From b1bbea09cce7969dabaf18b4d871ed2334af18a2 Mon Sep 17 00:00:00 2001 From: Claromes Date: Sun, 16 Jun 2024 05:55:01 -0300 Subject: [PATCH] update docs, review exceptions --- README.md | 2 +- app/app.py | 34 ++++---- docs/conf.py | 2 +- docs/{errors.rst => exceptions.rst} | 2 +- docs/index.rst | 2 +- docs/quickstart.rst | 4 +- docs/streamlit.rst | 2 + docs/workflow.rst | 2 +- waybacktweets/api/export_tweets.py | 4 + waybacktweets/api/parse_tweets.py | 125 ++++++++++++++-------------- waybacktweets/api/request_tweets.py | 22 ++--- waybacktweets/api/viz_tweets.py | 4 + waybacktweets/cli/main.py | 3 +- waybacktweets/utils/utils.py | 31 ++++--- 14 files changed, 127 insertions(+), 112 deletions(-) rename docs/{errors.rst => exceptions.rst} (99%) diff --git a/README.md b/README.md index 5a8890f..1ddd1b3 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ if archived_tweets: ### Using Wayback Tweets as a Web App -[Access the application](https://waybacktweets.streamlit.app), a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. +[Open the application](https://waybacktweets.streamlit.app), a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. ## Documentation diff --git a/app/app.py b/app/app.py index e0245da..87d630f 100644 --- a/app/app.py +++ b/app/app.py @@ -1,6 +1,5 @@ import datetime -import requests import streamlit as st import streamlit.components.v1 as components @@ -26,11 +25,11 @@ st.set_page_config( "About": f""" [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets) - Aplication that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. + Application that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. The application is a prototype hosted on Streamlit Cloud, allowing users to apply filters and view tweets that lack the original URL. [Read more](https://claromes.github.io/waybacktweets/streamlit.html). - © Copyright 2023 - {datetime.datetime.now().year}, [Claromes](https://claromes.com) · Icon by The Doodle Library + © 2023 - {datetime.datetime.now().year}, [Claromes](https://claromes.com) · Icon by The Doodle Library · Title font by Google, licensed under the Open Font License --- """, # noqa: E501 @@ -129,24 +128,23 @@ def next_page(): def tweets_count(username, archived_timestamp_filter): url = f"https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{username}/status/*&output=json&from={archived_timestamp_filter[0]}&to={archived_timestamp_filter[1]}" # noqa: E501 - try: - response = get_response(url=url) - - if response.status_code == 200: - data = response.json() - if data and len(data) > 1: - total_tweets = len(data) - 1 - return total_tweets - else: - return 0 - except requests.exceptions.ReadTimeout: - st.error("Connection to web.archive.org timed out.") + response, error, error_type = get_response(url=url) + + if response.status_code == 200: + data = response.json() + if data and len(data) > 1: + total_tweets = len(data) - 1 + return total_tweets + else: + return 0 + elif error and error_type == "ReadTimeout": + st.error("Failed to establish a new connection with web.archive.org.") st.stop() - except requests.exceptions.ConnectionError: + elif error and error_type == "ConnectionError": st.error("Failed to establish a new connection with web.archive.org.") st.stop() - except Exception as e: - st.error(f"{e}") + elif error and error_type: + st.error(f"{error}") st.stop() diff --git a/docs/conf.py b/docs/conf.py index 643113f..5692fea 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,7 +4,7 @@ from pallets_sphinx_themes import ProjectLink, get_version project = "Wayback Tweets" release, version = get_version("waybacktweets") -copyright = f"2023 - {datetime.datetime.now().year}, Claromes · Icon by The Doodle Library · Title Font by Google, licensed under the Open Font License · Wayback Tweets v{version}" # noqa: E501 +copyright = f"2023 - {datetime.datetime.now().year}, Claromes · Icon by The Doodle Library · Title font by Google, licensed under the Open Font License · Wayback Tweets v{version}" # noqa: E501 author = "Claromes" # -- General configuration --------------------------------------------------- diff --git a/docs/errors.rst b/docs/exceptions.rst similarity index 99% rename from docs/errors.rst rename to docs/exceptions.rst index 38a8f1b..109e41b 100644 --- a/docs/errors.rst +++ b/docs/exceptions.rst @@ -1,4 +1,4 @@ -Errors +Exceptions ================ These are the most common errors and are handled by the ``waybacktweets`` package. diff --git a/docs/index.rst b/docs/index.rst index c6e5175..9bf5bcd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,7 +19,7 @@ User Guide quickstart workflow result - errors + exceptions contribute todo diff --git a/docs/quickstart.rst b/docs/quickstart.rst index f98a503..4e3c4d7 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -10,7 +10,7 @@ waybacktweets [OPTIONS] USERNAME .. code-block:: shell - waybacktweets --from 20150101 --to 20191231 --limit 250 jack` + waybacktweets --from 20150101 --to 20191231 --limit 250 jack Module @@ -46,4 +46,4 @@ Web App Using Wayback Tweets as a Streamlit Web App -`Access the application `_, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. +`Open the application `_, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. diff --git a/docs/streamlit.rst b/docs/streamlit.rst index 78da866..b8de7d9 100644 --- a/docs/streamlit.rst +++ b/docs/streamlit.rst @@ -3,6 +3,8 @@ Web App Aplication that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. The application is a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud, allowing users to apply filters and view tweets that lack the original URL. +`Open the application `_. + Filters ---------- diff --git a/docs/workflow.rst b/docs/workflow.rst index c3ffd32..2480b35 100644 --- a/docs/workflow.rst +++ b/docs/workflow.rst @@ -19,5 +19,5 @@ Use the mouse to zoom in and out the flowchart. C--> |4xx| E[return None] E--> F{request Archived\nTweet URL} F--> |4xx| G[return Only CDX data] - F--> |TODO: 2xx/3xx: application/json| J[return JSON text] + F--> |2xx/3xx: application/json| J[return JSON text] F--> |2xx/3xx: text/html, warc/revisit, unk| K[return HTML iframe tag] diff --git a/waybacktweets/api/export_tweets.py b/waybacktweets/api/export_tweets.py index 4cd5d83..a6daf41 100644 --- a/waybacktweets/api/export_tweets.py +++ b/waybacktweets/api/export_tweets.py @@ -1,3 +1,7 @@ +""" +Exports the parsed archived tweets. +""" + import datetime import os import re diff --git a/waybacktweets/api/parse_tweets.py b/waybacktweets/api/parse_tweets.py index 28404e8..585aec2 100644 --- a/waybacktweets/api/parse_tweets.py +++ b/waybacktweets/api/parse_tweets.py @@ -1,10 +1,13 @@ +""" +Parses the returned data from the Wayback CDX Server API. +""" + import re from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import nullcontext from typing import Any, Dict, List, Optional, Tuple from urllib.parse import unquote -from requests import exceptions from rich import print as rprint from rich.progress import Progress @@ -47,49 +50,52 @@ class TwitterEmbed: availability statuses, and URLs, respectively. If no tweets are available, returns None. """ - try: - url = f"https://publish.twitter.com/oembed?url={self.tweet_url}" - response = get_response(url=url) - - if response: - json_response = response.json() - html = json_response["html"] - author_name = json_response["author_name"] - - regex = re.compile( - r'