comment JsonParser
authorClaromes <claromes@hey.com>
Fri, 14 Jun 2024 19:39:23 +0000 (16:39 -0300)
committerClaromes <claromes@hey.com>
Fri, 14 Jun 2024 19:39:23 +0000 (16:39 -0300)
app/app.py
docs/api.rst
docs/errors.rst
docs/result.rst
poetry.lock
pyproject.toml
waybacktweets/api/parse_tweets.py
waybacktweets/api/request_tweets.py
waybacktweets/api/viz_tweets.py
waybacktweets/cli/main.py
waybacktweets/utils/utils.py

index c4a7ab73c9d7b16a2dc561dfa1057de30d0749f7..7a3821341e553ff4e2116acb202556888c828f92 100644 (file)
@@ -126,6 +126,9 @@ def tweets_count(username, archived_timestamp_filter):
     except requests.exceptions.ConnectionError:
         st.error("Failed to establish a new connection with web.archive.org.")
         st.stop()
+    except Exception as e:
+        st.error(f"{e}")
+        st.stop()
 
 
 # Interface Settings
@@ -437,3 +440,6 @@ if query or st.session_state.count:
         If the problem persists [open an issue](https://github.com/claromes/waybacktweets/issues)."""  # noqa: E501
         )
         st.session_state.offset = 0
+    except Exception as e:
+        st.error(f"{e}")
+        st.stop()
index 283e4299e9054ec56ff3cf07669e072d1bff14e9..ced6bf4d97ba851bb16ff48e7c8c0e3859082a50 100644 (file)
@@ -22,8 +22,9 @@ Parse
 .. autoclass:: TwitterEmbed
     :members:
 
-.. autoclass:: JsonParser
-    :members:
+.. TODO: JSON Issue
+.. .. autoclass:: JsonParser
+..     :members:
 
 
 Export
index 492279b3beebeac0721e63c394c3e3325d31ae6a..d12c436d81e56eb5646158b84d639d22c85a1b3b 100644 (file)
@@ -17,9 +17,10 @@ This error is raised when the package fails to establish a new connection with w
 
 The output message from the package would be: ``Failed to establish a new connection with web.archive.org. Max retries exceeded.``
 
-This is the error often returned when performing experimental parsing of URLs with the mimetype ``application/json``.
+.. TODO: JSON Issue
+.. This is the error often returned when performing experimental parsing of URLs with the mimetype ``application/json``.
 
-The warning output message from the package would be: ``Connection error with https://web.archive.org/web/<TIMESTAMP>/https://twitter.com/<USERNAME>/status/<TWEET_ID>. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.``
+.. The warning output message from the package would be: ``Connection error with https://web.archive.org/web/<TIMESTAMP>/https://twitter.com/<USERNAME>/status/<TWEET_ID>. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.``
 
 HTTPError
 ----------------
index 2794f40dbe9a12e8365d454461c50cd27b013651..38b5498e6483ead2cf78bb73d56d6f7832b959d3 100644 (file)
@@ -15,7 +15,8 @@ The package saves in three formats: CSV, JSON, and HTML. The files have the foll
 
 - ``parsed_archived_tweet_url``: (`str`) The original archived URL after parsing. `Check the utility functions <api.html#module-waybacktweets.utils.utils>`_.
 
-- ``parsed_tweet_text_mimetype_json``: (`str`) The tweet text extracted from the archived URL that has mimetype ``application/json``.
+.. TODO: JSON Issue
+.. - ``parsed_tweet_text_mimetype_json``: (`str`) The tweet text extracted from the archived URL that has mimetype ``application/json``.
 
 - ``available_tweet_text``: (`str`) The tweet text extracted from the URL that is still available on the Twitter account.
 
index 12756fd6e260ce7669db8e29a566b92b0ebbea23..83469905cda535471c221b2fdd3c8a2d30390be7 100644 (file)
@@ -36,28 +36,6 @@ all = ["altair-tiles (>=0.3.0)", "anywidget (>=0.9.0)", "pyarrow (>=11)", "vega-
 dev = ["geopandas", "hatch", "ipython", "m2r", "mypy", "pandas-stubs", "pytest", "pytest-cov", "ruff (>=0.3.0)", "types-jsonschema", "types-setuptools"]
 doc = ["docutils", "jinja2", "myst-parser", "numpydoc", "pillow (>=9,<10)", "pydata-sphinx-theme (>=0.14.1)", "scipy", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinxext-altair"]
 
-[[package]]
-name = "anyio"
-version = "4.4.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
-    {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
-trio = ["trio (>=0.23)"]
-
 [[package]]
 name = "attrs"
 version = "23.2.0"
@@ -327,20 +305,6 @@ files = [
     {file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"},
 ]
 
-[[package]]
-name = "exceptiongroup"
-version = "1.2.1"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
-    {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
 [[package]]
 name = "filelock"
 version = "3.15.1"
@@ -422,62 +386,6 @@ gitdb = ">=4.0.1,<5"
 doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"]
 test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
 
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
-    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.5"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
-    {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.26.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.27.0"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
-    {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-
 [[package]]
 name = "identify"
 version = "2.5.36"
@@ -1403,17 +1311,6 @@ files = [
     {file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"},
 ]
 
-[[package]]
-name = "sniffio"
-version = "1.3.1"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
-    {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
-]
-
 [[package]]
 name = "snowballstemmer"
 version = "2.2.0"
@@ -1843,4 +1740,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.9.7 || >3.9.7,<4.0"
-content-hash = "6b4f6eedd706b20782b173657a9e8936e20853014e2ea504f064765cd42be4f7"
+content-hash = "e2870692e02e31ac100b8f245b07118ea693b67898444ea22ab43963b8feb944"
index 71544262d51b4c23f2db032bf1002bb4f319333b..38b47069c14d2af7b33356da9d9542a73c6b60ee 100644 (file)
@@ -11,7 +11,6 @@ python = ">=3.9,<3.9.7 || >3.9.7,<4.0"
 requests = "^2.30.0"
 streamlit = "1.35.0"
 rich = "^13.6.0"
-httpx = "^0.27.0"
 click = "^8.1.7"
 
 [tool.poetry.group.docs.dependencies]
index bc1704a82e27ba8f1e355ea6a9daea7c68eb5425..465287fac9c292ce788c81712a846b4a6d9765c5 100644 (file)
@@ -65,8 +65,12 @@ class TwitterEmbed:
         except exceptions:
             rprint("[yellow]Error parsing the tweet, but the CDX data was saved.")
             return None
+        except Exception as e:
+            rprint(f"[red]{e}")
+            return None
 
 
+# TODO: JSON Issue - Create separate function to handle JSON return without hitting rate limiting # noqa: E501
 class JsonParser:
     """Handles parsing of tweets when the mimetype is application/json."""
 
@@ -99,6 +103,9 @@ class JsonParser:
             rprint("[yellow]Error parsing the JSON, but the CDX data was saved.")
 
             return ""
+        except Exception as e:
+            rprint(f"[red]{e}")
+            return ""
 
 
 class TweetsParser:
@@ -155,15 +162,18 @@ class TweetsParser:
             self._add_field("available_tweet_is_RT", content[1][0])
             self._add_field("available_tweet_info", semicolon_parser(content[2][0]))
 
-        parsed_text_json = ""
+        # TODO: JSON Issue
+        # parsed_text_json = ""
+
+        # if response[3] == "application/json":
+        #     json_parser = JsonParser(encoded_parsed_archived_tweet)
+        #     text_json = json_parser.parse()
+
+        #     if text_json:
+        #         parsed_text_json = semicolon_parser(text_json)
 
-        if response[3] == "application/json":
-            json_parser = JsonParser(encoded_parsed_archived_tweet)
-            if json_parser:
-                text_json = json_parser.parse()
-                parsed_text_json = semicolon_parser(text_json)
+        # self._add_field("parsed_tweet_text_mimetype_json", parsed_text_json)
 
-        self._add_field("parsed_tweet_text_mimetype_json", parsed_text_json)
         self._add_field("archived_urlkey", response[0])
         self._add_field("archived_timestamp", response[1])
         self._add_field("original_tweet_url", encoded_tweet)
@@ -192,7 +202,7 @@ class TweetsParser:
                     try:
                         future.result()
                     except Exception as e:
-                        rprint(f"[red]{e}...")
+                        rprint(f"[red]{e}")
 
                     progress.update(task, advance=1)
 
index 72eaf89f952a5cf492c0968500d7ee80f9feb1c2..7bb9dd10cb7734a27120fbcb693d85cd5c51d963 100644 (file)
@@ -55,3 +55,5 @@ class WaybackTweets:
             rprint(
                 "[red]Temporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive Twitter feed (https://twitter.com/internetarchive) for the latest information."  # noqa: E501
             )
+        except Exception as e:
+            rprint(f"[red]{e}")
index 61b242e9654618e4636b829c5561d65bbf46662d..229bf86be7a226697de6683ea97865a65e52547f 100644 (file)
@@ -38,17 +38,22 @@ class HTMLTweetsVisualizer:
         for tweet in self.json_content:
             html += '<div class="tweet">\n'
 
+            # TODO: JSON Issue
+            # if (
+            #     (
+            #         tweet["archived_mimetype"] != "application/json"
+            #         and not tweet["parsed_tweet_text_mimetype_json"]
+            #     )
+            #     and not tweet["available_tweet_text"]
+            # ) or (
+            #     (
+            #         tweet["archived_mimetype"] == "application/json"
+            #         and not tweet["parsed_tweet_text_mimetype_json"]
+            #     )
+            #     and not tweet["available_tweet_text"]
+            # ):
             if (
-                (
-                    tweet["archived_mimetype"] != "application/json"
-                    and not tweet["parsed_tweet_text_mimetype_json"]
-                )
-                and not tweet["available_tweet_text"]
-            ) or (
-                (
-                    tweet["archived_mimetype"] == "application/json"
-                    and not tweet["parsed_tweet_text_mimetype_json"]
-                )
+                tweet["archived_mimetype"] != "application/json"
                 and not tweet["available_tweet_text"]
             ):
                 html += f'<iframe src="{tweet["parsed_archived_tweet_url"]}" frameborder="0" scrolling="auto"></iframe>\n'
@@ -64,11 +69,12 @@ class HTMLTweetsVisualizer:
                 html += f'<p><strong class="content">Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
                 html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_info"]}</p>\n'
 
-            if (
-                tweet["archived_mimetype"] == "application/json"
-                and tweet["parsed_tweet_text_mimetype_json"]
-            ) and not tweet["available_tweet_text"]:
-                html += f'<p><strong class="content">Parsed Tweet Text (application/json):</strong> {tweet["parsed_tweet_text_mimetype_json"]}</p>\n'
+            # TODO: JSON Issue
+            # if (
+            #     tweet["archived_mimetype"] == "application/json"
+            #     and tweet["parsed_tweet_text_mimetype_json"]
+            # ) and not tweet["available_tweet_text"]:
+            #     html += f'<p><strong class="content">Parsed Tweet Text (application/json):</strong> {tweet["parsed_tweet_text_mimetype_json"]}</p>\n'
 
             html += "<br>\n"
             html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
index 7d01d05b4693ac74909726c7025aafa45f128eb2..421980d8d2e593c990f65e46d0dee5a2292d5ae7 100644 (file)
@@ -75,7 +75,7 @@ def cli(
                 "archived_tweet_url",
                 "parsed_tweet_url",
                 "parsed_archived_tweet_url",
-                "parsed_tweet_text_mimetype_json",
+                # "parsed_tweet_text_mimetype_json", # TODO: JSON Issue
                 "available_tweet_text",
                 "available_tweet_is_RT",
                 "available_tweet_info",
index eff374a3c016e861ac5e2cc0557d3ed30b241d40..cfa29bb924f3e894cb460020351392ec842ce040 100644 (file)
@@ -25,8 +25,10 @@ def get_response(url, params=None):
 
     response = session.get(url, params=params, headers=headers)
 
-    if not 400 <= response.status_code <= 511:
-        return response
+    if 400 <= response.status_code <= 511:
+        return None
+
+    return response
 
 
 def clean_tweet_url(tweet_url, username):