dev = ["geopandas", "hatch", "ipython", "m2r", "mypy", "pandas-stubs", "pytest", "pytest-cov", "ruff (>=0.3.0)", "types-jsonschema", "types-setuptools"]
doc = ["docutils", "jinja2", "myst-parser", "numpydoc", "pillow (>=9,<10)", "pydata-sphinx-theme (>=0.14.1)", "scipy", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinxext-altair"]
-[[package]]
-name = "anyio"
-version = "4.4.0"
-description = "High level compatibility layer for multiple asynchronous event loop implementations"
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"},
- {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"},
-]
-
-[package.dependencies]
-exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""}
-idna = ">=2.8"
-sniffio = ">=1.1"
-typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
-
-[package.extras]
-doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
-trio = ["trio (>=0.23)"]
-
[[package]]
name = "attrs"
version = "23.2.0"
{file = "docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f"},
]
-[[package]]
-name = "exceptiongroup"
-version = "1.2.1"
-description = "Backport of PEP 654 (exception groups)"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
- {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
-]
-
-[package.extras]
-test = ["pytest (>=6)"]
-
[[package]]
name = "filelock"
version = "3.15.1"
doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"]
test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
-[[package]]
-name = "h11"
-version = "0.14.0"
-description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
- {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.5"
-description = "A minimal low-level HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"},
- {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"},
-]
-
-[package.dependencies]
-certifi = "*"
-h11 = ">=0.13,<0.15"
-
-[package.extras]
-asyncio = ["anyio (>=4.0,<5.0)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-trio = ["trio (>=0.22.0,<0.26.0)"]
-
-[[package]]
-name = "httpx"
-version = "0.27.0"
-description = "The next generation HTTP client."
-optional = false
-python-versions = ">=3.8"
-files = [
- {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"},
- {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"},
-]
-
-[package.dependencies]
-anyio = "*"
-certifi = "*"
-httpcore = "==1.*"
-idna = "*"
-sniffio = "*"
-
-[package.extras]
-brotli = ["brotli", "brotlicffi"]
-cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
-http2 = ["h2 (>=3,<5)"]
-socks = ["socksio (==1.*)"]
-
[[package]]
name = "identify"
version = "2.5.36"
{file = "smmap-5.0.1.tar.gz", hash = "sha256:dceeb6c0028fdb6734471eb07c0cd2aae706ccaecab45965ee83f11c8d3b1f62"},
]
-[[package]]
-name = "sniffio"
-version = "1.3.1"
-description = "Sniff out which async library your code is running under"
-optional = false
-python-versions = ">=3.7"
-files = [
- {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
- {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
-]
-
[[package]]
name = "snowballstemmer"
version = "2.2.0"
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<3.9.7 || >3.9.7,<4.0"
-content-hash = "6b4f6eedd706b20782b173657a9e8936e20853014e2ea504f064765cd42be4f7"
+content-hash = "e2870692e02e31ac100b8f245b07118ea693b67898444ea22ab43963b8feb944"
except exceptions:
rprint("[yellow]Error parsing the tweet, but the CDX data was saved.")
return None
+ except Exception as e:
+ rprint(f"[red]{e}")
+ return None
+# TODO: JSON Issue - Create separate function to handle JSON return without hitting rate limiting # noqa: E501
class JsonParser:
"""Handles parsing of tweets when the mimetype is application/json."""
rprint("[yellow]Error parsing the JSON, but the CDX data was saved.")
return ""
+ except Exception as e:
+ rprint(f"[red]{e}")
+ return ""
class TweetsParser:
self._add_field("available_tweet_is_RT", content[1][0])
self._add_field("available_tweet_info", semicolon_parser(content[2][0]))
- parsed_text_json = ""
+ # TODO: JSON Issue
+ # parsed_text_json = ""
+
+ # if response[3] == "application/json":
+ # json_parser = JsonParser(encoded_parsed_archived_tweet)
+ # text_json = json_parser.parse()
+
+ # if text_json:
+ # parsed_text_json = semicolon_parser(text_json)
- if response[3] == "application/json":
- json_parser = JsonParser(encoded_parsed_archived_tweet)
- if json_parser:
- text_json = json_parser.parse()
- parsed_text_json = semicolon_parser(text_json)
+ # self._add_field("parsed_tweet_text_mimetype_json", parsed_text_json)
- self._add_field("parsed_tweet_text_mimetype_json", parsed_text_json)
self._add_field("archived_urlkey", response[0])
self._add_field("archived_timestamp", response[1])
self._add_field("original_tweet_url", encoded_tweet)
try:
future.result()
except Exception as e:
- rprint(f"[red]{e}...")
+ rprint(f"[red]{e}")
progress.update(task, advance=1)
for tweet in self.json_content:
html += '<div class="tweet">\n'
+ # TODO: JSON Issue
+ # if (
+ # (
+ # tweet["archived_mimetype"] != "application/json"
+ # and not tweet["parsed_tweet_text_mimetype_json"]
+ # )
+ # and not tweet["available_tweet_text"]
+ # ) or (
+ # (
+ # tweet["archived_mimetype"] == "application/json"
+ # and not tweet["parsed_tweet_text_mimetype_json"]
+ # )
+ # and not tweet["available_tweet_text"]
+ # ):
if (
- (
- tweet["archived_mimetype"] != "application/json"
- and not tweet["parsed_tweet_text_mimetype_json"]
- )
- and not tweet["available_tweet_text"]
- ) or (
- (
- tweet["archived_mimetype"] == "application/json"
- and not tweet["parsed_tweet_text_mimetype_json"]
- )
+ tweet["archived_mimetype"] != "application/json"
and not tweet["available_tweet_text"]
):
html += f'<iframe src="{tweet["parsed_archived_tweet_url"]}" frameborder="0" scrolling="auto"></iframe>\n'
html += f'<p><strong class="content">Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_info"]}</p>\n'
- if (
- tweet["archived_mimetype"] == "application/json"
- and tweet["parsed_tweet_text_mimetype_json"]
- ) and not tweet["available_tweet_text"]:
- html += f'<p><strong class="content">Parsed Tweet Text (application/json):</strong> {tweet["parsed_tweet_text_mimetype_json"]}</p>\n'
+ # TODO: JSON Issue
+ # if (
+ # tweet["archived_mimetype"] == "application/json"
+ # and tweet["parsed_tweet_text_mimetype_json"]
+ # ) and not tweet["available_tweet_text"]:
+ # html += f'<p><strong class="content">Parsed Tweet Text (application/json):</strong> {tweet["parsed_tweet_text_mimetype_json"]}</p>\n'
html += "<br>\n"
html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'