update file name

author Claromes <claromes@hey.com>

Mon, 17 Jun 2024 01:57:54 +0000 (22:57 -0300)

committer Claromes <claromes@hey.com>

Mon, 17 Jun 2024 01:57:54 +0000 (22:57 -0300)
author Claromes <claromes@hey.com>
Mon, 17 Jun 2024 01:57:54 +0000 (22:57 -0300)
committer Claromes <claromes@hey.com>
Mon, 17 Jun 2024 01:57:54 +0000 (22:57 -0300)
diff --git a/.gitignore b/.gitignore

index 26166db157df44a3be0d18a61b976cd88f759238..5df8b238ca1d3117ce795d5e759443445727d844 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,12 @@
  *.csv
  *.json
  *.html
+
  waybacktweets/__pycache__
  waybacktweets/api/__pycache__
-waybacktweets/cli/__pycache__
+waybacktweets/config/__pycache__
+waybacktweets/exceptions/__pycache__
  waybacktweets/utils/__pycache__
+
  docs/_build/
  notes.md
diff --git a/app/app.py b/app/app.py

index b1db36b5fcb7c3ef15e4ecc47595f0cb3084bc3a..b0c6f7157d3181fe4a001401336e66e90f30ad79 100644 (file)
--- a/app/app.py
+++ b/app/app.py
@@ -3,9 +3,9 @@ import datetime
  import streamlit as st
  import streamlit.components.v1 as components
  
-from waybacktweets.api.export_tweets import TweetsExporter
-from waybacktweets.api.parse_tweets import JsonParser, TweetsParser
-from waybacktweets.api.request_tweets import WaybackTweets
+from waybacktweets.api.export import TweetsExporter
+from waybacktweets.api.parse import JsonParser, TweetsParser
+from waybacktweets.api.request import WaybackTweets
  from waybacktweets.config.config import config
  from waybacktweets.exceptions.exceptions import (
      ConnectionError,
diff --git a/docs/api.rst b/docs/api.rst

index ee5ad43b08f811501b200fb5d29a98776d4db948..7e7ab9f1609b0580c3b25af084f18fdac4317c31 100644 (file)
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -32,7 +32,7 @@ Exceptions
  Export
  ---------
  
-.. automodule:: waybacktweets.api.export_tweets
+.. automodule:: waybacktweets.api.export
  
  .. autoclass:: TweetsExporter
      :members:
@@ -41,7 +41,7 @@ Export
  Parse
  ---------
  
-.. automodule:: waybacktweets.api.parse_tweets
+.. automodule:: waybacktweets.api.parse
  
  .. autoclass:: TweetsParser
      :members:
@@ -56,7 +56,7 @@ Parse
  Request
  ---------
  
-.. automodule:: waybacktweets.api.request_tweets
+.. automodule:: waybacktweets.api.request
  
  .. autoclass:: WaybackTweets
      :members:
@@ -80,7 +80,7 @@ Utils
  Visualizer
  -----------
  
-.. automodule:: waybacktweets.api.viz_tweets
+.. automodule:: waybacktweets.api.visualize
  
  .. autoclass:: HTMLTweetsVisualizer
      :members:
diff --git a/docs/contribute.rst b/docs/contribute.rst

index 84ed2cbdbc586e46f5c0948c0e4b80abdce45f0d..0191658a86d9c70bef132d04698c4b039a8723e1 100644 (file)
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -27,10 +27,11 @@ Brief explanation about the code under the Wayback Tweets directory:
  - ``assets``: Title and logo images
  - ``docs``: Documentation generated with Sphinx
  - ``waybacktweets/api``: Main package modules
-- ``waybacktweets/cli``: Command line Interface module
+- ``waybacktweets/config``: Global configuration module
+- ``waybacktweets/exceptions``: Wayback Tweets Exceptions
  - ``waybacktweets/utils``: Helper functions used in the package
  
  Sponsoring
------------
+------------
  
  You can also donate to the project's developer and maintainer, `Claromes <https://claromes.com>`_, via `GitHub Sponsor <https://github.com/sponsors/claromes>`_ or if you are interested in sponsoring the project you can contact via email at support at claromes dot com.
diff --git a/docs/todo.rst b/docs/todo.rst

index cda3ea5aa1987d898f9c526cf3de49c4bd6d0c6c..eaced0377b395d4fa026b1888deaefcac7037e08 100644 (file)
--- a/docs/todo.rst
+++ b/docs/todo.rst
@@ -5,9 +5,7 @@ TODO
  
      <input type="checkbox">
  
-|uncheck| Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse_tweets.py:73``), and avoid rate limiting (`Planned for v1.1`)
-
-|uncheck| Docs: Add tutorial on how to save Tweet via command line (`Planned for v1.1`)
+|uncheck| Code: JSON Parser: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse.py:111``), and avoid rate limiting (`Planned for v1.1`)
  
  |uncheck| Code: Download images when tweet URL has extensions like JPG or PNG (`Planned for v1.2`)
  
diff --git a/waybacktweets/__init__.py b/waybacktweets/__init__.py

index 75b5ae423d0a308bab7329c0cb648884f10a4404..f8333d8641b058edaed6c4a2d13ebb9e59871836 100644 (file)
--- a/waybacktweets/__init__.py
+++ b/waybacktweets/__init__.py
@@ -1,8 +1,8 @@
  # flake8: noqa: F401
  
-from waybacktweets.api.export_tweets import TweetsExporter
-from waybacktweets.api.parse_tweets import JsonParser, TweetsParser, TwitterEmbed
-from waybacktweets.api.request_tweets import WaybackTweets
-from waybacktweets.api.viz_tweets import HTMLTweetsVisualizer
+from waybacktweets.api.export import TweetsExporter
+from waybacktweets.api.parse import JsonParser, TweetsParser, TwitterEmbed
+from waybacktweets.api.request import WaybackTweets
+from waybacktweets.api.visualize import HTMLTweetsVisualizer
  
  __version__ = "1.0"
diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py

index 74fb1401cef318dca4e2e7f03547fe9e1179db23..6039477e50330265739a437c9ab0c4b0ae64d54f 100644 (file)
--- a/waybacktweets/_cli.py
+++ b/waybacktweets/_cli.py
@@ -8,9 +8,9 @@ from typing import Any, Optional
  import click
  from rich import print as rprint
  
-from waybacktweets.api.export_tweets import TweetsExporter
-from waybacktweets.api.parse_tweets import TweetsParser
-from waybacktweets.api.request_tweets import WaybackTweets
+from waybacktweets.api.export import TweetsExporter
+from waybacktweets.api.parse import TweetsParser
+from waybacktweets.api.request import WaybackTweets
  from waybacktweets.config.config import config
  
  
diff --git a/waybacktweets/api/export.py b/waybacktweets/api/export.py

new file mode 100644 (file)

index 0000000..7751679
--- /dev/null
+++ b/waybacktweets/api/export.py
@@ -0,0 +1,117 @@
+"""
+Exports the parsed archived tweets.
+"""
+
+import datetime
+import os
+import re
+from typing import Any, Dict, List, Optional
+
+import pandas as pd
+
+from waybacktweets.api.visualize import HTMLTweetsVisualizer
+
+
+class TweetsExporter:
+    """
+    Class responsible for exporting parsed archived tweets.
+
+    :param data: The parsed archived tweets data.
+    :param username: The username associated with the tweets.
+    :param field_options: The fields to be included in the exported data. Options include "archived_urlkey", "archived_timestamp", "original_tweet_url", "archived_tweet_url", "parsed_tweet_url", "parsed_archived_tweet_url", "available_tweet_text", "available_tweet_is_RT", "available_tweet_info", "archived_mimetype", "archived_statuscode", "archived_digest", "archived_length".
+    """  # noqa: E501
+
+    def __init__(
+        self, data: Dict[str, List[Any]], username: str, field_options: List[str]
+    ):
+        self.data = data
+        self.username = username
+        self.field_options = field_options
+        self.formatted_datetime = self._datetime_now()
+        self.filename = f"{self.username}_tweets_{self.formatted_datetime}"
+        self.dataframe = self._create_dataframe()
+
+    @staticmethod
+    def _datetime_now() -> str:
+        """
+        Returns the current datetime, formatted as a string.
+
+        :returns: The current datetime.
+        """
+        now = datetime.datetime.now()
+        formatted_now = now.strftime("%Y%m%d%H%M%S")
+        formatted_now = re.sub(r"\W+", "", formatted_now)
+
+        return formatted_now
+
+    @staticmethod
+    def _transpose_matrix(
+        data: Dict[str, List[Any]], fill_value: Optional[Any] = None
+    ) -> List[List[Any]]:
+        """
+        Transposes a matrix,
+        filling in missing values with a specified fill value if needed.
+
+        :param data: The matrix to be transposed.
+        :param fill_value: The value to fill in missing values with.
+
+        :returns: The transposed matrix.
+        """
+        max_length = max(len(sublist) for sublist in data.values())
+
+        filled_data = {
+            key: value + [fill_value] * (max_length - len(value))
+            for key, value in data.items()
+        }
+
+        data_transposed = [list(row) for row in zip(*filled_data.values())]
+
+        return data_transposed
+
+    def _create_dataframe(self) -> pd.DataFrame:
+        """
+        Creates a DataFrame from the transposed data.
+
+        :returns: The DataFrame representation of the data.
+        """
+        data_transposed = self._transpose_matrix(self.data)
+
+        df = pd.DataFrame(data_transposed, columns=self.field_options)
+
+        return df
+
+    def save_to_csv(self) -> None:
+        """
+        Saves the DataFrame to a CSV file.
+        """
+        csv_file_path = f"{self.filename}.csv"
+        self.dataframe.to_csv(csv_file_path, index=False)
+
+        print(f"Saved to {csv_file_path}")
+
+    def save_to_json(self) -> None:
+        """
+        Saves the DataFrame to a JSON file.
+        """
+        json_file_path = f"{self.filename}.json"
+        self.dataframe.to_json(json_file_path, orient="records", lines=False)
+
+        print(f"Saved to {json_file_path}")
+
+    def save_to_html(self) -> None:
+        """
+        Saves the DataFrame to an HTML file.
+        """
+        json_file_path = f"{self.filename}.json"
+
+        if not os.path.exists(json_file_path):
+            self.save_to_json()
+
+        html_file_path = f"{self.filename}.html"
+
+        html = HTMLTweetsVisualizer(json_file_path, html_file_path, self.username)
+
+        html_content = html.generate()
+        html.save(html_content)
+
+        print(f"Saved to {html_file_path}")
diff --git a/waybacktweets/api/export_tweets.py b/waybacktweets/api/export_tweets.py

deleted file mode 100644 (file)

index a6daf41..0000000
--- a/waybacktweets/api/export_tweets.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""
-Exports the parsed archived tweets.
-"""
-
-import datetime
-import os
-import re
-from typing import Any, Dict, List, Optional
-
-import pandas as pd
-
-from waybacktweets.api.viz_tweets import HTMLTweetsVisualizer
-
-
-class TweetsExporter:
-    """
-    Class responsible for exporting parsed archived tweets.
-
-    :param data: The parsed archived tweets data.
-    :param username: The username associated with the tweets.
-    :param field_options: The fields to be included in the exported data. Options include "archived_urlkey", "archived_timestamp", "original_tweet_url", "archived_tweet_url", "parsed_tweet_url", "parsed_archived_tweet_url", "available_tweet_text", "available_tweet_is_RT", "available_tweet_info", "archived_mimetype", "archived_statuscode", "archived_digest", "archived_length".
-    """  # noqa: E501
-
-    def __init__(
-        self, data: Dict[str, List[Any]], username: str, field_options: List[str]
-    ):
-        self.data = data
-        self.username = username
-        self.field_options = field_options
-        self.formatted_datetime = self._datetime_now()
-        self.filename = f"{self.username}_tweets_{self.formatted_datetime}"
-        self.dataframe = self._create_dataframe()
-
-    @staticmethod
-    def _datetime_now() -> str:
-        """
-        Returns the current datetime, formatted as a string.
-
-        :returns: The current datetime.
-        """
-        now = datetime.datetime.now()
-        formatted_now = now.strftime("%Y%m%d%H%M%S")
-        formatted_now = re.sub(r"\W+", "", formatted_now)
-
-        return formatted_now
-
-    @staticmethod
-    def _transpose_matrix(
-        data: Dict[str, List[Any]], fill_value: Optional[Any] = None
-    ) -> List[List[Any]]:
-        """
-        Transposes a matrix,
-        filling in missing values with a specified fill value if needed.
-
-        :param data: The matrix to be transposed.
-        :param fill_value: The value to fill in missing values with.
-
-        :returns: The transposed matrix.
-        """
-        max_length = max(len(sublist) for sublist in data.values())
-
-        filled_data = {
-            key: value + [fill_value] * (max_length - len(value))
-            for key, value in data.items()
-        }
-
-        data_transposed = [list(row) for row in zip(*filled_data.values())]
-
-        return data_transposed
-
-    def _create_dataframe(self) -> pd.DataFrame:
-        """
-        Creates a DataFrame from the transposed data.
-
-        :returns: The DataFrame representation of the data.
-        """
-        data_transposed = self._transpose_matrix(self.data)
-
-        df = pd.DataFrame(data_transposed, columns=self.field_options)
-
-        return df
-
-    def save_to_csv(self) -> None:
-        """
-        Saves the DataFrame to a CSV file.
-        """
-        csv_file_path = f"{self.filename}.csv"
-        self.dataframe.to_csv(csv_file_path, index=False)
-
-        print(f"Saved to {csv_file_path}")
-
-    def save_to_json(self) -> None:
-        """
-        Saves the DataFrame to a JSON file.
-        """
-        json_file_path = f"{self.filename}.json"
-        self.dataframe.to_json(json_file_path, orient="records", lines=False)
-
-        print(f"Saved to {json_file_path}")
-
-    def save_to_html(self) -> None:
-        """
-        Saves the DataFrame to an HTML file.
-        """
-        json_file_path = f"{self.filename}.json"
-
-        if not os.path.exists(json_file_path):
-            self.save_to_json()
-
-        html_file_path = f"{self.filename}.html"
-
-        html = HTMLTweetsVisualizer(json_file_path, html_file_path, self.username)
-
-        html_content = html.generate()
-        html.save(html_content)
-
-        print(f"Saved to {html_file_path}")
diff --git a/waybacktweets/api/parse.py b/waybacktweets/api/parse.py

new file mode 100644 (file)

index 0000000..519696c
--- /dev/null
+++ b/waybacktweets/api/parse.py
@@ -0,0 +1,281 @@
+"""
+Parses the returned data from the Wayback CDX Server API.
+"""
+
+import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from contextlib import nullcontext
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import unquote
+
+from rich import print as rprint
+from rich.progress import Progress
+
+from waybacktweets.config.config import config
+from waybacktweets.exceptions.exceptions import (
+    ConnectionError,
+    GetResponseError,
+    HTTPError,
+)
+from waybacktweets.utils.utils import (
+    check_double_status,
+    check_pattern_tweet,
+    clean_tweet_url,
+    delete_tweet_pathnames,
+    get_response,
+    is_tweet_url,
+    semicolon_parser,
+)
+
+
+class TwitterEmbed:
+    """
+    Class responsible for parsing tweets using the Twitter Publish service.
+
+    :param tweet_url: The URL of the tweet to be parsed.
+    """
+
+    def __init__(self, tweet_url: str):
+        self.tweet_url = tweet_url
+
+    def embed(self) -> Optional[Tuple[List[str], List[bool], List[str]]]:
+        """
+        Parses the archived tweets when they are still available.
+
+        This function goes through each archived tweet and checks
+        if it is still available.
+        If the tweet is available, it extracts the necessary information
+        and adds it to the respective lists.
+        The function returns a tuple of three lists:
+        - The first list contains the tweet texts.
+        - The second list contains boolean values indicating whether each tweet
+        is still available.
+        - The third list contains the URLs of the tweets.
+
+        :returns: A tuple of three lists containing the tweet texts,
+            availability statuses, and URLs, respectively. If no tweets are available,
+            returns None.
+        """
+        try:
+            url = f"https://publish.twitter.com/oembed?url={self.tweet_url}"
+            response = get_response(url=url)
+            if response:
+                json_response = response.json()
+                html = json_response["html"]
+                author_name = json_response["author_name"]
+
+                regex = re.compile(
+                    r'<blockquote class="twitter-tweet"(?: [^>]+)?><p[^>]*>(.*?)<\/p>.*?&mdash; (.*?)<\/a>',  # noqa
+                    re.DOTALL,
+                )
+                regex_author = re.compile(r"^(.*?)\s*\(")
+
+                matches_html = regex.findall(html)
+
+                tweet_content = []
+                user_info = []
+                is_RT = []
+
+                for match in matches_html:
+                    tweet_content_match = re.sub(
+                        r"<a[^>]*>|<\/a>", "", match[0].strip()
+                    ).replace("<br>", "\n")
+                    user_info_match = re.sub(
+                        r"<a[^>]*>|<\/a>", "", match[1].strip()
+                    ).replace(")", "), ")
+                    match_author = regex_author.search(user_info_match)
+                    author_tweet = match_author.group(1) if match_author else ""
+
+                    if tweet_content_match:
+                        tweet_content.append(tweet_content_match)
+                    if user_info_match:
+                        user_info.append(user_info_match)
+                        is_RT.append(author_name != author_tweet)
+
+                return tweet_content, is_RT, user_info
+        except ConnectionError:
+            if config.verbose:
+                rprint("[yellow]Error parsing the tweet, but the CDX data was saved.")
+        except HTTPError:
+            if config.verbose:
+                rprint(
+                    f"[yellow]{self.tweet_url} not available on the user's account, but the CDX data was saved."  # noqa: E501
+                )
+        except GetResponseError as e:
+            if config.verbose:
+                rprint(f"[red]An error occurred: {str(e)}")
+
+        return None
+
+
+class JsonParser:
+    """
+    Class responsible for parsing tweets when the mimetype is application/json.\n
+    Note: This class is in an experimental phase, but it is currently being
+    used by the Streamlit Web App.
+
+    :param archived_tweet_url: The URL of the archived tweet to be parsed.
+    """
+
+    def __init__(self, archived_tweet_url: str):
+        self.archived_tweet_url = archived_tweet_url
+
+    def parse(self) -> str:
+        """
+        Parses the archived tweets in JSON format.
+
+        :returns: The parsed tweet text.
+        """
+        try:
+            response = get_response(url=self.archived_tweet_url)
+
+            if response:
+                json_data = response.json()
+
+                if "data" in json_data:
+                    return json_data["data"].get("text", json_data["data"])
+
+                if "retweeted_status" in json_data:
+                    return json_data["retweeted_status"].get(
+                        "text", json_data["retweeted_status"]
+                    )
+
+                return json_data.get("text", json_data)
+        except ConnectionError:
+            if config.verbose:
+                rprint(
+                    f"[yellow]Connection error with {self.archived_tweet_url}. Max retries exceeded. Error parsing the JSON, but the CDX data was saved."  # noqa: E501
+                )
+        except GetResponseError as e:
+            if config.verbose:
+                rprint(f"[red]An error occurred: {str(e)}")
+
+        return None
+
+
+class TweetsParser:
+    """
+    Class responsible for the overall parsing of archived tweets.
+
+    :param archived_tweets_response: The response from the archived tweets.
+    :param username: The username associated with the tweets.
+    :param field_options: The fields to be included in the parsed data. Options include "archived_urlkey", "archived_timestamp", "original_tweet_url", "archived_tweet_url", "parsed_tweet_url", "parsed_archived_tweet_url", "available_tweet_text", "available_tweet_is_RT", "available_tweet_info", "archived_mimetype", "archived_statuscode", "archived_digest", "archived_length".
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        archived_tweets_response: List[str],
+        username: str,
+        field_options: List[str],
+    ):
+        self.archived_tweets_response = archived_tweets_response
+        self.username = username
+        self.field_options = field_options
+        self.parsed_tweets = {option: [] for option in self.field_options}
+
+    def _add_field(self, key: str, value: Any) -> None:
+        """
+        Appends a value to a list in the parsed data structure.
+
+        :param key: The key in the parsed data structure.
+        :param value: The value to be appended.
+        """
+        if key in self.parsed_tweets:
+            self.parsed_tweets[key].append(value)
+
+    def _process_response(self, response: List[str]) -> None:
+        """
+        Processes the archived tweet's response and adds the relevant CDX data.
+
+        :param response: The response from the archived tweet.
+        """
+        tweet_remove_char = unquote(response[2]).replace("’", "")
+        cleaned_tweet = check_pattern_tweet(tweet_remove_char).strip('"')
+
+        wayback_machine_url = (
+            f"https://web.archive.org/web/{response[1]}/{tweet_remove_char}"
+        )
+        original_tweet = delete_tweet_pathnames(
+            clean_tweet_url(cleaned_tweet, self.username)
+        )
+        parsed_wayback_machine_url = (
+            f"https://web.archive.org/web/{response[1]}/{original_tweet}"
+        )
+
+        double_status = check_double_status(wayback_machine_url, original_tweet)
+
+        if double_status:
+            original_tweet = delete_tweet_pathnames(
+                f"https://twitter.com/{original_tweet}"
+            )
+        elif "://" not in original_tweet:
+            original_tweet = delete_tweet_pathnames(f"https://{original_tweet}")
+
+        encoded_tweet = semicolon_parser(response[2])
+        encoded_archived_tweet = semicolon_parser(wayback_machine_url)
+        encoded_parsed_tweet = semicolon_parser(original_tweet)
+        encoded_parsed_archived_tweet = semicolon_parser(parsed_wayback_machine_url)
+
+        available_tweet_text = None
+        available_tweet_is_RT = None
+        available_tweet_info = None
+
+        is_tweet = is_tweet_url(encoded_tweet)
+
+        if is_tweet:
+            embed_parser = TwitterEmbed(encoded_tweet)
+            content = embed_parser.embed()
+
+            if content:
+                available_tweet_text = semicolon_parser(content[0][0])
+                available_tweet_is_RT = content[1][0]
+                available_tweet_info = semicolon_parser(content[2][0])
+
+        self._add_field("available_tweet_text", available_tweet_text)
+        self._add_field("available_tweet_is_RT", available_tweet_is_RT)
+        self._add_field("available_tweet_info", available_tweet_info)
+
+        self._add_field("archived_urlkey", response[0])
+        self._add_field("archived_timestamp", response[1])
+        self._add_field("original_tweet_url", encoded_tweet)
+        self._add_field("archived_tweet_url", encoded_archived_tweet)
+        self._add_field("parsed_tweet_url", encoded_parsed_tweet)
+        self._add_field("parsed_archived_tweet_url", encoded_parsed_archived_tweet)
+        self._add_field("archived_mimetype", response[3])
+        self._add_field("archived_statuscode", response[4])
+        self._add_field("archived_digest", response[5])
+        self._add_field("archived_length", response[6])
+
+    def parse(self, print_progress=False) -> Dict[str, List[Any]]:
+        """
+        Parses the archived tweets CDX data and structures it.
+
+        :param print_progress: A boolean indicating whether to print progress or not.
+
+        :returns: The parsed tweets data.
+        """
+        with ThreadPoolExecutor(max_workers=10) as executor:
+
+            futures = {
+                executor.submit(self._process_response, response): response
+                for response in self.archived_tweets_response[1:]
+            }
+
+            progress_context = Progress() if print_progress else nullcontext()
+            with progress_context as progress:
+                task = None
+                if print_progress:
+                    task = progress.add_task(
+                        f"Waybacking @{self.username} tweets\n", total=len(futures)
+                    )
+
+                for future in as_completed(futures):
+                    try:
+                        future.result()
+                    except Exception as e:
+                        rprint(f"[red]{e}")
+
+                    if print_progress:
+                        progress.update(task, advance=1)
+
+            return self.parsed_tweets
diff --git a/waybacktweets/api/parse_tweets.py b/waybacktweets/api/parse_tweets.py

deleted file mode 100644 (file)

index 0b6c8d5..0000000
--- a/waybacktweets/api/parse_tweets.py
+++ /dev/null
@@ -1,294 +0,0 @@
-"""
-Parses the returned data from the Wayback CDX Server API.
-"""
-
-import re
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from contextlib import nullcontext
-from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import unquote
-
-from rich import print as rprint
-from rich.progress import Progress
-
-from waybacktweets.config.config import config
-from waybacktweets.exceptions.exceptions import (
-    ConnectionError,
-    GetResponseError,
-    HTTPError,
-)
-from waybacktweets.utils.utils import (
-    check_double_status,
-    check_pattern_tweet,
-    clean_tweet_url,
-    delete_tweet_pathnames,
-    get_response,
-    is_tweet_url,
-    semicolon_parser,
-)
-
-
-class TwitterEmbed:
-    """
-    Class responsible for parsing tweets using the Twitter Publish service.
-
-    :param tweet_url: The URL of the tweet to be parsed.
-    """
-
-    def __init__(self, tweet_url: str):
-        self.tweet_url = tweet_url
-
-    def embed(self) -> Optional[Tuple[List[str], List[bool], List[str]]]:
-        """
-        Parses the archived tweets when they are still available.
-
-        This function goes through each archived tweet and checks
-        if it is still available.
-        If the tweet is available, it extracts the necessary information
-        and adds it to the respective lists.
-        The function returns a tuple of three lists:
-        - The first list contains the tweet texts.
-        - The second list contains boolean values indicating whether each tweet
-        is still available.
-        - The third list contains the URLs of the tweets.
-
-        :returns: A tuple of three lists containing the tweet texts,
-            availability statuses, and URLs, respectively. If no tweets are available,
-            returns None.
-        """
-        try:
-            url = f"https://publish.twitter.com/oembed?url={self.tweet_url}"
-            response = get_response(url=url)
-            if response:
-                json_response = response.json()
-                html = json_response["html"]
-                author_name = json_response["author_name"]
-
-                regex = re.compile(
-                    r'<blockquote class="twitter-tweet"(?: [^>]+)?><p[^>]*>(.*?)<\/p>.*?&mdash; (.*?)<\/a>',  # noqa
-                    re.DOTALL,
-                )
-                regex_author = re.compile(r"^(.*?)\s*\(")
-
-                matches_html = regex.findall(html)
-
-                tweet_content = []
-                user_info = []
-                is_RT = []
-
-                for match in matches_html:
-                    tweet_content_match = re.sub(
-                        r"<a[^>]*>|<\/a>", "", match[0].strip()
-                    ).replace("<br>", "\n")
-                    user_info_match = re.sub(
-                        r"<a[^>]*>|<\/a>", "", match[1].strip()
-                    ).replace(")", "), ")
-                    match_author = regex_author.search(user_info_match)
-                    author_tweet = match_author.group(1) if match_author else ""
-
-                    if tweet_content_match:
-                        tweet_content.append(tweet_content_match)
-                    if user_info_match:
-                        user_info.append(user_info_match)
-                        is_RT.append(author_name != author_tweet)
-
-                return tweet_content, is_RT, user_info
-        except ConnectionError:
-            if config.verbose:
-                rprint("[yellow]Error parsing the tweet, but the CDX data was saved.")
-        except HTTPError:
-            if config.verbose:
-                rprint(
-                    f"[yellow]{self.tweet_url} not available on the user's account, but the CDX data was saved."  # noqa: E501
-                )
-        except GetResponseError as e:
-            if config.verbose:
-                rprint(f"[red]An error occurred: {str(e)}")
-
-        return None
-
-
-# TODO: JSON Issue - Create separate function to handle JSON return without hitting rate limiting # noqa: E501
-class JsonParser:
-    """
-    Class responsible for parsing tweets when the mimetype is application/json.\n
-    Note: This class is in an experimental phase, but it is currently being
-    used by the Streamlit Web App.
-
-    :param archived_tweet_url: The URL of the archived tweet to be parsed.
-    """
-
-    def __init__(self, archived_tweet_url: str):
-        self.archived_tweet_url = archived_tweet_url
-
-    def parse(self) -> str:
-        """
-        Parses the archived tweets in JSON format.
-
-        :returns: The parsed tweet text.
-        """
-        try:
-            response = get_response(url=self.archived_tweet_url)
-
-            if response:
-                json_data = response.json()
-
-                if "data" in json_data:
-                    return json_data["data"].get("text", json_data["data"])
-
-                if "retweeted_status" in json_data:
-                    return json_data["retweeted_status"].get(
-                        "text", json_data["retweeted_status"]
-                    )
-
-                return json_data.get("text", json_data)
-        except ConnectionError:
-            if config.verbose:
-                rprint(
-                    f"[yellow]Connection error with {self.archived_tweet_url}. Max retries exceeded. Error parsing the JSON, but the CDX data was saved."  # noqa: E501
-                )
-        except GetResponseError as e:
-            if config.verbose:
-                rprint(f"[red]An error occurred: {str(e)}")
-
-        return None
-
-
-class TweetsParser:
-    """
-    Class responsible for the overall parsing of archived tweets.
-
-    :param archived_tweets_response: The response from the archived tweets.
-    :param username: The username associated with the tweets.
-    :param field_options: The fields to be included in the parsed data. Options include "archived_urlkey", "archived_timestamp", "original_tweet_url", "archived_tweet_url", "parsed_tweet_url", "parsed_archived_tweet_url", "available_tweet_text", "available_tweet_is_RT", "available_tweet_info", "archived_mimetype", "archived_statuscode", "archived_digest", "archived_length".
-    """  # noqa: E501
-
-    def __init__(
-        self,
-        archived_tweets_response: List[str],
-        username: str,
-        field_options: List[str],
-    ):
-        self.archived_tweets_response = archived_tweets_response
-        self.username = username
-        self.field_options = field_options
-        self.parsed_tweets = {option: [] for option in self.field_options}
-
-    def _add_field(self, key: str, value: Any) -> None:
-        """
-        Appends a value to a list in the parsed data structure.
-
-        :param key: The key in the parsed data structure.
-        :param value: The value to be appended.
-        """
-        if key in self.parsed_tweets:
-            self.parsed_tweets[key].append(value)
-
-    def _process_response(self, response: List[str]) -> None:
-        """
-        Processes the archived tweet's response and adds the relevant CDX data.
-
-        :param response: The response from the archived tweet.
-        """
-        tweet_remove_char = unquote(response[2]).replace("’", "")
-        cleaned_tweet = check_pattern_tweet(tweet_remove_char).strip('"')
-
-        wayback_machine_url = (
-            f"https://web.archive.org/web/{response[1]}/{tweet_remove_char}"
-        )
-        original_tweet = delete_tweet_pathnames(
-            clean_tweet_url(cleaned_tweet, self.username)
-        )
-        parsed_wayback_machine_url = (
-            f"https://web.archive.org/web/{response[1]}/{original_tweet}"
-        )
-
-        double_status = check_double_status(wayback_machine_url, original_tweet)
-
-        if double_status:
-            original_tweet = delete_tweet_pathnames(
-                f"https://twitter.com/{original_tweet}"
-            )
-        elif "://" not in original_tweet:
-            original_tweet = delete_tweet_pathnames(f"https://{original_tweet}")
-
-        encoded_tweet = semicolon_parser(response[2])
-        encoded_archived_tweet = semicolon_parser(wayback_machine_url)
-        encoded_parsed_tweet = semicolon_parser(original_tweet)
-        encoded_parsed_archived_tweet = semicolon_parser(parsed_wayback_machine_url)
-
-        available_tweet_text = None
-        available_tweet_is_RT = None
-        available_tweet_info = None
-
-        is_tweet = is_tweet_url(encoded_tweet)
-
-        if is_tweet:
-            embed_parser = TwitterEmbed(encoded_tweet)
-            content = embed_parser.embed()
-
-            if content:
-                available_tweet_text = semicolon_parser(content[0][0])
-                available_tweet_is_RT = content[1][0]
-                available_tweet_info = semicolon_parser(content[2][0])
-
-        self._add_field("available_tweet_text", available_tweet_text)
-        self._add_field("available_tweet_is_RT", available_tweet_is_RT)
-        self._add_field("available_tweet_info", available_tweet_info)
-
-        # TODO: JSON Issue
-        # parsed_text_json = ""
-
-        # if response[3] == "application/json":
-        #     json_parser = JsonParser(encoded_parsed_archived_tweet)
-        #     text_json = json_parser.parse()
-
-        #     if text_json:
-        #         parsed_text_json = semicolon_parser(text_json)
-
-        # self._add_field("parsed_tweet_text_mimetype_json", parsed_text_json)
-
-        self._add_field("archived_urlkey", response[0])
-        self._add_field("archived_timestamp", response[1])
-        self._add_field("original_tweet_url", encoded_tweet)
-        self._add_field("archived_tweet_url", encoded_archived_tweet)
-        self._add_field("parsed_tweet_url", encoded_parsed_tweet)
-        self._add_field("parsed_archived_tweet_url", encoded_parsed_archived_tweet)
-        self._add_field("archived_mimetype", response[3])
-        self._add_field("archived_statuscode", response[4])
-        self._add_field("archived_digest", response[5])
-        self._add_field("archived_length", response[6])
-
-    def parse(self, print_progress=False) -> Dict[str, List[Any]]:
-        """
-        Parses the archived tweets CDX data and structures it.
-
-        :param print_progress: A boolean indicating whether to print progress or not.
-
-        :returns: The parsed tweets data.
-        """
-        with ThreadPoolExecutor(max_workers=10) as executor:
-
-            futures = {
-                executor.submit(self._process_response, response): response
-                for response in self.archived_tweets_response[1:]
-            }
-
-            progress_context = Progress() if print_progress else nullcontext()
-            with progress_context as progress:
-                task = None
-                if print_progress:
-                    task = progress.add_task(
-                        f"Waybacking @{self.username} tweets\n", total=len(futures)
-                    )
-
-                for future in as_completed(futures):
-                    try:
-                        future.result()
-                    except Exception as e:
-                        rprint(f"[red]{e}")
-
-                    if print_progress:
-                        progress.update(task, advance=1)
-
-            return self.parsed_tweets
diff --git a/waybacktweets/api/request.py b/waybacktweets/api/request.py

new file mode 100644 (file)

index 0000000..d7d37a1
--- /dev/null
+++ b/waybacktweets/api/request.py
@@ -0,0 +1,108 @@
+"""
+Requests data from the Wayback Machine API.
+"""
+
+from typing import Any, Dict, Optional
+
+from rich import print as rprint
+
+from waybacktweets.config.config import config
+from waybacktweets.exceptions.exceptions import (
+    ConnectionError,
+    EmptyResponseError,
+    GetResponseError,
+    HTTPError,
+    ReadTimeoutError,
+)
+from waybacktweets.utils.utils import get_response
+
+
+class WaybackTweets:
+    """
+    Class responsible for requesting data from the Wayback CDX Server API.
+
+    :param username: The username associated with the tweets.
+    :param collapse: The field to collapse duplicate lines on.
+    :param timestamp_from: The timestamp to start retrieving tweets from.
+    :param timestamp_to: The timestamp to stop retrieving tweets at.
+    :param limit: The maximum number of results to return.
+    :param offset: The number of lines to skip in the results.
+    :param matchType: Results matching a certain prefix, a certain host or all subdomains.
+    """  # noqa: E501
+
+    def __init__(
+        self,
+        username: str,
+        collapse: str = None,
+        timestamp_from: str = None,
+        timestamp_to: str = None,
+        limit: int = None,
+        offset: int = None,
+        matchtype: str = None,
+    ):
+        self.username = username
+        self.collapse = collapse
+        self.timestamp_from = timestamp_from
+        self.timestamp_to = timestamp_to
+        self.limit = limit
+        self.offset = offset
+        self.matchtype = matchtype
+
+    def get(self) -> Optional[Dict[str, Any]]:
+        """
+        Sends a GET request to the Internet Archive's CDX API
+        to retrieve archived tweets.
+
+        :returns: The response from the CDX API in JSON format, if successful.
+        """
+        url = "https://web.archive.org/cdx/search/cdx"
+
+        status_pathname = "status/*"
+        if self.matchtype:
+            status_pathname = ""
+
+        params = {
+            "url": f"https://twitter.com/{self.username}/{status_pathname}",
+            "output": "json",
+        }
+
+        if self.collapse:
+            params["collapse"] = self.collapse
+
+        if self.timestamp_from:
+            params["from"] = self.timestamp_from
+
+        if self.timestamp_to:
+            params["to"] = self.timestamp_to
+
+        if self.limit:
+            params["limit"] = self.limit
+
+        if self.offset:
+            params["offset"] = self.offset
+
+        if self.matchtype:
+            params["matchType"] = self.matchtype
+
+        try:
+            response = get_response(url=url, params=params)
+            return response.json()
+        except ReadTimeoutError:
+            if config.verbose:
+                rprint("[red]Connection to web.archive.org timed out.")
+        except ConnectionError:
+            if config.verbose:
+                rprint(
+                    "[red]Failed to establish a new connection with web.archive.org. Max retries exceeded. Please wait a few minutes and try again."  # noqa: E501
+                )
+        except HTTPError as e:
+            if config.verbose:
+                rprint(f"[red]HTTP error occurred: {str(e)}")
+        except EmptyResponseError:
+            if config.verbose:
+                rprint("[red]No data was saved due to an empty response.")
+        except GetResponseError as e:
+            if config.verbose:
+                rprint(f"[red]An error occurred: {str(e)}")
+
+        return None
diff --git a/waybacktweets/api/request_tweets.py b/waybacktweets/api/request_tweets.py

deleted file mode 100644 (file)

index d7d37a1..0000000
--- a/waybacktweets/api/request_tweets.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""
-Requests data from the Wayback Machine API.
-"""
-
-from typing import Any, Dict, Optional
-
-from rich import print as rprint
-
-from waybacktweets.config.config import config
-from waybacktweets.exceptions.exceptions import (
-    ConnectionError,
-    EmptyResponseError,
-    GetResponseError,
-    HTTPError,
-    ReadTimeoutError,
-)
-from waybacktweets.utils.utils import get_response
-
-
-class WaybackTweets:
-    """
-    Class responsible for requesting data from the Wayback CDX Server API.
-
-    :param username: The username associated with the tweets.
-    :param collapse: The field to collapse duplicate lines on.
-    :param timestamp_from: The timestamp to start retrieving tweets from.
-    :param timestamp_to: The timestamp to stop retrieving tweets at.
-    :param limit: The maximum number of results to return.
-    :param offset: The number of lines to skip in the results.
-    :param matchType: Results matching a certain prefix, a certain host or all subdomains.
-    """  # noqa: E501
-
-    def __init__(
-        self,
-        username: str,
-        collapse: str = None,
-        timestamp_from: str = None,
-        timestamp_to: str = None,
-        limit: int = None,
-        offset: int = None,
-        matchtype: str = None,
-    ):
-        self.username = username
-        self.collapse = collapse
-        self.timestamp_from = timestamp_from
-        self.timestamp_to = timestamp_to
-        self.limit = limit
-        self.offset = offset
-        self.matchtype = matchtype
-
-    def get(self) -> Optional[Dict[str, Any]]:
-        """
-        Sends a GET request to the Internet Archive's CDX API
-        to retrieve archived tweets.
-
-        :returns: The response from the CDX API in JSON format, if successful.
-        """
-        url = "https://web.archive.org/cdx/search/cdx"
-
-        status_pathname = "status/*"
-        if self.matchtype:
-            status_pathname = ""
-
-        params = {
-            "url": f"https://twitter.com/{self.username}/{status_pathname}",
-            "output": "json",
-        }
-
-        if self.collapse:
-            params["collapse"] = self.collapse
-
-        if self.timestamp_from:
-            params["from"] = self.timestamp_from
-
-        if self.timestamp_to:
-            params["to"] = self.timestamp_to
-
-        if self.limit:
-            params["limit"] = self.limit
-
-        if self.offset:
-            params["offset"] = self.offset
-
-        if self.matchtype:
-            params["matchType"] = self.matchtype
-
-        try:
-            response = get_response(url=url, params=params)
-            return response.json()
-        except ReadTimeoutError:
-            if config.verbose:
-                rprint("[red]Connection to web.archive.org timed out.")
-        except ConnectionError:
-            if config.verbose:
-                rprint(
-                    "[red]Failed to establish a new connection with web.archive.org. Max retries exceeded. Please wait a few minutes and try again."  # noqa: E501
-                )
-        except HTTPError as e:
-            if config.verbose:
-                rprint(f"[red]HTTP error occurred: {str(e)}")
-        except EmptyResponseError:
-            if config.verbose:
-                rprint("[red]No data was saved due to an empty response.")
-        except GetResponseError as e:
-            if config.verbose:
-                rprint(f"[red]An error occurred: {str(e)}")
-
-        return None
diff --git a/waybacktweets/api/visualize.py b/waybacktweets/api/visualize.py

new file mode 100644 (file)

index 0000000..ef13aac
--- /dev/null
+++ b/waybacktweets/api/visualize.py
@@ -0,0 +1,126 @@
+# flake8: noqa: E501
+"""
+Generates an HTML file to visualize the parsed data.
+"""
+
+import json
+from typing import Any, Dict, List
+
+
+class HTMLTweetsVisualizer:
+    """
+    Class responsible for generating an HTML file to visualize the parsed data.
+
+    :param json_content: The content of the JSON file.
+    :param html_file_path: The path where the HTML file will be saved.
+    :param username: The username associated with the tweets.
+    """
+
+    def __init__(self, json_file_path: str, html_file_path: str, username: str):
+        self.json_content = self._json_loader(json_file_path)
+        self.html_file_path = html_file_path
+        self.username = username
+
+    @staticmethod
+    def _json_loader(json_file_path: str) -> List[Dict[str, Any]]:
+        """
+        Reads and loads JSON data from a specified file path.
+
+        :param json_file_path: The path of the JSON file.
+
+        :returns: The content of the JSON file.
+        """
+        with open(json_file_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+
+    def generate(self) -> str:
+        """
+        Generates an HTML string that represents the parsed data.
+
+        :returns: The generated HTML string.
+        """
+
+        html = f"<html>\n<head>\n<title>@{self.username} archived tweets</title>\n"
+        html += "<style>\n"
+        html += "body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n"
+        html += ".container { display: flex; flex-wrap: wrap; gap: 20px; }\n"
+        html += ".tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n"
+        html += ".tweet strong { font-weight: bold; }\n"
+        html += ".tweet a { color: #ef5552; text-decoration: none; }\n"
+        html += ".content { color: #ef5552; }\n"
+        html += ".tweet a:hover { text-decoration: underline; }\n"
+        html += "h1, h3 { text-align: center; }\n"
+        html += "iframe { width: 600px; height: 600px; }\n"
+        html += "</style>\n"
+        html += "</head>\n<body>\n"
+        html += f"<h1>@{self.username} archived tweets</h1>\n"
+        html += '<div class="container">\n'
+
+        for tweet in self.json_content:
+            html += '<div class="tweet">\n'
+
+            # TODO: JSON Issue
+            # if (
+            #     (
+            #         tweet["archived_mimetype"] != "application/json"
+            #         and not tweet["parsed_tweet_text_mimetype_json"]
+            #     )
+            #     and not tweet["available_tweet_text"]
+            # ) or (
+            #     (
+            #         tweet["archived_mimetype"] == "application/json"
+            #         and not tweet["parsed_tweet_text_mimetype_json"]
+            #     )
+            #     and not tweet["available_tweet_text"]
+            # ):
+            if (
+                tweet["archived_mimetype"] != "application/json"
+                and not tweet["available_tweet_text"]
+            ):
+                html += f'<iframe src="{tweet["parsed_archived_tweet_url"]}" frameborder="0" scrolling="auto"></iframe>\n'
+
+            html += f'<p><a href="{tweet["original_tweet_url"]}" target="_blank"><strong>Original Tweet↗</strong></a> · \n'
+            html += f'<a href="{tweet["parsed_tweet_url"]}" target="_blank"><strong>Parsed Tweet↗</strong></a> · \n'
+            html += f'<a href="{tweet["archived_tweet_url"]}" target="_blank"><strong>Archived Tweet↗</strong></a> · \n'
+            html += f'<a href="{tweet["parsed_archived_tweet_url"]}" target="_blank"><strong>Parsed Archived Tweet↗</strong></a></p>\n'
+
+            if tweet["available_tweet_text"]:
+                html += "<br>\n"
+                html += f'<p><strong class="content">Available Tweet Content:</strong> {tweet["available_tweet_text"]}</p>\n'
+                html += f'<p><strong class="content">Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
+                html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_info"]}</p>\n'
+
+            # TODO: JSON Issue
+            # if (
+            #     tweet["archived_mimetype"] == "application/json"
+            #     and tweet["parsed_tweet_text_mimetype_json"]
+            # ) and not tweet["available_tweet_text"]:
+            #     html += f'<p><strong class="content">Parsed Tweet Text (application/json):</strong> {tweet["parsed_tweet_text_mimetype_json"]}</p>\n'
+
+            html += "<br>\n"
+            html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
+            html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
+            html += f'<p><strong>Archived mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
+            html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
+            html += (
+                f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
+            )
+            html += (
+                f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
+            )
+            html += "</div>\n"
+
+        html += "</div>\n"
+        html += '<h3>generated by <a href="https://github.com/claromes/waybacktweets" target="_blank">Wayback Tweets↗</a></h3>\n'
+        html += "</body>\n</html>"
+
+        return html
+
+    def save(self, html_content: str) -> None:
+        """
+        Saves the generated HTML string to a file.
+
+        :param html_content: The HTML string to be saved.
+        """
+        with open(self.html_file_path, "w", encoding="utf-8") as f:
+            f.write(html_content)
diff --git a/waybacktweets/api/viz_tweets.py b/waybacktweets/api/viz_tweets.py

deleted file mode 100644 (file)

index ef13aac..0000000
--- a/waybacktweets/api/viz_tweets.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# flake8: noqa: E501
-"""
-Generates an HTML file to visualize the parsed data.
-"""
-
-import json
-from typing import Any, Dict, List
-
-
-class HTMLTweetsVisualizer:
-    """
-    Class responsible for generating an HTML file to visualize the parsed data.
-
-    :param json_content: The content of the JSON file.
-    :param html_file_path: The path where the HTML file will be saved.
-    :param username: The username associated with the tweets.
-    """
-
-    def __init__(self, json_file_path: str, html_file_path: str, username: str):
-        self.json_content = self._json_loader(json_file_path)
-        self.html_file_path = html_file_path
-        self.username = username
-
-    @staticmethod
-    def _json_loader(json_file_path: str) -> List[Dict[str, Any]]:
-        """
-        Reads and loads JSON data from a specified file path.
-
-        :param json_file_path: The path of the JSON file.
-
-        :returns: The content of the JSON file.
-        """
-        with open(json_file_path, "r", encoding="utf-8") as f:
-            return json.load(f)
-
-    def generate(self) -> str:
-        """
-        Generates an HTML string that represents the parsed data.
-
-        :returns: The generated HTML string.
-        """
-
-        html = f"<html>\n<head>\n<title>@{self.username} archived tweets</title>\n"
-        html += "<style>\n"
-        html += "body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n"
-        html += ".container { display: flex; flex-wrap: wrap; gap: 20px; }\n"
-        html += ".tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n"
-        html += ".tweet strong { font-weight: bold; }\n"
-        html += ".tweet a { color: #ef5552; text-decoration: none; }\n"
-        html += ".content { color: #ef5552; }\n"
-        html += ".tweet a:hover { text-decoration: underline; }\n"
-        html += "h1, h3 { text-align: center; }\n"
-        html += "iframe { width: 600px; height: 600px; }\n"
-        html += "</style>\n"
-        html += "</head>\n<body>\n"
-        html += f"<h1>@{self.username} archived tweets</h1>\n"
-        html += '<div class="container">\n'
-
-        for tweet in self.json_content:
-            html += '<div class="tweet">\n'
-
-            # TODO: JSON Issue
-            # if (
-            #     (
-            #         tweet["archived_mimetype"] != "application/json"
-            #         and not tweet["parsed_tweet_text_mimetype_json"]
-            #     )
-            #     and not tweet["available_tweet_text"]
-            # ) or (
-            #     (
-            #         tweet["archived_mimetype"] == "application/json"
-            #         and not tweet["parsed_tweet_text_mimetype_json"]
-            #     )
-            #     and not tweet["available_tweet_text"]
-            # ):
-            if (
-                tweet["archived_mimetype"] != "application/json"
-                and not tweet["available_tweet_text"]
-            ):
-                html += f'<iframe src="{tweet["parsed_archived_tweet_url"]}" frameborder="0" scrolling="auto"></iframe>\n'
-
-            html += f'<p><a href="{tweet["original_tweet_url"]}" target="_blank"><strong>Original Tweet↗</strong></a> · \n'
-            html += f'<a href="{tweet["parsed_tweet_url"]}" target="_blank"><strong>Parsed Tweet↗</strong></a> · \n'
-            html += f'<a href="{tweet["archived_tweet_url"]}" target="_blank"><strong>Archived Tweet↗</strong></a> · \n'
-            html += f'<a href="{tweet["parsed_archived_tweet_url"]}" target="_blank"><strong>Parsed Archived Tweet↗</strong></a></p>\n'
-
-            if tweet["available_tweet_text"]:
-                html += "<br>\n"
-                html += f'<p><strong class="content">Available Tweet Content:</strong> {tweet["available_tweet_text"]}</p>\n'
-                html += f'<p><strong class="content">Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
-                html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_info"]}</p>\n'
-
-            # TODO: JSON Issue
-            # if (
-            #     tweet["archived_mimetype"] == "application/json"
-            #     and tweet["parsed_tweet_text_mimetype_json"]
-            # ) and not tweet["available_tweet_text"]:
-            #     html += f'<p><strong class="content">Parsed Tweet Text (application/json):</strong> {tweet["parsed_tweet_text_mimetype_json"]}</p>\n'
-
-            html += "<br>\n"
-            html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
-            html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
-            html += f'<p><strong>Archived mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
-            html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
-            html += (
-                f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
-            )
-            html += (
-                f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
-            )
-            html += "</div>\n"
-
-        html += "</div>\n"
-        html += '<h3>generated by <a href="https://github.com/claromes/waybacktweets" target="_blank">Wayback Tweets↗</a></h3>\n'
-        html += "</body>\n</html>"
-
-        return html
-
-    def save(self, html_content: str) -> None:
-        """
-        Saves the generated HTML string to a file.
-
-        :param html_content: The HTML string to be saved.
-        """
-        with open(self.html_file_path, "w", encoding="utf-8") as f:
-            f.write(html_content)
diff --git a/waybacktweets/config/__pycache__/__init__.cpython-311.pyc b/waybacktweets/config/__pycache__/__init__.cpython-311.pyc

deleted file mode 100644 (file)

index 9b453bc..0000000

Binary files a/waybacktweets/config/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/waybacktweets/config/__pycache__/config.cpython-311.pyc b/waybacktweets/config/__pycache__/config.cpython-311.pyc

deleted file mode 100644 (file)

index 2bf4595..0000000

Binary files a/waybacktweets/config/__pycache__/config.cpython-311.pyc and /dev/null differ
diff --git a/waybacktweets/config/config.py b/waybacktweets/config/config.py

index eb6e6dd74c5ee661295473d2a96515ebcdde1dc4..5d1ab67db361f1ec1380a0566e89ede016f6555c 100644 (file)
--- a/waybacktweets/config/config.py
+++ b/waybacktweets/config/config.py
@@ -10,7 +10,7 @@ class _Config:
  
  config = _Config()
  """
-Configuration settings..
+Configuration settings.
  
  .. attribute:: verbose
  
diff --git a/waybacktweets/exceptions/__pycache__/__init__.cpython-311.pyc b/waybacktweets/exceptions/__pycache__/__init__.cpython-311.pyc

deleted file mode 100644 (file)

index 8d74104..0000000

Binary files a/waybacktweets/exceptions/__pycache__/__init__.cpython-311.pyc and /dev/null differ
diff --git a/waybacktweets/exceptions/__pycache__/exceptions.cpython-311.pyc b/waybacktweets/exceptions/__pycache__/exceptions.cpython-311.pyc

deleted file mode 100644 (file)

index 5ade7da..0000000

Binary files a/waybacktweets/exceptions/__pycache__/exceptions.cpython-311.pyc and /dev/null differ
author	Claromes <claromes@hey.com>
	Mon, 17 Jun 2024 01:57:54 +0000 (22:57 -0300)
committer	Claromes <claromes@hey.com>
	Mon, 17 Jun 2024 01:57:54 +0000 (22:57 -0300)
.gitignore		patch \| blob \| history
app/app.py		patch \| blob \| history
docs/api.rst		patch \| blob \| history
docs/contribute.rst		patch \| blob \| history
docs/todo.rst		patch \| blob \| history
waybacktweets/__init__.py		patch \| blob \| history
waybacktweets/_cli.py		patch \| blob \| history
waybacktweets/api/export.py	[new file with mode: 0644]	patch \| blob
waybacktweets/api/export_tweets.py	[deleted file]	patch \| blob \| history
waybacktweets/api/parse.py	[new file with mode: 0644]	patch \| blob
waybacktweets/api/parse_tweets.py	[deleted file]	patch \| blob \| history
waybacktweets/api/request.py	[new file with mode: 0644]	patch \| blob
waybacktweets/api/request_tweets.py	[deleted file]	patch \| blob \| history
waybacktweets/api/visualize.py	[new file with mode: 0644]	patch \| blob
waybacktweets/api/viz_tweets.py	[deleted file]	patch \| blob \| history
waybacktweets/config/__pycache__/__init__.cpython-311.pyc	[deleted file]	patch \| blob \| history
waybacktweets/config/__pycache__/config.cpython-311.pyc	[deleted file]	patch \| blob \| history
waybacktweets/config/config.py		patch \| blob \| history
waybacktweets/exceptions/__pycache__/__init__.cpython-311.pyc	[deleted file]	patch \| blob \| history
waybacktweets/exceptions/__pycache__/exceptions.cpython-311.pyc	[deleted file]	patch \| blob \| history