add classes

author Claromes <claromes@hey.com>

Tue, 4 Jun 2024 16:59:17 +0000 (13:59 -0300)

committer Claromes <claromes@hey.com>

Tue, 4 Jun 2024 16:59:17 +0000 (13:59 -0300)
author Claromes <claromes@hey.com>
Tue, 4 Jun 2024 16:59:17 +0000 (13:59 -0300)
committer Claromes <claromes@hey.com>
Tue, 4 Jun 2024 16:59:17 +0000 (13:59 -0300)
diff --git a/waybacktweets/export_tweets.py b/waybacktweets/export_tweets.py

index 38c770248db766b607697d6887647758b0ddfbdd..1bd42a43b92b12bf6136d8b0c92c21669983d80f 100644 (file)
--- a/waybacktweets/export_tweets.py
+++ b/waybacktweets/export_tweets.py
@@ -1,67 +1,78 @@
-"""
-Exports the parsed archived tweets.
-"""
-
  import pandas as pd
  import re
  import datetime
+import os
+from viz_tweets import HTMLTweetsVisualizer
+
+
+class TweetsExporter:
+    """Handles the exporting of parsed archived tweets."""
+
+    def __init__(self, data, username, metadata_options):
+        self.data = data
+        self.username = username
+        self.metadata_options = metadata_options
+        self.formatted_datetime = self.datetime_now()
+        self.filename = f'{self.username}_tweets_{self.formatted_datetime}'
+        self.dataframe = self.create_dataframe(self)
  
-from viz_tweets import *
+    @staticmethod
+    def datetime_now():
+        """Formats datetime."""
+        now = datetime.datetime.now()
+        formatted_now = now.strftime("%Y%m%d%H%M%S")
+        formatted_now = re.sub(r'\W+', '', formatted_now)
  
+        return formatted_now
  
-def datetime_now():
-    """Formats datetime."""
-    now = datetime.datetime.now()
+    @staticmethod
+    def transpose_matrix(data, fill_value=None):
+        """Transposes a matrix, filling in missing values with a specified fill value if needed."""
+        max_length = max(len(sublist) for sublist in data.values())
  
-    formatted_now = now.strftime("%Y%m%d%H%M%S")
+        filled_data = {
+            key: value + [fill_value] * (max_length - len(value))
+            for key, value in data.items()
+        }
  
-    formatted_now = re.sub(r'\W+', '', formatted_now)
+        return filled_data
  
-    return formatted_now
+    @staticmethod
+    def create_dataframe(self):
+        """Creates a DataFrame from the transposed data."""
+        data_transposed = self.transpose_matrix(self.data)
  
+        df = pd.DataFrame(data_transposed, columns=self.metadata_options)
  
-def transpose_matrix(data, fill_value=None):
-    """Transposes a matrix, filling in missing values with a specified fill value if needed."""
-    max_length = max(len(sublist) for sublist in data)
-    filled_data = [
-        sublist + [fill_value] * (max_length - len(sublist))
-        for sublist in data
-    ]
+        return df
  
-    data_transposed = [list(row) for row in zip(*filled_data)]
+    def save_to_csv(self):
+        """Saves the DataFrame to a CSV file."""
+        csv_file_path = f'{self.filename}.csv'
+        self.dataframe.to_csv(csv_file_path, index=False)
  
-    return data_transposed
+        print(f'Saved to {csv_file_path}')
  
+    def save_to_json(self):
+        """Saves the DataFrame to a JSON file."""
+        json_file_path = f'{self.filename}.json'
+        self.dataframe.to_json(json_file_path, orient='records', lines=False)
  
-def save_tweets(data, username):
-    """Saves parsed archived tweets in CSV, JSON, and HTML formats."""
-    data_transposed = transpose_matrix(data)
+        print(f'Saved to {json_file_path}')
  
-    formatted_datetime = datetime_now()
-    filename = f'{username}_tweets_{formatted_datetime}'
+    def save_to_html(self):
+        """Saves the DataFrame to an HTML file."""
+        json_file_path = f'{self.filename}.json'
  
-    df = pd.DataFrame(data_transposed,
-                      columns=[
-                          'archived_urlkey', 'archived_timestamp', 'tweet',
-                          'archived_tweet', 'parsed_tweet',
-                          'parsed_tweet_mimetype_json',
-                          'parsed_archived_tweet', 'archived_mimetype',
-                          'archived_statuscode', 'archived_digest',
-                          'archived_length', 'available_tweet_content',
-                          'available_tweet_is_RT', 'available_tweet_username'
-                      ])
+        if not os.path.exists(json_file_path):
+            self.save_to_json()
  
-    csv_file_path = f'{filename}.csv'
-    df.to_csv(csv_file_path, index=False)
+        html_file_path = f'{self.filename}.html'
  
-    json_file_path = f'{filename}.json'
-    df.to_json(json_file_path, orient='records', lines=False)
+        html = HTMLTweetsVisualizer(json_file_path, html_file_path,
+                                    self.username)
  
-    html_file_path = f'{filename}.html'
-    json_content = read_json(json_file_path)
-    html_content = generate_html(json_content, username)
-    save_html(html_file_path, html_content)
+        html_content = html.generate()
+        html.save(html_content)
  
-    print(
-        f'Done. Check the files {filename}.csv, {filename}.json and {filename}.html'
-    )
+        print(f'Saved to {html_file_path}')
diff --git a/waybacktweets/main.py b/waybacktweets/main.py

index 0fc9ca23232a48563f6e4566f2987d9fcd98f49b..8dac791013d471f685951f68a194642c7de5699f 100644 (file)
--- a/waybacktweets/main.py
+++ b/waybacktweets/main.py
@@ -2,9 +2,9 @@
  Main function for retrieving archived tweets.
  """
  
-from request_tweets import *
-from parse_tweets import *
-from export_tweets import *
+from request_tweets import WaybackTweets
+from parse_tweets import TweetsParser
+from export_tweets import TweetsExporter
  
  username = 'claromes'
  unique = False
@@ -13,18 +13,33 @@ datetime_to = ''
  
  
  def main():
-    """Invokes the functions to retrieve archived tweets, perform necessary parsing, and save the data."""
+    """Invokes the classes to retrieve archived tweets, perform necessary parsing, and save the data."""
      try:
-        archived_tweets = get_archived_tweets(username, unique, datetime_from,
-                                              datetime_to)
-        if archived_tweets:
-            data = parse_archived_tweets(archived_tweets, username)
-
-            save_tweets(data, username)
+        api = WaybackTweets(username)
+        archived_tweets = api.get()
  
-        print(
-            f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
-        )
+        if archived_tweets:
+            metadata_options = [
+                'archived_urlkey', 'archived_timestamp', 'tweet',
+                'archived_tweet', 'parsed_tweet', 'parsed_tweet_mimetype_json',
+                'available_tweet_content', 'available_tweet_is_RT',
+                'available_tweet_username', 'parsed_archived_tweet',
+                'archived_mimetype', 'archived_statuscode', 'archived_digest',
+                'archived_length'
+            ]
+
+            parser = TweetsParser(archived_tweets, username, metadata_options)
+            parsed_tweets = parser.parse()
+
+            exporter = TweetsExporter(parsed_tweets, username,
+                                      metadata_options)
+            exporter.save_to_csv()
+            exporter.save_to_json()
+            exporter.save_to_html()
+
+            print(
+                f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
+            )
      except TypeError as e:
          print(e)
  
diff --git a/waybacktweets/parse_tweets.py b/waybacktweets/parse_tweets.py

index 3513cb4c85d67f85bdfef7d97276d9d544261512..6f863569885e15dff134b742e7e0490c42701f91 100644 (file)
--- a/waybacktweets/parse_tweets.py
+++ b/waybacktweets/parse_tweets.py
@@ -1,148 +1,149 @@
-"""
-Parses the returned data from the Wayback CDX Server API.
-"""
-
  import requests
  import re
  from urllib.parse import unquote
  from utils import *
  
  
-def embed(tweet):
-    """Parses the archived tweets when the tweets are still available using the Twitter Publish service from X.
-    Returns the text of the tweet, if it's a retweet, and the username of the account."""
-    try:
-        url = f'https://publish.twitter.com/oembed?url={tweet}'
-        response = requests.get(url)
-
-        regex = r'<blockquote class="twitter-tweet"(?: [^>]+)?><p[^>]*>(.*?)<\/p>.*?&mdash; (.*?)<\/a>'
-        regex_author = r'^(.*?)\s*\('
-
-        if not (400 <= response.status_code <= 511):
-            html = response.json()['html']
-            author_name = response.json()['author_name']
-
-            matches_html = re.findall(regex, html, re.DOTALL)
-
-            tweet_content = []
-            user_info = []
-            is_RT = []
-
-            for match in matches_html:
-                tweet_content_match = re.sub(r'<a[^>]*>|<\/a>', '',
-                                             match[0].strip())
-                tweet_content_match = tweet_content_match.replace('<br>', '\n')
-
-                user_info_match = re.sub(r'<a[^>]*>|<\/a>', '',
-                                         match[1].strip())
-                user_info_match = user_info_match.replace(')', '), ')
-
-                match_author = re.search(regex_author, user_info_match)
-                author_tweet = match_author.group(1)
-
-                if tweet_content_match:
-                    tweet_content.append(tweet_content_match)
-                if user_info_match:
-                    user_info.append(user_info_match)
-
-                    is_RT_match = False
-                    if author_name != author_tweet:
-                        is_RT_match = True
-
-                    is_RT.append(is_RT_match)
-
-            return tweet_content, is_RT, user_info
-    except:
-        return None
-
-
-def parse_json_mimetype(tweet):
-    """Parses the archived tweets when the mimetype is application/json and returns the text of the tweet."""
-    response_json = requests.get(tweet)
-
-    if not (400 <= response_json.status_code <= 511):
-        json_data = response_json.json()
-
-        if 'data' in json_data:
-            if 'text' in json_data['data']:
-                json_text = json_data['data']['text']
-                return json_text
-            else:
-                json_text = json_data['data']
-                return json_text
-        else:
-            if 'text' in json_data:
-                json_text = json_data['text']
-                return json_text
-            else:
-                json_text = json_data
-                return json_text
-
-
-def parse_archived_tweets(archived_tweets_response, username):
-    """Parses the archived tweets metadata and structures it in a more readable format."""
-    archived_urlkey = []
-    archived_timestamp = []
-    tweet = []
-    archived_tweet = []
-    parsed_tweet = []
-    parsed_tweet_mimetype_json = []
-    available_tweet_content = []
-    available_tweet_is_RT = []
-    available_tweet_username = []
-    parsed_archived_tweet = []
-    archived_mimetype = []
-    archived_statuscode = []
-    archived_digest = []
-    archived_length = []
-
-    for response in archived_tweets_response[1:]:
-        tweet_remove_char = unquote(response[2]).replace('’', '')
-        cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"')
-
-        wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{tweet_remove_char}'
-
-        original_tweet = delete_tweet_pathnames(
-            clean_tweet_url(cleaned_tweet, username))
-
-        parsed_wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{original_tweet}'
-
-        double_status = check_double_status(wayback_machine_url,
-                                            original_tweet)
-
-        if double_status:
-            original_tweet = delete_tweet_pathnames(
-                f'https://twitter.com/{original_tweet}')
-
-        elif not '://' in original_tweet:
+class TwitterEmbed:
+    """Handles parsing of tweets using the Twitter Publish service."""
+
+    def __init__(self, tweet_url):
+        self.tweet_url = tweet_url
+
+    def embed(self):
+        """Parses the archived tweets when they are still available."""
+        try:
+            url = f'https://publish.twitter.com/oembed?url={self.tweet_url}'
+            response = requests.get(url)
+            if not (400 <= response.status_code <= 511):
+                html = response.json()['html']
+                author_name = response.json()['author_name']
+
+                regex = r'<blockquote class="twitter-tweet"(?: [^>]+)?><p[^>]*>(.*?)<\/p>.*?&mdash; (.*?)<\/a>'
+                regex_author = r'^(.*?)\s*\('
+
+                matches_html = re.findall(regex, html, re.DOTALL)
+
+                tweet_content = []
+                user_info = []
+                is_RT = []
+
+                for match in matches_html:
+                    tweet_content_match = re.sub(r'<a[^>]*>|<\/a>', '',
+                                                 match[0].strip())
+                    tweet_content_match = tweet_content_match.replace(
+                        '<br>', '\n')
+
+                    user_info_match = re.sub(r'<a[^>]*>|<\/a>', '',
+                                             match[1].strip())
+                    user_info_match = user_info_match.replace(')', '), ')
+
+                    match_author = re.search(regex_author, user_info_match)
+                    author_tweet = match_author.group(
+                        1) if match_author else ""
+
+                    if tweet_content_match:
+                        tweet_content.append(tweet_content_match)
+                    if user_info_match:
+                        user_info.append(user_info_match)
+
+                        is_RT_match = False
+                        if author_name != author_tweet:
+                            is_RT_match = True
+
+                        is_RT.append(is_RT_match)
+
+                return tweet_content, is_RT, user_info
+        except Exception as e:
+            print(f"Error parsing tweet: {e}")
+            return None
+
+
+class JsonParser:
+    """Handles parsing of tweets when the mimetype is application/json."""
+
+    def __init__(self, tweet_url):
+        self.tweet_url = tweet_url
+
+    def parse(self):
+        """Parses the archived tweets in JSON format."""
+        try:
+            response = requests.get(self.tweet_url)
+            if not (400 <= response.status_code <= 511):
+                json_data = response.json()
+                if 'data' in json_data:
+                    return json_data['data'].get('text', json_data['data'])
+                else:
+                    return json_data.get('text', json_data)
+        except Exception as e:
+            print(f"Error parsing JSON mimetype tweet: {e}")
+            return None
+
+
+class TweetsParser:
+    """Handles the overall parsing of archived tweets."""
+
+    def __init__(self, archived_tweets_response, username, metadata_options):
+        self.archived_tweets_response = archived_tweets_response
+        self.username = username
+        self.metadata_options = metadata_options
+        self.parsed_tweets = {option: [] for option in self.metadata_options}
+
+    def add_metadata(self, key, value):
+        """Appends a value to a list in the parsed data structure.
+        Defines which data will be structured and saved."""
+        if key in self.parsed_tweets:
+            self.parsed_tweets[key].append(value)
+
+    def parse(self):
+        """Parses the archived tweets metadata and structures it."""
+        for response in self.archived_tweets_response[1:]:
+            tweet_remove_char = unquote(response[2]).replace('’', '')
+            cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"')
+
+            wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{tweet_remove_char}'
              original_tweet = delete_tweet_pathnames(
-                f'https://{original_tweet}')
-
-        encoded_tweet = semicolon_parse(response[2])
-        encoded_archived_tweet = semicolon_parse(wayback_machine_url)
-        encoded_parsed_tweet = semicolon_parse(original_tweet)
-        encoded_parsed_archived_tweet = semicolon_parse(
-            parsed_wayback_machine_url)
-
-        content = embed(encoded_tweet)
-        if content:
-            available_tweet_content.append(content[0][0])
-            available_tweet_is_RT.append(content[1][0])
-            available_tweet_username.append(content[2][0])
-
-        if response[3] == 'application/json':
-            json_mimetype = parse_json_mimetype(encoded_archived_tweet)
-            parsed_tweet_mimetype_json.append(json_mimetype)
-
-        archived_urlkey.append(response[0])
-        archived_timestamp.append(response[1])
-        tweet.append(encoded_tweet)
-        archived_tweet.append(encoded_archived_tweet)
-        parsed_tweet.append(encoded_parsed_tweet)
-        parsed_archived_tweet.append(encoded_parsed_archived_tweet)
-        archived_mimetype.append(response[3])
-        archived_statuscode.append(response[4])
-        archived_digest.append(response[5])
-        archived_length.append(response[6])
-
-    return archived_urlkey, archived_timestamp, tweet, archived_tweet, parsed_tweet, parsed_tweet_mimetype_json, parsed_archived_tweet, archived_mimetype, archived_statuscode, archived_digest, archived_length, available_tweet_content, available_tweet_is_RT, available_tweet_username
+                clean_tweet_url(cleaned_tweet, self.username))
+            parsed_wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{original_tweet}'
+
+            double_status = check_double_status(wayback_machine_url,
+                                                original_tweet)
+
+            if double_status:
+                original_tweet = delete_tweet_pathnames(
+                    f'https://twitter.com/{original_tweet}')
+            elif not '://' in original_tweet:
+                original_tweet = delete_tweet_pathnames(
+                    f'https://{original_tweet}')
+
+            encoded_tweet = semicolon_parse(response[2])
+            encoded_archived_tweet = semicolon_parse(wayback_machine_url)
+            encoded_parsed_tweet = semicolon_parse(original_tweet)
+            encoded_parsed_archived_tweet = semicolon_parse(
+                parsed_wayback_machine_url)
+
+            embed_parser = TwitterEmbed(encoded_tweet)
+            content = embed_parser.embed()
+            if content:
+                self.add_metadata('available_tweet_content', content[0][0])
+                self.add_metadata('available_tweet_is_RT', content[1][0])
+                self.add_metadata('available_tweet_username', content[2][0])
+
+            if response[3] == 'application/json':
+                json_parser = JsonParser(encoded_archived_tweet)
+                json_mimetype = json_parser.parse()
+                self.add_metadata('parsed_tweet_mimetype_json', json_mimetype)
+
+            self.add_metadata('archived_urlkey', response[0])
+            self.add_metadata('archived_timestamp', response[1])
+            self.add_metadata('tweet', encoded_tweet)
+            self.add_metadata('archived_tweet', encoded_archived_tweet)
+            self.add_metadata('parsed_tweet', encoded_parsed_tweet)
+            self.add_metadata('parsed_archived_tweet',
+                              encoded_parsed_archived_tweet)
+            self.add_metadata('archived_mimetype', response[3])
+            self.add_metadata('archived_statuscode', response[4])
+            self.add_metadata('archived_digest', response[5])
+            self.add_metadata('archived_length', response[6])
+
+        return self.parsed_tweets
diff --git a/waybacktweets/request_tweets.py b/waybacktweets/request_tweets.py

index 71699480a5e2fd43db5259162f11dd1a5a7a6cf3..2410366837f4a6bf8d6211ad90972ee451eb4c87 100644 (file)
--- a/waybacktweets/request_tweets.py
+++ b/waybacktweets/request_tweets.py
@@ -1,41 +1,46 @@
-"""
-Requests data from the Wayback Machine API.
-"""
-
  import requests
  
  
-def get_archived_tweets(username,
-                        unique=False,
-                        timestamp_from='',
-                        timestamp_to=''):
+class WaybackTweets:
      """Requests data from the Wayback CDX Server API and returns it in JSON format."""
-    unique = f'&collapse=urlkey' if unique else ''
-
-    if timestamp_from:
-        timestamp_from = f'&from={timestamp_from}'
-
-    if timestamp_to:
-        timestamp_to = f'&to={timestamp_to}'
  
-    url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{username}/status/*&output=json{unique}{timestamp_from}{timestamp_to}&limit=100'
-    print(f'Getting and parsing archived tweets from {url}')
-
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-
-        if not (400 <= response.status_code <= 511):
-            return response.json()
-    except requests.exceptions.Timeout as e:
-        print(f'{e}.\nConnection to web.archive.org timed out.')
-    except requests.exceptions.ConnectionError as e:
-        print(
-            f'{e}.\nFailed to establish a new connection with web.archive.org.'
-        )
-    except requests.exceptions.HTTPError as e:
-        print(
-            f'{e}.\nTemporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive [Twitter feed](https://twitter.com/internetarchive/) for the latest information.'
+    def __init__(self,
+                 username,
+                 unique=False,
+                 timestamp_from='',
+                 timestamp_to=''):
+        self.username = username
+        self.unique = unique
+        self.timestamp_from = timestamp_from
+        self.timestamp_to = timestamp_to
+
+    def get(self):
+        unique_param = '&collapse=urlkey' if self.unique else ''
+        timestamp_from_param = f'&from={self.timestamp_from}' if self.timestamp_from else ''
+        timestamp_to_param = f'&to={self.timestamp_to}' if self.timestamp_to else ''
+
+        url = (
+            f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{self.username}/status/*'
+            f'&output=json{unique_param}{timestamp_from_param}{timestamp_to_param}&limit=5'
          )
-    except UnboundLocalError as e:
-        print(e)
+        print(f'Getting and parsing archived tweets from {url}')
+
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+
+            if not (400 <= response.status_code <= 511):
+                return response.json()
+        except requests.exceptions.Timeout as e:
+            print(f'{e}.\nConnection to web.archive.org timed out.')
+        except requests.exceptions.ConnectionError as e:
+            print(
+                f'{e}.\nFailed to establish a new connection with web.archive.org.'
+            )
+        except requests.exceptions.HTTPError as e:
+            print(
+                f'{e}.\nTemporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive [Twitter feed](https://twitter.com/internetarchive/) for the latest information.'
+            )
+        except UnboundLocalError as e:
+            print(e)
+        return None
diff --git a/waybacktweets/viz_tweets.py b/waybacktweets/viz_tweets.py

index a3c88d5b43ba9250c37751521193e89fc9426bce..ef14a384752d2c9cc070a2276893a1e9d6564e09 100644 (file)
--- a/waybacktweets/viz_tweets.py
+++ b/waybacktweets/viz_tweets.py
@@ -1,57 +1,60 @@
-"""
-Generates an HTML file to visualize the parsed data.
-"""
-
  import json
  
  
-def read_json(json_file_path):
-    """Reads and loads JSON data from a specified file path."""
-    with open(json_file_path, 'r', encoding='utf-8') as f:
-        return json.load(f)
-
-
-def generate_html(json_content, username):
-    """Generates an HTML file."""
-    html = f'<html>\n<head>\n<title>@{username} archived tweets</title>\n'
-    html += '<style>\n'
-    html += 'body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n'
-    html += '.container { display: flex; flex-wrap: wrap; gap: 20px; }\n'
-    html += '.tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n'
-    html += '.tweet strong { font-weight: bold; }\n'
-    html += '.tweet a { color: #1da1f2; text-decoration: none; }\n'
-    html += '.tweet a:hover { text-decoration: underline; }\n'
-    html += 'h1 { text-align: center; }\n'
-    html += '</style>\n'
-    html += '</head>\n<body>\n'
-    html += f'<h1>@{username} archived tweets</h1>\n'
-    html += '<div class="container">\n'
-
-    for tweet in json_content:
-        html += '<div class="tweet">\n'
-        html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
-        html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
-        html += f'<p><strong>Tweet:</strong> <a href="{tweet["tweet"]}">{tweet["tweet"]}</a></p>\n'
-        html += f'<p><strong>Archived Tweet:</strong> <a href="{tweet["archived_tweet"]}">{tweet["archived_tweet"]}</a></p>\n'
-        html += f'<p><strong>Parsed Tweet:</strong> <a href="{tweet["parsed_tweet"]}">{tweet["parsed_tweet"]}</a></p>\n'
-        html += f'<p><strong>Parsed Tweet Mimetype JSON:</strong> {tweet["parsed_tweet_mimetype_json"]}</p>\n'
-        html += f'<p><strong>Parsed Archived Tweet:</strong> <a href="{tweet["parsed_archived_tweet"]}">{tweet["parsed_archived_tweet"]}</a></p>\n'
-        html += f'<p><strong>Archived Mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
-        html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
-        html += f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
-        html += f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
-        html += f'<p><strong>Available Tweet Content:</strong> {tweet["available_tweet_content"]}</p>\n'
-        html += f'<p><strong>Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
-        html += f'<p><strong>Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'
-        html += '</div>\n'
-
-    html += '</div>\n'
-    html += '</body>\n</html>'
+class HTMLTweetsVisualizer:
+    """Generates an HTML file to visualize the parsed data."""
+
+    def __init__(self, json_file_path, html_file_path, username):
+        self.json_content = self.json_loader(json_file_path)
+        self.html_file_path = html_file_path
+        self.username = username
+
+    @staticmethod
+    def json_loader(json_file_path):
+        """Reads and loads JSON data from a specified file path."""
+        with open(json_file_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    def generate(self):
+        """Generates an HTML file."""
+        html = f'<html>\n<head>\n<title>@{self.username} archived tweets</title>\n'
+        html += '<style>\n'
+        html += 'body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n'
+        html += '.container { display: flex; flex-wrap: wrap; gap: 20px; }\n'
+        html += '.tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n'
+        html += '.tweet strong { font-weight: bold; }\n'
+        html += '.tweet a { color: #1da1f2; text-decoration: none; }\n'
+        html += '.tweet a:hover { text-decoration: underline; }\n'
+        html += 'h1 { text-align: center; }\n'
+        html += '</style>\n'
+        html += '</head>\n<body>\n'
+        html += f'<h1>@{self.username} archived tweets</h1>\n'
+        html += '<div class="container">\n'
+
+        for tweet in self.json_content:
+            html += '<div class="tweet">\n'
+            html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
+            html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
+            html += f'<p><strong>Tweet:</strong> <a href="{tweet["tweet"]}">{tweet["tweet"]}</a></p>\n'
+            html += f'<p><strong>Archived Tweet:</strong> <a href="{tweet["archived_tweet"]}">{tweet["archived_tweet"]}</a></p>\n'
+            html += f'<p><strong>Parsed Tweet:</strong> <a href="{tweet["parsed_tweet"]}">{tweet["parsed_tweet"]}</a></p>\n'
+            html += f'<p><strong>Parsed Tweet Mimetype JSON:</strong> {tweet["parsed_tweet_mimetype_json"]}</p>\n'
+            html += f'<p><strong>Parsed Archived Tweet:</strong> <a href="{tweet["parsed_archived_tweet"]}">{tweet["parsed_archived_tweet"]}</a></p>\n'
+            html += f'<p><strong>Archived Mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
+            html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
+            html += f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
+            html += f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
+            html += f'<p><strong>Available Tweet Content:</strong> {tweet["available_tweet_content"]}</p>\n'
+            html += f'<p><strong>Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
+            html += f'<p><strong>Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'
+            html += '</div>\n'
  
-    return html
+        html += '</div>\n'
+        html += '</body>\n</html>'
  
+        return html
  
-def save_html(html_file_path, html_content):
-    """Saves the generated HTML."""
-    with open(html_file_path, 'w', encoding='utf-8') as f:
-        f.write(html_content)
+    def save(self, html_content):
+        """Saves the generated HTML."""
+        with open(self.html_file_path, 'w', encoding='utf-8') as f:
+            f.write(html_content)
author	Claromes <claromes@hey.com>
	Tue, 4 Jun 2024 16:59:17 +0000 (13:59 -0300)
committer	Claromes <claromes@hey.com>
	Tue, 4 Jun 2024 16:59:17 +0000 (13:59 -0300)
waybacktweets/export_tweets.py		patch \| blob \| history
waybacktweets/main.py		patch \| blob \| history
waybacktweets/parse_tweets.py		patch \| blob \| history
waybacktweets/request_tweets.py		patch \| blob \| history
waybacktweets/viz_tweets.py		patch \| blob \| history