-"""
-Exports the parsed archived tweets.
-"""
-
import pandas as pd
import re
import datetime
+import os
+from viz_tweets import HTMLTweetsVisualizer
+
+
+class TweetsExporter:
+ """Handles the exporting of parsed archived tweets."""
+
+ def __init__(self, data, username, metadata_options):
+ self.data = data
+ self.username = username
+ self.metadata_options = metadata_options
+ self.formatted_datetime = self.datetime_now()
+ self.filename = f'{self.username}_tweets_{self.formatted_datetime}'
+ self.dataframe = self.create_dataframe(self)
-from viz_tweets import *
+ @staticmethod
+ def datetime_now():
+ """Formats datetime."""
+ now = datetime.datetime.now()
+ formatted_now = now.strftime("%Y%m%d%H%M%S")
+ formatted_now = re.sub(r'\W+', '', formatted_now)
+ return formatted_now
-def datetime_now():
- """Formats datetime."""
- now = datetime.datetime.now()
+ @staticmethod
+ def transpose_matrix(data, fill_value=None):
+ """Transposes a matrix, filling in missing values with a specified fill value if needed."""
+ max_length = max(len(sublist) for sublist in data.values())
- formatted_now = now.strftime("%Y%m%d%H%M%S")
+ filled_data = {
+ key: value + [fill_value] * (max_length - len(value))
+ for key, value in data.items()
+ }
- formatted_now = re.sub(r'\W+', '', formatted_now)
+ return filled_data
- return formatted_now
+ @staticmethod
+ def create_dataframe(self):
+ """Creates a DataFrame from the transposed data."""
+ data_transposed = self.transpose_matrix(self.data)
+ df = pd.DataFrame(data_transposed, columns=self.metadata_options)
-def transpose_matrix(data, fill_value=None):
- """Transposes a matrix, filling in missing values with a specified fill value if needed."""
- max_length = max(len(sublist) for sublist in data)
- filled_data = [
- sublist + [fill_value] * (max_length - len(sublist))
- for sublist in data
- ]
+ return df
- data_transposed = [list(row) for row in zip(*filled_data)]
+ def save_to_csv(self):
+ """Saves the DataFrame to a CSV file."""
+ csv_file_path = f'{self.filename}.csv'
+ self.dataframe.to_csv(csv_file_path, index=False)
- return data_transposed
+ print(f'Saved to {csv_file_path}')
+ def save_to_json(self):
+ """Saves the DataFrame to a JSON file."""
+ json_file_path = f'{self.filename}.json'
+ self.dataframe.to_json(json_file_path, orient='records', lines=False)
-def save_tweets(data, username):
- """Saves parsed archived tweets in CSV, JSON, and HTML formats."""
- data_transposed = transpose_matrix(data)
+ print(f'Saved to {json_file_path}')
- formatted_datetime = datetime_now()
- filename = f'{username}_tweets_{formatted_datetime}'
+ def save_to_html(self):
+ """Saves the DataFrame to an HTML file."""
+ json_file_path = f'{self.filename}.json'
- df = pd.DataFrame(data_transposed,
- columns=[
- 'archived_urlkey', 'archived_timestamp', 'tweet',
- 'archived_tweet', 'parsed_tweet',
- 'parsed_tweet_mimetype_json',
- 'parsed_archived_tweet', 'archived_mimetype',
- 'archived_statuscode', 'archived_digest',
- 'archived_length', 'available_tweet_content',
- 'available_tweet_is_RT', 'available_tweet_username'
- ])
+ if not os.path.exists(json_file_path):
+ self.save_to_json()
- csv_file_path = f'{filename}.csv'
- df.to_csv(csv_file_path, index=False)
+ html_file_path = f'{self.filename}.html'
- json_file_path = f'{filename}.json'
- df.to_json(json_file_path, orient='records', lines=False)
+ html = HTMLTweetsVisualizer(json_file_path, html_file_path,
+ self.username)
- html_file_path = f'{filename}.html'
- json_content = read_json(json_file_path)
- html_content = generate_html(json_content, username)
- save_html(html_file_path, html_content)
+ html_content = html.generate()
+ html.save(html_content)
- print(
- f'Done. Check the files {filename}.csv, {filename}.json and {filename}.html'
- )
+ print(f'Saved to {html_file_path}')
Main function for retrieving archived tweets.
"""
-from request_tweets import *
-from parse_tweets import *
-from export_tweets import *
+from request_tweets import WaybackTweets
+from parse_tweets import TweetsParser
+from export_tweets import TweetsExporter
username = 'claromes'
unique = False
def main():
- """Invokes the functions to retrieve archived tweets, perform necessary parsing, and save the data."""
+ """Invokes the classes to retrieve archived tweets, perform necessary parsing, and save the data."""
try:
- archived_tweets = get_archived_tweets(username, unique, datetime_from,
- datetime_to)
- if archived_tweets:
- data = parse_archived_tweets(archived_tweets, username)
-
- save_tweets(data, username)
+ api = WaybackTweets(username)
+ archived_tweets = api.get()
- print(
- f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
- )
+ if archived_tweets:
+ metadata_options = [
+ 'archived_urlkey', 'archived_timestamp', 'tweet',
+ 'archived_tweet', 'parsed_tweet', 'parsed_tweet_mimetype_json',
+ 'available_tweet_content', 'available_tweet_is_RT',
+ 'available_tweet_username', 'parsed_archived_tweet',
+ 'archived_mimetype', 'archived_statuscode', 'archived_digest',
+ 'archived_length'
+ ]
+
+ parser = TweetsParser(archived_tweets, username, metadata_options)
+ parsed_tweets = parser.parse()
+
+ exporter = TweetsExporter(parsed_tweets, username,
+ metadata_options)
+ exporter.save_to_csv()
+ exporter.save_to_json()
+ exporter.save_to_html()
+
+ print(
+ f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
+ )
except TypeError as e:
print(e)
-"""
-Parses the returned data from the Wayback CDX Server API.
-"""
-
import requests
import re
from urllib.parse import unquote
from utils import *
-def embed(tweet):
- """Parses the archived tweets when the tweets are still available using the Twitter Publish service from X.
- Returns the text of the tweet, if it's a retweet, and the username of the account."""
- try:
- url = f'https://publish.twitter.com/oembed?url={tweet}'
- response = requests.get(url)
-
- regex = r'<blockquote class="twitter-tweet"(?: [^>]+)?><p[^>]*>(.*?)<\/p>.*?— (.*?)<\/a>'
- regex_author = r'^(.*?)\s*\('
-
- if not (400 <= response.status_code <= 511):
- html = response.json()['html']
- author_name = response.json()['author_name']
-
- matches_html = re.findall(regex, html, re.DOTALL)
-
- tweet_content = []
- user_info = []
- is_RT = []
-
- for match in matches_html:
- tweet_content_match = re.sub(r'<a[^>]*>|<\/a>', '',
- match[0].strip())
- tweet_content_match = tweet_content_match.replace('<br>', '\n')
-
- user_info_match = re.sub(r'<a[^>]*>|<\/a>', '',
- match[1].strip())
- user_info_match = user_info_match.replace(')', '), ')
-
- match_author = re.search(regex_author, user_info_match)
- author_tweet = match_author.group(1)
-
- if tweet_content_match:
- tweet_content.append(tweet_content_match)
- if user_info_match:
- user_info.append(user_info_match)
-
- is_RT_match = False
- if author_name != author_tweet:
- is_RT_match = True
-
- is_RT.append(is_RT_match)
-
- return tweet_content, is_RT, user_info
- except:
- return None
-
-
-def parse_json_mimetype(tweet):
- """Parses the archived tweets when the mimetype is application/json and returns the text of the tweet."""
- response_json = requests.get(tweet)
-
- if not (400 <= response_json.status_code <= 511):
- json_data = response_json.json()
-
- if 'data' in json_data:
- if 'text' in json_data['data']:
- json_text = json_data['data']['text']
- return json_text
- else:
- json_text = json_data['data']
- return json_text
- else:
- if 'text' in json_data:
- json_text = json_data['text']
- return json_text
- else:
- json_text = json_data
- return json_text
-
-
-def parse_archived_tweets(archived_tweets_response, username):
- """Parses the archived tweets metadata and structures it in a more readable format."""
- archived_urlkey = []
- archived_timestamp = []
- tweet = []
- archived_tweet = []
- parsed_tweet = []
- parsed_tweet_mimetype_json = []
- available_tweet_content = []
- available_tweet_is_RT = []
- available_tweet_username = []
- parsed_archived_tweet = []
- archived_mimetype = []
- archived_statuscode = []
- archived_digest = []
- archived_length = []
-
- for response in archived_tweets_response[1:]:
- tweet_remove_char = unquote(response[2]).replace('’', '')
- cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"')
-
- wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{tweet_remove_char}'
-
- original_tweet = delete_tweet_pathnames(
- clean_tweet_url(cleaned_tweet, username))
-
- parsed_wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{original_tweet}'
-
- double_status = check_double_status(wayback_machine_url,
- original_tweet)
-
- if double_status:
- original_tweet = delete_tweet_pathnames(
- f'https://twitter.com/{original_tweet}')
-
- elif not '://' in original_tweet:
+class TwitterEmbed:
+ """Handles parsing of tweets using the Twitter Publish service."""
+
+ def __init__(self, tweet_url):
+ self.tweet_url = tweet_url
+
+ def embed(self):
+ """Parses the archived tweets when they are still available."""
+ try:
+ url = f'https://publish.twitter.com/oembed?url={self.tweet_url}'
+ response = requests.get(url)
+ if not (400 <= response.status_code <= 511):
+ html = response.json()['html']
+ author_name = response.json()['author_name']
+
+ regex = r'<blockquote class="twitter-tweet"(?: [^>]+)?><p[^>]*>(.*?)<\/p>.*?— (.*?)<\/a>'
+ regex_author = r'^(.*?)\s*\('
+
+ matches_html = re.findall(regex, html, re.DOTALL)
+
+ tweet_content = []
+ user_info = []
+ is_RT = []
+
+ for match in matches_html:
+ tweet_content_match = re.sub(r'<a[^>]*>|<\/a>', '',
+ match[0].strip())
+ tweet_content_match = tweet_content_match.replace(
+ '<br>', '\n')
+
+ user_info_match = re.sub(r'<a[^>]*>|<\/a>', '',
+ match[1].strip())
+ user_info_match = user_info_match.replace(')', '), ')
+
+ match_author = re.search(regex_author, user_info_match)
+ author_tweet = match_author.group(
+ 1) if match_author else ""
+
+ if tweet_content_match:
+ tweet_content.append(tweet_content_match)
+ if user_info_match:
+ user_info.append(user_info_match)
+
+ is_RT_match = False
+ if author_name != author_tweet:
+ is_RT_match = True
+
+ is_RT.append(is_RT_match)
+
+ return tweet_content, is_RT, user_info
+ except Exception as e:
+ print(f"Error parsing tweet: {e}")
+ return None
+
+
+class JsonParser:
+ """Handles parsing of tweets when the mimetype is application/json."""
+
+ def __init__(self, tweet_url):
+ self.tweet_url = tweet_url
+
+ def parse(self):
+ """Parses the archived tweets in JSON format."""
+ try:
+ response = requests.get(self.tweet_url)
+ if not (400 <= response.status_code <= 511):
+ json_data = response.json()
+ if 'data' in json_data:
+ return json_data['data'].get('text', json_data['data'])
+ else:
+ return json_data.get('text', json_data)
+ except Exception as e:
+ print(f"Error parsing JSON mimetype tweet: {e}")
+ return None
+
+
+class TweetsParser:
+ """Handles the overall parsing of archived tweets."""
+
+ def __init__(self, archived_tweets_response, username, metadata_options):
+ self.archived_tweets_response = archived_tweets_response
+ self.username = username
+ self.metadata_options = metadata_options
+ self.parsed_tweets = {option: [] for option in self.metadata_options}
+
+ def add_metadata(self, key, value):
+ """Appends a value to a list in the parsed data structure.
+ Defines which data will be structured and saved."""
+ if key in self.parsed_tweets:
+ self.parsed_tweets[key].append(value)
+
+ def parse(self):
+ """Parses the archived tweets metadata and structures it."""
+ for response in self.archived_tweets_response[1:]:
+ tweet_remove_char = unquote(response[2]).replace('’', '')
+ cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"')
+
+ wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{tweet_remove_char}'
original_tweet = delete_tweet_pathnames(
- f'https://{original_tweet}')
-
- encoded_tweet = semicolon_parse(response[2])
- encoded_archived_tweet = semicolon_parse(wayback_machine_url)
- encoded_parsed_tweet = semicolon_parse(original_tweet)
- encoded_parsed_archived_tweet = semicolon_parse(
- parsed_wayback_machine_url)
-
- content = embed(encoded_tweet)
- if content:
- available_tweet_content.append(content[0][0])
- available_tweet_is_RT.append(content[1][0])
- available_tweet_username.append(content[2][0])
-
- if response[3] == 'application/json':
- json_mimetype = parse_json_mimetype(encoded_archived_tweet)
- parsed_tweet_mimetype_json.append(json_mimetype)
-
- archived_urlkey.append(response[0])
- archived_timestamp.append(response[1])
- tweet.append(encoded_tweet)
- archived_tweet.append(encoded_archived_tweet)
- parsed_tweet.append(encoded_parsed_tweet)
- parsed_archived_tweet.append(encoded_parsed_archived_tweet)
- archived_mimetype.append(response[3])
- archived_statuscode.append(response[4])
- archived_digest.append(response[5])
- archived_length.append(response[6])
-
- return archived_urlkey, archived_timestamp, tweet, archived_tweet, parsed_tweet, parsed_tweet_mimetype_json, parsed_archived_tweet, archived_mimetype, archived_statuscode, archived_digest, archived_length, available_tweet_content, available_tweet_is_RT, available_tweet_username
+ clean_tweet_url(cleaned_tweet, self.username))
+ parsed_wayback_machine_url = f'https://web.archive.org/web/{response[1]}/{original_tweet}'
+
+ double_status = check_double_status(wayback_machine_url,
+ original_tweet)
+
+ if double_status:
+ original_tweet = delete_tweet_pathnames(
+ f'https://twitter.com/{original_tweet}')
+ elif not '://' in original_tweet:
+ original_tweet = delete_tweet_pathnames(
+ f'https://{original_tweet}')
+
+ encoded_tweet = semicolon_parse(response[2])
+ encoded_archived_tweet = semicolon_parse(wayback_machine_url)
+ encoded_parsed_tweet = semicolon_parse(original_tweet)
+ encoded_parsed_archived_tweet = semicolon_parse(
+ parsed_wayback_machine_url)
+
+ embed_parser = TwitterEmbed(encoded_tweet)
+ content = embed_parser.embed()
+ if content:
+ self.add_metadata('available_tweet_content', content[0][0])
+ self.add_metadata('available_tweet_is_RT', content[1][0])
+ self.add_metadata('available_tweet_username', content[2][0])
+
+ if response[3] == 'application/json':
+ json_parser = JsonParser(encoded_archived_tweet)
+ json_mimetype = json_parser.parse()
+ self.add_metadata('parsed_tweet_mimetype_json', json_mimetype)
+
+ self.add_metadata('archived_urlkey', response[0])
+ self.add_metadata('archived_timestamp', response[1])
+ self.add_metadata('tweet', encoded_tweet)
+ self.add_metadata('archived_tweet', encoded_archived_tweet)
+ self.add_metadata('parsed_tweet', encoded_parsed_tweet)
+ self.add_metadata('parsed_archived_tweet',
+ encoded_parsed_archived_tweet)
+ self.add_metadata('archived_mimetype', response[3])
+ self.add_metadata('archived_statuscode', response[4])
+ self.add_metadata('archived_digest', response[5])
+ self.add_metadata('archived_length', response[6])
+
+ return self.parsed_tweets
-"""
-Requests data from the Wayback Machine API.
-"""
-
import requests
-def get_archived_tweets(username,
- unique=False,
- timestamp_from='',
- timestamp_to=''):
+class WaybackTweets:
"""Requests data from the Wayback CDX Server API and returns it in JSON format."""
- unique = f'&collapse=urlkey' if unique else ''
-
- if timestamp_from:
- timestamp_from = f'&from={timestamp_from}'
-
- if timestamp_to:
- timestamp_to = f'&to={timestamp_to}'
- url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{username}/status/*&output=json{unique}{timestamp_from}{timestamp_to}&limit=100'
- print(f'Getting and parsing archived tweets from {url}')
-
- try:
- response = requests.get(url)
- response.raise_for_status()
-
- if not (400 <= response.status_code <= 511):
- return response.json()
- except requests.exceptions.Timeout as e:
- print(f'{e}.\nConnection to web.archive.org timed out.')
- except requests.exceptions.ConnectionError as e:
- print(
- f'{e}.\nFailed to establish a new connection with web.archive.org.'
- )
- except requests.exceptions.HTTPError as e:
- print(
- f'{e}.\nTemporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive [Twitter feed](https://twitter.com/internetarchive/) for the latest information.'
+ def __init__(self,
+ username,
+ unique=False,
+ timestamp_from='',
+ timestamp_to=''):
+ self.username = username
+ self.unique = unique
+ self.timestamp_from = timestamp_from
+ self.timestamp_to = timestamp_to
+
+ def get(self):
+ unique_param = '&collapse=urlkey' if self.unique else ''
+ timestamp_from_param = f'&from={self.timestamp_from}' if self.timestamp_from else ''
+ timestamp_to_param = f'&to={self.timestamp_to}' if self.timestamp_to else ''
+
+ url = (
+ f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{self.username}/status/*'
+ f'&output=json{unique_param}{timestamp_from_param}{timestamp_to_param}&limit=5'
)
- except UnboundLocalError as e:
- print(e)
+ print(f'Getting and parsing archived tweets from {url}')
+
+ try:
+ response = requests.get(url)
+ response.raise_for_status()
+
+ if not (400 <= response.status_code <= 511):
+ return response.json()
+ except requests.exceptions.Timeout as e:
+ print(f'{e}.\nConnection to web.archive.org timed out.')
+ except requests.exceptions.ConnectionError as e:
+ print(
+ f'{e}.\nFailed to establish a new connection with web.archive.org.'
+ )
+ except requests.exceptions.HTTPError as e:
+ print(
+ f'{e}.\nTemporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive [Twitter feed](https://twitter.com/internetarchive/) for the latest information.'
+ )
+ except UnboundLocalError as e:
+ print(e)
+ return None
-"""
-Generates an HTML file to visualize the parsed data.
-"""
-
import json
-def read_json(json_file_path):
- """Reads and loads JSON data from a specified file path."""
- with open(json_file_path, 'r', encoding='utf-8') as f:
- return json.load(f)
-
-
-def generate_html(json_content, username):
- """Generates an HTML file."""
- html = f'<html>\n<head>\n<title>@{username} archived tweets</title>\n'
- html += '<style>\n'
- html += 'body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n'
- html += '.container { display: flex; flex-wrap: wrap; gap: 20px; }\n'
- html += '.tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n'
- html += '.tweet strong { font-weight: bold; }\n'
- html += '.tweet a { color: #1da1f2; text-decoration: none; }\n'
- html += '.tweet a:hover { text-decoration: underline; }\n'
- html += 'h1 { text-align: center; }\n'
- html += '</style>\n'
- html += '</head>\n<body>\n'
- html += f'<h1>@{username} archived tweets</h1>\n'
- html += '<div class="container">\n'
-
- for tweet in json_content:
- html += '<div class="tweet">\n'
- html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
- html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
- html += f'<p><strong>Tweet:</strong> <a href="{tweet["tweet"]}">{tweet["tweet"]}</a></p>\n'
- html += f'<p><strong>Archived Tweet:</strong> <a href="{tweet["archived_tweet"]}">{tweet["archived_tweet"]}</a></p>\n'
- html += f'<p><strong>Parsed Tweet:</strong> <a href="{tweet["parsed_tweet"]}">{tweet["parsed_tweet"]}</a></p>\n'
- html += f'<p><strong>Parsed Tweet Mimetype JSON:</strong> {tweet["parsed_tweet_mimetype_json"]}</p>\n'
- html += f'<p><strong>Parsed Archived Tweet:</strong> <a href="{tweet["parsed_archived_tweet"]}">{tweet["parsed_archived_tweet"]}</a></p>\n'
- html += f'<p><strong>Archived Mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
- html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
- html += f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
- html += f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
- html += f'<p><strong>Available Tweet Content:</strong> {tweet["available_tweet_content"]}</p>\n'
- html += f'<p><strong>Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
- html += f'<p><strong>Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'
- html += '</div>\n'
-
- html += '</div>\n'
- html += '</body>\n</html>'
+class HTMLTweetsVisualizer:
+ """Generates an HTML file to visualize the parsed data."""
+
+ def __init__(self, json_file_path, html_file_path, username):
+ self.json_content = self.json_loader(json_file_path)
+ self.html_file_path = html_file_path
+ self.username = username
+
+ @staticmethod
+ def json_loader(json_file_path):
+ """Reads and loads JSON data from a specified file path."""
+ with open(json_file_path, 'r', encoding='utf-8') as f:
+ return json.load(f)
+
+ def generate(self):
+ """Generates an HTML file."""
+ html = f'<html>\n<head>\n<title>@{self.username} archived tweets</title>\n'
+ html += '<style>\n'
+ html += 'body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n'
+ html += '.container { display: flex; flex-wrap: wrap; gap: 20px; }\n'
+ html += '.tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n'
+ html += '.tweet strong { font-weight: bold; }\n'
+ html += '.tweet a { color: #1da1f2; text-decoration: none; }\n'
+ html += '.tweet a:hover { text-decoration: underline; }\n'
+ html += 'h1 { text-align: center; }\n'
+ html += '</style>\n'
+ html += '</head>\n<body>\n'
+ html += f'<h1>@{self.username} archived tweets</h1>\n'
+ html += '<div class="container">\n'
+
+ for tweet in self.json_content:
+ html += '<div class="tweet">\n'
+ html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
+ html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
+ html += f'<p><strong>Tweet:</strong> <a href="{tweet["tweet"]}">{tweet["tweet"]}</a></p>\n'
+ html += f'<p><strong>Archived Tweet:</strong> <a href="{tweet["archived_tweet"]}">{tweet["archived_tweet"]}</a></p>\n'
+ html += f'<p><strong>Parsed Tweet:</strong> <a href="{tweet["parsed_tweet"]}">{tweet["parsed_tweet"]}</a></p>\n'
+ html += f'<p><strong>Parsed Tweet Mimetype JSON:</strong> {tweet["parsed_tweet_mimetype_json"]}</p>\n'
+ html += f'<p><strong>Parsed Archived Tweet:</strong> <a href="{tweet["parsed_archived_tweet"]}">{tweet["parsed_archived_tweet"]}</a></p>\n'
+ html += f'<p><strong>Archived Mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
+ html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
+ html += f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
+ html += f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
+ html += f'<p><strong>Available Tweet Content:</strong> {tweet["available_tweet_content"]}</p>\n'
+ html += f'<p><strong>Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
+ html += f'<p><strong>Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'
+ html += '</div>\n'
- return html
+ html += '</div>\n'
+ html += '</body>\n</html>'
+ return html
-def save_html(html_file_path, html_content):
- """Saves the generated HTML."""
- with open(html_file_path, 'w', encoding='utf-8') as f:
- f.write(html_content)
+ def save(self, html_content):
+ """Saves the generated HTML."""
+ with open(self.html_file_path, 'w', encoding='utf-8') as f:
+ f.write(html_content)