From 8a4debb7f9a7dc8682c4b24d7bdb8a5b137d10d3 Mon Sep 17 00:00:00 2001
From: Claromes <claromes@hey.com>
Date: Tue, 28 May 2024 06:30:07 -0300
Subject: [PATCH] add generate_html

---
 .gitignore                                    |  1 +
 waybacktweets/export_tweets.py                | 14 +++++-
 waybacktweets/main.py                         |  4 +-
 .../{tweet_parse.py => parse_tweets.py}       | 39 ---------------
 waybacktweets/request_tweets.py               |  2 +-
 waybacktweets/viz_tweets.py                   | 50 +++++++++++++++++++
 6 files changed, 66 insertions(+), 44 deletions(-)
 rename waybacktweets/{tweet_parse.py => parse_tweets.py} (82%)
 create mode 100644 waybacktweets/viz_tweets.py

diff --git a/.gitignore b/.gitignore
index 64b33db..04e940c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 .venv/
 *.csv
 *.json
+*.html
 waybacktweets/__pycache__
 waybacktweets/notes.md
diff --git a/waybacktweets/export_tweets.py b/waybacktweets/export_tweets.py
index efcd015..03847c9 100644
--- a/waybacktweets/export_tweets.py
+++ b/waybacktweets/export_tweets.py
@@ -2,6 +2,8 @@ import pandas as pd
 import re
 import datetime
 
+from viz_tweets import *
+
 
 def datetime_now():
     now = datetime.datetime.now()
@@ -25,7 +27,7 @@ def transpose_matrix(data, fill_value=None):
     return data_transposed
 
 
-def response_tweets_csv(data, username):
+def response_tweets(data, username):
     data_transposed = transpose_matrix(data)
 
     formatted_datetime = datetime_now()
@@ -48,4 +50,12 @@ def response_tweets_csv(data, username):
     json_file_path = f'{filename}.json'
     df.to_json(json_file_path, orient='records', lines=False)
 
-    print(f'Done. Check the files {filename}.csv and {filename}.json')
+    html_file_path = f'{filename}.html'
+
+    json_content = read_json(json_file_path)
+    html_content = generate_html(json_content, username)
+    save_html(html_file_path, html_content)
+
+    print(
+        f'Done. Check the files {filename}.csv, {filename}.json and {filename}.html'
+    )
diff --git a/waybacktweets/main.py b/waybacktweets/main.py
index ae63093..eb1f795 100644
--- a/waybacktweets/main.py
+++ b/waybacktweets/main.py
@@ -1,5 +1,5 @@
 from request_tweets import *
-from tweet_parse import *
+from parse_tweets import *
 from export_tweets import *
 
 username = 'claromes'
@@ -15,7 +15,7 @@ def main():
         if archived_tweets:
             data = parse_archived_tweets(archived_tweets, username)
 
-            response_tweets_csv(data, username)
+            response_tweets(data, username)
 
         print(
             f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
diff --git a/waybacktweets/tweet_parse.py b/waybacktweets/parse_tweets.py
similarity index 82%
rename from waybacktweets/tweet_parse.py
rename to waybacktweets/parse_tweets.py
index 167e1ce..5486326 100644
--- a/waybacktweets/tweet_parse.py
+++ b/waybacktweets/parse_tweets.py
@@ -172,42 +172,3 @@ def parse_archived_tweets(archived_tweets_response, username):
 #             st.info(f'Replying to {st.session_state.current_handle}')
 #         else:
 #             components.iframe(clean_link(link), height=500, scrolling=True)
-
-#     elif mimetype[i] == 'application/json':
-#         try:
-#             response_json = requests.get(link)
-
-#             if response_json.status_code == 200:
-#                 json_data = response_json.json()
-
-#                 if 'data' in json_data:
-#                     if 'text' in json_data['data']:
-#                         json_text = json_data['data']['text']
-#                     else:
-#                         json_text = json_data['data']
-#                 else:
-#                     if 'text' in json_data:
-#                         json_text = json_data['text']
-#                     else:
-#                         json_text = json_data
-
-#                 st.code(json_text)
-#                 st.json(json_data, expanded=False)
-
-#                 st.divider()
-#             else:
-#                 st.error(response_json.status_code)
-
-#                 st.divider()
-#         except requests.exceptions.Timeout:
-#             st.error('Connection to web.archive.org timed out.')
-#             st.divider()
-#         except requests.exceptions.ConnectionError:
-#             st.error(
-#                 'Failed to establish a new connection with web.archive.org.')
-#             st.divider()
-#         except UnboundLocalError:
-#             st.empty()
-#     else:
-#         st.warning('MIME Type was not parsed.')
-#         st.divider()
diff --git a/waybacktweets/request_tweets.py b/waybacktweets/request_tweets.py
index cc4b2a2..72f9f0e 100644
--- a/waybacktweets/request_tweets.py
+++ b/waybacktweets/request_tweets.py
@@ -14,7 +14,7 @@ def get_archived_tweets(username,
     if timestamp_to:
         timestamp_to = f'&to={timestamp_to}'
 
-    url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{username}/status/*&output=json{unique}{timestamp_from}{timestamp_to}&limit=10'
+    url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{username}/status/*&output=json{unique}{timestamp_from}{timestamp_to}&limit=100'
     print(f'Getting and parsing archived tweets from {url}')
 
     try:
diff --git a/waybacktweets/viz_tweets.py b/waybacktweets/viz_tweets.py
new file mode 100644
index 0000000..ac47297
--- /dev/null
+++ b/waybacktweets/viz_tweets.py
@@ -0,0 +1,50 @@
+import json
+
+
+def read_json(json_file_path):
+    with open(json_file_path, 'r', encoding='utf-8') as f:
+        return json.load(f)
+
+
+def generate_html(json_content, username):
+    html = f'<html>\n<head>\n<title>@{username} archived tweets</title>\n'
+    html += '<style>\n'
+    html += 'body { font-family: monospace; background-color: #f5f8fa; color: #1c1e21; margin: 0; padding: 20px; }\n'
+    html += '.container { display: flex; flex-wrap: wrap; gap: 20px; }\n'
+    html += '.tweet { flex: 0 1 calc(33.33% - 20px); background-color: #fff; border: 1px solid #e1e8ed; border-radius: 10px; padding: 15px; overflow-wrap: break-word; margin: auto; }\n'
+    html += '.tweet strong { font-weight: bold; }\n'
+    html += '.tweet a { color: #1da1f2; text-decoration: none; }\n'
+    html += '.tweet a:hover { text-decoration: underline; }\n'
+    html += 'h1 { text-align: center; }\n'
+    html += '</style>\n'
+    html += '</head>\n<body>\n'
+    html += f'<h1>@{username} archived tweets</h1>\n'
+    html += '<div class="container">\n'
+
+    for tweet in json_content:
+        html += '<div class="tweet">\n'
+        html += f'<p><strong>Archived Timestamp:</strong> {tweet["archived_timestamp"]}</p>\n'
+        html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
+        html += f'<p><strong>Tweet:</strong> <a href="{tweet["tweet"]}">{tweet["tweet"]}</a></p>\n'
+        html += f'<p><strong>Archived Tweet:</strong> <a href="{tweet["archived_tweet"]}">{tweet["archived_tweet"]}</a></p>\n'
+        html += f'<p><strong>Parsed Tweet:</strong> <a href="{tweet["parsed_tweet"]}">{tweet["parsed_tweet"]}</a></p>\n'
+        html += f'<p><strong>Parsed Tweet Mimetype JSON:</strong> {tweet["parsed_tweet_mimetype_json"]}</p>\n'
+        html += f'<p><strong>Parsed Archived Tweet:</strong> <a href="{tweet["parsed_archived_tweet"]}">{tweet["parsed_archived_tweet"]}</a></p>\n'
+        html += f'<p><strong>Archived Mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
+        html += f'<p><strong>Archived Statuscode:</strong> {tweet["archived_statuscode"]}</p>\n'
+        html += f'<p><strong>Archived Digest:</strong> {tweet["archived_digest"]}</p>\n'
+        html += f'<p><strong>Archived Length:</strong> {tweet["archived_length"]}</p>\n'
+        html += f'<p><strong>Available Tweet Content:</strong> {tweet["available_tweet_content"]}</p>\n'
+        html += f'<p><strong>Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
+        html += f'<p><strong>Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'
+        html += '</div>\n'
+
+    html += '</div>\n'
+    html += '</body>\n</html>'
+
+    return html
+
+
+def save_html(html_file_path, html_content):
+    with open(html_file_path, 'w', encoding='utf-8') as f:
+        f.write(html_content)
-- 
2.34.1