add prints
authorClaromes <claromes@hey.com>
Wed, 5 Jun 2024 22:17:05 +0000 (19:17 -0300)
committerClaromes <claromes@hey.com>
Wed, 5 Jun 2024 22:17:05 +0000 (19:17 -0300)
requirements.txt
waybacktweets/main.py
waybacktweets/parse_tweets.py
waybacktweets/request_tweets.py
waybacktweets/viz_tweets.py

index af02bb2bdc048d83467db860064c754c7c61d8e2..b50860ba298aa1a97abfd186e5193ab4d7917ae8 100644 (file)
@@ -2,3 +2,4 @@ requests==2.30.0
 streamlit==1.27.0
 mkdocs==1.6.0
 mkdocs-material==9.5.25
+rich==13.6.0
index aa2203713724a743dc217766162cd7a028af9480..5ec3cd83ecacf5e8838da61557214ad3ad6c057e 100644 (file)
@@ -1,22 +1,23 @@
 """
 Main function for retrieving archived tweets.
 """
+from rich import print as rprint
 
 from request_tweets import WaybackTweets
 from parse_tweets import TweetsParser
 from export_tweets import TweetsExporter
 
-username = 'claromes'
+username = 'dfrlab'
 unique = False
-datetime_from = ''
-datetime_to = ''
+datetime_from = '2020-01-01'
+datetime_to = '2024-05-31'
 ascending = False
 
 
 def main():
     """Invokes the classes to retrieve archived tweets, perform necessary parsing, and save the data."""
     try:
-        api = WaybackTweets(username)
+        api = WaybackTweets(username, unique, datetime_from, datetime_to)
         archived_tweets = api.get()
 
         if archived_tweets:
@@ -34,15 +35,18 @@ def main():
 
             exporter = TweetsExporter(parsed_tweets, username,
                                       metadata_options, ascending)
-            exporter.save_to_csv()
-            exporter.save_to_json()
+            exporter.save_to_csv()
+            exporter.save_to_json()
             exporter.save_to_html()
+        else:
+            print('Nothing here.')
 
-            print(
-                f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
-            )
     except TypeError as e:
         print(e)
+    finally:
+        rprint(
+            f'[yellow]\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues'
+        )
 
 
 if __name__ == '__main__':
index 17624bc32112f482d9a71fc533ce282eb044eb1f..09d4fecca697049e454a376773f40dbaabcc4c1c 100644 (file)
@@ -2,6 +2,7 @@ import requests
 import re
 from urllib.parse import unquote
 from utils import *
+from rich.progress import track
 
 
 class TwitterEmbed:
@@ -101,7 +102,9 @@ class TweetsParser:
 
     def parse(self):
         """Parses the archived tweets metadata and structures it."""
-        for response in self.archived_tweets_response[1:]:
+        for response in track(
+                self.archived_tweets_response[1:],
+                description=f'Wayback @{self.username} tweets\n'):
             tweet_remove_char = unquote(response[2]).replace('’', '')
             cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"')
 
index 540f693a8b2ac051f36cad056818b4e27e3069b3..6a8451a26c97d6a0d0f075918e045dfde2a7444c 100644 (file)
@@ -23,7 +23,7 @@ class WaybackTweets:
             f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{self.username}/status/*'
             f'&output=json{unique_param}{timestamp_from_param}{timestamp_to_param}&limit=20'
         )
-        print(f'Getting and parsing archived tweets from {url}')
+        print('Hi, archivist...')
 
         try:
             response = requests.get(url)
index ff19364f702bbad38c791c6fcef8cbb1016fafce..906fa1fed4fca6bc5e10c83e9ad43e04006ddff8 100644 (file)
@@ -36,7 +36,8 @@ class HTMLTweetsVisualizer:
         for tweet in self.json_content:
             html += '<div class="tweet">\n'
 
-            if tweet["archived_mimetype"] != 'application/json':
+            if tweet["archived_mimetype"] != 'application/json' and not tweet[
+                    "available_tweet_text"]:
                 html += f'<iframe src="{tweet["parsed_archived_tweet_url"]}" frameborder="0" scrolling="auto"></iframe>\n'
 
             html += f'<p><a href="{tweet["original_tweet_url"]}" target="_blank"><strong>Original Tweet↗</strong></a> · \n'
@@ -45,6 +46,7 @@ class HTMLTweetsVisualizer:
             html += f'<a href="{tweet["parsed_archived_tweet_url"]}" target="_blank"><strong>Parsed Archived Tweet↗</strong></a></p>\n'
 
             if tweet["available_tweet_text"]:
+                html += f'<br>\n'
                 html += f'<p><strong class="content">Available Tweet Content:</strong> {tweet["available_tweet_text"]}</p>\n'
                 html += f'<p><strong class="content">Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
                 html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'