From 33aeb4da1802cfda3e7e19ea4bb05698627c62d6 Mon Sep 17 00:00:00 2001 From: Claromes Date: Wed, 5 Jun 2024 19:17:05 -0300 Subject: [PATCH] add prints --- requirements.txt | 1 + waybacktweets/main.py | 22 +++++++++++++--------- waybacktweets/parse_tweets.py | 5 ++++- waybacktweets/request_tweets.py | 2 +- waybacktweets/viz_tweets.py | 4 +++- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/requirements.txt b/requirements.txt index af02bb2..b50860b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ requests==2.30.0 streamlit==1.27.0 mkdocs==1.6.0 mkdocs-material==9.5.25 +rich==13.6.0 diff --git a/waybacktweets/main.py b/waybacktweets/main.py index aa22037..5ec3cd8 100644 --- a/waybacktweets/main.py +++ b/waybacktweets/main.py @@ -1,22 +1,23 @@ """ Main function for retrieving archived tweets. """ +from rich import print as rprint from request_tweets import WaybackTweets from parse_tweets import TweetsParser from export_tweets import TweetsExporter -username = 'claromes' +username = 'dfrlab' unique = False -datetime_from = '' -datetime_to = '' +datetime_from = '2020-01-01' +datetime_to = '2024-05-31' ascending = False def main(): """Invokes the classes to retrieve archived tweets, perform necessary parsing, and save the data.""" try: - api = WaybackTweets(username) + api = WaybackTweets(username, unique, datetime_from, datetime_to) archived_tweets = api.get() if archived_tweets: @@ -34,15 +35,18 @@ def main(): exporter = TweetsExporter(parsed_tweets, username, metadata_options, ascending) - # exporter.save_to_csv() - # exporter.save_to_json() + exporter.save_to_csv() + exporter.save_to_json() exporter.save_to_html() + else: + print('Nothing here.') - print( - f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.' - ) except TypeError as e: print(e) + finally: + rprint( + f'[yellow]\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues' + ) if __name__ == '__main__': diff --git a/waybacktweets/parse_tweets.py b/waybacktweets/parse_tweets.py index 17624bc..09d4fec 100644 --- a/waybacktweets/parse_tweets.py +++ b/waybacktweets/parse_tweets.py @@ -2,6 +2,7 @@ import requests import re from urllib.parse import unquote from utils import * +from rich.progress import track class TwitterEmbed: @@ -101,7 +102,9 @@ class TweetsParser: def parse(self): """Parses the archived tweets metadata and structures it.""" - for response in self.archived_tweets_response[1:]: + for response in track( + self.archived_tweets_response[1:], + description=f'Wayback @{self.username} tweets\n'): tweet_remove_char = unquote(response[2]).replace('’', '') cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"') diff --git a/waybacktweets/request_tweets.py b/waybacktweets/request_tweets.py index 540f693..6a8451a 100644 --- a/waybacktweets/request_tweets.py +++ b/waybacktweets/request_tweets.py @@ -23,7 +23,7 @@ class WaybackTweets: f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{self.username}/status/*' f'&output=json{unique_param}{timestamp_from_param}{timestamp_to_param}&limit=20' ) - print(f'Getting and parsing archived tweets from {url}') + print('Hi, archivist...') try: response = requests.get(url) diff --git a/waybacktweets/viz_tweets.py b/waybacktweets/viz_tweets.py index ff19364..906fa1f 100644 --- a/waybacktweets/viz_tweets.py +++ b/waybacktweets/viz_tweets.py @@ -36,7 +36,8 @@ class HTMLTweetsVisualizer: for tweet in self.json_content: html += '
\n' - if tweet["archived_mimetype"] != 'application/json': + if tweet["archived_mimetype"] != 'application/json' and not tweet[ + "available_tweet_text"]: html += f'\n' html += f'

Original Tweet↗ · \n' @@ -45,6 +46,7 @@ class HTMLTweetsVisualizer: html += f'Parsed Archived Tweet↗

\n' if tweet["available_tweet_text"]: + html += f'
\n' html += f'

Available Tweet Content: {tweet["available_tweet_text"]}

\n' html += f'

Available Tweet Is Retweet: {tweet["available_tweet_is_RT"]}

\n' html += f'

Available Tweet Username: {tweet["available_tweet_username"]}

\n' -- 2.34.1