streamlit==1.27.0
mkdocs==1.6.0
mkdocs-material==9.5.25
+rich==13.6.0
"""
Main function for retrieving archived tweets.
"""
+from rich import print as rprint
from request_tweets import WaybackTweets
from parse_tweets import TweetsParser
from export_tweets import TweetsExporter
-username = 'claromes'
+username = 'dfrlab'
unique = False
-datetime_from = ''
-datetime_to = ''
+datetime_from = '2020-01-01'
+datetime_to = '2024-05-31'
ascending = False
def main():
"""Invokes the classes to retrieve archived tweets, perform necessary parsing, and save the data."""
try:
- api = WaybackTweets(username)
+ api = WaybackTweets(username, unique, datetime_from, datetime_to)
archived_tweets = api.get()
if archived_tweets:
exporter = TweetsExporter(parsed_tweets, username,
metadata_options, ascending)
- # exporter.save_to_csv()
- # exporter.save_to_json()
+ exporter.save_to_csv()
+ exporter.save_to_json()
exporter.save_to_html()
+ else:
+ print('Nothing here.')
- print(
- f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.'
- )
except TypeError as e:
print(e)
+ finally:
+ rprint(
+ f'[yellow]\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues'
+ )
if __name__ == '__main__':
import re
from urllib.parse import unquote
from utils import *
+from rich.progress import track
class TwitterEmbed:
def parse(self):
"""Parses the archived tweets metadata and structures it."""
- for response in self.archived_tweets_response[1:]:
+ for response in track(
+ self.archived_tweets_response[1:],
+ description=f'Wayback @{self.username} tweets\n'):
tweet_remove_char = unquote(response[2]).replace('’', '')
cleaned_tweet = pattern_tweet(tweet_remove_char).strip('"')
f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{self.username}/status/*'
f'&output=json{unique_param}{timestamp_from_param}{timestamp_to_param}&limit=20'
)
- print(f'Getting and parsing archived tweets from {url}')
+ print('Hi, archivist...')
try:
response = requests.get(url)
for tweet in self.json_content:
html += '<div class="tweet">\n'
- if tweet["archived_mimetype"] != 'application/json':
+ if tweet["archived_mimetype"] != 'application/json' and not tweet[
+ "available_tweet_text"]:
html += f'<iframe src="{tweet["parsed_archived_tweet_url"]}" frameborder="0" scrolling="auto"></iframe>\n'
html += f'<p><a href="{tweet["original_tweet_url"]}" target="_blank"><strong>Original Tweet↗</strong></a> · \n'
html += f'<a href="{tweet["parsed_archived_tweet_url"]}" target="_blank"><strong>Parsed Archived Tweet↗</strong></a></p>\n'
if tweet["available_tweet_text"]:
+ html += f'<br>\n'
html += f'<p><strong class="content">Available Tweet Content:</strong> {tweet["available_tweet_text"]}</p>\n'
html += f'<p><strong class="content">Available Tweet Is Retweet:</strong> {tweet["available_tweet_is_RT"]}</p>\n'
html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_username"]}</p>\n'