From: Claromes Date: Tue, 4 Jun 2024 20:05:03 +0000 (-0300) Subject: review parse text X-Git-Url: https://git.claromes.com/?a=commitdiff_plain;h=ef5526ec2a8f9e847d66bff55362e4425795dc31;p=waybacktweets.git review parse text --- diff --git a/waybacktweets/export_tweets.py b/waybacktweets/export_tweets.py index 1bd42a4..2e0708b 100644 --- a/waybacktweets/export_tweets.py +++ b/waybacktweets/export_tweets.py @@ -35,7 +35,9 @@ class TweetsExporter: for key, value in data.items() } - return filled_data + data_transposed = [list(row) for row in zip(*filled_data.values())] + + return data_transposed @staticmethod def create_dataframe(self): diff --git a/waybacktweets/main.py b/waybacktweets/main.py index 8dac791..cd9d75d 100644 --- a/waybacktweets/main.py +++ b/waybacktweets/main.py @@ -34,8 +34,8 @@ def main(): exporter = TweetsExporter(parsed_tweets, username, metadata_options) exporter.save_to_csv() - exporter.save_to_json() - exporter.save_to_html() + # exporter.save_to_json() + # exporter.save_to_html() print( f'\nNeed help? Open an issue: https://github.com/claromes/waybacktweets/issues.' diff --git a/waybacktweets/parse_tweets.py b/waybacktweets/parse_tweets.py index 6f86356..d3f9755 100644 --- a/waybacktweets/parse_tweets.py +++ b/waybacktweets/parse_tweets.py @@ -62,17 +62,21 @@ class TwitterEmbed: class JsonParser: """Handles parsing of tweets when the mimetype is application/json.""" - def __init__(self, tweet_url): - self.tweet_url = tweet_url + def __init__(self, archived_tweet_url): + self.archived_tweet_url = archived_tweet_url def parse(self): """Parses the archived tweets in JSON format.""" try: - response = requests.get(self.tweet_url) + response = requests.get(self.archived_tweet_url) if not (400 <= response.status_code <= 511): json_data = response.json() + if 'data' in json_data: return json_data['data'].get('text', json_data['data']) + elif 'retweeted_status' in json_data: + return json_data['retweeted_status'].get( + 'text', json_data['retweeted_status']) else: return json_data.get('text', json_data) except Exception as e: @@ -124,15 +128,22 @@ class TweetsParser: embed_parser = TwitterEmbed(encoded_tweet) content = embed_parser.embed() + if content: - self.add_metadata('available_tweet_content', content[0][0]) + self.add_metadata('available_tweet_content', + semicolon_parse(content[0][0])) self.add_metadata('available_tweet_is_RT', content[1][0]) - self.add_metadata('available_tweet_username', content[2][0]) + self.add_metadata('available_tweet_username', + semicolon_parse(content[2][0])) if response[3] == 'application/json': json_parser = JsonParser(encoded_archived_tweet) - json_mimetype = json_parser.parse() - self.add_metadata('parsed_tweet_mimetype_json', json_mimetype) + text_json = json_parser.parse() + parsed_text_json = semicolon_parse(text_json) + else: + parsed_text_json = None + + self.add_metadata('parsed_tweet_mimetype_json', parsed_text_json) self.add_metadata('archived_urlkey', response[0]) self.add_metadata('archived_timestamp', response[1])