From 9e4ccddd4265a61b489ba736bfe7c1bec79c682a Mon Sep 17 00:00:00 2001 From: Claromes Date: Mon, 13 Nov 2023 18:35:53 -0300 Subject: [PATCH] update readme, review display --- README.md | 42 +++++++++--------- app.py | 125 ++++++++++++++++++++++++++++-------------------------- 2 files changed, 84 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index 453e230..7461d71 100644 --- a/README.md +++ b/README.md @@ -5,52 +5,50 @@ Tool that displays, via [Wayback CDX Server API](https://github.com/internetarchive/wayback/tree/master/wayback-cdx-server), multiple archived tweets on Wayback Machine to avoid opening each link manually. The app is a prototype written in Python with Streamlit and hosted at Streamlit Cloud. -- Tweets per page defined by user -- Filter by years -- Filter by only deleted tweets +Users can define the number of tweets displayed per page and apply filters based on specific years. There is also an option to filter and view only deleted tweets. *Thanks Tristan Lee for the idea.* -## Development - -### Requirement - -- Python 3.8+ +## Testimonials -### Installation +>"Original way to find deleted tweets." — [Henk Van Ess](https://twitter.com/henkvaness/status/1693298101765701676) -$ `git clone git@github.com:claromes/waybacktweets.git` +>"This is an excellent tool to use now that most Twitter API-based tools have gone down with changes to the pricing structure over at X." — [The OSINT Newsletter - Issue #22](https://osintnewsletter.com/p/22#%C2%A7osint-community) -$ `cd waybacktweets` +>"One of the keys to using the Wayback Machine effectively is knowing what it can and can’t archive. It can, and has, archived many, many Twitter accounts... Utilize fun tools such as Wayback Tweets to do so more effectively." — [Ari Ben Am](https://memeticwarfareweekly.substack.com/p/mww-paradise-by-the-telegram-dashboard) -$ `pip install -r requirements.txt` +>"Want to see archived tweets on Wayback Machine in bulk? You can use Wayback Tweets." — [Daily OSINT](https://twitter.com/DailyOsint/status/1695065018662855102) -$ `streamlit run app.py` +>"Untuk mempermudah penelusuran arsip, gunakan Wayback Tweets." — [GIJN Indonesia](https://twitter.com/gijnIndonesia/status/1685912219408805888) -Streamlit will be served at http://localhost:8501 +>"A tool to quickly view tweets saved on archive.org." — [Irina_Tech_Tips Newsletter #3](https://irinatechtips.substack.com/p/irina_tech_tips-newsletter-3-2023#%C2%A7wayback-tweets) ## Docs - [Roadmap](docs/ROADMAP.md) - [Changelog](docs/CHANGELOG.md) -## Testimonials +## Development ->"Original way to find deleted tweets." — [Henk Van Ess](https://twitter.com/henkvaness/status/1693298101765701676) +### Requirement ->"This is an excellent tool to use now that most Twitter API-based tools have gone down with changes to the pricing structure over at X." — [The OSINT Newsletter - Issue #22](https://osintnewsletter.com/p/22#%C2%A7osint-community) +- Python 3.8+ ->"One of the keys to using the Wayback Machine effectively is knowing what it can and can’t archive. It can, and has, archived many, many Twitter accounts... Utilize fun tools such as Wayback Tweets to do so more effectively." — [Ari Ben Am](https://memeticwarfareweekly.substack.com/p/mww-paradise-by-the-telegram-dashboard) +### Installation ->"Want to see archived tweets on Wayback Machine in bulk? You can use Wayback Tweets." — [Daily OSINT](https://twitter.com/DailyOsint/status/1695065018662855102) +$ `git clone git@github.com:claromes/waybacktweets.git` ->"Untuk mempermudah penelusuran arsip, gunakan Wayback Tweets." — [GIJN Indonesia](https://twitter.com/gijnIndonesia/status/1685912219408805888) +$ `cd waybacktweets` ->"A tool to quickly view tweets saved on archive.org." — [Irina_Tech_Tips Newsletter #3](https://irinatechtips.substack.com/p/irina_tech_tips-newsletter-3-2023#%C2%A7wayback-tweets) +$ `pip install -r requirements.txt` + +$ `streamlit run app.py` + +Streamlit will be served at http://localhost:8501 ## Contributing -PRs are welcome. Please, check the [roadmap](docs/ROADMAP.md) or add a new feature. +PRs are welcome. Check the roadmap or add a new feature. > [!NOTE] > If the application is down, please check the [Streamlit Cloud Status](https://www.streamlitstatus.com/). \ No newline at end of file diff --git a/app.py b/app.py index 9fa91e1..3dfc615 100644 --- a/app.py +++ b/app.py @@ -18,11 +18,7 @@ st.set_page_config( [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) - Tool that displays, via Wayback CDX Server API, multiple archived tweets on Wayback Machine to avoid opening each link manually. - - - Tweets per page defined by user - - Filter by years - - Filter by only deleted tweets + Tool that displays, via Wayback CDX Server API, multiple archived tweets on Wayback Machine to avoid opening each link manually. Users can define the number of tweets displayed per page and apply filters based on specific years. There is also an option to filter and view only deleted tweets. This tool is a prototype, please feel free to send your [feedbacks](https://github.com/claromes/waybacktweets/issues). Created and maintained by [@claromes](https://github.com/claromes). @@ -212,6 +208,60 @@ def next_page(): st.session_state.update_component += 1 scroll_into_view() +def display_tweet(): + if is_RT[0] == True: + st.info('*Retweet*') + st.write(tweet_content[0]) + st.write(f'**{user_info[0]}**') + + st.divider() + +def display_not_tweet(): + if mimetype[i] == 'application/json': + st.error('Tweet has been deleted.') + try: + response_json = requests.get(link) + + if response_json.status_code == 200: + json_data = response_json.json() + + if 'data' in json_data: + if 'text' in json_data['data']: + json_text = json_data['data']['text'] + else: + json_text = json_data['data'] + else: + if 'text' in json_data: + json_text = json_data['text'] + else: + json_text = json_data + + st.code(json_text) + st.json(json_data, expanded=False) + else: + st.error(response_json.status_code) + except requests.exceptions.Timeout: + st.error('Connection to web.archive.org timed out.') + except requests.exceptions.ConnectionError: + st.error('Failed to establish a new connection with web.archive.org.') + except UnboundLocalError: + st.empty() + + if mimetype[i] == 'text/html': + st.error('Tweet has been deleted.') + + components.iframe(link, height=500, scrolling=True) + + st.divider() + if mimetype[i] == 'warc/revisit': + st.warning('''MIME Type was not parsed.''') + + st.divider() + if mimetype[i] == 'text/plain': + st.warning('''MIME Type was not parsed.''') + + st.divider() + # UI st.title('Wayback Tweets [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)', anchor=False) st.write('Display multiple archived tweets on Wayback Machine and avoid opening each link manually') @@ -227,6 +277,7 @@ only_deleted = st.checkbox('Only deleted tweets') query = st.button('Query', type='primary', use_container_width=True) if handle != st.session_state.current_handle: + st.session_state.current_handle = handle st.session_state.offset = 0 if query or st.session_state.count: @@ -255,62 +306,7 @@ if query or st.session_state.count: st.session_state.current_handle = handle return_none_count = 0 - - def display_tweet(): - if is_RT[0] == True: - st.info('*Retweet*') - st.write(tweet_content[0]) - st.write(f'**{user_info[0]}**') - - st.divider() - - def display_not_tweet(): - if mimetype[i] == 'application/json': - st.error('Tweet has been deleted.') - try: - response_json = requests.get(link) - - if response_json.status_code == 200: - json_data = response_json.json() - - if 'data' in json_data: - if 'text' in json_data['data']: - json_text = json_data['data']['text'] - else: - json_text = json_data['data'] - else: - if 'text' in json_data: - json_text = json_data['text'] - else: - json_text = json_data - - st.code(json_text) - st.json(json_data, expanded=False) - else: - st.error(response_json.status_code) - except requests.exceptions.Timeout: - st.error('Connection to web.archive.org timed out.') - except requests.exceptions.ConnectionError: - st.error('Failed to establish a new connection with web.archive.org.') - except UnboundLocalError: - st.empty() - - st.divider() - if mimetype[i] == 'text/html': - st.error('Tweet has been deleted.') - - components.iframe(link, height=500, scrolling=True) - - st.divider() - if mimetype[i] == 'warc/revisit': - st.warning('''MIME Type was not parsed.''') - - st.divider() - if mimetype[i] == 'text/plain': - st.warning('''MIME Type was not parsed.''') - - st.divider() - + start_index = st.session_state.offset end_index = min(st.session_state.count, start_index + tweets_per_page) @@ -334,6 +330,13 @@ if query or st.session_state.count: if mimetype[i] == 'text/html': display_tweet() + + if mimetype[i] == 'warc/revisit': + st.warning('''MIME Type was not parsed.''') + + st.divider() + if mimetype[i] == 'text/plain': + st.warning('''MIME Type was not parsed.''') elif not tweet: display_not_tweet() -- 2.34.1