From aa51d9901f6f44ecb067713ecc8e5f6b68df1c4e Mon Sep 17 00:00:00 2001 From: Claromes Date: Thu, 11 May 2023 01:14:07 -0300 Subject: [PATCH] v0.0.1 --- .gitignore | 1 + README.md | 13 ++++++ app.py | 109 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 + 4 files changed, 125 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eba74f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e79d03a --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# Wayback Tweets + +Archived tweets on Wayback Machine in a easy way + +*Thanks Tristan Lee for the idea.* + +## TODO + +- [ ] Feedbacks +- [ ] Pagination +- [ ] Prevent duplicate URLs +- [ ] Grid +- [ ] About \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..307e559 --- /dev/null +++ b/app.py @@ -0,0 +1,109 @@ +import requests +import datetime +import streamlit as st +import streamlit.components.v1 as components + +st.set_page_config( + page_title='Wayback Tweets', + page_icon='🏛️', + layout='centered' +) + +def embed(tweet): + api = 'https://publish.twitter.com/oembed?url={}'.format(tweet) + response = requests.get(api) + + if response.status_code == 200 or response.status_code == 304: + return response.json()['html'] + else: + return None + +def query_api(handle): + if not handle: + st.error("Type Twitter's handle") + st.stop() + + url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json'.format(handle) + response = requests.get(url) + if response.status_code == 200: + return response.json() + else: + return None + +def parse_links(links): + parsed_links = [] + timestamp = [] + tweet_links = [] + parsed_mimetype = [] + + for link in links[1:]: + url = 'https://web.archive.org/web/{}/{}'.format(link[1], link[2]) + + parsed_links.append(url) + timestamp.append(link[1]) + tweet_links.append(link[2]) + parsed_mimetype.append(link[3]) + + return parsed_links, tweet_links, parsed_mimetype, timestamp + +def attr(i): + st.markdown(''' + {}. [Wayback Machine link]({}) + + **MIME Type:** {} + + **From:** {} + + [Tweet link]({}) + '''.format(i+1, link, mimetype[i], datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S"), tweet_links[i])) + + st.markdown('**Preview:**') + +st.title('Wayback Tweets', anchor=False) +st.write('Archived tweets on Wayback Machine') + +username = st.text_input('Type Twitter username', placeholder='Type Twitter username', label_visibility='collapsed') + +query = st.button('Query', type='primary', use_container_width=True) + + +if query: + links = query_api(username) + parsed_links = parse_links(links)[0] + tweet_links = parse_links(links)[1] + mimetype = parse_links(links)[2] + timestamp = parse_links(links)[3] + + if links: + only_deleted = st.checkbox('Only deleted tweets') + + st.write('{} URLs have been captured'.format(len(parsed_links))) + + st.divider() + + for i, link in enumerate(parsed_links): + tweet = embed('{}'.format(tweet_links[i])) + + if not only_deleted: + attr(i) + + if tweet == None: + st.error('Tweet has been deleted.') + st.markdown(''.format(link), unsafe_allow_html=True) + st.divider() + else: + components.html(tweet, height=700, scrolling=True) + st.divider() + + if only_deleted: + if tweet == None: + st.error('Tweet has been deleted.') + + attr(i) + + st.markdown(''.format(link), unsafe_allow_html=True) + st.divider() + else: st.empty() + + if not links: + st.error('Unable to query the Wayback Machine API.') diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3f27b86 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests==2.30.0 +streamlit==1.22.0 -- 2.34.1