v0.0.1
authorClaromes <claromes@hey.com>
Thu, 11 May 2023 04:14:07 +0000 (01:14 -0300)
committerClaromes <claromes@hey.com>
Thu, 11 May 2023 04:14:07 +0000 (01:14 -0300)
.gitignore [new file with mode: 0644]
README.md [new file with mode: 0644]
app.py [new file with mode: 0644]
requirements.txt [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..eba74f4
--- /dev/null
@@ -0,0 +1 @@
+venv/
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..e79d03a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,13 @@
+# Wayback Tweets
+
+Archived tweets on Wayback Machine in a easy way
+
+*Thanks Tristan Lee for the idea.*
+
+## TODO
+
+- [ ] Feedbacks
+- [ ] Pagination
+- [ ] Prevent duplicate URLs
+- [ ] Grid
+- [ ] About
\ No newline at end of file
diff --git a/app.py b/app.py
new file mode 100644 (file)
index 0000000..307e559
--- /dev/null
+++ b/app.py
@@ -0,0 +1,109 @@
+import requests
+import datetime
+import streamlit as st
+import streamlit.components.v1 as components
+
+st.set_page_config(
+    page_title='Wayback Tweets',
+    page_icon='🏛️',
+    layout='centered'
+)
+
+def embed(tweet):
+    api = 'https://publish.twitter.com/oembed?url={}'.format(tweet)
+    response = requests.get(api)
+
+    if response.status_code == 200 or response.status_code == 304:
+        return response.json()['html']
+    else:
+        return None
+
+def query_api(handle):
+    if not handle:
+        st.error("Type Twitter's handle")
+        st.stop()
+
+    url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json'.format(handle)
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return None
+
+def parse_links(links):
+    parsed_links = []
+    timestamp = []
+    tweet_links = []
+    parsed_mimetype = []
+
+    for link in links[1:]:
+        url = 'https://web.archive.org/web/{}/{}'.format(link[1], link[2])
+
+        parsed_links.append(url)
+        timestamp.append(link[1])
+        tweet_links.append(link[2])
+        parsed_mimetype.append(link[3])
+
+    return parsed_links, tweet_links, parsed_mimetype, timestamp
+
+def attr(i):
+    st.markdown('''
+    {}. [Wayback Machine link]({})
+
+    **MIME Type:** {}
+
+    **From:** {}
+
+    [Tweet link]({})
+    '''.format(i+1, link, mimetype[i], datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S"), tweet_links[i]))
+
+    st.markdown('**Preview:**')
+
+st.title('Wayback Tweets', anchor=False)
+st.write('Archived tweets on Wayback Machine')
+
+username = st.text_input('Type Twitter username', placeholder='Type Twitter username', label_visibility='collapsed')
+
+query = st.button('Query', type='primary', use_container_width=True)
+
+
+if query:
+    links = query_api(username)
+    parsed_links = parse_links(links)[0]
+    tweet_links = parse_links(links)[1]
+    mimetype = parse_links(links)[2]
+    timestamp = parse_links(links)[3]
+
+    if links:
+        only_deleted = st.checkbox('Only deleted tweets')
+
+        st.write('{} URLs have been captured'.format(len(parsed_links)))
+
+        st.divider()
+
+        for i, link in enumerate(parsed_links):
+            tweet = embed('{}'.format(tweet_links[i]))
+
+            if not only_deleted:
+                attr(i)
+
+                if tweet == None:
+                    st.error('Tweet has been deleted.')
+                    st.markdown('<iframe src="{}" height=700 width=550></iframe>'.format(link), unsafe_allow_html=True)
+                    st.divider()
+                else:
+                    components.html(tweet, height=700, scrolling=True)
+                    st.divider()
+
+            if only_deleted:
+                if tweet == None:
+                    st.error('Tweet has been deleted.')
+
+                    attr(i)
+
+                    st.markdown('<iframe src="{}" height=700 width=550></iframe>'.format(link), unsafe_allow_html=True)
+                    st.divider()
+                else: st.empty()
+
+    if not links:
+        st.error('Unable to query the Wayback Machine API.')
diff --git a/requirements.txt b/requirements.txt
new file mode 100644 (file)
index 0000000..3f27b86
--- /dev/null
@@ -0,0 +1,2 @@
+requests==2.30.0
+streamlit==1.22.0