fix only_deleted, add cache, display processing count
authorClaromes <claromes@hey.com>
Sat, 13 May 2023 00:44:55 +0000 (21:44 -0300)
committerClaromes <claromes@hey.com>
Sat, 13 May 2023 00:44:55 +0000 (21:44 -0300)
README.md
app.py

index 375eeb26c6d3f67b5f0982e1dba9db512869f27c..40d3fe6cc5e523806488a73df30b9489ff8a4375 100644 (file)
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Archived tweets on Wayback Machine in a easy way
 ## Bugs
 
 - [ ] "web.archive.org took too long to respond."
-- [ ] `only_deleted` checkbox selected for handles without deleted tweets
+- [x] `only_deleted` checkbox selected for handles without deleted tweets
 
 ## TODO
 
diff --git a/app.py b/app.py
index 2550b179b9268deb5d7d93d8b465d302d8f00286..3809f9d46c89713fe1a2d3d1776af8c212c8d7f9 100644 (file)
--- a/app.py
+++ b/app.py
@@ -11,6 +11,36 @@ st.set_page_config(
     layout='centered'
 )
 
+# https://discuss.streamlit.io/t/remove-hide-running-man-animation-on-top-of-page/21773/3
+hide_streamlit_style = """
+                <style>
+                div[data-testid="stToolbar"] {
+                visibility: hidden;
+                height: 0%;
+                position: fixed;
+                }
+                div[data-testid="stDecoration"] {
+                visibility: hidden;
+                height: 0%;
+                position: fixed;
+                }
+                div[data-testid="stStatusWidget"] {
+                visibility: hidden;
+                height: 0%;
+                position: fixed;
+                }
+                #MainMenu {
+                visibility: hidden;
+                height: 0%;
+                }
+                header {
+                visibility: hidden;
+                height: 0%;
+                }
+                </style>
+                """
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+
 def embed(tweet):
     api = 'https://publish.twitter.com/oembed?url={}'.format(tweet)
     response = requests.get(api)
@@ -20,9 +50,10 @@ def embed(tweet):
     else:
         return None
 
+@st.cache_data(show_spinner=False)
 def query_api(handle):
     if not handle:
-        st.error("Type Twitter handle.")
+        st.warning('username, please!')
         st.stop()
 
     url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json'.format(handle)
@@ -38,6 +69,7 @@ def parse_links(links):
     tweet_links = []
     parsed_mimetype = []
 
+
     for link in links[1:]:
         url = 'https://web.archive.org/web/{}/{}'.format(link[1], link[2])
 
@@ -50,61 +82,56 @@ def parse_links(links):
 
 def attr(i):
     st.markdown('''
-    {}. [Wayback Machine link]({})
-
-    **MIME Type:** {}
-
-    **From:** {}
-
-    [Tweet link]({})
+    {}. **Wayback Machine:** [link]({}) | **MIME Type:** {} | **From:** {} | **Tweet:** [link]({})
     '''.format(i+1, link, mimetype[i], datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S"), tweet_links[i]))
 
-    st.markdown('**Preview:**')
-
 st.title('Wayback Tweets', anchor=False)
 st.write('Archived tweets on Wayback Machine')
 
-handle = st.text_input('Type Twitter handle', placeholder='Type Twitter handle', label_visibility='collapsed')
-
-query = st.button('Query', type='primary', use_container_width=True)
+handle = st.text_input('username', placeholder='username', label_visibility='collapsed')
+query = st.button('Query', type='primary', use_container_width=True, key='init')
+only_deleted = st.checkbox('Only deleted tweets')
 
+if query or handle:
+    with st.spinner(''):
+        progress = st.empty()
+        links = query_api(handle)
+        parsed_links = parse_links(links)[0]
+        tweet_links = parse_links(links)[1]
+        mimetype = parse_links(links)[2]
+        timestamp = parse_links(links)[3]
 
-if query:
-    links = query_api(handle)
-    parsed_links = parse_links(links)[0]
-    tweet_links = parse_links(links)[1]
-    mimetype = parse_links(links)[2]
-    timestamp = parse_links(links)[3]
+        if links or stop:
+            st.divider()
 
-    if links:
-        only_deleted = st.checkbox('Only deleted tweets')
+            return_none_count = 0
 
-        st.write('{} URLs have been captured'.format(len(parsed_links)))
+            for i, link in enumerate(parsed_links):
+                tweet = embed('{}'.format(tweet_links[i]))
 
-        st.divider()
+                if not only_deleted:
+                    attr(i)
 
-        for i, link in enumerate(parsed_links):
-            tweet = embed('{}'.format(tweet_links[i]))
+                    if tweet == None:
+                        st.error('Tweet has been deleted.')
+                        st.markdown('<iframe src="{}" height=700 width=700 scrolling="no"></iframe>'.format(link), unsafe_allow_html=True)
+                        st.divider()
+                    else:
+                        components.html(tweet,width=700, height=700, scrolling=True)
+                        st.divider()
 
-            if not only_deleted:
-                attr(i)
+                    progress.write('{}/{} URLs have been captured'.format(i + 1, len(parsed_links)))
 
-                if tweet == None:
-                    st.error('Tweet has been deleted.')
-                    st.markdown('<iframe src="{}" height=700 width=550></iframe>'.format(link), unsafe_allow_html=True)
-                    st.divider()
-                else:
-                    components.html(tweet, height=700, scrolling=True)
-                    st.divider()
+                if only_deleted:
+                    if tweet == None:
+                        return_none_count += 1
+                        attr(i)
 
-            if only_deleted:
-                if tweet == None:
-                    attr(i)
+                        st.error('Tweet has been deleted.')
+                        st.markdown('<iframe src="{}" height=700 width=700 scrolling="no"></iframe>'.format(link), unsafe_allow_html=True)
+                        st.divider()
 
-                    st.error('Tweet has been deleted.')
-                    st.markdown('<iframe src="{}" height=700 width=550></iframe>'.format(link), unsafe_allow_html=True)
-                    st.divider()
-                else: st.empty()
+                        progress.write('{}/{} URLs have been captured'.format(return_none_count, len(parsed_links)))
 
-    if not links:
-        st.error('Unable to query the Wayback Machine API.')
+        if not links:
+            st.error('Unable to query the Wayback Machine API.')