From: Claromes Date: Sat, 13 May 2023 00:44:55 +0000 (-0300) Subject: fix only_deleted, add cache, display processing count X-Git-Url: https://git.claromes.com/?a=commitdiff_plain;h=8dcf71d9320f5f1dc6a787bd5024251d0aa36e55;p=waybacktweets.git fix only_deleted, add cache, display processing count --- diff --git a/README.md b/README.md index 375eeb2..40d3fe6 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Archived tweets on Wayback Machine in a easy way ## Bugs - [ ] "web.archive.org took too long to respond." -- [ ] `only_deleted` checkbox selected for handles without deleted tweets +- [x] `only_deleted` checkbox selected for handles without deleted tweets ## TODO diff --git a/app.py b/app.py index 2550b17..3809f9d 100644 --- a/app.py +++ b/app.py @@ -11,6 +11,36 @@ st.set_page_config( layout='centered' ) +# https://discuss.streamlit.io/t/remove-hide-running-man-animation-on-top-of-page/21773/3 +hide_streamlit_style = """ + + """ +st.markdown(hide_streamlit_style, unsafe_allow_html=True) + def embed(tweet): api = 'https://publish.twitter.com/oembed?url={}'.format(tweet) response = requests.get(api) @@ -20,9 +50,10 @@ def embed(tweet): else: return None +@st.cache_data(show_spinner=False) def query_api(handle): if not handle: - st.error("Type Twitter handle.") + st.warning('username, please!') st.stop() url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json'.format(handle) @@ -38,6 +69,7 @@ def parse_links(links): tweet_links = [] parsed_mimetype = [] + for link in links[1:]: url = 'https://web.archive.org/web/{}/{}'.format(link[1], link[2]) @@ -50,61 +82,56 @@ def parse_links(links): def attr(i): st.markdown(''' - {}. [Wayback Machine link]({}) - - **MIME Type:** {} - - **From:** {} - - [Tweet link]({}) + {}. **Wayback Machine:** [link]({}) | **MIME Type:** {} | **From:** {} | **Tweet:** [link]({}) '''.format(i+1, link, mimetype[i], datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S"), tweet_links[i])) - st.markdown('**Preview:**') - st.title('Wayback Tweets', anchor=False) st.write('Archived tweets on Wayback Machine') -handle = st.text_input('Type Twitter handle', placeholder='Type Twitter handle', label_visibility='collapsed') - -query = st.button('Query', type='primary', use_container_width=True) +handle = st.text_input('username', placeholder='username', label_visibility='collapsed') +query = st.button('Query', type='primary', use_container_width=True, key='init') +only_deleted = st.checkbox('Only deleted tweets') +if query or handle: + with st.spinner(''): + progress = st.empty() + links = query_api(handle) + parsed_links = parse_links(links)[0] + tweet_links = parse_links(links)[1] + mimetype = parse_links(links)[2] + timestamp = parse_links(links)[3] -if query: - links = query_api(handle) - parsed_links = parse_links(links)[0] - tweet_links = parse_links(links)[1] - mimetype = parse_links(links)[2] - timestamp = parse_links(links)[3] + if links or stop: + st.divider() - if links: - only_deleted = st.checkbox('Only deleted tweets') + return_none_count = 0 - st.write('{} URLs have been captured'.format(len(parsed_links))) + for i, link in enumerate(parsed_links): + tweet = embed('{}'.format(tweet_links[i])) - st.divider() + if not only_deleted: + attr(i) - for i, link in enumerate(parsed_links): - tweet = embed('{}'.format(tweet_links[i])) + if tweet == None: + st.error('Tweet has been deleted.') + st.markdown(''.format(link), unsafe_allow_html=True) + st.divider() + else: + components.html(tweet,width=700, height=700, scrolling=True) + st.divider() - if not only_deleted: - attr(i) + progress.write('{}/{} URLs have been captured'.format(i + 1, len(parsed_links))) - if tweet == None: - st.error('Tweet has been deleted.') - st.markdown(''.format(link), unsafe_allow_html=True) - st.divider() - else: - components.html(tweet, height=700, scrolling=True) - st.divider() + if only_deleted: + if tweet == None: + return_none_count += 1 + attr(i) - if only_deleted: - if tweet == None: - attr(i) + st.error('Tweet has been deleted.') + st.markdown(''.format(link), unsafe_allow_html=True) + st.divider() - st.error('Tweet has been deleted.') - st.markdown(''.format(link), unsafe_allow_html=True) - st.divider() - else: st.empty() + progress.write('{}/{} URLs have been captured'.format(return_none_count, len(parsed_links))) - if not links: - st.error('Unable to query the Wayback Machine API.') + if not links: + st.error('Unable to query the Wayback Machine API.')