formatting to checking the health of the Streamlit app

author Claromes <claromes@hey.com>

Sun, 19 May 2024 17:35:48 +0000 (14:35 -0300)

committer Claromes <claromes@hey.com>

Sun, 19 May 2024 17:35:48 +0000 (14:35 -0300)
author Claromes <claromes@hey.com>
Sun, 19 May 2024 17:35:48 +0000 (14:35 -0300)
committer Claromes <claromes@hey.com>
Sun, 19 May 2024 17:35:48 +0000 (14:35 -0300)
diff --git a/app.py b/app.py

index 0c2f9736d33644f98073eb8df0d2c5fe348feb34..f366ee4e129ce9b315ec8190c093651553e53c4e 100644 (file)
--- a/app.py
+++ b/app.py
@@ -8,13 +8,12 @@ from urllib.parse import unquote
  
  year = datetime.datetime.now().year
  
-st.set_page_config(
-    page_title='Wayback Tweets',
-    page_icon='🏛️',
-    layout='centered',
-    menu_items={
-
-        'About': '''
+st.set_page_config(page_title='Wayback Tweets',
+                   page_icon='🏛️',
+                   layout='centered',
+                   menu_items={
+                       'About':
+                       '''
          ## 🏛️ Wayback Tweets
  
          [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md)
@@ -25,9 +24,9 @@ st.set_page_config(
  
          -------
          ''',
-        'Report a bug': 'https://github.com/claromes/waybacktweets/issues'
-    }
-)
+                       'Report a bug':
+                       'https://github.com/claromes/waybacktweets/issues'
+                   })
  
  # https://discuss.streamlit.io/t/remove-hide-running-man-animation-on-top-of-page/21773/3
  hide_streamlit_style = '''
@@ -74,6 +73,7 @@ if 'saved_at' not in st.session_state:
  if 'count' not in st.session_state:
      st.session_state.count = False
  
+
  def scroll_into_view():
      js = f'''
      <script>
@@ -84,6 +84,7 @@ def scroll_into_view():
  
      components.html(js, width=0, height=0)
  
+
  def clean_tweet(tweet):
      handle = st.session_state.current_handle.lower()
      tweet_lower = tweet.lower()
@@ -97,6 +98,7 @@ def clean_tweet(tweet):
      else:
          return tweet
  
+
  def clean_link(link):
      handle = st.session_state.current_handle.lower()
      link = link.lower()
@@ -109,6 +111,7 @@ def clean_link(link):
      else:
          return link
  
+
  def pattern_tweet(tweet):
      # Reply: /status//
      # Link:  /status///
@@ -122,6 +125,7 @@ def pattern_tweet(tweet):
      else:
          return tweet
  
+
  def pattern_tweet_id(tweet):
      # Delete sub-endpoint (/photos, /likes, /retweet...)
      pattern_username = re.compile(r'https://twitter\.com/([^/]+)/status/\d+')
@@ -137,12 +141,14 @@ def pattern_tweet_id(tweet):
      else:
          return tweet
  
+
  def check_double_status(url_wb, url_tweet):
      if url_wb.count('/status/') == 2 and not 'twitter.com' in url_tweet:
          return True
  
      return False
  
+
  def embed(tweet):
      try:
          url = f'https://publish.twitter.com/oembed?url={clean_tweet(tweet)}'
@@ -163,10 +169,12 @@ def embed(tweet):
              is_RT = []
  
              for match in matches_html:
-                tweet_content_match = re.sub(r'<a[^>]*>|<\/a>', '', match[0].strip())
+                tweet_content_match = re.sub(r'<a[^>]*>|<\/a>', '',
+                                             match[0].strip())
                  tweet_content_match = tweet_content_match.replace('<br>', '\n')
  
-                user_info_match = re.sub(r'<a[^>]*>|<\/a>', '', match[1].strip())
+                user_info_match = re.sub(r'<a[^>]*>|<\/a>', '',
+                                         match[1].strip())
                  user_info_match = user_info_match.replace(')', '), ')
  
                  match_author = re.search(regex_author, user_info_match)
@@ -193,6 +201,7 @@ def embed(tweet):
      except UnboundLocalError:
          st.empty()
  
+
  @st.cache_data(ttl=1800, show_spinner=False)
  def tweets_count(handle, saved_at):
      url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{handle}/status/*&collapse=timestamp:8&output=json&from={saved_at[0]}&to={saved_at[1]}'
@@ -214,6 +223,7 @@ def tweets_count(handle, saved_at):
      except UnboundLocalError:
          st.empty()
  
+
  @st.cache_data(ttl=1800, show_spinner=False)
  def query_api(handle, limit, offset, saved_at):
      if not handle:
@@ -236,11 +246,12 @@ def query_api(handle, limit, offset, saved_at):
      except requests.exceptions.HTTPError:
          st.error('''
          **Temporarily Offline**
-        
+
          Internet Archive services are temporarily offline. Please check Internet Archive [Twitter feed](https://twitter.com/internetarchive/) for the latest information.
          ''')
          st.stop()
  
+
  @st.cache_data(ttl=1800, show_spinner=False)
  def parse_links(links):
      parsed_links = []
@@ -261,18 +272,25 @@ def parse_links(links):
  
      return parsed_links, tweet_links, parsed_mimetype, timestamp
  
+
  def attr(i):
      original_tweet = pattern_tweet_id(clean_tweet(tweet_links[i]))
  
      if status:
-        original_tweet = pattern_tweet_id(f'https://twitter.com/{tweet_links[i]}')
+        original_tweet = pattern_tweet_id(
+            f'https://twitter.com/{tweet_links[i]}')
      elif not '://' in tweet_links[i]:
          original_tweet = pattern_tweet_id(f'https://{tweet_links[i]}')
  
-    st.markdown(f'{i+1 + st.session_state.offset}. [**archived url**]({link}) · [**original url**]({original_tweet}) · **MIME Type:** {mimetype[i]} · **Saved at:** {datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S")}')
+    st.markdown(
+        f'{i+1 + st.session_state.offset}. [**archived url**]({link}) · [**original url**]({original_tweet}) · **MIME Type:** {mimetype[i]} · **Saved at:** {datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S")}'
+    )
+
  
  def display_tweet():
-    if mimetype[i] == 'application/json' or mimetype[i] == 'text/html' or mimetype[i] == 'unk' or mimetype[i] == 'warc/revisit':
+    if mimetype[i] == 'application/json' or mimetype[
+            i] == 'text/html' or mimetype[i] == 'unk' or mimetype[
+                i] == 'warc/revisit':
          if is_RT[0] == True:
              st.info('*Retweet*')
          st.write(tweet_content[0])
@@ -284,18 +302,22 @@ def display_tweet():
  
          st.divider()
  
+
  def display_not_tweet():
      original_link = pattern_tweet_id(clean_tweet(tweet_links[i]))
  
      if status:
-        original_link = pattern_tweet_id(f'https://twitter.com/{tweet_links[i]}')
+        original_link = pattern_tweet_id(
+            f'https://twitter.com/{tweet_links[i]}')
      elif not '://' in tweet_links[i]:
          original_link = pattern_tweet_id(f'https://{tweet_links[i]}')
  
      response_html = requests.get(original_link)
  
-    if mimetype[i] == 'text/html' or mimetype[i] == 'warc/revisit' or mimetype[i] == 'unk':
-        if ('.jpg' in tweet_links[i] or '.png' in tweet_links[i]) and response_html.status_code == 200:
+    if mimetype[i] == 'text/html' or mimetype[i] == 'warc/revisit' or mimetype[
+            i] == 'unk':
+        if ('.jpg' in tweet_links[i] or '.png'
+                in tweet_links[i]) and response_html.status_code == 200:
              components.iframe(tweet_links[i], height=500, scrolling=True)
          elif '/status/' not in original_link:
              st.info("This isn't a status or is not available")
@@ -335,7 +357,8 @@ def display_not_tweet():
              st.error('Connection to web.archive.org timed out.')
              st.divider()
          except requests.exceptions.ConnectionError:
-            st.error('Failed to establish a new connection with web.archive.org.')
+            st.error(
+                'Failed to establish a new connection with web.archive.org.')
              st.divider()
          except UnboundLocalError:
              st.empty()
@@ -343,6 +366,7 @@ def display_not_tweet():
          st.warning('MIME Type was not parsed.')
          st.divider()
  
+
  def prev_page():
      st.session_state.offset -= tweets_per_page
  
@@ -350,22 +374,33 @@ def prev_page():
      st.session_state.update_component += 1
      scroll_into_view()
  
+
  def next_page():
      st.session_state.offset += tweets_per_page
  
      #scroll to top config
      st.session_state.update_component += 1
      scroll_into_view()
-    
+
+
  # UI
-st.title('Wayback Tweets [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)', anchor=False)
-st.write('Display multiple archived tweets on Wayback Machine and avoid opening each link manually')
+st.title(
+    'Wayback Tweets [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)',
+    anchor=False)
+st.write(
+    'Display multiple archived tweets on Wayback Machine and avoid opening each link manually'
+)
  
  handle = st.text_input('Username', placeholder='jack')
  
-st.session_state.saved_at = st.slider('Tweets saved between', 2006, year, (2006, year))
+st.session_state.saved_at = st.slider('Tweets saved between', 2006, year,
+                                      (2006, year))
  
-not_available = st.checkbox('Original URLs not available', help='Due to changes in X, it is possible to find available tweets if you are logged into X')
+not_available = st.checkbox(
+    'Original URLs not available',
+    help=
+    'Due to changes in X, it is possible to find available tweets if you are logged into X'
+)
  
  query = st.button('Query', type='primary', use_container_width=True)
  
@@ -378,7 +413,9 @@ if query or st.session_state.count:
  
      st.session_state.count = tweets_count(handle, st.session_state.saved_at)
  
-    st.caption('The search optimization uses an 8-digit [collapsing strategy](https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md?ref=hackernoon.com#collapsing), refining the captures to one per day. The number of tweets per page is set to 25, and this is a fixed value due to the API rate limit.')
+    st.caption(
+        'The search optimization uses an 8-digit [collapsing strategy](https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md?ref=hackernoon.com#collapsing), refining the captures to one per day. The number of tweets per page is set to 25, and this is a fixed value due to the API rate limit.'
+    )
      st.write(f'**{st.session_state.count} URLs have been captured**')
  
      if st.session_state.count:
@@ -387,7 +424,8 @@ if query or st.session_state.count:
  
      try:
          progress = st.empty()
-        links = query_api(handle, tweets_per_page, st.session_state.offset, st.session_state.saved_at)
+        links = query_api(handle, tweets_per_page, st.session_state.offset,
+                          st.session_state.saved_at)
  
          parse = parse_links(links)
          parsed_links = parse[0]
@@ -401,9 +439,10 @@ if query or st.session_state.count:
              st.session_state.current_handle = handle
  
              return_none_count = 0
-                
+
              start_index = st.session_state.offset
-            end_index = min(st.session_state.count, start_index + tweets_per_page)
+            end_index = min(st.session_state.count,
+                            start_index + tweets_per_page)
  
              with st.spinner('Fetching...'):
                  for i in range(tweets_per_page):
@@ -413,7 +452,7 @@ if query or st.session_state.count:
                              tweet = embed(tweet_links[i])
  
                              status = check_double_status(link, tweet_links[i])
-                            
+
                              if not not_available:
                                  attr(i)
  
@@ -434,7 +473,9 @@ if query or st.session_state.count:
  
                                      display_not_tweet()
  
-                                progress.write(f'{return_none_count} URLs have been captured in the range {start_index}-{end_index}')
+                                progress.write(
+                                    f'{return_none_count} URLs have been captured in the range {start_index}-{end_index}'
+                                )
  
                              if start_index <= 0:
                                  st.session_state.prev_disabled = True
@@ -453,10 +494,20 @@ if query or st.session_state.count:
  
                          st.session_state.next_disabled = True
  
-            prev, _ , next = st.columns([3, 4, 3])
-
-            prev.button('Previous', disabled=st.session_state.prev_disabled, key='prev_button_key', on_click=prev_page, type='primary', use_container_width=True)
-            next.button('Next', disabled=st.session_state.next_disabled, key='next_button_key', on_click=next_page, type='primary', use_container_width=True)
+            prev, _, next = st.columns([3, 4, 3])
+
+            prev.button('Previous',
+                        disabled=st.session_state.prev_disabled,
+                        key='prev_button_key',
+                        on_click=prev_page,
+                        type='primary',
+                        use_container_width=True)
+            next.button('Next',
+                        disabled=st.session_state.next_disabled,
+                        key='next_button_key',
+                        on_click=next_page,
+                        type='primary',
+                        use_container_width=True)
  
          if not links:
              st.error('Unable to query the Wayback Machine API.')
author	Claromes <claromes@hey.com>
	Sun, 19 May 2024 17:35:48 +0000 (14:35 -0300)
committer	Claromes <claromes@hey.com>
	Sun, 19 May 2024 17:35:48 +0000 (14:35 -0300)