test screenshot

author Claromes <claromes@hey.com>

Mon, 14 Aug 2023 20:48:52 +0000 (17:48 -0300)

committer Claromes <claromes@hey.com>

Mon, 14 Aug 2023 20:48:52 +0000 (17:48 -0300)
author Claromes <claromes@hey.com>
Mon, 14 Aug 2023 20:48:52 +0000 (17:48 -0300)
committer Claromes <claromes@hey.com>
Mon, 14 Aug 2023 20:48:52 +0000 (17:48 -0300)
diff --git a/README.md b/README.md

index c7cf677dae13cc815d2fc2ea0d38e71b97a3b46c..ea8eaf39e4eb26e9a42b80b15d62508d0a76f3b3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -60,5 +60,7 @@ Streamlit will be served at http://localhost:8501
  - [ ] Add current page to page title
  - [ ] Parse MIME type `warc/revisit`
  - [ ] Filter by period/datetime
+- [ ] Apply filters by API endpoints
+- [ ] Add contributing guidelines
  
  ## [Changelog](/CHANGELOG.md)
diff --git a/app.py b/app.py

index 299dddca5c2e03c124518d5cd8e1718178466419..41225d0387797cb6fec09ace15d91a33490593de 100644 (file)
--- a/app.py
+++ b/app.py
@@ -4,7 +4,8 @@ import streamlit as st
  import streamlit.components.v1 as components
  import json
  import re
-from bs4 import BeautifulSoup
+import os
+from selenium import webdriver
  
  __version__ = '0.2'
  
@@ -92,7 +93,7 @@ def scroll_into_view():
  def embed(tweet):
      try:
          url = 'https://publish.twitter.com/oembed?url={}'.format(tweet)
-        response = requests.get(url, timeout=1)
+        response = requests.get(url)
  
          regex = r'<blockquote class="twitter-tweet"><p[^>]*>(.*?)<\/p>.*?&mdash; (.*?)<\/a>'
          regex_author = r'^(.*?)\s*\('
@@ -141,7 +142,7 @@ def embed(tweet):
  def tweets_count(handle):
      url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json'.format(handle)
      try:
-        response = requests.get(url, timeout=10)
+        response = requests.get(url)
  
          if response.status_code == 200:
              data = response.json()
@@ -162,7 +163,7 @@ def query_api(handle, limit, offset):
  
      url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json&limit={}&offset={}'.format(handle, limit, offset)
      try:
-        response = requests.get(url, timeout=1)
+        response = requests.get(url)
  
          if response.status_code == 200 or response.status_code == 304:
              return response.json()
@@ -204,7 +205,7 @@ Display multiple archived tweets on Wayback Machine and avoid opening each link
  handle = st.text_input('username', placeholder='username', label_visibility='collapsed')
  query = st.button('Query', type='primary', use_container_width=True)
  
-bar = st.progress(0)
+bar = st.empty()
  
  if query or handle:
      if handle != st.session_state.current_handle:
@@ -222,6 +223,7 @@ if query or handle:
      only_deleted = st.checkbox('Only deleted tweets')
  
      try:
+        bar.progress(0)
          progress = st.empty()
          links = query_api(handle, tweets_per_page, st.session_state.offset)
          parsed_links = parse_links(links)[0]
@@ -263,15 +265,38 @@ if query or handle:
              def display_not_tweet():
                  if mimetype[i] == 'application/json':
                      st.error('Tweet has been deleted.')
-                    response = requests.get(link, timeout=5)
+                    response = requests.get(link)
                      json_data = response.json()
+                    json_text = response.json()['text']
  
+                    st.code(json_text)
                      st.json(json_data, expanded=False)
  
                      st.divider()
                  if mimetype[i] == 'text/html':
                      st.error('Tweet has been deleted.')
-                    components.iframe(link, height=500)
+
+                    re_link = re.search(r'[^/]+$', link)
+                    re_link = re_link.group()
+                    screenshot_filename = 'img_{}.jpg'.format(re_link)
+
+                    if not os.path.exists(screenshot_filename):
+                        options = webdriver.ChromeOptions()
+                        options.add_argument('--headless')
+
+                        driver = webdriver.Chrome(options=options)
+                        driver.get(link)
+                        driver.set_window_size(700, 700)
+
+                        current_directory = os.getcwd()
+                        screenshot_path = os.path.join(current_directory, screenshot_filename)
+
+                        driver.save_screenshot(screenshot_path)
+                        driver.quit()
+
+                    st.image(screenshot_filename)
+                    # components.iframe(link, height=500, width=700)
+                    # st.markdown('<iframe src="{}" loading="lazy" height=500 width=auto></iframe>'.format(link), unsafe_allow_html=True)
  
                      st.divider()
  
diff --git a/requirements.txt b/requirements.txt

index 5f0beb71e67b80d807d9feb18f8a3d63eaa43f6b..dee3de622d141a739d923d720221472d73dfe58e 100644 (file)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
  requests==2.30.0
-streamlit==1.23.1
+streamlit==1.25.0
author	Claromes <claromes@hey.com>
	Mon, 14 Aug 2023 20:48:52 +0000 (17:48 -0300)
committer	Claromes <claromes@hey.com>
	Mon, 14 Aug 2023 20:48:52 +0000 (17:48 -0300)
README.md		patch \| blob \| history
app.py		patch \| blob \| history
requirements.txt		patch \| blob \| history