From: Claromes <claromes@hey.com>
Date: Mon, 14 Aug 2023 20:48:52 +0000 (-0300)
Subject: test screenshot
X-Git-Url: https://git.claromes.com/?a=commitdiff_plain;h=2a4ba380b3f6f368eb7e4b1e5bcacb0976277886;p=waybacktweets.git

test screenshot
---

diff --git a/README.md b/README.md
index c7cf677..ea8eaf3 100644
--- a/README.md
+++ b/README.md
@@ -60,5 +60,7 @@ Streamlit will be served at http://localhost:8501
 - [ ] Add current page to page title
 - [ ] Parse MIME type `warc/revisit`
 - [ ] Filter by period/datetime
+- [ ] Apply filters by API endpoints
+- [ ] Add contributing guidelines
 
 ## [Changelog](/CHANGELOG.md)
diff --git a/app.py b/app.py
index 299dddc..41225d0 100644
--- a/app.py
+++ b/app.py
@@ -4,7 +4,8 @@ import streamlit as st
 import streamlit.components.v1 as components
 import json
 import re
-from bs4 import BeautifulSoup
+import os
+from selenium import webdriver
 
 __version__ = '0.2'
 
@@ -92,7 +93,7 @@ def scroll_into_view():
 def embed(tweet):
     try:
         url = 'https://publish.twitter.com/oembed?url={}'.format(tweet)
-        response = requests.get(url, timeout=1)
+        response = requests.get(url)
 
         regex = r'<blockquote class="twitter-tweet"><p[^>]*>(.*?)<\/p>.*?&mdash; (.*?)<\/a>'
         regex_author = r'^(.*?)\s*\('
@@ -141,7 +142,7 @@ def embed(tweet):
 def tweets_count(handle):
     url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json'.format(handle)
     try:
-        response = requests.get(url, timeout=10)
+        response = requests.get(url)
 
         if response.status_code == 200:
             data = response.json()
@@ -162,7 +163,7 @@ def query_api(handle, limit, offset):
 
     url = 'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{}/status/*&output=json&limit={}&offset={}'.format(handle, limit, offset)
     try:
-        response = requests.get(url, timeout=1)
+        response = requests.get(url)
 
         if response.status_code == 200 or response.status_code == 304:
             return response.json()
@@ -204,7 +205,7 @@ Display multiple archived tweets on Wayback Machine and avoid opening each link
 handle = st.text_input('username', placeholder='username', label_visibility='collapsed')
 query = st.button('Query', type='primary', use_container_width=True)
 
-bar = st.progress(0)
+bar = st.empty()
 
 if query or handle:
     if handle != st.session_state.current_handle:
@@ -222,6 +223,7 @@ if query or handle:
     only_deleted = st.checkbox('Only deleted tweets')
 
     try:
+        bar.progress(0)
         progress = st.empty()
         links = query_api(handle, tweets_per_page, st.session_state.offset)
         parsed_links = parse_links(links)[0]
@@ -263,15 +265,38 @@ if query or handle:
             def display_not_tweet():
                 if mimetype[i] == 'application/json':
                     st.error('Tweet has been deleted.')
-                    response = requests.get(link, timeout=5)
+                    response = requests.get(link)
                     json_data = response.json()
+                    json_text = response.json()['text']
 
+                    st.code(json_text)
                     st.json(json_data, expanded=False)
 
                     st.divider()
                 if mimetype[i] == 'text/html':
                     st.error('Tweet has been deleted.')
-                    components.iframe(link, height=500)
+
+                    re_link = re.search(r'[^/]+$', link)
+                    re_link = re_link.group()
+                    screenshot_filename = 'img_{}.jpg'.format(re_link)
+
+                    if not os.path.exists(screenshot_filename):
+                        options = webdriver.ChromeOptions()
+                        options.add_argument('--headless')
+
+                        driver = webdriver.Chrome(options=options)
+                        driver.get(link)
+                        driver.set_window_size(700, 700)
+
+                        current_directory = os.getcwd()
+                        screenshot_path = os.path.join(current_directory, screenshot_filename)
+
+                        driver.save_screenshot(screenshot_path)
+                        driver.quit()
+
+                    st.image(screenshot_filename)
+                    # components.iframe(link, height=500, width=700)
+                    # st.markdown('<iframe src="{}" loading="lazy" height=500 width=auto></iframe>'.format(link), unsafe_allow_html=True)
 
                     st.divider()
 
diff --git a/requirements.txt b/requirements.txt
index 5f0beb7..dee3de6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
 requests==2.30.0
-streamlit==1.23.1
+streamlit==1.25.0