update HTML, update docs app and spinner, update print msg, delete image preview

author Claromes <claromes@hey.com>

Mon, 24 Jun 2024 11:40:21 +0000 (08:40 -0300)

committer Claromes <claromes@hey.com>

Mon, 24 Jun 2024 11:40:21 +0000 (08:40 -0300)
author Claromes <claromes@hey.com>
Mon, 24 Jun 2024 11:40:21 +0000 (08:40 -0300)
committer Claromes <claromes@hey.com>
Mon, 24 Jun 2024 11:40:21 +0000 (08:40 -0300)
diff --git a/app/app.py b/app/app.py

index 8a209d51257010d78989ad13b7c904652f456771..19804497c2fe108f23f944b42e5680e566b86a84 100644 (file)
--- a/app/app.py
+++ b/app/app.py
@@ -1,5 +1,5 @@
  import base64
-from datetime import datetime
+from datetime import datetime, timedelta
  
  import streamlit as st
  
@@ -13,13 +13,13 @@ from waybacktweets.config import FIELD_OPTIONS, config
  
  PAGE_ICON = "assets/parthenon.png"
  TITLE = "assets/waybacktweets.png"
-PREVIEW_IMAGE = "assets/preview_image.jpg"
  DOWNLOAD = "assets/download.svg"
  
  collapse = None
  matchtype = None
-start_date = datetime(2006, 1, 1)
+start_date = datetime.now() - timedelta(days=365 * 2)
  end_date = datetime.now()
+min_date = datetime(2006, 1, 1)
  
  # ------ Verbose Mode Configuration ------ #
  
@@ -81,7 +81,7 @@ st.html(
  # ------ Requestings ------ #
  
  
-@st.cache_data(ttl=600, show_spinner=True)
+@st.cache_data(ttl=600, show_spinner=False)
  def wayback_tweets(
      username,
      collapse,
@@ -105,7 +105,7 @@ def wayback_tweets(
      return archived_tweets
  
  
-@st.cache_data(ttl=600, show_spinner=True)
+@st.cache_data(ttl=600, show_spinner=False)
  def tweets_parser(archived_tweets, username, field_options):
      parser = TweetsParser(archived_tweets, username, field_options)
      parsed_tweets = parser.parse()
@@ -113,7 +113,7 @@ def tweets_parser(archived_tweets, username, field_options):
      return parsed_tweets
  
  
-@st.cache_data(ttl=600, show_spinner=True)
+@st.cache_data(ttl=600, show_spinner=False)
  def tweets_exporter(parsed_tweets, username, field_options):
      exporter = TweetsExporter(parsed_tweets, username, field_options)
  
@@ -135,11 +135,11 @@ st.caption(
  )
  st.write("Retrieve archived tweets CDX data in CSV, JSON, and HTML formats.")
  
-st.caption(
+st.write(
      "This application uses the Wayback Tweets Python package, which can be used as a module or as a standalone command line tool. [Read the documentation](https://claromes.github.io/waybacktweets)."  # noqa: E501
  )
  
-st.caption(
+st.write(
      "To access the legacy version of Wayback Tweets [click here](https://waybacktweets-legacy.streamlit.app)."  # noqa: E501
  )
  
@@ -150,13 +150,14 @@ st.divider()
  username = st.text_input("Username *", key="username", placeholder="Without @")
  
  with st.expander("Filtering"):
-    start_date = datetime(2006, 1, 1)
-    end_date = datetime.now()
  
+    st.caption(
+        ":orange[A large date range takes a long time to process, and the app's resources may not be sufficient. Try to perform searches with smaller ranges to get faster results.]"  # noqa: E501
+    )
      st.session_state.archived_timestamp_filter = st.date_input(
          "Tweets saved between",
          (start_date, end_date),
-        start_date,
+        min_date,
          end_date,
          format="YYYY/MM/DD",
          help="Using the `from` and `to` filters. Format: YYYY/MM/DD",
@@ -178,21 +179,11 @@ with st.expander("Filtering"):
              help="Allows for a simple way to scroll through the results",
          )
  
-    col3, col4 = st.columns(2)
-
-    with col3:
-        not_available = st.checkbox(
-            "Only tweets not available",
-            key="not_available",
-            help="Checks if the archived URL still exists on Twitter",
-        )
-
-    with col4:
-        unique = st.checkbox(
-            "Only unique Wayback Machine URLs",
-            key="unique",
-            help="Filtering by the collapse option using the `urlkey` field and the URL Match Scope `prefix`",  # noqa: E501
-        )
+    unique = st.checkbox(
+        "Only unique Wayback Machine URLs",
+        key="unique",
+        help="Filtering by the collapse option using the `urlkey` field and the URL Match Scope `prefix`",  # noqa: E501
+    )
  
  
  query = st.button("Query", type="primary", use_container_width=True)
@@ -208,102 +199,111 @@ if query or st.session_state.count:
          matchtype = "prefix"
  
      try:
-        wayback_tweets = wayback_tweets(
-            st.session_state.current_username,
-            collapse,
-            st.session_state.archived_timestamp_filter[0],
-            st.session_state.archived_timestamp_filter[1],
-            limit,
-            offset,
-            matchtype,
-        )
+        with st.spinner(
+            f"Waybacking @{st.session_state.current_username}'s archived tweets"
+        ):
+            wayback_tweets = wayback_tweets(
+                st.session_state.current_username,
+                collapse,
+                st.session_state.archived_timestamp_filter[0],
+                st.session_state.archived_timestamp_filter[1],
+                limit,
+                offset,
+                matchtype,
+            )
  
          if not wayback_tweets:
              st.error("No data was saved due to an empty response.")
              st.stop()
  
-        parsed_tweets = tweets_parser(
-            wayback_tweets, st.session_state.current_username, FIELD_OPTIONS
-        )
+        with st.spinner(
+            f"Parsing @{st.session_state.current_username}'s archived tweets"
+        ):
+            parsed_tweets = tweets_parser(
+                wayback_tweets, st.session_state.current_username, FIELD_OPTIONS
+            )
  
-        df, file_name = tweets_exporter(
-            parsed_tweets, st.session_state.current_username, FIELD_OPTIONS
-        )
+            df, file_name = tweets_exporter(
+                parsed_tweets, st.session_state.current_username, FIELD_OPTIONS
+            )
  
          csv_data = df.to_csv(index=False)
          json_data = df.to_json(orient="records", lines=False)
          html = HTMLTweetsVisualizer(username, json_data)
          html_content = html.generate()
  
-        st.session_state.count = len(df)
-        st.write(f"**{st.session_state.count} URLs have been captured**")
+        # -- Rendering -- #
  
-        # -- HTML -- #
+        if csv_data and json_data and html_content:
+            st.session_state.count = len(df)
+            st.write(f"**{st.session_state.count} URLs have been captured**")
  
-        st.header("HTML", divider="gray")
-        st.write(
-            f"Visualize tweets more efficiently through iframes. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
-        )
+            # -- HTML -- #
  
-        col5, col6 = st.columns([1, 18])
+            st.header("HTML", divider="gray")
+            st.write(
+                f"Visualize tweets more efficiently through iframes. Download the @{st.session_state.current_username}'s archived tweets in HTML."  # noqa: E501
+            )
  
-        with col5:
-            st.image(DOWNLOAD, width=22)
+            col5, col6 = st.columns([1, 18])
  
-        with col6:
-            b64_html = base64.b64encode(html_content.encode()).decode()
-            href_html = f"data:text/html;base64,{b64_html}"
+            with col5:
+                st.image(DOWNLOAD, width=22)
  
-            st.markdown(
-                f'<a href="{href_html}" download="{file_name}.html" title="Download {file_name}.html">{file_name}.html</a>',  # noqa: E501
-                unsafe_allow_html=True,
-            )
+            with col6:
+                b64_html = base64.b64encode(html_content.encode()).decode()
+                href_html = f"data:text/html;base64,{b64_html}"
  
-        st.image(PREVIEW_IMAGE, "Preview image")
+                st.markdown(
+                    f'<a href="{href_html}" download="{file_name}.html" title="Download {file_name}.html">{file_name}.html</a>',  # noqa: E501
+                    unsafe_allow_html=True,
+                )
  
-        # -- CSV -- #
+            # -- CSV -- #
  
-        st.header("CSV", divider="gray")
-        st.write(
-            "Check the data returned in the dataframe below and download the file."
-        )
+            st.header("CSV", divider="gray")
+            st.write(
+                "Check the data returned in the dataframe below and download the file."
+            )
  
-        col7, col8 = st.columns([1, 18])
+            col7, col8 = st.columns([1, 18])
  
-        with col7:
-            st.image(DOWNLOAD, width=22)
+            with col7:
+                st.image(DOWNLOAD, width=22)
  
-        with col8:
-            b64_csv = base64.b64encode(csv_data.encode()).decode()
-            href_csv = f"data:file/csv;base64,{b64_csv}"
+            with col8:
+                b64_csv = base64.b64encode(csv_data.encode()).decode()
+                href_csv = f"data:file/csv;base64,{b64_csv}"
  
-            st.markdown(
-                f'<a href="{href_csv}" download="{file_name}.csv" title="Download {file_name}.csv">{file_name}.csv</a>',  # noqa: E501
-                unsafe_allow_html=True,
-            )
+                st.markdown(
+                    f'<a href="{href_csv}" download="{file_name}.csv" title="Download {file_name}.csv">{file_name}.csv</a>',  # noqa: E501
+                    unsafe_allow_html=True,
+                )
  
-        st.dataframe(df, use_container_width=True)
+            st.dataframe(df, use_container_width=True)
  
-        # -- JSON -- #
+            # -- JSON -- #
  
-        st.header("JSON", divider="gray")
-        st.write("Check the data returned in JSON format below and download the file.")
+            st.header("JSON", divider="gray")
+            st.write(
+                "Check the data returned in JSON format below and download the file."
+            )
  
-        col9, col10 = st.columns([1, 18])
+            col9, col10 = st.columns([1, 18])
  
-        with col9:
-            st.image(DOWNLOAD, width=22)
+            with col9:
+                st.image(DOWNLOAD, width=22)
  
-        with col10:
-            b64_json = base64.b64encode(json_data.encode()).decode()
-            href_json = f"data:file/json;base64,{b64_json}"
+            with col10:
+                b64_json = base64.b64encode(json_data.encode()).decode()
+                href_json = f"data:file/json;base64,{b64_json}"
  
-            st.markdown(
-                f'<a href="{href_json}" download="{file_name}.json" title="Download {file_name}.json">{file_name}.json</a>',  # noqa: E501
-                unsafe_allow_html=True,
-            )
+                st.markdown(
+                    f'<a href="{href_json}" download="{file_name}.json" title="Download {file_name}.json">{file_name}.json</a>',  # noqa: E501
+                    unsafe_allow_html=True,
+                )
  
-        st.json(json_data, expanded=False)
+            st.json(json_data, expanded=False)
      except TypeError as e:
          st.error(
              f"""
diff --git a/assets/preview_image.jpg b/assets/preview_image.jpg

deleted file mode 100644 (file)

index cf4633d..0000000

Binary files a/assets/preview_image.jpg and /dev/null differ
diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py

index d115c09bd30ac8faef0d28d44b54efb9e1208d24..4048fc780def996c25f2ef3b5c861bbb944ae339 100644 (file)
--- a/waybacktweets/_cli.py
+++ b/waybacktweets/_cli.py
@@ -121,7 +121,7 @@ def main(
              username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype
          )
  
-        print("Making a request to the Internet Archive...")
+        print(f"Waybacking @{username}'s archived tweets...")
          archived_tweets = api.get()
  
          if archived_tweets:
diff --git a/waybacktweets/api/export.py b/waybacktweets/api/export.py

index af8e804798dbdef442ce0d754fdc4c7e176550a2..6524bfee34984eec6c48b104f81162f2e616d4dd 100644 (file)
--- a/waybacktweets/api/export.py
+++ b/waybacktweets/api/export.py
@@ -97,23 +97,23 @@ class TweetsExporter:
          """
          Saves the DataFrame to a JSON file.
          """
-        json_file_path = f"{self.filename}.json"
-        self.dataframe.to_json(json_file_path, orient="records", lines=False)
+        json_path = f"{self.filename}.json"
+        self.dataframe.to_json(json_path, orient="records", lines=False)
  
-        print(f"Saved to {json_file_path}")
+        print(f"Saved to {json_path}")
  
      def save_to_html(self) -> None:
          """
          Saves the DataFrame to an HTML file.
          """
-        json_file_path = f"{self.filename}.json"
+        json_path = f"{self.filename}.json"
  
-        if not os.path.exists(json_file_path):
+        if not os.path.exists(json_path):
              self.save_to_json()
  
          html_file_path = f"{self.filename}.html"
  
-        html = HTMLTweetsVisualizer(self.username, json_file_path, html_file_path)
+        html = HTMLTweetsVisualizer(self.username, json_path, html_file_path)
  
          html_content = html.generate()
          html.save(html_content)
diff --git a/waybacktweets/api/parse.py b/waybacktweets/api/parse.py

index 65ad04177c757c15f6daa3acea65a8f38c9d30e7..31e0e3430b10b62e137ebf62ad9fe937f3c2a482 100644 (file)
--- a/waybacktweets/api/parse.py
+++ b/waybacktweets/api/parse.py
@@ -279,7 +279,8 @@ class TweetsParser:
                  task = None
                  if print_progress:
                      task = progress.add_task(
-                        f"Waybacking @{self.username} tweets\n", total=len(futures)
+                        f"Parsing @{self.username}'s archived tweets\n",
+                        total=len(futures),
                      )
  
                  for future in as_completed(futures):
diff --git a/waybacktweets/api/visualize.py b/waybacktweets/api/visualize.py

index 3f5bfcce2b0e3e105fa48e0722a9e0aa76a3b65a..e1e9e8e14d2a0edf56fc921eaef9b18104e745b2 100644 (file)
--- a/waybacktweets/api/visualize.py
+++ b/waybacktweets/api/visualize.py
@@ -16,36 +16,36 @@ class HTMLTweetsVisualizer:
  
      Args:
          username (str): The username associated with the tweets.
-        json_file_path (Union[str, List[str]]): The path of the JSON file or the JSON data itself.
+        json_path (Union[str, List[str]]): The path of the JSON file or the JSON data itself.
          html_file_path (str, optional): The path where the HTML file will be saved.
      """
  
      def __init__(
          self,
          username: str,
-        json_file_path: Union[str, List[str]],
+        json_path: Union[str, List[str]],
          html_file_path: str = None,
      ):
          self.username = username
-        self.json_file_path = self._json_loader(json_file_path)
+        self.json_path = self._json_loader(json_path)
          self.html_file_path = html_file_path
  
      @staticmethod
-    def _json_loader(json_file_path: Union[str, List[str]]) -> List[Dict[str, Any]]:
+    def _json_loader(json_path: Union[str, List[str]]) -> List[Dict[str, Any]]:
          """
          Reads and loads JSON data from a specified file path or JSON string.
  
          Args:
-            json_file_path (Union[str, List[str]]): The path of the JSON file or the JSON data itself.
+            json_path (Union[str, List[str]]): The path of the JSON file or the JSON data itself.
  
          Returns:
              The content of the JSON file or data.
          """
-        if os.path.isfile(json_file_path):
-            with open(json_file_path, "r", encoding="utf-8") as f:
+        if os.path.isfile(json_path):
+            with open(json_path, "r", encoding="utf-8") as f:
                  return json.load(f)
  
-        return json.loads(json_file_path)
+        return json.loads(json_path)
  
      def generate(self) -> str:
          """
@@ -104,7 +104,7 @@ class HTMLTweetsVisualizer:
          html += f"<h1>@{self.username}'s archived tweets</h1>\n"
          html += '<div class="container">\n'
  
-        for index, tweet in enumerate(self.json_file_path):
+        for index, tweet in enumerate(self.json_path):
              html += '<div class="tweet">\n'
  
              if not tweet["available_tweet_text"]:
@@ -115,10 +115,6 @@ class HTMLTweetsVisualizer:
                      "Parsed Tweet": tweet["parsed_tweet_url"],
                  }
  
-                html += f'<p class="source">{tweet["original_tweet_url"]}</p>\n'
-                html += f'<p class="source">{tweet["archived_mimetype"]}</p>\n'
-                html += "<br>\n"
-
                  for key, value in iframe_src.items():
                      key_cleaned = key.replace(" ", "_")
  
@@ -155,6 +151,12 @@ class HTMLTweetsVisualizer:
                  html += f'<p><strong class="content">Available Tweet Username:</strong> {tweet["available_tweet_info"]}</p>\n'
  
              html += "<br>\n"
+            html += f'<p><strong>Archived Tweet:</strong> {tweet["archived_tweet_url"]}</p>\n'
+            html += f'<p><strong>Parsed Archived Tweet:</strong> {tweet["parsed_archived_tweet_url"]}</p>\n'
+            html += f'<p><strong>Original Tweet:</strong> {tweet["original_tweet_url"]}</p>\n'
+            html += (
+                f'<p><strong>Parsed Tweet:</strong> {tweet["parsed_tweet_url"]}</p>\n'
+            )
              html += f'<p><strong>Archived URL Key:</strong> {tweet["archived_urlkey"]}</p>\n'
              html += f'<p><strong>Archived Timestamp:</strong> {timestamp_parser(tweet["archived_timestamp"])} ({tweet["archived_timestamp"]})</p>\n'
              html += f'<p><strong>Archived mimetype:</strong> {tweet["archived_mimetype"]}</p>\n'
author	Claromes <claromes@hey.com>
	Mon, 24 Jun 2024 11:40:21 +0000 (08:40 -0300)
committer	Claromes <claromes@hey.com>
	Mon, 24 Jun 2024 11:40:21 +0000 (08:40 -0300)
app/app.py		patch \| blob \| history
assets/preview_image.jpg	[deleted file]	patch \| blob \| history
waybacktweets/_cli.py		patch \| blob \| history
waybacktweets/api/export.py		patch \| blob \| history
waybacktweets/api/parse.py		patch \| blob \| history
waybacktweets/api/visualize.py		patch \| blob \| history