From: Claromes Date: Thu, 4 Jul 2024 19:04:08 +0000 (-0300) Subject: app - add tabs, lagacy app - update descriptions, module - update CLI help text,... X-Git-Url: https://git.claromes.com/?a=commitdiff_plain;h=95850a22dc0684c94494e2df5d953f05018f70b3;p=waybacktweets.git app - add tabs, lagacy app - update descriptions, module - update CLI help text, add Donate button, add hands-on docs page --- diff --git a/README.md b/README.md index 2bd1b23..8668792 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,9 @@ # Wayback Tweets -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![docs](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml/badge.svg)](https://github.com/claromes/waybacktweets/actions/workflows/docs.yml) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) +[![PyPI](https://img.shields.io/pypi/v/waybacktweets)](https://pypi.org/project/waybacktweets) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg)](https://doi.org/10.5281/zenodo.12528448) [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://waybacktweets.streamlit.app) [![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing) -Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats. + +Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see [Field Options](https://claromes.github.io/waybacktweets/field_options.html)), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats. ## Installation @@ -57,7 +58,7 @@ if archived_tweets: ## Acknowledgements - Tristan Lee (Bellingcat's Data Scientist) for the idea of the application. -- Jessica Smith (Snowflake's Marketing Specialist) and Streamlit/Snowflake teams for the additional server resources on Streamlit Cloud. +- Jessica Smith (Snowflake's Community Growth Specialist) and Streamlit/Snowflake team for the additional server resources on Streamlit Cloud. - OSINT Community for recommending the application. > [!NOTE] diff --git a/app/app.py b/app/app.py index df41e67..a608dad 100644 --- a/app/app.py +++ b/app/app.py @@ -34,7 +34,7 @@ st.set_page_config( layout="centered", menu_items={ "About": f""" - [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets) + [![License](https://img.shields.io/github/license/claromes/waybacktweets)](https://github.com/claromes/waybacktweets/blob/main/LICENSE.md) The application is a prototype hosted on Streamlit Cloud, serving as an alternative to the command line tool. @@ -168,16 +168,12 @@ if st.query_params.username != "": # ------ User Interface Settings ------ # -st.info( - "🥳 [**Pre-release 1.0x: Python module, CLI, and new Streamlit app**](https://github.com/claromes/waybacktweets/releases)" # noqa: E501 -) - st.image(TITLE, use_column_width="never") st.caption( - "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)" # noqa: E501 + "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![sponsor](https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github)](https://github.com/sponsors/claromes)" # noqa: E501 ) st.write( - "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the `iframe` tag), CSV, and JSON formats." # noqa: E501 + "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets using the iframe tag), CSV, and JSON formats." # noqa: E501 ) st.write( @@ -291,15 +287,15 @@ if st.session_state.query or st.session_state.count: # -- Rendering -- # - if csv_data and json_data and html_content: - st.session_state.count = len(df) - st.write(f"**{st.session_state.count} URLs have been captured**") + st.session_state.count = len(df) + st.write(f"**{st.session_state.count} URLs have been captured**") - # -- HTML -- # + tab1, tab2, tab3 = st.tabs(["HTML", "CSV", "JSON"]) - st.header("HTML", divider="gray", anchor=False) + # -- HTML -- # + with tab1: st.write( - f"Visualize tweets more efficiently through `iframes`. Download the @{st.session_state.current_username}'s archived tweets in HTML." # noqa: E501 + f"Visualize tweets more efficiently through iframe tags. Download the @{st.session_state.current_username}'s archived tweets in HTML." # noqa: E501 ) col5, col6 = st.columns([1, 18]) @@ -317,8 +313,7 @@ if st.session_state.query or st.session_state.count: ) # -- CSV -- # - - st.header("CSV", divider="gray", anchor=False) + with tab2: st.write( "Check the data returned in the dataframe below and download the file." ) @@ -340,8 +335,7 @@ if st.session_state.query or st.session_state.count: st.dataframe(df, use_container_width=True) # -- JSON -- # - - st.header("JSON", divider="gray", anchor=False) + with tab3: st.write( "Check the data returned in JSON format below and download the file." ) diff --git a/docs/conf.py b/docs/conf.py index 4a4419e..b6304e9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,7 @@ extensions = [ "sphinx_new_tab_link", "sphinx_click.ext", "sphinx_autodoc_typehints", + "sphinxcontrib.youtube", ] templates_path = ["_templates"] diff --git a/docs/handson.rst b/docs/handson.rst new file mode 100644 index 0000000..79ec10f --- /dev/null +++ b/docs/handson.rst @@ -0,0 +1,22 @@ +Hands-On Examples +==================== + +- **Notebook** + + This notebook demonstrates how to fetch, parse, and export archived tweets for a specific user using the ``waybacktweets`` library. + + .. image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/drive/1zRqi6uTMiGi5z8GQ-PC0tbpCJWULCqMO?usp=sharing + :alt: Open In Collab + +.. raw:: html + +
+
+ +- **Video** + + Demonstration of how to use Wayback Tweets and other tools to retrieve tweets (in Spanish) + + .. youtube:: qy3wOnUxe6A + :width: 100% diff --git a/docs/index.rst b/docs/index.rst index f6a5578..300dec9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,10 +9,11 @@ Wayback Tweets Pre-release: |release| -Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML (for easy viewing of the tweets using the ``iframe`` tag), CSV, and JSON formats. +Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats. -.. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.12528448.svg - :target: https://doi.org/10.5281/zenodo.12528448 +.. image:: https://img.shields.io/badge/Donate-via%20Sponsors-ff69b4.svg?logo=github + :target: https://github.com/sponsors/claromes + :alt: GitHub Sponsors .. note:: Intensive queries can lead to rate limiting, resulting in a temporary ban of a few minutes from web.archive.org. @@ -30,6 +31,7 @@ User Guide field_options outputs exceptions + handson contribute todo diff --git a/legacy_app/legacy_app.py b/legacy_app/legacy_app.py index 82059a2..ba2df0e 100644 --- a/legacy_app/legacy_app.py +++ b/legacy_app/legacy_app.py @@ -14,11 +14,7 @@ st.set_page_config( layout="centered", menu_items={ "About": """ - ## 🏛️ Wayback Tweets - - Tool that displays, via Wayback CDX Server API, multiple archived tweets on Wayback Machine to avoid opening each link manually. Users can apply filters based on specific years and view tweets that do not have the original URL available. - - This tool is a prototype, please feel free to send your [feedbacks](https://github.com/claromes/waybacktweets/issues). Created by [@claromes](https://claromes.com). + This is the legacy application of [Wayback Tweets](https://waybacktweets.streamlit.app/). ------- """, # noqa: E501 @@ -386,7 +382,7 @@ def next_page(): # UI st.title( - "Wayback Tweets [![Star](https://img.shields.io/github/stars/claromes/waybacktweets?style=social)](https://github.com/claromes/waybacktweets)", # noqa: E501 + "Wayback Tweets", # noqa: E501 anchor=False, help="v0.4.3", ) diff --git a/poetry.lock b/poetry.lock index 825cb2a..05d1e4f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1524,6 +1524,26 @@ lint = ["docutils-stubs", "flake8", "mypy"] standalone = ["Sphinx (>=5)"] test = ["pytest"] +[[package]] +name = "sphinxcontrib-youtube" +version = "1.4.1" +description = "Sphinx \"youtube\" extension." +optional = false +python-versions = "*" +files = [ + {file = "sphinxcontrib_youtube-1.4.1-py2.py3-none-any.whl", hash = "sha256:de9cb454f066d580a1e7ad64efae7dd9e12c1b1567a31faa330b1aeaeed40460"}, + {file = "sphinxcontrib_youtube-1.4.1.tar.gz", hash = "sha256:eb7871c8af47fd2b5c9727615354b7f95bce554be8be45b9fa8e5bc022f88059"}, +] + +[package.dependencies] +requests = "*" +Sphinx = ">=6.1" + +[package.extras] +dev = ["nox"] +doc = ["pydata-sphinx-theme", "sphinx-copybutton", "sphinx-design"] +test = ["beautifulsoup4", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "streamlit" version = "1.36.0" @@ -1733,4 +1753,4 @@ watchmedo = ["PyYAML (>=3.10)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4017fc7af7b13a774406ad205ef03952ef96dc5c3e0413c624c8a459e0619a4c" +content-hash = "e41f880cd350ecafc461396adeec717dd632a56071c030fab761265acc0773f6" diff --git a/pyproject.toml b/pyproject.toml index 9dbd986..7296f3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "waybacktweets" -version = "1.0a6" +version = "1.0a7" description = "Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data." authors = ["Claromes "] license = "GPLv3" @@ -46,6 +46,7 @@ sphinxcontrib-mermaid = "^0.9.2" sphinx-new-tab-link = "^0.4.0" sphinx-click = "^6.0.0" sphinx-autodoc-typehints = "^2.1.1" +sphinxcontrib-youtube = "^1.4.1" [tool.poetry.group.dev.dependencies] streamlit = "1.36.0" diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py index 4048fc7..f003efc 100644 --- a/waybacktweets/_cli.py +++ b/waybacktweets/_cli.py @@ -97,7 +97,7 @@ def _parse_date( "verbose", is_flag=True, default=False, - help="Shows the error log.", + help="Shows the log.", ) def main( username: str,