From: Claromes Date: Sat, 8 Feb 2025 07:56:16 +0000 (-0300) Subject: delete donate links, update docs X-Git-Url: https://git.claromes.com/?a=commitdiff_plain;h=d3624f1f8b83435115edbb9cbe6414a89b325081;p=waybacktweets.git delete donate links, update docs --- diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml deleted file mode 100644 index df3f45f..0000000 --- a/.github/FUNDING.yml +++ /dev/null @@ -1 +0,0 @@ -github: [claromes] diff --git a/app/app.py b/app/app.py index 58e5e4a..edeab95 100644 --- a/app/app.py +++ b/app/app.py @@ -168,7 +168,7 @@ if st.query_params.username != "": st.image(TITLE, use_column_width="never") st.caption( - "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases) [![read the documentation](https://img.shields.io/badge/read_the-documentation-0a507a?logo=sphinx)](https://claromes.github.io/waybacktweets) [![sponsors](https://img.shields.io/badge/donate-via%20sponsors-ff69b4.svg?logo=github)](https://github.com/sponsors/claromes)" # noqa: E501 + "[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/claromes/waybacktweets?include_prereleases)](https://github.com/claromes/waybacktweets/releases)" # noqa: E501 ) st.write( "Retrieves archived tweets CDX data in HTML (for easy viewing of the tweets), CSV, and JSON formats." # noqa: E501 diff --git a/build/.buildinfo b/build/.buildinfo new file mode 100644 index 0000000..36d5a2c --- /dev/null +++ b/build/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 28a617ea8be2c355c226cdf202693234 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/build/.doctrees/api.doctree b/build/.doctrees/api.doctree new file mode 100644 index 0000000..ff016bc Binary files /dev/null and b/build/.doctrees/api.doctree differ diff --git a/build/.doctrees/cli.doctree b/build/.doctrees/cli.doctree new file mode 100644 index 0000000..57e13b3 Binary files /dev/null and b/build/.doctrees/cli.doctree differ diff --git a/build/.doctrees/contribute.doctree b/build/.doctrees/contribute.doctree new file mode 100644 index 0000000..097347b Binary files /dev/null and b/build/.doctrees/contribute.doctree differ diff --git a/build/.doctrees/environment.pickle b/build/.doctrees/environment.pickle new file mode 100644 index 0000000..a6e615e Binary files /dev/null and b/build/.doctrees/environment.pickle differ diff --git a/build/.doctrees/exceptions.doctree b/build/.doctrees/exceptions.doctree new file mode 100644 index 0000000..cf09982 Binary files /dev/null and b/build/.doctrees/exceptions.doctree differ diff --git a/build/.doctrees/field_options.doctree b/build/.doctrees/field_options.doctree new file mode 100644 index 0000000..20ba38d Binary files /dev/null and b/build/.doctrees/field_options.doctree differ diff --git a/build/.doctrees/handson.doctree b/build/.doctrees/handson.doctree new file mode 100644 index 0000000..ba23be5 Binary files /dev/null and b/build/.doctrees/handson.doctree differ diff --git a/build/.doctrees/index.doctree b/build/.doctrees/index.doctree new file mode 100644 index 0000000..b585f6a Binary files /dev/null and b/build/.doctrees/index.doctree differ diff --git a/build/.doctrees/installation.doctree b/build/.doctrees/installation.doctree new file mode 100644 index 0000000..192e2f9 Binary files /dev/null and b/build/.doctrees/installation.doctree differ diff --git a/build/.doctrees/outputs.doctree b/build/.doctrees/outputs.doctree new file mode 100644 index 0000000..7a14815 Binary files /dev/null and b/build/.doctrees/outputs.doctree differ diff --git a/build/.doctrees/quickstart.doctree b/build/.doctrees/quickstart.doctree new file mode 100644 index 0000000..a287a95 Binary files /dev/null and b/build/.doctrees/quickstart.doctree differ diff --git a/build/.doctrees/streamlit.doctree b/build/.doctrees/streamlit.doctree new file mode 100644 index 0000000..92f3807 Binary files /dev/null and b/build/.doctrees/streamlit.doctree differ diff --git a/build/.doctrees/todo.doctree b/build/.doctrees/todo.doctree new file mode 100644 index 0000000..5aef23a Binary files /dev/null and b/build/.doctrees/todo.doctree differ diff --git a/build/.doctrees/workflow.doctree b/build/.doctrees/workflow.doctree new file mode 100644 index 0000000..72c63a3 Binary files /dev/null and b/build/.doctrees/workflow.doctree differ diff --git a/build/_images/waybacktweets_title.png b/build/_images/waybacktweets_title.png new file mode 100644 index 0000000..937a666 Binary files /dev/null and b/build/_images/waybacktweets_title.png differ diff --git a/build/_sources/api.rst.txt b/build/_sources/api.rst.txt new file mode 100644 index 0000000..6a5a2b4 --- /dev/null +++ b/build/_sources/api.rst.txt @@ -0,0 +1,72 @@ +API +==== + +Request +--------- + +.. automodule:: waybacktweets.api.request + +.. autoclass:: WaybackTweets + :members: + +.. _parser: + +Parse +--------- + +.. automodule:: waybacktweets.api.parse + +.. autoclass:: TweetsParser + :members: + :private-members: + +.. autoclass:: TwitterEmbed + :members: + +.. autoclass:: JsonParser + :members: + +.. _exporter: + +Export +--------- + +.. automodule:: waybacktweets.api.export + +.. autoclass:: TweetsExporter + :members: + :private-members: + +Visualize +----------- + +.. automodule:: waybacktweets.api.visualize + +.. autoclass:: HTMLTweetsVisualizer + :members: + :private-members: + +.. _utils: + +Utils +------- + +.. automodule:: waybacktweets.utils.utils + +.. autofunction:: check_double_status +.. autofunction:: check_pattern_tweet +.. autofunction:: check_url_scheme +.. autofunction:: clean_tweet_url +.. autofunction:: clean_wayback_machine_url +.. autofunction:: delete_tweet_pathnames +.. autofunction:: get_response +.. autofunction:: is_tweet_url +.. autofunction:: semicolon_parser +.. autofunction:: timestamp_parser + + +Config +------------ + +.. automodule:: waybacktweets.config.config + :members: diff --git a/build/_sources/cli.rst.txt b/build/_sources/cli.rst.txt new file mode 100644 index 0000000..2a16040 --- /dev/null +++ b/build/_sources/cli.rst.txt @@ -0,0 +1,77 @@ +CLI +================ + +Usage +--------- + +.. click:: waybacktweets._cli:main + :prog: waybacktweets + :nested: full + +Collapsing +------------ + +The Wayback Tweets command line tool recommends the use of three types of "collapse": ``urlkey``, ``digest``, and ``timestamp`` field. + +- ``urlkey``: (`str`) A canonical transformation of the URL you supplied, for example, ``org,eserver,tc)/``. Such keys are useful for indexing. + +- ``digest``: (`str`) The ``SHA1`` hash digest of the content, excluding the headers. It's usually a base-32-encoded string. + +- ``timestamp``: (`datetime`) A 14 digit date-time representation in the ``YYYYMMDDhhmmss`` format. We recommend ``YYYYMMDD``. + +However, it is possible to use it with other options. Read below text extracted from the official Wayback CDX Server API (Beta) documentation. + +.. note:: + + A new form of filtering is the option to "collapse" results based on a field, or a substring of a field. Collapsing is done on adjacent CDX lines where all captures after the first one that are duplicate are filtered out. This is useful for filtering out captures that are "too dense" or when looking for unique captures. + + To use collapsing, add one or more ``collapse=field`` or ``collapse=field:N`` where ``N`` is the first ``N`` characters of field to test. + + - Ex: Only show at most 1 capture per hour (compare the first 10 digits of the ``timestamp`` field). Given 2 captures ``20130226010000`` and ``20130226010800``, since first 10 digits ``2013022601`` match, the 2nd capture will be filtered out: + + http://web.archive.org/cdx/search/cdx?url=google.com&collapse=timestamp:10 + + The calendar page at `web.archive.org` uses this filter by default: `http://web.archive.org/web/*/archive.org` + + - Ex: Only show unique captures by ``digest`` (note that only adjacent digest are collapsed, duplicates elsewhere in the cdx are not affected): + + http://web.archive.org/cdx/search/cdx?url=archive.org&collapse=digest + + - Ex: Only show unique urls in a prefix query (filtering out captures except first capture of a given url). This is similar to the old prefix query in wayback (note: this query may be slow at the moment): + + http://web.archive.org/cdx/search/cdx?url=archive.org&collapse=urlkey&matchType=prefix + + +URL Match Scope +----------------- + +The CDX Server can return results matching a certain prefix, a certain host or all subdomains by using the ``matchType`` param. + +The package ``waybacktweets`` uses the pathname ``/status`` followed by the wildcard '*' at the end of the URL to retrieve only tweets. However, if a value is provided for this parameter, the search will be made from the URL `twitter.com/`. + +Read below text extracted from the official Wayback CDX Server API (Beta) documentation. + +.. note:: + + For example, if given the url: archive.org/about/ and: + + - ``matchType=exact`` (default if omitted) will return results matching exactly archive.org/about/ + + - ``matchType=prefix`` will return results for all results under the path archive.org/about/ + + http://web.archive.org/cdx/search/cdx?url=archive.org/about/&matchType=prefix&limit=1000 + + - ``matchType=host`` will return results from host archive.org + + http://web.archive.org/cdx/search/cdx?url=archive.org/about/&matchType=host&limit=1000 + + - ``matchType=domain`` will return results from host archive.org and all subhosts \*.archive.org + + http://web.archive.org/cdx/search/cdx?url=archive.org/about/&matchType=domain&limit=1000 + + The matchType may also be set implicitly by using wildcard '*' at end or beginning of the url: + + - If url is ends in '/\*', eg url=archive.org/\* the query is equivalent to url=archive.org/&matchType=prefix + - If url starts with '\*.', eg url=\*.archive.org/ the query is equivalent to url=archive.org/&matchType=domain + + (Note: The domain mode is only available if the CDX is in `SURT `_-order format.) diff --git a/build/_sources/contribute.rst.txt b/build/_sources/contribute.rst.txt new file mode 100644 index 0000000..568efd4 --- /dev/null +++ b/build/_sources/contribute.rst.txt @@ -0,0 +1,33 @@ +Contribute +================ + +Here are all the ways you can contribute to this project. + +Testing +--------- + +The best way to help is by using the package, either on the command line or as a module, suggesting improvements and reporting bugs. You're very welcome to `open an issue `_. + + +Hacking +--------- + +If you have Python skills, contribute to the `code `_. + +These are the prerequisites: + +- Python 3.10+ +- Poetry + +Install from the source, following the :ref:`installation_from_source` instructions. + +Brief explanation about the code under the Wayback Tweets directory: + +- ``app``: Streamlit application code +- ``assets``: Title and logo images +- ``docs``: Documentation generated with Sphinx +- ``legacy_app``: Legacy Streamlit application code +- ``waybacktweets/api``: Main package modules +- ``waybacktweets/config``: Global configuration module +- ``waybacktweets/exceptions``: Wayback Tweets Exceptions +- ``waybacktweets/utils``: Helper functions used in the package diff --git a/build/_sources/exceptions.rst.txt b/build/_sources/exceptions.rst.txt new file mode 100644 index 0000000..a7500a7 --- /dev/null +++ b/build/_sources/exceptions.rst.txt @@ -0,0 +1,44 @@ +Exceptions +================ + +These are the most common errors and are handled by the ``waybacktweets`` package. + +ReadTimeoutError +------------------ + +This error occurs when a request to the web.archive.org server takes too long to respond. The server could be overloaded or there could be network issues. + +The output message from the package would be: ``Connection to web.archive.org timed out.`` + +ConnectionError +------------------ + +This error is raised when the package fails to establish a new connection with web.archive.org. This could be due to network issues or the server being down. + +The output message from the package would be: ``Failed to establish a new connection with web.archive.org. Max retries exceeded.`` + + +This is the error often returned when performing experimental parsing of URLs with the mimetype ``application/json``. + +The warning output message from the package would be: ``Connection error with https://web.archive.org/web//https://twitter.com//status/. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.`` + +HTTPError +------------------ + +This error occurs when the Internet Archive services are temporarily offline. This could be due to maintenance or server issues. + +The output message from the package would be: ``Temporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive Twitter feed (https://twitter.com/internetarchive) for the latest information.`` + +EmptyResponseError +--------------------- + +This exception raised for empty responses. + +The output message from the package would be: ``No data was saved due to an empty response.`` + +Warning +------------------ + +It is possible to encounter the following warning when running the ``TweetsParser`` class (:ref:`parser`): `` not available on the user's Twitter account, but the CDX data was saved.`` + +This occurs when the original tweet is no longer available on Twitter and has possibly been deleted. diff --git a/build/_sources/field_options.rst.txt b/build/_sources/field_options.rst.txt new file mode 100644 index 0000000..3c4a0ae --- /dev/null +++ b/build/_sources/field_options.rst.txt @@ -0,0 +1,42 @@ +.. _field_options: + +Field Options +================ + +The package performs several parses to facilitate the analysis of archived tweets and types of tweets. The fields below are available, which can be passed to the :ref:`parser` and :ref:`exporter`, in addition, the command line tool returns all these fields. + +- ``archived_urlkey``: (`str`) A canonical transformation of the URL you supplied, for example, ``org,eserver,tc)/``. Such keys are useful for indexing. + +- ``archived_timestamp``: (`str`) A 14 digit date-time representation in the ``YYYYMMDDhhmmss`` format. + +- ``parsed_archived_timestamp``: (`str`) The ``archived_timestamp`` in human-readable format. + +- ``archived_tweet_url``: (`str`) The archived URL. + +- ``parsed_archived_tweet_url``: (`str`) The archived URL after parsing. It is not guaranteed that this option will be archived, it is just a facilitator, as the originally archived URL does not always exist, due to changes in URLs and web services of the social network Twitter. Check the :ref:`utils`. + +- ``original_tweet_url``: (`str`) The original tweet URL. + +- ``parsed_tweet_url``: (`str`) The original tweet URL after parsing. Old URLs were archived in a nested manner. The parsing applied here unnests these URLs, when necessary. Check the :ref:`utils`. + +- ``available_tweet_text``: (`str`) The tweet text extracted from the URL that is still available on the Twitter account. + +- ``available_tweet_is_RT``: (`bool`) Whether the tweet from the ``available_tweet_text`` field is a retweet or not. + +- ``available_tweet_info``: (`str`) Name and date of the tweet from the ``available_tweet_text`` field. + +- ``archived_mimetype``: (`str`) The mimetype of the archived content, which can be one of these: + + - ``text/html`` + + - ``warc/revisit`` + + - ``application/json`` + + - ``unk`` + +- ``archived_statuscode``: (`str`) The HTTP status code of the snapshot. If the mimetype is ``warc/revisit``, the value returned for the ``statuscode`` key can be blank, but the actual value is the same as that of any other entry that has the same ``digest`` as this entry. If the mimetype is ``application/json``, the value is usually empty or ``-``. + +- ``archived_digest``: (`str`) The ``SHA1`` hash digest of the content, excluding the headers. It's usually a base-32-encoded string. + +- ``archived_length``: (`int`) The compressed byte size of the corresponding WARC record, which includes WARC headers, HTTP headers, and content payload. diff --git a/build/_sources/handson.rst.txt b/build/_sources/handson.rst.txt new file mode 100644 index 0000000..648a046 --- /dev/null +++ b/build/_sources/handson.rst.txt @@ -0,0 +1,22 @@ +Hands-On Examples +==================== + +- **Notebook** + + This notebook demonstrates how to fetch, parse, and export archived tweets for a specific user using the ``waybacktweets`` library. + + .. image:: https://colab.research.google.com/assets/colab-badge.svg + :target: https://colab.research.google.com/drive/1tnaM3rMWpoSHBZ4P_6iHFPjraWRQ3OGe?usp=sharing + :alt: Open In Collab + +.. raw:: html + +
+
+ +- **Video** + + Demonstration of how to use Wayback Tweets and other tools to retrieve tweets (in Spanish) + + .. youtube:: qy3wOnUxe6A + :width: 100% diff --git a/build/_sources/index.rst.txt b/build/_sources/index.rst.txt new file mode 100644 index 0000000..c06b4d1 --- /dev/null +++ b/build/_sources/index.rst.txt @@ -0,0 +1,81 @@ +.. rst-class:: hide-header + +Wayback Tweets +================ + +.. image:: ../assets/waybacktweets_title.png + :alt: Wayback Tweets + :align: center + +Pre-release: |release| + +Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats. + +.. note:: + Intensive queries can lead to rate limiting, resulting in a temporary ban of a few minutes from web.archive.org. + + +User Guide +------------ + +.. toctree:: + :maxdepth: 2 + + installation + quickstart + workflow + field_options + outputs + exceptions + handson + contribute + todo + + +Command-Line Interface +------------------------ +.. toctree:: + :maxdepth: 2 + + cli + +Streamlit Web App +------------------- + +.. toctree:: + :maxdepth: 2 + + streamlit + + +API Reference +--------------- + +.. toctree:: + :maxdepth: 2 + + api + + +Additional Information +----------------------- + +.. toctree:: + :maxdepth: 1 + +.. raw:: html + + + +Indices and tables +---------------------- + +.. toctree:: + :maxdepth: 2 + + genindex + modindex + search diff --git a/build/_sources/installation.rst.txt b/build/_sources/installation.rst.txt new file mode 100644 index 0000000..9c942b1 --- /dev/null +++ b/build/_sources/installation.rst.txt @@ -0,0 +1,85 @@ +Installation +================ + +**It is compatible with Python versions 3.10 and above.** + +Using pip +------------ + + .. code-block:: shell + + pip install waybacktweets + +Using Poetry +------------ + + .. code-block:: shell + + poetry add waybacktweets + +.. _installation_from_source: + +From source +------------- + + **Clone the repository:** + + .. code-block:: shell + + git clone git@github.com:claromes/waybacktweets.git + + **Change directory:** + + .. code-block:: shell + + cd waybacktweets + + **Install Poetry, if you haven't already:** + + .. code-block:: shell + + pip install poetry + + **Install the dependencies:** + + .. code-block:: shell + + poetry install + + **Install the pre-commit:** + + .. code-block:: shell + + poetry run pre-commit install + + **Run the CLI:** + + .. code-block:: shell + + poetry run waybacktweets [SUBCOMMANDS] + + **Run the Streamlit App:** + + - Starts a new shell and activates the virtual environment: + + .. code-block:: shell + + poetry shell + + - Run the Streamlit: + + .. code-block:: shell + + streamlit run app/app.py + + **Build the docs:** + + .. code-block:: shell + + cd docs + + .. code-block:: shell + + make clean html + +`Read the Poetry CLI documentation `_. diff --git a/build/_sources/outputs.rst.txt b/build/_sources/outputs.rst.txt new file mode 100644 index 0000000..365db72 --- /dev/null +++ b/build/_sources/outputs.rst.txt @@ -0,0 +1,29 @@ +Outputs +========== + +It is possible to save the CDX data in three formats. In the command line tool, these three formats are saved automatically. + +HTML +-------- + +This format allows for easy viewing of the archived tweets, through the use of the ``iframe`` tag. Each tweet contains four viewing options, which render when clicking on the accordion: + +- ``archived_tweet_url``: (`str`) The archived URL. + +- ``parsed_archived_tweet_url``: (`str`) The archived URL after parsing. It is not guaranteed that this option will be archived, it is just a facilitator, as the originally archived URL does not always exist, due to changes in URLs and web services of the social network Twitter. Check the :ref:`utils`. + +- ``original_tweet_url``: (`str`) The original tweet URL. + +- ``parsed_tweet_url``: (`str`) The original tweet URL after parsing. Old URLs were archived in a nested manner. The parsing applied here unnests these URLs, when necessary. Check the :ref:`utils`. + +Additionally, other fields are displayed. + +CSV +-------- + +Option to analyze the CDX data in comma-separated values. + +JSON +-------- + +Option to analyze the data in JavaScript Object Notation. diff --git a/build/_sources/quickstart.rst.txt b/build/_sources/quickstart.rst.txt new file mode 100644 index 0000000..d05e5c7 --- /dev/null +++ b/build/_sources/quickstart.rst.txt @@ -0,0 +1,48 @@ +Quickstart +================ + +CLI +------------- + +Using Wayback Tweets as a standalone command line tool. + +waybacktweets [OPTIONS] USERNAME + +.. code-block:: shell + + waybacktweets --from 20150101 --to 20191231 --limit 250 jack + +Web App +------------- + +Using Wayback Tweets as a Streamlit Web App. + +`Open the application `_, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. + +Module +------------- + +Using Wayback Tweets as a Python Module. + +.. code-block:: python + + from waybacktweets import WaybackTweets, TweetsParser, TweetsExporter + + USERNAME = "jack" + + api = WaybackTweets(USERNAME) + archived_tweets = api.get() + + if archived_tweets: + field_options = [ + "archived_timestamp", + "original_tweet_url", + "archived_tweet_url", + "archived_statuscode", + ] + + parser = TweetsParser(archived_tweets, USERNAME, field_options) + parsed_tweets = parser.parse() + + exporter = TweetsExporter(parsed_tweets, USERNAME, field_options) + exporter.save_to_csv() diff --git a/build/_sources/streamlit.rst.txt b/build/_sources/streamlit.rst.txt new file mode 100644 index 0000000..bf748e2 --- /dev/null +++ b/build/_sources/streamlit.rst.txt @@ -0,0 +1,63 @@ +Web App +========= + +The application is a prototype hosted on Streamlit Cloud, serving as an alternative to the command line tool. + +`Open the application `_. + + +Filters +---------- + +- Filtering by date range: Using the ``from`` and ``to`` filters + +- Limit: Query result limits. + +- Resumption Key: Allows for a simple way to scroll through the results. Key to continue the query from the end of the previous query. + +- Only unique Wayback Machine URLs: Filtering by the collapse option using the ``urlkey`` field and the URL Match Scope ``prefix`` + + +Username Query Parameter +-------------------------- + +An alternative way to access the application is by using the ``username`` query parameter. This allows for automatic configuration of the Username input and automatically searches. Additionally, when the ``username`` parameter is sent, the accordion with the filters will already be open. + +Example URL format: + +``https://waybacktweets.streamlit.app?username=`` + + +Community Comments +-------------------- + +.. raw:: html + +
    +
  • "We're always delighted when we see our community members create tools for open source research." Bellingcat
  • +
    +
  • "#myOSINTtip Clarissa Mendes launched a new tool for accessing old tweets via archive.org called the Wayback Tweets app. For those who love to look deeper at #osint tools, it is available on GitHub and uses the Wayback CDX Server API server (which is a hidden gem for accessing archive.org data!)" My OSINT Training
  • +
    +
  • "Original way to find deleted tweets." Henk Van Ess
  • +
    +
  • "This is an excellent tool to use now that most Twitter API-based tools have gone down with changes to the pricing structure over at X." The OSINT Newsletter - Issue #22
  • +
    +
  • "One of the keys to using the Wayback Machine effectively is knowing what it can and can't archive. It can, and has, archived many, many Twitter accounts... Utilize fun tools such as Wayback Tweets to do so more effectively." Ari Ben Am
  • +
    +
  • "Want to see archived tweets on Wayback Machine in bulk? You can use Wayback Tweets." Daily OSINT
  • +
    +
  • "Untuk mempermudah penelusuran arsip, gunakan Wayback Tweets." GIJN Indonesia
  • +
    +
  • "A tool to quickly view tweets saved on archive.org." Irina_Tech_Tips Newsletter #3
  • +
    +
+ +Legacy App +------------- + +To access the legacy version of Wayback Tweets `click here `_. + +.. note:: + + If the application is down, please check the `Streamlit Cloud Status `_. + diff --git a/build/_sources/todo.rst.txt b/build/_sources/todo.rst.txt new file mode 100644 index 0000000..89e5586 --- /dev/null +++ b/build/_sources/todo.rst.txt @@ -0,0 +1,18 @@ +TODO +================ + +.. |uncheck| raw:: html + + + +|uncheck| Unit Tests + +|uncheck| JSON Parser: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse.py:110``), and avoid rate limiting + +|uncheck| Download images when tweet URL has extensions like JPG or PNG + +|uncheck| Implement logging system (remove print statements) + +|uncheck| Mapping and parsing of other Twitter-related URLs + +|uncheck| Download snapshots from https://archive.today diff --git a/build/_sources/workflow.rst.txt b/build/_sources/workflow.rst.txt new file mode 100644 index 0000000..09361c5 --- /dev/null +++ b/build/_sources/workflow.rst.txt @@ -0,0 +1,25 @@ +.. _flowchart: + +Workflow +================ + +The tool was written following a proposal not only to retrieve data from archived tweets, but also to facilitate the reading of these tweets. Therefore, a flow is defined to obtain these results in the best possible way. + +Due to limitations of the Wayback CDX Server API, it is not always possible to parse the results with the mimetype ``application/json``, regardless, the data in CDX format are saved. + +Use the mouse to zoom in and out the flowchart. + +.. mermaid:: + :zoom: + :align: center + + flowchart TB + A[input Username]--> B[(Wayback Machine)] + B--> B1[save Archived Tweets CDX data] + B1--> |parsing| C{embed Tweet URL\nvia Twitter Publisher} + C--> |2xx/3xx| D[return Tweet text] + C--> |4xx| E[return None] + E--> F{request Archived\nTweet URL} + F--> |4xx| G[return Only CDX data] + F--> |TODO: 2xx/3xx: application/json| J[return JSON text] + F--> |2xx/3xx: text/html, warc/revisit, unk| K[return HTML iframe tag] diff --git a/build/_static/basic.css b/build/_static/basic.css new file mode 100644 index 0000000..f316efc --- /dev/null +++ b/build/_static/basic.css @@ -0,0 +1,925 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/build/_static/card.png b/build/_static/card.png new file mode 100644 index 0000000..28efa17 Binary files /dev/null and b/build/_static/card.png differ diff --git a/build/_static/css/custom.css b/build/_static/css/custom.css new file mode 100644 index 0000000..773294a --- /dev/null +++ b/build/_static/css/custom.css @@ -0,0 +1,14 @@ +body { + font-family: Georgia, 'Times New Roman', Times, serif; + background-color: whitesmoke; +} + +a:hover { + background-color: whitesmoke !important; +} + +#cli #usage #waybacktweets h3, +#cli .admonition-title, +.sphinxsidebarwrapper li ul li ul:has(a[href="#waybacktweets"]):last-child { + display: none; +} diff --git a/build/_static/doctools.js b/build/_static/doctools.js new file mode 100644 index 0000000..4d67807 --- /dev/null +++ b/build/_static/doctools.js @@ -0,0 +1,156 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Base JavaScript utilities for all Sphinx HTML documentation. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/build/_static/documentation_options.js b/build/_static/documentation_options.js new file mode 100644 index 0000000..8b844b7 --- /dev/null +++ b/build/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '1.0rc1', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/build/_static/file.png b/build/_static/file.png new file mode 100644 index 0000000..a858a41 Binary files /dev/null and b/build/_static/file.png differ diff --git a/build/_static/flask.css b/build/_static/flask.css new file mode 100644 index 0000000..e37830d --- /dev/null +++ b/build/_static/flask.css @@ -0,0 +1,15 @@ +@import url("pocoo.css"); + +a, a.reference, a.footnote-reference { + color: #004b6b; + text-decoration-color: #004b6b; +} + +a:hover { + color: #6d4100; + text-decoration-color: #6d4100; +} + +p.version-warning { + background-color: #004b6b; +} diff --git a/build/_static/language_data.js b/build/_static/language_data.js new file mode 100644 index 0000000..367b8ed --- /dev/null +++ b/build/_static/language_data.js @@ -0,0 +1,199 @@ +/* + * language_data.js + * ~~~~~~~~~~~~~~~~ + * + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/build/_static/minus.png b/build/_static/minus.png new file mode 100644 index 0000000..d96755f Binary files /dev/null and b/build/_static/minus.png differ diff --git a/build/_static/parthenon.png b/build/_static/parthenon.png new file mode 100644 index 0000000..191b5b5 Binary files /dev/null and b/build/_static/parthenon.png differ diff --git a/build/_static/plus.png b/build/_static/plus.png new file mode 100644 index 0000000..7107cec Binary files /dev/null and b/build/_static/plus.png differ diff --git a/build/_static/pocoo.css b/build/_static/pocoo.css new file mode 100644 index 0000000..4f14b31 --- /dev/null +++ b/build/_static/pocoo.css @@ -0,0 +1,525 @@ +@import url("basic.css"); + +/* -- page layout --------------------------------------------------- */ + +body { + font-family: 'Garamond', 'Georgia', serif; + font-size: 17px; + background-color: #fff; + color: #3e4349; + margin: 0; + padding: 0; +} + +div.related { + max-width: 1140px; + margin: 10px auto; + + /* displayed on mobile */ + display: none; +} + +div.document { + max-width: 1140px; + margin: 10px auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 220px; +} + +div.body { + min-width: initial; + max-width: initial; + padding: 0 30px; +} + +div.sphinxsidebarwrapper { + padding: 10px; +} + +div.sphinxsidebar { + width: 220px; + font-size: 14px; + line-height: 1.5; + color: #444; +} + +div.sphinxsidebar li { + overflow: hidden; + text-overflow: ellipsis; +} + +div.sphinxsidebar li:hover { + overflow: visible; +} + +div.sphinxsidebar a, +div.sphinxsidebar a code { + color: #444; + border-color: #444; +} + +div.sphinxsidebar a:hover { + background-color:#fff; +} + +div.sphinxsidebar p.logo { + margin: 0; + text-align: center; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-size: 24px; + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p, +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; +} + +div.sphinxsidebar input { + border: 1px solid #999; + font-size: 1em; +} + +div.footer { + max-width: 1140px; + margin: 20px auto; + font-size: 14px; + text-align: right; + color: #888; +} + +div.footer a { + color: #888; + border-color: #888; +} + +/* -- quick search -------------------------------------------------- */ + +div.sphinxsidebar #searchbox form { + display: flex; +} + +div.sphinxsidebar #searchbox form > div { + display: flex; + flex: 1 1 auto; +} + +div.sphinxsidebar #searchbox input[type=text] { + flex: 1 1 auto; + width: 1% !important; +} + +div.sphinxsidebar #searchbox input[type=submit] { + border-left-width: 0; +} + +/* -- versions ------------------------------------------------------ */ + +div.sphinxsidebar ul.versions a.current { + font-style: italic; + border-bottom: 1px solid #000; + color: #000; +} + +div.sphinxsidebar ul.versions span.note { + color: #999; +} + +/* -- version warning ----------------------------------------------- */ + +p.version-warning { + top: 10px; + position: sticky; + + margin: 10px 0; + padding: 5px 10px; + border-radius: 4px; + + letter-spacing: 1px; + color: #fff; + text-shadow: 0 0 2px #000; + text-align: center; + + background: #d40 repeating-linear-gradient( + 135deg, + transparent, + transparent 56px, + rgba(255, 255, 255, 0.2) 56px, + rgba(255, 255, 255, 0.2) 112px + ); +} + +p.version-warning a { + color: #fff; + border-color: #fff; +} + +/* -- body styles --------------------------------------------------- */ + +a { + text-decoration: underline; + text-decoration-style: dotted; + text-decoration-color: #000; + text-decoration-thickness: 1px; +} + +a:hover { + text-decoration-style: solid; +} + +h1, h2, h3, h4, h5, h6 { + font-weight: normal; + margin: 30px 0 10px; + padding: 0; + color: black; +} + +div.body h1 { + font-size: 240%; +} + +div.body h2 { + font-size: 180%; +} + +div.body h3 { + font-size: 150%; +} + +div.body h4 { + font-size: 130%; +} + +div.body h5 { + font-size: 100%; +} + +div.body h6 { + font-size: 100%; +} + +div.body h1:first-of-type { + margin-top: 0; +} + +a.headerlink { + color: #ddd; + margin: 0 0.2em; + padding: 0 0.2em; + border: none; +} + +a.headerlink:hover { + color: #444; +} + +div.body p, +div.body dd, +div.body li { + line-height: 1.4; +} + +img.screenshot { + box-shadow: 2px 2px 4px #eee; +} + +hr { + border: 1px solid #999; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul, ol { + margin: 10px 0 10px 30px; + padding: 0; +} + +a.footnote-reference { + font-size: 0.7em; + vertical-align: top; +} + +/* -- admonitions --------------------------------------------------- */ + +div.admonition, +div.topic { + background-color: #fafafa; + margin: 10px -10px; + padding: 10px; + border-top: 1px solid #ccc; + border-right: none; + border-bottom: 1px solid #ccc; + border-left: none; +} + +div.admonition p.admonition-title, +div.topic p.topic-title { + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; + display: inline; +} + +p.admonition-title::after { + content: ":"; +} + +div.admonition p.last, +div.topic p:last-child { + margin-bottom: 0; +} + +div.danger, div.error { + background-color: #fff0f0; + border-color: #ffb0b0; +} + +div.seealso { + background-color: #fffff0; + border-color: #f0f0a8; +} + +/* -- changelog ----------------------------------------------------- */ + +details.changelog summary { + cursor: pointer; + font-style: italic; + margin-bottom: 10px; +} + +/* -- search highlight ---------------------------------------------- */ + +dt:target, +.footnote:target, +span.highlighted { + background-color: #ffdf80; +} + +rect.highlighted { + fill: #ffdf80; +} + +/* -- code displays ------------------------------------------------- */ + +pre, code { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.9em; +} + +pre { + margin: 0; + padding: 0; + line-height: 1.3; +} + +div.literal-block-wrapper { + padding: 10px 0 0; +} + +div.code-block-caption { + padding: 0; +} + +div.highlight, div.literal-block-wrapper div.highlight { + margin: 10px -10px; + padding: 10px; +} + +code { + color: #222; + background: #e8eff0; +} + +/* -- tables -------------------------------------------------------- */ + +table.docutils { + border: 1px solid #888; + box-shadow: 2px 2px 4px #eee; +} + +table.docutils td, +table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, +table.footnote { + border: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #eee; + background-color: #fafafa; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.footnote td.label { + width: 0; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +/* -- responsive screen --------------------------------------------- */ + +@media screen and (max-width: 1139px) { + p.version-warning { + margin: 10px; + } + + div.footer { + margin: 20px 10px; + } +} + +/* -- small screen -------------------------------------------------- */ + +@media screen and (max-width: 767px) { + body { + padding: 0 20px; + } + + div.related { + display: block; + } + + p.version-warning { + margin: 10px 0; + } + + div.documentwrapper { + float: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + padding: 0; + } + + div.sphinxsidebar { + float: none; + width: 100%; + margin: 0 -20px -10px; + padding: 0 20px; + background-color: #333; + color: #ccc; + } + + div.sphinxsidebar a, + div.sphinxsidebar a code, + div.sphinxsidebar h3, + div.sphinxsidebar h4, + div.footer a { + color: #ccc; + border-color: #ccc; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.sphinxsidebar ul.versions a.current { + border-bottom-color: #fff; + color: #fff; + } + + div.footer { + text-align: left; + margin: 0 -20px; + padding: 20px; + background-color: #333; + color: #ccc; + } +} + +/* https://github.com/twbs/bootstrap/blob + /0e8831505ac845f3102fa2c5996a7141c9ab01ee + /scss/mixins/_screen-reader.scss */ +.hide-header > h1:first-child { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border: 0; +} + +/* -- sphinx-tabs -------------------------------------------------- */ + +.sphinx-tabs { + margin-bottom: 0; +} + +.sphinx-tabs .ui.menu { + font-family: 'Garamond', 'Georgia', serif !important; +} + +.sphinx-tabs .ui.attached.menu { + border-bottom: none +} + +.sphinx-tabs .ui.tabular.menu .item { + border-bottom: 2px solid transparent; + border-left: none; + border-right: none; + border-top: none; + padding: .3em 0.6em; +} + +.sphinx-tabs .ui.attached.segment, .ui.segment { + border: 0; + padding: 0; +} diff --git a/build/_static/pygments.css b/build/_static/pygments.css new file mode 100644 index 0000000..57c7df3 --- /dev/null +++ b/build/_static/pygments.css @@ -0,0 +1,84 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #8f5902; font-style: italic } /* Comment */ +.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ +.highlight .g { color: #000000 } /* Generic */ +.highlight .k { color: #004461; font-weight: bold } /* Keyword */ +.highlight .l { color: #000000 } /* Literal */ +.highlight .n { color: #000000 } /* Name */ +.highlight .o { color: #582800 } /* Operator */ +.highlight .x { color: #000000 } /* Other */ +.highlight .p { color: #000000; font-weight: bold } /* Punctuation */ +.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #8f5902 } /* Comment.Preproc */ +.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #a40000 } /* Generic.Deleted */ +.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ +.highlight .ges { color: #000000 } /* Generic.EmphStrong */ +.highlight .gr { color: #ef2929 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #745334 } /* Generic.Prompt */ +.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ +.highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */ +.highlight .ld { color: #000000 } /* Literal.Date */ +.highlight .m { color: #990000 } /* Literal.Number */ +.highlight .s { color: #4e9a06 } /* Literal.String */ +.highlight .na { color: #c4a000 } /* Name.Attribute */ +.highlight .nb { color: #004461 } /* Name.Builtin */ +.highlight .nc { color: #000000 } /* Name.Class */ +.highlight .no { color: #000000 } /* Name.Constant */ +.highlight .nd { color: #888888 } /* Name.Decorator */ +.highlight .ni { color: #ce5c00 } /* Name.Entity */ +.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #000000 } /* Name.Function */ +.highlight .nl { color: #f57900 } /* Name.Label */ +.highlight .nn { color: #000000 } /* Name.Namespace */ +.highlight .nx { color: #000000 } /* Name.Other */ +.highlight .py { color: #000000 } /* Name.Property */ +.highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #000000 } /* Name.Variable */ +.highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */ +.highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */ +.highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */ +.highlight .mb { color: #990000 } /* Literal.Number.Bin */ +.highlight .mf { color: #990000 } /* Literal.Number.Float */ +.highlight .mh { color: #990000 } /* Literal.Number.Hex */ +.highlight .mi { color: #990000 } /* Literal.Number.Integer */ +.highlight .mo { color: #990000 } /* Literal.Number.Oct */ +.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ +.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ +.highlight .sc { color: #4e9a06 } /* Literal.String.Char */ +.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ +.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ +.highlight .se { color: #4e9a06 } /* Literal.String.Escape */ +.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ +.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ +.highlight .sx { color: #4e9a06 } /* Literal.String.Other */ +.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ +.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ +.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ +.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #000000 } /* Name.Function.Magic */ +.highlight .vc { color: #000000 } /* Name.Variable.Class */ +.highlight .vg { color: #000000 } /* Name.Variable.Global */ +.highlight .vi { color: #000000 } /* Name.Variable.Instance */ +.highlight .vm { color: #000000 } /* Name.Variable.Magic */ +.highlight .il { color: #990000 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/build/_static/searchtools.js b/build/_static/searchtools.js new file mode 100644 index 0000000..92da3f8 --- /dev/null +++ b/build/_static/searchtools.js @@ -0,0 +1,619 @@ +/* + * searchtools.js + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for the full-text search. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename] = item; + + let listItem = document.createElement("li"); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = _( + "Search finished, found ${resultCount} page(s) matching the search query." + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlinks", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/build/_static/sphinx_highlight.js b/build/_static/sphinx_highlight.js new file mode 100644 index 0000000..8a96c69 --- /dev/null +++ b/build/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/build/_static/version_warning_offset.js b/build/_static/version_warning_offset.js new file mode 100644 index 0000000..c7f9f49 --- /dev/null +++ b/build/_static/version_warning_offset.js @@ -0,0 +1,40 @@ +/* +When showing the sticky version warning, the warning will cover the +scroll target when navigating to #id hash locations. Take over scrolling +to adjust the position to account for the height of the warning. +*/ +$(() => { + const versionWarning = $('.version-warning') + + // Skip if there is no version warning, regular browser behavior is + // fine in that case. + if (versionWarning.length) { + const height = versionWarning.outerHeight(true) + const target = $(':target') + + // Adjust position when the initial link has a hash. + if (target.length) { + // Use absolute scrollTo instead of relative scrollBy to avoid + // scrolling when the viewport is already at the bottom of the + // document and has space. + const y = target.offset().top - height + // Delayed because the initial browser scroll doesn't seem to + // happen until after the document ready event, so scrolling + // immediately will be overridden. + setTimeout(() => scrollTo(0, y), 100) + } + + // Listen to clicks on hash anchors. + $('a[href^="#"]').on('click', e => { + // Stop default scroll. Also stops the automatic URL hash update. + e.preventDefault() + // Get the id to scroll to and set the URL hash manually. + const id = $(e.currentTarget).attr('href').substring(1) + location.hash = id + // Use getElementById since the hash may have dots in it. + const target = $(document.getElementById(id)) + // Scroll to top of target with space for the version warning. + scrollTo(0, target.offset().top - height) + }) + } +}) diff --git a/build/objects.inv b/build/objects.inv new file mode 100644 index 0000000..25a3f77 Binary files /dev/null and b/build/objects.inv differ diff --git a/build/searchindex.js b/build/searchindex.js new file mode 100644 index 0000000..4254b82 --- /dev/null +++ b/build/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"API": [[0, "api"]], "API Reference": [[6, "api-reference"]], "Additional Information": [[6, "additional-information"]], "CLI": [[1, "cli"], [9, "cli"]], "CSV": [[8, "csv"]], "Collapsing": [[1, "collapsing"]], "Command-Line Interface": [[6, "command-line-interface"]], "Community Comments": [[10, "community-comments"]], "Config": [[0, "module-waybacktweets.config.config"]], "ConnectionError": [[3, "connectionerror"]], "Contribute": [[2, "contribute"]], "EmptyResponseError": [[3, "emptyresponseerror"]], "Exceptions": [[3, "exceptions"]], "Export": [[0, "module-waybacktweets.api.export"]], "Field Options": [[4, "field-options"]], "Filters": [[10, "filters"]], "From source": [[7, "from-source"]], "HTML": [[8, "html"]], "HTTPError": [[3, "httperror"]], "Hacking": [[2, "hacking"]], "Hands-On Examples": [[5, "hands-on-examples"]], "Indices and tables": [[6, "indices-and-tables"]], "Installation": [[7, "installation"]], "JSON": [[8, "json"]], "Legacy App": [[10, "legacy-app"]], "Module": [[9, "module"]], "Outputs": [[8, "outputs"]], "Parse": [[0, "module-waybacktweets.api.parse"]], "Quickstart": [[9, "quickstart"]], "ReadTimeoutError": [[3, "readtimeouterror"]], "Request": [[0, "module-waybacktweets.api.request"]], "Streamlit Web App": [[6, "streamlit-web-app"]], "TODO": [[11, "todo"]], "Testing": [[2, "testing"]], "URL Match Scope": [[1, "url-match-scope"]], "Usage": [[1, "usage"]], "User Guide": [[6, "user-guide"]], "Username Query Parameter": [[10, "username-query-parameter"]], "Using Poetry": [[7, "using-poetry"]], "Using pip": [[7, "using-pip"]], "Utils": [[0, "utils"]], "Visualize": [[0, "module-waybacktweets.api.visualize"]], "Warning": [[3, "warning"]], "Wayback Tweets": [[6, "wayback-tweets"]], "Web App": [[9, "web-app"], [10, "web-app"]], "Workflow": [[12, "workflow"]], "waybacktweets": [[1, "waybacktweets"]]}, "docnames": ["api", "cli", "contribute", "exceptions", "field_options", "handson", "index", "installation", "outputs", "quickstart", "streamlit", "todo", "workflow"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["api.rst", "cli.rst", "contribute.rst", "exceptions.rst", "field_options.rst", "handson.rst", "index.rst", "installation.rst", "outputs.rst", "quickstart.rst", "streamlit.rst", "todo.rst", "workflow.rst"], "indexentries": {"--collapse": [[1, "cmdoption-waybacktweets-c", false]], "--from": [[1, "cmdoption-waybacktweets-f", false]], "--limit": [[1, "cmdoption-waybacktweets-l", false]], "--matchtype": [[1, "cmdoption-waybacktweets-mt", false]], "--resumption_key": [[1, "cmdoption-waybacktweets-rk", false]], "--to": [[1, "cmdoption-waybacktweets-t", false]], "--verbose": [[1, "cmdoption-waybacktweets-v", false]], "-c": [[1, "cmdoption-waybacktweets-c", false]], "-f": [[1, "cmdoption-waybacktweets-f", false]], "-l": [[1, "cmdoption-waybacktweets-l", false]], "-mt": [[1, "cmdoption-waybacktweets-mt", false]], "-rk": [[1, "cmdoption-waybacktweets-rk", false]], "-t": [[1, "cmdoption-waybacktweets-t", false]], "-v": [[1, "cmdoption-waybacktweets-v", false]], "_add_field() (waybacktweets.api.parse.tweetsparser method)": [[0, "waybacktweets.api.parse.TweetsParser._add_field", false]], "_add_resumption_key() (waybacktweets.api.parse.tweetsparser method)": [[0, "waybacktweets.api.parse.TweetsParser._add_resumption_key", false]], "_create_dataframe() (waybacktweets.api.export.tweetsexporter method)": [[0, "waybacktweets.api.export.TweetsExporter._create_dataframe", false]], "_datetime_now() (waybacktweets.api.export.tweetsexporter static method)": [[0, "waybacktweets.api.export.TweetsExporter._datetime_now", false]], "_json_loader() (waybacktweets.api.visualize.htmltweetsvisualizer static method)": [[0, "waybacktweets.api.visualize.HTMLTweetsVisualizer._json_loader", false]], "_process_response() (waybacktweets.api.parse.tweetsparser method)": [[0, "waybacktweets.api.parse.TweetsParser._process_response", false]], "_transpose_matrix() (waybacktweets.api.export.tweetsexporter static method)": [[0, "waybacktweets.api.export.TweetsExporter._transpose_matrix", false]], "check_double_status() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_double_status", false]], "check_pattern_tweet() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_pattern_tweet", false]], "check_url_scheme() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_url_scheme", false]], "clean_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_tweet_url", false]], "clean_wayback_machine_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_wayback_machine_url", false]], "config (in module waybacktweets.config.config)": [[0, "waybacktweets.config.config.config", false]], "delete_tweet_pathnames() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.delete_tweet_pathnames", false]], "embed() (waybacktweets.api.parse.twitterembed method)": [[0, "waybacktweets.api.parse.TwitterEmbed.embed", false]], "generate() (waybacktweets.api.visualize.htmltweetsvisualizer method)": [[0, "waybacktweets.api.visualize.HTMLTweetsVisualizer.generate", false]], "get() (waybacktweets.api.request.waybacktweets method)": [[0, "waybacktweets.api.request.WaybackTweets.get", false]], "get_response() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.get_response", false]], "htmltweetsvisualizer (class in waybacktweets.api.visualize)": [[0, "waybacktweets.api.visualize.HTMLTweetsVisualizer", false]], "is_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.is_tweet_url", false]], "jsonparser (class in waybacktweets.api.parse)": [[0, "waybacktweets.api.parse.JsonParser", false]], "module": [[0, "module-waybacktweets.api.export", false], [0, "module-waybacktweets.api.parse", false], [0, "module-waybacktweets.api.request", false], [0, "module-waybacktweets.api.visualize", false], [0, "module-waybacktweets.config.config", false], [0, "module-waybacktweets.utils.utils", false]], "parse() (waybacktweets.api.parse.jsonparser method)": [[0, "waybacktweets.api.parse.JsonParser.parse", false]], "parse() (waybacktweets.api.parse.tweetsparser method)": [[0, "waybacktweets.api.parse.TweetsParser.parse", false]], "save() (waybacktweets.api.visualize.htmltweetsvisualizer method)": [[0, "waybacktweets.api.visualize.HTMLTweetsVisualizer.save", false]], "save_to_csv() (waybacktweets.api.export.tweetsexporter method)": [[0, "waybacktweets.api.export.TweetsExporter.save_to_csv", false]], "save_to_html() (waybacktweets.api.export.tweetsexporter method)": [[0, "waybacktweets.api.export.TweetsExporter.save_to_html", false]], "save_to_json() (waybacktweets.api.export.tweetsexporter method)": [[0, "waybacktweets.api.export.TweetsExporter.save_to_json", false]], "semicolon_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.semicolon_parser", false]], "timestamp_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.timestamp_parser", false]], "tweetsexporter (class in waybacktweets.api.export)": [[0, "waybacktweets.api.export.TweetsExporter", false]], "tweetsparser (class in waybacktweets.api.parse)": [[0, "waybacktweets.api.parse.TweetsParser", false]], "twitterembed (class in waybacktweets.api.parse)": [[0, "waybacktweets.api.parse.TwitterEmbed", false]], "username": [[1, "cmdoption-waybacktweets-arg-USERNAME", false]], "verbose (in module waybacktweets.config.config)": [[0, "waybacktweets.config.config.verbose", false]], "waybacktweets (class in waybacktweets.api.request)": [[0, "waybacktweets.api.request.WaybackTweets", false]], "waybacktweets command line option": [[1, "cmdoption-waybacktweets-arg-USERNAME", false], [1, "cmdoption-waybacktweets-c", false], [1, "cmdoption-waybacktweets-f", false], [1, "cmdoption-waybacktweets-l", false], [1, "cmdoption-waybacktweets-mt", false], [1, "cmdoption-waybacktweets-rk", false], [1, "cmdoption-waybacktweets-t", false], [1, "cmdoption-waybacktweets-v", false]], "waybacktweets.api.export": [[0, "module-waybacktweets.api.export", false]], "waybacktweets.api.parse": [[0, "module-waybacktweets.api.parse", false]], "waybacktweets.api.request": [[0, "module-waybacktweets.api.request", false]], "waybacktweets.api.visualize": [[0, "module-waybacktweets.api.visualize", false]], "waybacktweets.config.config": [[0, "module-waybacktweets.config.config", false]], "waybacktweets.utils.utils": [[0, "module-waybacktweets.utils.utils", false]]}, "objects": {"waybacktweets": [[1, 6, 1, "cmdoption-waybacktweets-c", "--collapse"], [1, 6, 1, "cmdoption-waybacktweets-f", "--from"], [1, 6, 1, "cmdoption-waybacktweets-l", "--limit"], [1, 6, 1, "cmdoption-waybacktweets-mt", "--matchtype"], [1, 6, 1, "cmdoption-waybacktweets-rk", "--resumption_key"], [1, 6, 1, "cmdoption-waybacktweets-t", "--to"], [1, 6, 1, "cmdoption-waybacktweets-v", "--verbose"], [1, 6, 1, "cmdoption-waybacktweets-c", "-c"], [1, 6, 1, "cmdoption-waybacktweets-f", "-f"], [1, 6, 1, "cmdoption-waybacktweets-l", "-l"], [1, 6, 1, "cmdoption-waybacktweets-mt", "-mt"], [1, 6, 1, "cmdoption-waybacktweets-rk", "-rk"], [1, 6, 1, "cmdoption-waybacktweets-t", "-t"], [1, 6, 1, "cmdoption-waybacktweets-v", "-v"], [1, 6, 1, "cmdoption-waybacktweets-arg-USERNAME", "USERNAME"]], "waybacktweets.api": [[0, 0, 0, "-", "export"], [0, 0, 0, "-", "parse"], [0, 0, 0, "-", "request"], [0, 0, 0, "-", "visualize"]], "waybacktweets.api.export": [[0, 1, 1, "", "TweetsExporter"]], "waybacktweets.api.export.TweetsExporter": [[0, 2, 1, "", "_create_dataframe"], [0, 2, 1, "", "_datetime_now"], [0, 2, 1, "", "_transpose_matrix"], [0, 2, 1, "", "save_to_csv"], [0, 2, 1, "", "save_to_html"], [0, 2, 1, "", "save_to_json"]], "waybacktweets.api.parse": [[0, 1, 1, "", "JsonParser"], [0, 1, 1, "", "TweetsParser"], [0, 1, 1, "", "TwitterEmbed"]], "waybacktweets.api.parse.JsonParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse.TweetsParser": [[0, 2, 1, "", "_add_field"], [0, 2, 1, "", "_add_resumption_key"], [0, 2, 1, "", "_process_response"], [0, 2, 1, "", "parse"]], "waybacktweets.api.parse.TwitterEmbed": [[0, 2, 1, "", "embed"]], "waybacktweets.api.request": [[0, 1, 1, "", "WaybackTweets"]], "waybacktweets.api.request.WaybackTweets": [[0, 2, 1, "", "get"]], "waybacktweets.api.visualize": [[0, 1, 1, "", "HTMLTweetsVisualizer"]], "waybacktweets.api.visualize.HTMLTweetsVisualizer": [[0, 2, 1, "", "_json_loader"], [0, 2, 1, "", "generate"], [0, 2, 1, "", "save"]], "waybacktweets.config": [[0, 0, 0, "-", "config"]], "waybacktweets.config.config": [[0, 3, 1, "", "config"], [0, 4, 1, "", "verbose"]], "waybacktweets.utils": [[0, 0, 0, "-", "utils"]], "waybacktweets.utils.utils": [[0, 5, 1, "", "check_double_status"], [0, 5, 1, "", "check_pattern_tweet"], [0, 5, 1, "", "check_url_scheme"], [0, 5, 1, "", "clean_tweet_url"], [0, 5, 1, "", "clean_wayback_machine_url"], [0, 5, 1, "", "delete_tweet_pathnames"], [0, 5, 1, "", "get_response"], [0, 5, 1, "", "is_tweet_url"], [0, 5, 1, "", "semicolon_parser"], [0, 5, 1, "", "timestamp_parser"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"], "6": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:attribute", "5": "py:function", "6": "std:cmdoption"}, "terms": {"": [0, 1, 3, 4], "0": 6, "0rc1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "1": 1, "10": [1, 2, 7], "1000": 1, "110": 11, "14": [1, 4], "2": 1, "2013022601": 1, "20130226010000": 1, "20130226010800": 1, "20150101": 9, "20191231": 9, "20200305": 1, "20231231": 1, "22": 10, "250": 9, "2nd": 1, "3": [2, 6, 7, 10], "300": 1, "32": [1, 4], "3b": 0, "4": 1, "A": [0, 1, 4, 10], "For": [0, 1, 10], "If": [0, 1, 2, 4, 10], "In": 8, "It": [0, 1, 3, 4, 7, 8, 10], "No": 3, "On": 6, "One": 10, "Such": [1, 4], "The": [0, 1, 2, 3, 4, 8, 10, 12], "These": [2, 3], "To": [1, 10], "With": 1, "_add_field": 0, "_add_resumption_kei": 0, "_config": 0, "_create_datafram": 0, "_datetime_now": 0, "_json_load": 0, "_process_respons": 0, "_transpose_matrix": 0, "about": [1, 2], "abov": 7, "access": 10, "accordion": [8, 10], "account": [3, 4, 10], "activ": 7, "actual": 4, "add": [0, 1, 7], "addit": 4, "addition": [8, 10], "adjac": 1, "affect": 1, "after": [1, 4, 8], "all": [0, 1, 2, 4], "allow": [1, 8, 10], "alreadi": [7, 10], "also": [0, 1, 12], "altern": 10, "alwai": [4, 8, 10, 12], "am": 10, "an": [0, 2, 3, 10], "analysi": 4, "analyz": 8, "ani": [0, 4], "api": [1, 2, 9, 10, 11, 12], "app": [2, 7], "append": 0, "appli": [4, 8, 11], "applic": [0, 2, 3, 4, 9, 10, 12], "ar": [0, 1, 2, 3, 4, 8, 12], "archiv": [0, 1, 3, 4, 5, 6, 8, 10, 11, 12], "archived_digest": 4, "archived_length": 4, "archived_mimetyp": 4, "archived_statuscod": [4, 9], "archived_timestamp": [0, 4, 9], "archived_tweet": 9, "archived_tweet_url": [0, 4, 8, 9], "archived_tweets_respons": 0, "archived_urlkei": 4, "argument": 1, "ari": 10, "arsip": 10, "asset": 2, "associ": 0, "automat": [8, 10], "avail": [0, 1, 3, 4, 10], "available_tweet_info": 4, "available_tweet_is_rt": 4, "available_tweet_text": 4, "avoid": 11, "ban": 6, "base": [1, 4, 10], "been": 3, "begin": 1, "being": 3, "bellingcat": 10, "below": [1, 4], "ben": 10, "best": [2, 12], "beta": 1, "blank": 4, "bool": [0, 4], "boolean": 0, "brief": 2, "bug": 2, "build": 7, "bulk": 10, "byte": 4, "c": 1, "calendar": 1, "call": 10, "can": [1, 2, 4, 6, 10], "canon": [1, 4], "captur": 1, "cd": 7, "cdx": [0, 1, 3, 6, 8, 10, 12], "certain": [0, 1], "chang": [4, 6, 7, 8, 10], "charact": 1, "check": [0, 3, 4, 8, 10], "check_double_statu": 0, "check_pattern_tweet": 0, "check_url_schem": 0, "clarissa": 10, "clarom": [1, 7], "class": [0, 3], "clean": [0, 7], "clean_tweet_url": 0, "clean_wayback_machine_url": 0, "cli": [6, 7], "click": [8, 10], "clone": 7, "cloud": [9, 10], "code": [2, 4], "collaps": [0, 6, 10], "com": [0, 1, 3, 7], "comma": 8, "command": [1, 2, 4, 8, 9, 10], "comment": 6, "commit": 7, "common": [0, 3], "commun": 6, "compar": 1, "compat": 7, "compress": 4, "condit": 0, "config": [2, 6], "configur": [0, 2, 10], "connect": [0, 3], "connectionerror": [0, 6], "contain": [0, 8], "content": [0, 1, 4], "continu": [0, 1, 10], "contribut": 6, "correct": 0, "correspond": 4, "could": [0, 3], "creat": [0, 10, 11], "csv": [0, 6], "current": 0, "d": 0, "daili": 10, "data": [0, 3, 6, 8, 10, 12], "datafram": 0, "date": [1, 4, 10], "datetim": [0, 1], "deeper": 10, "default": 1, "defin": 12, "delet": [3, 10], "delete_tweet_pathnam": 0, "delight": 10, "demonstr": 5, "dens": 1, "depend": 7, "detail": 0, "determin": 0, "dict": 0, "dictionari": 0, "digest": [1, 4], "digit": [1, 4], "directori": [2, 7], "displai": 8, "do": 10, "doc": [2, 7], "document": [1, 2, 7], "doe": [0, 4, 8], "domain": 1, "done": 1, "down": [3, 10], "download": 11, "due": [3, 4, 8, 12], "duplic": [0, 1], "each": [0, 8], "easi": [6, 8], "effect": 10, "eg": 1, "either": 2, "elsewher": 1, "emb": 0, "empti": [0, 3, 4], "emptyresponseerror": [0, 6], "enabl": 0, "encod": [1, 4], "encount": 3, "end": [0, 1, 10], "ensur": 0, "entri": 4, "environ": 7, "equival": 1, "error": [0, 3], "eserv": [1, 4], "ess": 10, "establish": 3, "ex": 1, "exact": 1, "exactli": [0, 1], "exampl": [1, 4, 6, 10], "exceed": 3, "excel": 10, "except": [1, 2, 6], "exclud": [1, 4], "exist": [4, 8], "experiment": [0, 3], "explan": 2, "export": [4, 5, 6, 9], "express": 0, "extens": 11, "extract": [0, 1, 4], "f": 1, "facilit": [4, 8, 12], "fail": 3, "fals": 0, "feed": 3, "fetch": 5, "few": 6, "field": [0, 1, 6, 8, 10], "field_opt": [0, 9], "file": 0, "fill": 0, "fill_valu": 0, "filter": [1, 6], "find": [0, 10], "first": [0, 1], "flow": 12, "flowchart": 12, "follow": [0, 1, 2, 3, 12], "form": 1, "format": [0, 1, 4, 6, 8, 10, 12], "four": 8, "framework": 9, "from": [0, 1, 2, 3, 4, 6, 9, 10, 11, 12], "fun": 10, "function": [0, 2, 11], "gem": 10, "gener": [0, 2], "get": [0, 9], "get_respons": 0, "gijn": 10, "git": 7, "github": [1, 7, 10], "given": 1, "global": [0, 2], "goe": 0, "gone": 10, "googl": 1, "gpl": 6, "guarante": [4, 8], "gunakan": 10, "h": 0, "ha": [3, 4, 10, 11], "hack": 6, "hand": 6, "handl": [0, 3, 11], "hash": [1, 4], "have": [2, 10], "haven": 7, "header": [1, 4], "help": 2, "helper": 2, "henk": 10, "here": [2, 4, 8, 10], "hidden": 10, "host": [0, 1, 9, 10], "hour": 1, "how": [0, 5], "howev": 1, "html": [0, 4, 6, 7], "html_content": 0, "html_file_path": 0, "htmltweetsvisu": 0, "http": [0, 1, 3, 4, 10, 11], "httperror": [0, 6], "human": 4, "i": [0, 1, 2, 3, 4, 7, 8, 10, 12], "ifram": [6, 8], "imag": [2, 11], "implement": 11, "implicitli": 1, "import": 9, "improv": 2, "includ": [0, 4], "index": [1, 4, 6], "indic": 0, "indonesia": 10, "inform": [0, 3], "input": 10, "instal": [2, 6], "instanc": 0, "instruct": [0, 2], "int": [0, 4], "integ": 1, "intens": 6, "internet": [0, 3], "internetarch": 3, "io": 1, "irina_tech_tip": 10, "is_tweet_url": 0, "issu": [2, 3, 10], "item": 0, "itself": 0, "jack": [1, 9], "javascript": 8, "jpg": 11, "json": [0, 3, 4, 6, 11, 12], "json_path": 0, "jsonpars": [0, 11], "just": [4, 8], "kei": [0, 1, 4, 10], "know": 10, "l": 1, "last": 0, "latest": 3, "launch": 10, "lead": 6, "least": 1, "legaci": [2, 6], "legacy_app": 2, "librari": 5, "licens": 6, "like": 11, "limit": [0, 1, 6, 9, 10, 11, 12], "line": [0, 1, 2, 4, 8, 9, 10], "link": 0, "list": 0, "load": 0, "log": [0, 1, 11], "logo": 2, "long": 3, "longer": 3, "look": [1, 10], "love": 10, "m": 0, "machin": [0, 6, 10], "made": 1, "mai": 1, "main": 2, "mainten": 3, "make": 7, "manag": 0, "mani": 10, "manipul": 0, "manner": [4, 8], "map": 11, "match": [0, 6, 10], "matchtyp": [0, 1], "matrix": 0, "max": 3, "maximum": 0, "member": 10, "mempermudah": 10, "mend": 10, "messag": 3, "met": 0, "method": 0, "mimetyp": [0, 3, 4, 12], "minut": 6, "miss": 0, "mode": 1, "modul": [0, 2, 6], "moment": 1, "more": [0, 1, 10], "most": [1, 3, 10], "mous": 12, "mt": 1, "my": 10, "myosinttip": 10, "n": 1, "name": 4, "necessari": [0, 4, 6, 8], "need": 0, "nest": [4, 8], "network": [3, 4, 8], "new": [1, 3, 7, 10], "newslett": 10, "none": 0, "notat": 8, "note": [0, 1], "notebook": 5, "now": 10, "number": 0, "object": 8, "obtain": 12, "occur": [0, 3], "occurr": 0, "offici": 1, "offlin": 3, "often": 3, "old": [1, 4, 8, 10], "omit": 1, "onc": 0, "one": [1, 4], "onli": [0, 1, 10, 12], "onward": 1, "open": [2, 9, 10], "option": [0, 1, 6, 8, 9, 10], "order": 1, "org": [1, 3, 4, 6, 10], "origin": [0, 3, 4, 8, 10], "original_tweet_url": [0, 4, 8, 9], "osint": 10, "other": [1, 4, 5, 8, 11], "otherwis": 0, "our": 10, "out": [1, 3, 12], "output": [1, 3, 6], "over": 10, "overal": 0, "overload": 3, "packag": [1, 2, 3, 4], "page": [1, 6], "param": [0, 1], "paramet": [0, 1, 6], "pars": [3, 4, 5, 6, 8, 9, 11, 12], "parsed_archived_timestamp": 4, "parsed_archived_tweet_url": [4, 8], "parsed_tweet": 9, "parsed_tweet_url": [4, 8], "parser": [9, 11], "pass": 4, "path": [0, 1], "pathnam": [0, 1], "pattern": 0, "payload": 4, "pb": 0, "penelusuran": 10, "per": 1, "perform": [3, 4, 6], "phase": 0, "pip": 6, "pleas": [3, 10], "png": 11, "poetri": [2, 6], "possibl": [1, 3, 8, 12], "possibli": 3, "pre": [6, 7], "prefix": [0, 1, 10], "prerequisit": 2, "previou": [0, 1, 10], "price": 10, "print": [0, 11], "print_progress": 0, "process": 0, "progress": 0, "project": 2, "propos": 12, "prototyp": [9, 10], "provid": [0, 1], "publish": 0, "py": [7, 11], "python": [2, 7, 9], "queri": [0, 1, 6], "quickli": 10, "quickstart": 6, "rais": [0, 3], "rang": [1, 10], "rate": [6, 11], "re": [2, 10], "read": [0, 1, 7, 12], "readabl": 4, "readtimeouterror": [0, 6], "recommend": 1, "record": 4, "regardless": 12, "regular": 0, "relat": 11, "releas": 6, "relev": 0, "remov": [0, 11], "render": 8, "replac": 0, "repli": 0, "report": 2, "repositori": 7, "repres": 0, "represent": [0, 1, 4], "request": [3, 6], "requir": 1, "research": 10, "respect": 0, "respond": 3, "respons": [0, 3], "result": [0, 1, 6, 10, 12], "resumpt": [0, 10], "resumption_kei": [0, 1], "retri": 3, "retriev": [0, 1, 5, 6, 12], "return": [0, 1, 3, 4, 11], "retweet": 4, "revisit": 4, "rk": 1, "run": [3, 7], "same": 4, "save": [0, 3, 6, 8, 10, 12], "save_to_csv": [0, 9], "save_to_html": 0, "save_to_json": 0, "scheme": 0, "scope": [6, 10], "scroll": [1, 10], "search": [1, 6, 10], "second": 0, "see": [6, 10], "semicolon": 0, "semicolon_pars": 0, "send": 0, "sent": 10, "separ": [8, 11], "serv": 10, "server": [0, 1, 3, 10, 12], "servic": [0, 3, 4, 8], "set": [0, 1], "sever": 4, "sha1": [1, 4], "shell": 7, "should": 0, "show": 1, "similar": 1, "simpl": [1, 10], "sinc": 1, "size": 4, "skill": 2, "slash": 0, "slow": 1, "snapshot": [4, 11], "so": 10, "social": [4, 8], "sourc": [2, 6, 10], "spanish": 5, "specif": 5, "specifi": 0, "sphinx": 2, "standalon": 9, "start": [0, 1, 7], "statement": 11, "static": 0, "statu": [0, 1, 3, 4, 10], "status": 0, "statuscod": 4, "still": [0, 4], "stop": 0, "str": [0, 1, 4, 8], "streamlit": [2, 7, 9, 10], "string": [0, 1, 4], "structur": [0, 10], "subcommand": 7, "subdomain": [0, 1], "subhost": 1, "substr": 1, "success": 0, "suggest": 2, "suppli": [1, 4], "surt": 1, "system": 11, "t": [1, 7, 10], "tag": [6, 8], "take": 3, "tc": [1, 4], "temporari": 6, "temporarili": 3, "test": [1, 6, 11], "text": [0, 1, 4], "than": 0, "thei": 0, "therefor": 12, "thi": [0, 1, 2, 3, 4, 5, 8, 10], "third": 0, "those": 10, "three": [0, 1, 8], "through": [0, 1, 8, 10], "throughout": 0, "time": [1, 3, 4], "timeout": 0, "timestamp": [0, 1, 3], "timestamp_from": 0, "timestamp_pars": 0, "timestamp_to": 0, "titl": 2, "todai": 11, "todo": 6, "too": [1, 3], "tool": [1, 4, 5, 8, 9, 10, 12], "train": 10, "transform": [1, 4], "transpos": 0, "true": 0, "tupl": 0, "tweet": [0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 12], "tweet_id": 3, "tweet_url": [0, 3], "tweetsexport": [0, 9], "tweetspars": [0, 3, 9], "twimg": 0, "twitter": [0, 1, 3, 4, 8, 10, 11], "twitter_url": 0, "twitteremb": 0, "two": 0, "type": [0, 1, 4], "under": [1, 2], "union": 0, "uniqu": [1, 10], "unit": 11, "unk": 4, "unnest": [4, 8], "untuk": 10, "up": 1, "url": [0, 3, 4, 6, 8, 10, 11], "urlkei": [1, 10], "us": [0, 1, 2, 4, 5, 6, 8, 9, 10, 12], "usag": 6, "user": [3, 5], "usernam": [0, 1, 3, 6, 9], "usual": [1, 4], "util": [2, 4, 6, 8, 10], "v": 1, "v1": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "valu": [0, 1, 4, 8], "valueerror": 0, "van": 10, "verbos": [0, 1], "veri": 2, "version": [7, 10], "via": 10, "video": 5, "view": [6, 8, 10], "virtual": 7, "visit": 0, "visual": 6, "wa": [3, 12], "wai": [1, 2, 10, 12], "want": 10, "warc": 4, "warn": 6, "wayback": [0, 1, 2, 5, 9, 10, 12], "wayback_machine_url": 0, "waybacktweet": [0, 2, 3, 5, 7, 9, 10, 11], "we": [1, 10], "web": [1, 3, 4, 8], "welcom": 2, "were": [4, 8], "what": 10, "when": [0, 1, 3, 4, 8, 10, 11], "where": [0, 1], "whether": [0, 4], "which": [0, 4, 8, 10], "who": 10, "wildcard": 1, "without": [0, 1], "workflow": 6, "would": 3, "written": [9, 12], "x": 10, "xx": 1, "y": 0, "year": 1, "you": [1, 2, 4, 7, 10], "yyyymmdd": 1, "yyyymmddhhmmss": [1, 4], "zoom": 12}, "titles": ["API", "CLI", "Contribute", "Exceptions", "Field Options", "Hands-On Examples", "Wayback Tweets", "Installation", "Outputs", "Quickstart", "Web App", "TODO", "Workflow"], "titleterms": {"On": 5, "addit": 6, "api": [0, 6], "app": [6, 9, 10], "cli": [1, 9], "collaps": 1, "command": 6, "comment": 10, "commun": 10, "config": 0, "connectionerror": 3, "contribut": 2, "csv": 8, "emptyresponseerror": 3, "exampl": 5, "except": 3, "export": 0, "field": 4, "filter": 10, "from": 7, "guid": 6, "hack": 2, "hand": 5, "html": 8, "httperror": 3, "indic": 6, "inform": 6, "instal": 7, "interfac": 6, "json": 8, "legaci": 10, "line": 6, "match": 1, "modul": 9, "option": 4, "output": 8, "paramet": 10, "pars": 0, "pip": 7, "poetri": 7, "queri": 10, "quickstart": 9, "readtimeouterror": 3, "refer": 6, "request": 0, "scope": 1, "sourc": 7, "streamlit": 6, "tabl": 6, "test": 2, "todo": 11, "tweet": 6, "url": 1, "us": 7, "usag": 1, "user": 6, "usernam": 10, "util": 0, "visual": 0, "warn": 3, "wayback": 6, "waybacktweet": 1, "web": [6, 9, 10], "workflow": 12}}) \ No newline at end of file diff --git a/docs/contribute.rst b/docs/contribute.rst index edaab0c..568efd4 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -31,8 +31,3 @@ Brief explanation about the code under the Wayback Tweets directory: - ``waybacktweets/config``: Global configuration module - ``waybacktweets/exceptions``: Wayback Tweets Exceptions - ``waybacktweets/utils``: Helper functions used in the package - -Sponsoring ------------- - -You can also donate to the project's developer and maintainer, `Claromes `_, via `GitHub Sponsor `_ or if you are interested in sponsoring the project you can contact via email at support at claromes dot com. diff --git a/docs/index.rst b/docs/index.rst index 23922ec..c06b4d1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,10 +11,6 @@ Pre-release: |release| Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing (see :ref:`field_options`), and saves the data in HTML, for easy viewing of the tweets using the iframe tags, CSV, and JSON formats. -.. image:: https://img.shields.io/badge/donate-via%20sponsors-ff69b4.svg?logo=github - :target: https://github.com/sponsors/claromes - :alt: sponsors - .. note:: Intensive queries can lead to rate limiting, resulting in a temporary ban of a few minutes from web.archive.org. diff --git a/docs/installation.rst b/docs/installation.rst index dd35b7a..9c942b1 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -40,7 +40,6 @@ From source pip install poetry - **Install the dependencies:** .. code-block:: shell @@ -59,17 +58,19 @@ From source poetry run waybacktweets [SUBCOMMANDS] - **Starts a new shell and activates the virtual environment:** + **Run the Streamlit App:** - .. code-block:: shell + - Starts a new shell and activates the virtual environment: - poetry shell + .. code-block:: shell - **Run the Streamlit App:** + poetry shell - .. code-block:: shell + - Run the Streamlit: + + .. code-block:: shell - streamlit run app/app.py + streamlit run app/app.py **Build the docs:** diff --git a/docs/todo.rst b/docs/todo.rst index 3c8d469..89e5586 100644 --- a/docs/todo.rst +++ b/docs/todo.rst @@ -15,4 +15,4 @@ TODO |uncheck| Mapping and parsing of other Twitter-related URLs -|uncheck| Develop a scraper to download snapshots from https://archive.today +|uncheck| Download snapshots from https://archive.today diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py index 9c78349..c7bed4e 100644 --- a/waybacktweets/_cli.py +++ b/waybacktweets/_cli.py @@ -41,7 +41,14 @@ def _parse_date( @click.command( context_settings={"help_option_names": ["-h", "--help"]}, - epilog="Check out our docs at https://claromes.github.io/waybacktweets for more details", # noqa: E501 + epilog=""" +Examples:\n + Retrieve all tweets: waybacktweets jack\n\n + With options and verbose output: waybacktweets --from 20200305 --to 20231231 --limit 300 --verbose jack\n\n + +Documentation:\n + https://claromes.github.io/waybacktweets/ + """, # noqa: E501 ) @click.argument("username", type=str) @click.option( @@ -112,10 +119,8 @@ def main( verbose: Optional[bool], ) -> None: """ - Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data. - USERNAME: The Twitter username without @ - """ # noqa: E501 + """ try: config.verbose = verbose