update dirs
authorClaromes <claromes@hey.com>
Sun, 16 Jun 2024 11:55:39 +0000 (08:55 -0300)
committerClaromes <claromes@hey.com>
Sun, 16 Jun 2024 11:55:39 +0000 (08:55 -0300)
LICENSE.md
docs/cli.rst
pyproject.toml
waybacktweets/_cli.py [new file with mode: 0644]
waybacktweets/cli/__init__.py [deleted file]
waybacktweets/cli/main.py [deleted file]
waybacktweets/utils/__init__.py

index f240dcad2de9c1c5fa61602cf58c4d634a93c892..624d3761b62aff59d6871040623a85438da0a3f3 100644 (file)
@@ -631,7 +631,7 @@ to attach them to the start of each source file to most effectively
 state the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
 
-    Wayback Tweets - Retrieves archived tweets' CDX data from the Wayback Machine, performs necessary parsing, and saves the data.
+    Wayback Tweets - Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data.
     Copyright (C) 2023  Clarissa Mendes (Claromes)
 
     This program is free software: you can redistribute it and/or modify
index f6f19fc1908eb61873d4b595546ca1346358f259..2a160407406a66d263f6e53ad0bb6336b41ef844 100644 (file)
@@ -4,7 +4,7 @@ CLI
 Usage
 ---------
 
-.. click:: waybacktweets.cli.main:cli
+.. click:: waybacktweets._cli:main
    :prog: waybacktweets
    :nested: full
 
index 38b47069c14d2af7b33356da9d9542a73c6b60ee..abd71e5056e4875bea02ff9046c568c11f453a44 100644 (file)
@@ -35,7 +35,7 @@ max-line-length = 88
 extend-ignore = ["E203", "E701"]
 
 [tool.poetry.scripts]
-waybacktweets = 'waybacktweets.cli.main:cli'
+waybacktweets = 'waybacktweets._cli:main'
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/waybacktweets/_cli.py b/waybacktweets/_cli.py
new file mode 100644 (file)
index 0000000..753b8f5
--- /dev/null
@@ -0,0 +1,135 @@
+"""
+CLI functions for retrieving archived tweets.
+"""
+
+from datetime import datetime
+from typing import Any, Optional
+
+import click
+from rich import print as rprint
+
+from waybacktweets.api.export_tweets import TweetsExporter
+from waybacktweets.api.parse_tweets import TweetsParser
+from waybacktweets.api.request_tweets import WaybackTweets
+
+
+def parse_date(
+    ctx: Optional[Any] = None, param: Optional[Any] = None, value: Optional[str] = None
+) -> Optional[str]:
+    """
+    Parses a date string and returns it in the format "YYYYMMDD".
+
+    :param ctx: Necessary when used with the click package. Defaults to None.
+    :param param: Necessary when used with the click package. Defaults to None.
+    :param value: A date string in the "YYYYMMDD" format. Defaults to None.
+
+    :returns: The input date string formatted in the "YYYYMMDD" format,
+        or None if no date string was provided.
+    """
+    try:
+        if value is None:
+            return None
+
+        date = datetime.strptime(value, "%Y%m%d")
+
+        return date.strftime("%Y%m%d")
+    except ValueError:
+        raise click.BadParameter("Date must be in format YYYYmmdd")
+
+
+@click.command()
+@click.argument("username", type=str)
+@click.option(
+    "--collapse",
+    type=click.Choice(["urlkey", "digest", "timestamp:XX"], case_sensitive=False),
+    default=None,
+    help="Collapse results based on a field, or a substring of a field. XX in the timestamp value ranges from 1 to 14, comparing the first XX digits of the timestamp field. It is recommended to use from 4 onwards, to compare at least by years.",  # noqa: E501
+)
+@click.option(
+    "--from",
+    "timestamp_from",
+    type=click.UNPROCESSED,
+    metavar="DATE",
+    callback=parse_date,
+    default=None,
+    help="Filtering by date range from this date. Format: YYYYmmdd",
+)
+@click.option(
+    "--to",
+    "timestamp_to",
+    type=click.UNPROCESSED,
+    metavar="DATE",
+    callback=parse_date,
+    default=None,
+    help="Filtering by date range up to this date. Format: YYYYmmdd",
+)
+@click.option(
+    "--limit", type=int, metavar="INTEGER", default=None, help="Query result limits."
+)
+@click.option(
+    "--offset",
+    type=int,
+    metavar="INTEGER",
+    default=None,
+    help="Allows for a simple way to scroll through the results.",
+)
+@click.option(
+    "--matchtype",
+    type=click.Choice(["exact", "prefix", "host", "domain"], case_sensitive=False),
+    default=None,
+    help="Results matching a certain prefix, a certain host or all subdomains.",  # noqa: E501
+)
+def main(
+    username: str,
+    collapse: Optional[str],
+    timestamp_from: Optional[str],
+    timestamp_to: Optional[str],
+    limit: Optional[int],
+    offset: Optional[int],
+    matchtype: Optional[str],
+) -> None:
+    """
+    Retrieves archived tweets CDX data from the Wayback Machine,
+    performs necessary parsing, and saves the data.
+
+    USERNAME: The Twitter username without @.
+    """
+    try:
+        api = WaybackTweets(
+            username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype
+        )
+
+        print("Making a request to the Internet Archive...")
+        archived_tweets = api.get()
+
+        if archived_tweets:
+            field_options = [
+                "archived_urlkey",
+                "archived_timestamp",
+                "original_tweet_url",
+                "archived_tweet_url",
+                "parsed_tweet_url",
+                "parsed_archived_tweet_url",
+                "available_tweet_text",
+                "available_tweet_is_RT",
+                "available_tweet_info",
+                "archived_mimetype",
+                "archived_statuscode",
+                "archived_digest",
+                "archived_length",
+            ]
+
+            parser = TweetsParser(archived_tweets, username, field_options)
+            parsed_tweets = parser.parse(print_progress=True)
+
+            exporter = TweetsExporter(parsed_tweets, username, field_options)
+
+            exporter.save_to_csv()
+            exporter.save_to_json()
+            exporter.save_to_html()
+    except Exception as e:
+        rprint(f"[red]{e}")
+    finally:
+        rprint(
+            "[yellow]\nNeed help? Read the docs: https://claromes.github.io/waybacktweets"  # noqa: E501
+        )
diff --git a/waybacktweets/cli/__init__.py b/waybacktweets/cli/__init__.py
deleted file mode 100644 (file)
index e38525e..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# flake8: noqa: F401
-
-from waybacktweets.cli.main import cli
diff --git a/waybacktweets/cli/main.py b/waybacktweets/cli/main.py
deleted file mode 100644 (file)
index 7c31e6a..0000000
+++ /dev/null
@@ -1,135 +0,0 @@
-"""
-CLI functions for retrieving archived tweets.
-"""
-
-from datetime import datetime
-from typing import Any, Optional
-
-import click
-from rich import print as rprint
-
-from waybacktweets.api.export_tweets import TweetsExporter
-from waybacktweets.api.parse_tweets import TweetsParser
-from waybacktweets.api.request_tweets import WaybackTweets
-
-
-def parse_date(
-    ctx: Optional[Any] = None, param: Optional[Any] = None, value: Optional[str] = None
-) -> Optional[str]:
-    """
-    Parses a date string and returns it in the format "YYYYMMDD".
-
-    :param ctx: Necessary when used with the click package. Defaults to None.
-    :param param: Necessary when used with the click package. Defaults to None.
-    :param value: A date string in the "YYYYMMDD" format. Defaults to None.
-
-    :returns: The input date string formatted in the "YYYYMMDD" format,
-        or None if no date string was provided.
-    """
-    try:
-        if value is None:
-            return None
-
-        date = datetime.strptime(value, "%Y%m%d")
-
-        return date.strftime("%Y%m%d")
-    except ValueError:
-        raise click.BadParameter("Date must be in format YYYYmmdd")
-
-
-@click.command()
-@click.argument("username", type=str)
-@click.option(
-    "--collapse",
-    type=click.Choice(["urlkey", "digest", "timestamp:XX"], case_sensitive=False),
-    default=None,
-    help="Collapse results based on a field, or a substring of a field. XX in the timestamp value ranges from 1 to 14, comparing the first XX digits of the timestamp field. It is recommended to use from 4 onwards, to compare at least by years.",  # noqa: E501
-)
-@click.option(
-    "--from",
-    "timestamp_from",
-    type=click.UNPROCESSED,
-    metavar="DATE",
-    callback=parse_date,
-    default=None,
-    help="Filtering by date range from this date. Format: YYYYmmdd",
-)
-@click.option(
-    "--to",
-    "timestamp_to",
-    type=click.UNPROCESSED,
-    metavar="DATE",
-    callback=parse_date,
-    default=None,
-    help="Filtering by date range up to this date. Format: YYYYmmdd",
-)
-@click.option(
-    "--limit", type=int, metavar="INTEGER", default=None, help="Query result limits."
-)
-@click.option(
-    "--offset",
-    type=int,
-    metavar="INTEGER",
-    default=None,
-    help="Allows for a simple way to scroll through the results.",
-)
-@click.option(
-    "--matchtype",
-    type=click.Choice(["exact", "prefix", "host", "domain"], case_sensitive=False),
-    default=None,
-    help="Results matching a certain prefix, a certain host or all subdomains.",  # noqa: E501
-)
-def cli(
-    username: str,
-    collapse: Optional[str],
-    timestamp_from: Optional[str],
-    timestamp_to: Optional[str],
-    limit: Optional[int],
-    offset: Optional[int],
-    matchtype: Optional[str],
-) -> None:
-    """
-    Retrieves archived tweets CDX data from the Wayback Machine,
-    performs necessary parsing, and saves the data.
-
-    USERNAME: The Twitter username without @.
-    """
-    try:
-        api = WaybackTweets(
-            username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype
-        )
-
-        print("Making a request to the Internet Archive...")
-        archived_tweets = api.get()
-
-        if archived_tweets:
-            field_options = [
-                "archived_urlkey",
-                "archived_timestamp",
-                "original_tweet_url",
-                "archived_tweet_url",
-                "parsed_tweet_url",
-                "parsed_archived_tweet_url",
-                "available_tweet_text",
-                "available_tweet_is_RT",
-                "available_tweet_info",
-                "archived_mimetype",
-                "archived_statuscode",
-                "archived_digest",
-                "archived_length",
-            ]
-
-            parser = TweetsParser(archived_tweets, username, field_options)
-            parsed_tweets = parser.parse(print_progress=True)
-
-            exporter = TweetsExporter(parsed_tweets, username, field_options)
-
-            exporter.save_to_csv()
-            exporter.save_to_json()
-            exporter.save_to_html()
-    except Exception as e:
-        rprint(f"[red]{e}")
-    finally:
-        rprint(
-            "[yellow]\nNeed help? Read the docs: https://claromes.github.io/waybacktweets"  # noqa: E501
-        )
index 6752ce23dbe830a7f089fe26b36c453884bc7775..8a7685589280c2c61bb91c392c44e158932633f0 100644 (file)
@@ -1,3 +1,12 @@
 # flake8: noqa: F401
 
-from waybacktweets.utils.utils import *
+from waybacktweets.utils.utils import (
+    check_double_status,
+    check_pattern_tweet,
+    clean_tweet_url,
+    clean_wayback_machine_url,
+    delete_tweet_pathnames,
+    get_response,
+    is_tweet_url,
+    semicolon_parser,
+)