- Limit: Query result limits.
-- Offset: Allows for a simple way to scroll through the results.
+- Resumption Key: Allows for a simple way to scroll through the results. Key to continue the query from the end of the previous query.
- Only unique Wayback Machine URLs: Filtering by the collapse option using the ``urlkey`` field and the URL Match Scope ``prefix``
help="Query result limits.",
)
@click.option(
- "-o",
- "--offset",
- type=int,
- metavar="INTEGER",
+ "-rk",
+ "--resumption_key",
+ type=str,
default=None,
- help="Allows for a simple way to scroll through the results.",
+ help="Allows for a simple way to scroll through the results. Key to continue the query from the end of the previous query.", # noqa: E501
)
@click.option(
"-mt",
timestamp_from: Optional[str],
timestamp_to: Optional[str],
limit: Optional[int],
- offset: Optional[int],
+ resumption_key: Optional[str],
matchtype: Optional[str],
verbose: Optional[bool],
) -> None:
config.verbose = verbose
api = WaybackTweets(
- username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype
+ username,
+ collapse,
+ timestamp_from,
+ timestamp_to,
+ limit,
+ resumption_key,
+ matchtype,
)
print(f"Waybacking @{username}'s archived tweets...")
"archived_statuscode",
"archived_digest",
"archived_length",
+ "resumption_key",
]
parser = TweetsParser(archived_tweets, username, field_options)
self.field_options = field_options
self.parsed_tweets = {option: [] for option in self.field_options}
+ if "resumption_key" not in self.parsed_tweets:
+ self.parsed_tweets["resumption_key"] = []
+
+ self._add_resumption_key()
+
+ def _add_resumption_key(self):
+ """Adds the resumption key from the last archived tweet response to the parsed tweets.
+
+ This method extracts the resumption key from the last item in the archived tweets response list
+ and appends it to the 'resumption_key' field in the parsed tweets dictionary. It also prints
+ the resumption key with instructions on how to use it with the 'limit' option for continuing
+ the query from the end of the previous query.
+
+ Raises:
+ ValueError: If the list of archived tweet responses is empty.
+
+ """ # noqa: E501
+ if not self.archived_tweets_response:
+ raise ValueError("The list of archived tweet responses is empty.")
+
+ resumption_key = self.archived_tweets_response[-1][0]
+ self.parsed_tweets["resumption_key"].append(resumption_key)
+
+ rprint(
+ f'[blue]\nResumption Key: [bold]{resumption_key}[/bold]\nIf you are using the "limit" option, use this key in the "resumption_key" option and continue the query from the end of the previous query.\n' # noqa: E501
+ )
+
def _add_field(self, key: str, value: Any) -> None:
"""
Appends a value to a list in the parsed data structure.
for future in as_completed(futures):
try:
future.result()
+ except IndexError:
+ pass
except Exception as e:
rprint(f"[red]{e}")
timestamp_from (str, optional): The timestamp to start retrieving tweets from.
timestamp_to (str, optional): The timestamp to stop retrieving tweets at.
limit (int, optional): The maximum number of results to return.
- offset (int, optional): The number of lines to skip in the results.
+ resumption_key (int, optional): Key to continue the query from the end of the previous query.
matchtype (str, optional): Results matching a certain prefix, a certain host or all subdomains.
""" # noqa: E501
timestamp_from: str = None,
timestamp_to: str = None,
limit: int = None,
- offset: int = None,
+ resumption_key: str = None,
matchtype: str = None,
):
self.username = username
self.timestamp_from = timestamp_from
self.timestamp_to = timestamp_to
self.limit = limit
- self.offset = offset
+ self.resumption_key = resumption_key
self.matchtype = matchtype
def get(self) -> Optional[Dict[str, Any]]:
params = {
"url": f"https://twitter.com/{self.username}/status{wildcard_pathname}",
+ "showResumeKey": "true",
"output": "json",
}
if self.limit:
params["limit"] = self.limit
- if self.offset:
- params["offset"] = self.offset
+ if self.resumption_key:
+ params["resumption_key"] = self.resumption_key
if self.matchtype:
params["matchType"] = self.matchtype
"archived_statuscode",
"archived_digest",
"archived_length",
+ "resumption_key",
]