From 6c2a264d0ff441efd12629cea7f0f388d51b475a Mon Sep 17 00:00:00 2001 From: Claromes Date: Wed, 13 Dec 2023 17:52:25 -0300 Subject: [PATCH] add msgs and add collapse by timestamp --- app.py | 3 ++- docs/CHANGELOG.md | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 4807727..29f9385 100644 --- a/app.py +++ b/app.py @@ -220,7 +220,7 @@ def query_api(handle, limit, offset, saved_at): st.warning('username, please!') st.stop() - url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{handle}/status/*&output=json&limit={limit}&offset={offset}&from={saved_at[0]}&to={saved_at[1]}' + url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{handle}/status/*&collapse=timestamp:8&output=json&limit={limit}&offset={offset}&from={saved_at[0]}&to={saved_at[1]}' try: response = requests.get(url) response.raise_for_status() @@ -378,6 +378,7 @@ if query or st.session_state.count: st.session_state.count = tweets_count(handle, st.session_state.saved_at) + st.caption('The search optimization uses an 8-digit [collapsing strategy](https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md?ref=hackernoon.com#collapsing), refining the captures to one per day. The number of tweets per page is set to 25, and this is a fixed value due to the API rate limit.') st.write(f'**{st.session_state.count} URLs have been captured**') if st.session_state.count: diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 7f2d82e..11cc44c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [v0.4.3](https://github.com/claromes/waybacktweets/releases/tag/v0.4.3) - 2023-12-13 +- Add: + - 8-digit collapsing strategy (one capture per day) + - Messages about collapsing strategy and number of tweets displayed + ## [v0.4.2](https://github.com/claromes/waybacktweets/releases/tag/v0.4.2) - 2023-12-13 - Add: - Parse tweet URLs to delete `/photos`, `/likes`, `/retweets` and other sub-endpoints -- 2.34.1