add msgs and add collapse by timestamp
authorClaromes <claromes@hey.com>
Wed, 13 Dec 2023 20:52:25 +0000 (17:52 -0300)
committerClaromes <claromes@hey.com>
Wed, 13 Dec 2023 20:52:25 +0000 (17:52 -0300)
app.py
docs/CHANGELOG.md

diff --git a/app.py b/app.py
index 4807727b4719ccacd8f6db9371e06ae954cd056d..29f9385c51d5ced9eaa0ecc2c5b949f2fd38fdb9 100644 (file)
--- a/app.py
+++ b/app.py
@@ -220,7 +220,7 @@ def query_api(handle, limit, offset, saved_at):
         st.warning('username, please!')
         st.stop()
 
-    url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{handle}/status/*&output=json&limit={limit}&offset={offset}&from={saved_at[0]}&to={saved_at[1]}'
+    url = f'https://web.archive.org/cdx/search/cdx?url=https://twitter.com/{handle}/status/*&collapse=timestamp:8&output=json&limit={limit}&offset={offset}&from={saved_at[0]}&to={saved_at[1]}'
     try:
         response = requests.get(url)
         response.raise_for_status()
@@ -378,6 +378,7 @@ if query or st.session_state.count:
 
     st.session_state.count = tweets_count(handle, st.session_state.saved_at)
 
+    st.caption('The search optimization uses an 8-digit [collapsing strategy](https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md?ref=hackernoon.com#collapsing), refining the captures to one per day. The number of tweets per page is set to 25, and this is a fixed value due to the API rate limit.')
     st.write(f'**{st.session_state.count} URLs have been captured**')
 
     if st.session_state.count:
index 7f2d82e84d95cecf93afb4cac20e424b1f1366b0..11cc44c82ab2e614c424e63b855dc851ffc7ca0a 100644 (file)
@@ -1,5 +1,10 @@
 # Changelog
 
+## [v0.4.3](https://github.com/claromes/waybacktweets/releases/tag/v0.4.3) - 2023-12-13
+- Add:
+  - 8-digit collapsing strategy (one capture per day)
+  - Messages about collapsing strategy and number of tweets displayed
+
 ## [v0.4.2](https://github.com/claromes/waybacktweets/releases/tag/v0.4.2) - 2023-12-13
 - Add:
   - Parse tweet URLs to delete `/photos`, `/likes`, `/retweets` and other sub-endpoints