From 05caf43bc98ef4e1a8323a8aedb17310742c0a1c Mon Sep 17 00:00:00 2001 From: claromes Date: Fri, 14 Jun 2024 10:12:58 +0000 Subject: [PATCH 01/63] Initial gh-pages commit From eec076e539e8774cf403114df60d7ff3797a72dc Mon Sep 17 00:00:00 2001 From: claromes Date: Fri, 14 Jun 2024 10:12:59 +0000 Subject: [PATCH 02/63] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20cl?= =?UTF-8?q?aromes/waybacktweets@f266279d0ddf463c6b7d63873718bc899b794174?= =?UTF-8?q?=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .nojekyll | 0 404.html | 89 +++ _sources/api.rst.txt | 59 ++ _sources/cli.rst.txt | 9 + _sources/contribute.rst.txt | 35 ++ _sources/errors.rst.txt | 31 + _sources/index.rst.txt | 74 +++ _sources/installation.rst.txt | 40 ++ _sources/modules.rst.txt | 7 + _sources/quickstart.rst.txt | 42 ++ _sources/result.rst.txt | 40 ++ _sources/streamlit.rst.txt | 50 ++ _sources/workflow.rst.txt | 23 + _static/basic.css | 925 ++++++++++++++++++++++++++++++ _static/css/custom.css | 4 + _static/doctools.js | 156 +++++ _static/documentation_options.js | 13 + _static/file.png | Bin 0 -> 286 bytes _static/flask.css | 15 + _static/language_data.js | 199 +++++++ _static/minus.png | Bin 0 -> 90 bytes _static/parthenon.svg | 26 + _static/plus.png | Bin 0 -> 90 bytes _static/pocoo.css | 525 +++++++++++++++++ _static/pygments.css | 84 +++ _static/searchtools.js | 619 ++++++++++++++++++++ _static/sphinx_highlight.js | 154 +++++ _static/version_warning_offset.js | 40 ++ api.html | 330 +++++++++++ cli.html | 161 ++++++ contribute.html | 132 +++++ errors.html | 123 ++++ genindex.html | 351 ++++++++++++ index.html | 202 +++++++ installation.html | 134 +++++ modules.html | 141 +++++ objects.inv | Bin 0 -> 760 bytes py-modindex.html | 129 +++++ quickstart.html | 137 +++++ result.html | 121 ++++ search.html | 106 ++++ searchindex.js | 1 + streamlit.html | 138 +++++ workflow.html | 127 ++++ 44 files changed, 5592 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 _sources/api.rst.txt create mode 100644 _sources/cli.rst.txt create mode 100644 _sources/contribute.rst.txt create mode 100644 _sources/errors.rst.txt create mode 100644 _sources/index.rst.txt create mode 100644 _sources/installation.rst.txt create mode 100644 _sources/modules.rst.txt create mode 100644 _sources/quickstart.rst.txt create mode 100644 _sources/result.rst.txt create mode 100644 _sources/streamlit.rst.txt create mode 100644 _sources/workflow.rst.txt create mode 100644 _static/basic.css create mode 100644 _static/css/custom.css create mode 100644 _static/doctools.js create mode 100644 _static/documentation_options.js create mode 100644 _static/file.png create mode 100644 _static/flask.css create mode 100644 _static/language_data.js create mode 100644 _static/minus.png create mode 100644 _static/parthenon.svg create mode 100644 _static/plus.png create mode 100644 _static/pocoo.css create mode 100644 _static/pygments.css create mode 100644 _static/searchtools.js create mode 100644 _static/sphinx_highlight.js create mode 100644 _static/version_warning_offset.js create mode 100644 api.html create mode 100644 cli.html create mode 100644 contribute.html create mode 100644 errors.html create mode 100644 genindex.html create mode 100644 index.html create mode 100644 installation.html create mode 100644 modules.html create mode 100644 objects.inv create mode 100644 py-modindex.html create mode 100644 quickstart.html create mode 100644 result.html create mode 100644 search.html create mode 100644 searchindex.js create mode 100644 streamlit.html create mode 100644 workflow.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..43a7e90 --- /dev/null +++ b/404.html @@ -0,0 +1,89 @@ + + + + + + + + Page Not Found — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +

Page Not Found

+

+ The page you requested does not exist. You may have followed a bad + link, or the page may have been moved or removed. +

+ Go to the overview or + search. + +

+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt new file mode 100644 index 0000000..283e429 --- /dev/null +++ b/_sources/api.rst.txt @@ -0,0 +1,59 @@ +API +==== + +Request +--------- + +.. module:: waybacktweets.api.request_tweets + +.. autoclass:: WaybackTweets + :members: + + + +Parse +--------- + +.. module:: waybacktweets.api.parse_tweets + +.. autoclass:: TweetsParser + :members: + +.. autoclass:: TwitterEmbed + :members: + +.. autoclass:: JsonParser + :members: + + +Export +--------- + +.. module:: waybacktweets.api.export_tweets + +.. autoclass:: TweetsExporter + :members: + + +Visualizer +----------- + +.. module:: waybacktweets.api.viz_tweets + +.. autoclass:: HTMLTweetsVisualizer + :members: + + +Utils +------- + +.. module:: waybacktweets.utils.utils + +.. autofunction:: check_double_status +.. autofunction:: check_pattern_tweet +.. autofunction:: clean_tweet_url +.. autofunction:: clean_wayback_machine_url +.. autofunction:: delete_tweet_pathnames +.. autofunction:: get_response +.. autofunction:: parse_date +.. autofunction:: semicolon_parser diff --git a/_sources/cli.rst.txt b/_sources/cli.rst.txt new file mode 100644 index 0000000..9276250 --- /dev/null +++ b/_sources/cli.rst.txt @@ -0,0 +1,9 @@ +CLI +================ + +Usage +--------- + +.. click:: waybacktweets.cli.main:cli + :prog: waybacktweets + :nested: full diff --git a/_sources/contribute.rst.txt b/_sources/contribute.rst.txt new file mode 100644 index 0000000..298ec43 --- /dev/null +++ b/_sources/contribute.rst.txt @@ -0,0 +1,35 @@ +Contribute +================ + +Here are all the ways you can contribute to this project. + +Testing +--------- + +The best way to help is by using the package, either on the command line or as a module, suggesting improvements and reporting bugs. You're very welcome to `open an issue `_. + + +Hacking +--------- + +If you have Python skills, contribute to the `code `_. + +These are the prerequisites: + +- Python 3.11+ +- Poetry + +Install from the source, following `these instructions `_. + +Brief explanation about the code under the Wayback Tweets directory: + +- ``app``: Streamlit app +- ``docs``: Documentation generated with Sphinx +- ``waybacktweets/api``: Main package modules +- ``waybacktweets/cli``: Command line Interface module +- ``waybacktweets/utils``: Helper functions used in the package + +Sponsoring +----------- + +You can also donate to the project's developer and maintainer, `Claromes `_, via `GitHub Sponsor `_ or if you are interested in sponsoring the project you can contact via email at support at claromes dot com. diff --git a/_sources/errors.rst.txt b/_sources/errors.rst.txt new file mode 100644 index 0000000..492279b --- /dev/null +++ b/_sources/errors.rst.txt @@ -0,0 +1,31 @@ +Errors +================ + +These are the most common errors and are handled by the ``waybacktweets`` package. + +ReadTimeout +---------------- + +This error occurs when a request to the web.archive.org server takes too long to respond. The server could be overloaded or there could be network issues. + +The output message from the package would be: ``Connection to web.archive.org timed out.`` + +ConnectionError +---------------- + +This error is raised when the package fails to establish a new connection with web.archive.org. This could be due to network issues or the server being down. + +The output message from the package would be: ``Failed to establish a new connection with web.archive.org. Max retries exceeded.`` + +This is the error often returned when performing experimental parsing of URLs with the mimetype ``application/json``. + +The warning output message from the package would be: ``Connection error with https://web.archive.org/web//https://twitter.com//status/. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.`` + +HTTPError +---------------- + +This error occurs when the Internet Archive services are temporarily offline. This could be due to maintenance or server issues. + +The output message from the package would be: ``Temporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive Twitter feed (https://twitter.com/internetarchive) for the latest information.`` + + diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt new file mode 100644 index 0000000..b41345c --- /dev/null +++ b/_sources/index.rst.txt @@ -0,0 +1,74 @@ +.. rst-class:: hide-header + +Wayback Tweets +================ + + +Wayback Tweets Documentation +------------------------------ + +Retrieves archived tweets' CDX data from the Wayback Machine, performs necessary parsing, and saves the data. + + +User Guide +------------ + +.. toctree:: + :maxdepth: 2 + + installation + quickstart + workflow + result + errors + contribute + + +Command-Line Interface +------------------------ +.. toctree:: + :maxdepth: 2 + + cli + + +API Reference +--------------- + +.. toctree:: + :maxdepth: 2 + + api + + +Streamlit Web App +------------------- + +.. toctree:: + :maxdepth: 2 + + streamlit + + +Additional Information +----------------------- + +.. toctree:: + :maxdepth: 1 + +.. raw:: html + + + +Indices and tables +---------------------- + +.. toctree:: + :maxdepth: 2 + + genindex + modindex + search diff --git a/_sources/installation.rst.txt b/_sources/installation.rst.txt new file mode 100644 index 0000000..725f6c7 --- /dev/null +++ b/_sources/installation.rst.txt @@ -0,0 +1,40 @@ +Installation +================ + + +Using pip +------------ + + .. code-block:: shell + + pip install waybacktweets + +From source +------------- + + Clone the repository: + + .. code-block:: shell + + git clone git@github.com:claromes/waybacktweets.git + + Change directory: + + .. code-block:: shell + + cd waybacktweets + + Install poetry, if you haven't already: + + .. code-block:: shell + + pip install poetry + + + Install the dependencies: + + .. code-block:: shell + + poetry install + +`Read the Poetry CLI documentation `_. diff --git a/_sources/modules.rst.txt b/_sources/modules.rst.txt new file mode 100644 index 0000000..88fb379 --- /dev/null +++ b/_sources/modules.rst.txt @@ -0,0 +1,7 @@ +waybacktweets +============= + +.. toctree:: + :maxdepth: 4 + + api diff --git a/_sources/quickstart.rst.txt b/_sources/quickstart.rst.txt new file mode 100644 index 0000000..916996b --- /dev/null +++ b/_sources/quickstart.rst.txt @@ -0,0 +1,42 @@ +Quickstart +================ + +CLI +------------- + +Using Wayback Tweets as a standalone command line tool + +waybacktweets [OPTIONS] USERNAME + +.. code-block:: shell + + waybacktweets --from 20150101 --to 20191231 --limit 250 jack` + + +Module +------------- + +Using Wayback Tweets as a Python Module + +.. code-block:: python + + from waybacktweets import WaybackTweets + from waybacktweets.utils import parse_date + + username = "jack" + collapse = "urlkey" + timestamp_from = parse_date("20150101") + timestamp_to = parse_date("20191231") + limit = 250 + offset = 0 + + api = WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset) + + archived_tweets = api.get() + +Web App +------------- + +Using Wayback Tweets as a Streamlit Web App + +`Access the application `_, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. diff --git a/_sources/result.rst.txt b/_sources/result.rst.txt new file mode 100644 index 0000000..2794f40 --- /dev/null +++ b/_sources/result.rst.txt @@ -0,0 +1,40 @@ +Result +================ + +The package saves in three formats: CSV, JSON, and HTML. The files have the following fields: + +- ``archived_urlkey``: (`str`) A canonical transformation of the URL you supplied, for example, ``org,eserver,tc)/``. Such keys are useful for indexing. + +- ``archived_timestamp``: (`datetime`) A 14 digit date-time representation in the ``YYYYMMDDhhmmss`` format. + +- ``original_tweet_url``: (`str`) The original tweet URL. + +- ``archived_tweet_url``: (`str`) The original archived URL. + +- ``parsed_tweet_url``: (`str`) The original tweet URL after parsing. `Check the utility functions `_. + +- ``parsed_archived_tweet_url``: (`str`) The original archived URL after parsing. `Check the utility functions `_. + +- ``parsed_tweet_text_mimetype_json``: (`str`) The tweet text extracted from the archived URL that has mimetype ``application/json``. + +- ``available_tweet_text``: (`str`) The tweet text extracted from the URL that is still available on the Twitter account. + +- ``available_tweet_is_RT``: (`bool`) Whether the tweet from the ``available_tweet_text`` field is a retweet or not. + +- ``available_tweet_info``: (`str`) Name and date of the tweet from the ``available_tweet_text`` field. + +- ``archived_mimetype``: (`str`) The mimetype of the archived content, which can be one of these: + + - ``text/html`` + + - ``warc/revisit`` + + - ``application/json`` + + - ``unk`` + +- ``archived_statuscode``: (`str`) The HTTP status code of the snapshot. If the mimetype is ``warc/revisit``, the value returned for the ``statuscode`` key can be blank, but the actual value is the same as that of any other entry that has the same ``digest`` as this entry. If the mimetype is ``application/json``, the value is usually empty or ``-``. + +- ``archived_digest``: (`str`) The ``SHA1`` hash digest of the content, excluding the headers. It's usually a base-32-encoded string. + +- ``archived_length``: (`int`) The compressed byte size of the corresponding WARC record, which includes WARC headers, HTTP headers, and content payload. diff --git a/_sources/streamlit.rst.txt b/_sources/streamlit.rst.txt new file mode 100644 index 0000000..5fc21cd --- /dev/null +++ b/_sources/streamlit.rst.txt @@ -0,0 +1,50 @@ +Web App +========= + +Aplication that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. The application is a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud, allowing users to apply filters and view tweets that lack the original URL. + + +Filters +---------- + +- Calendar: Filtering by date range + +- Checkbox: Only tweets not available + +- Checkbox: Only unique URLs (filtering by the collapse option using the urlkey field) + + +Pagination +------------ + +Pagination is automatic and allows viewing up to 25 tweets per page. This is a fixed value due to the API rate limit. + + +Community Comments +-------------------- + +.. raw:: html + +
    +
  • "We're always delighted when we see our community members create tools for open source research." Bellingcat
  • +
    +
  • "#myOSINTtip Clarissa Mendes launched a new tool for accessing old tweets via archive.org called the Wayback Tweets app. For those who love to look deeper at #osint tools, it is available on GitHub and uses the Wayback CDX Server API server (which is a hidden gem for accessing archive.org data!)" My OSINT Training
  • +
    +
  • "Original way to find deleted tweets." Henk Van Ess
  • +
    +
  • "This is an excellent tool to use now that most Twitter API-based tools have gone down with changes to the pricing structure over at X." The OSINT Newsletter - Issue #22
  • +
    +
  • "One of the keys to using the Wayback Machine effectively is knowing what it can and can't archive. It can, and has, archived many, many Twitter accounts... Utilize fun tools such as Wayback Tweets to do so more effectively." Ari Ben Am
  • +
    +
  • "Want to see archived tweets on Wayback Machine in bulk? You can use Wayback Tweets." Daily OSINT
  • +
    +
  • "Untuk mempermudah penelusuran arsip, gunakan Wayback Tweets." GIJN Indonesia
  • +
    +
  • "A tool to quickly view tweets saved on archive.org." Irina_Tech_Tips Newsletter #3
  • +
    +
+ +.. note:: + + If the application is down, please check the `Streamlit Cloud Status `_. + diff --git a/_sources/workflow.rst.txt b/_sources/workflow.rst.txt new file mode 100644 index 0000000..2480b35 --- /dev/null +++ b/_sources/workflow.rst.txt @@ -0,0 +1,23 @@ +Workflow +================ + +The tool was written following a proposal not only to Retrieve data from archived tweets, but also to facilitate the reading of these tweets. Therefore, a flow is defined to obtain these results in the best possible way. + +Due to limitations of the Wayback CDX Server API, it is not always possible to parse the results with the mimetype ``application/json``, regardless, the data in CDX format are saved. + +Use the mouse to zoom in and out the flowchart. + +.. mermaid:: + :zoom: + :align: center + + flowchart TB + A[input Username]--> B[(Wayback Machine)] + B--> B1[save Archived Tweets CDX data] + B1--> |parsing| C{embed Tweet URL\nvia Twitter Publisher} + C--> |2xx/3xx| D[return Tweet text] + C--> |4xx| E[return None] + E--> F{request Archived\nTweet URL} + F--> |4xx| G[return Only CDX data] + F--> |2xx/3xx: application/json| J[return JSON text] + F--> |2xx/3xx: text/html, warc/revisit, unk| K[return HTML iframe tag] diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 0000000..f316efc --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,925 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/css/custom.css b/_static/css/custom.css new file mode 100644 index 0000000..e2cc030 --- /dev/null +++ b/_static/css/custom.css @@ -0,0 +1,4 @@ +#cli #usage #waybacktweets h3, +.sphinxsidebarwrapper li ul li ul:has(a[href="#waybacktweets"]):last-child{ + display: none; +} diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 0000000..4d67807 --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,156 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Base JavaScript utilities for all Sphinx HTML documentation. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 0000000..529239f --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '1.0', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 0000000000000000000000000000000000000000..a858a410e4faa62ce324d814e4b816fff83a6fb3 GIT binary patch literal 286 zcmV+(0pb3MP)s`hMrGg#P~ix$^RISR_I47Y|r1 z_CyJOe}D1){SET-^Amu_i71Lt6eYfZjRyw@I6OQAIXXHDfiX^GbOlHe=Ae4>0m)d(f|Me07*qoM6N<$f}vM^LjV8( literal 0 HcmV?d00001 diff --git a/_static/flask.css b/_static/flask.css new file mode 100644 index 0000000..e37830d --- /dev/null +++ b/_static/flask.css @@ -0,0 +1,15 @@ +@import url("pocoo.css"); + +a, a.reference, a.footnote-reference { + color: #004b6b; + text-decoration-color: #004b6b; +} + +a:hover { + color: #6d4100; + text-decoration-color: #6d4100; +} + +p.version-warning { + background-color: #004b6b; +} diff --git a/_static/language_data.js b/_static/language_data.js new file mode 100644 index 0000000..367b8ed --- /dev/null +++ b/_static/language_data.js @@ -0,0 +1,199 @@ +/* + * language_data.js + * ~~~~~~~~~~~~~~~~ + * + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 0000000000000000000000000000000000000000..d96755fdaf8bb2214971e0db9c1fd3077d7c419d GIT binary patch literal 90 zcmeAS@N?(olHy`uVBq!ia0vp^+#t*WBp7;*Yy1LIik>cxAr*|t7R?Mi>2?kWtu=nj kDsEF_5m^0CR;1wuP-*O&G^0G}KYk!hp00i_>zopr08q^qX#fBK literal 0 HcmV?d00001 diff --git a/_static/parthenon.svg b/_static/parthenon.svg new file mode 100644 index 0000000..3be2d40 --- /dev/null +++ b/_static/parthenon.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 0000000000000000000000000000000000000000..7107cec93a979b9a5f64843235a16651d563ce2d GIT binary patch literal 90 zcmeAS@N?(olHy`uVBq!ia0vp^+#t*WBp7;*Yy1LIik>cxAr*|t7R?Mi>2?kWtu>-2 m3q%Vub%g%s<8sJhVPMczOq}xhg9DJoz~JfX=d#Wzp$Pyb1r*Kz literal 0 HcmV?d00001 diff --git a/_static/pocoo.css b/_static/pocoo.css new file mode 100644 index 0000000..4f14b31 --- /dev/null +++ b/_static/pocoo.css @@ -0,0 +1,525 @@ +@import url("basic.css"); + +/* -- page layout --------------------------------------------------- */ + +body { + font-family: 'Garamond', 'Georgia', serif; + font-size: 17px; + background-color: #fff; + color: #3e4349; + margin: 0; + padding: 0; +} + +div.related { + max-width: 1140px; + margin: 10px auto; + + /* displayed on mobile */ + display: none; +} + +div.document { + max-width: 1140px; + margin: 10px auto; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 220px; +} + +div.body { + min-width: initial; + max-width: initial; + padding: 0 30px; +} + +div.sphinxsidebarwrapper { + padding: 10px; +} + +div.sphinxsidebar { + width: 220px; + font-size: 14px; + line-height: 1.5; + color: #444; +} + +div.sphinxsidebar li { + overflow: hidden; + text-overflow: ellipsis; +} + +div.sphinxsidebar li:hover { + overflow: visible; +} + +div.sphinxsidebar a, +div.sphinxsidebar a code { + color: #444; + border-color: #444; +} + +div.sphinxsidebar a:hover { + background-color:#fff; +} + +div.sphinxsidebar p.logo { + margin: 0; + text-align: center; +} + +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-size: 24px; + color: #444; +} + +div.sphinxsidebar p.logo a, +div.sphinxsidebar h3 a, +div.sphinxsidebar p.logo a:hover, +div.sphinxsidebar h3 a:hover { + border: none; +} + +div.sphinxsidebar p, +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + margin: 10px 0; +} + +div.sphinxsidebar ul { + margin: 10px 0; + padding: 0; +} + +div.sphinxsidebar input { + border: 1px solid #999; + font-size: 1em; +} + +div.footer { + max-width: 1140px; + margin: 20px auto; + font-size: 14px; + text-align: right; + color: #888; +} + +div.footer a { + color: #888; + border-color: #888; +} + +/* -- quick search -------------------------------------------------- */ + +div.sphinxsidebar #searchbox form { + display: flex; +} + +div.sphinxsidebar #searchbox form > div { + display: flex; + flex: 1 1 auto; +} + +div.sphinxsidebar #searchbox input[type=text] { + flex: 1 1 auto; + width: 1% !important; +} + +div.sphinxsidebar #searchbox input[type=submit] { + border-left-width: 0; +} + +/* -- versions ------------------------------------------------------ */ + +div.sphinxsidebar ul.versions a.current { + font-style: italic; + border-bottom: 1px solid #000; + color: #000; +} + +div.sphinxsidebar ul.versions span.note { + color: #999; +} + +/* -- version warning ----------------------------------------------- */ + +p.version-warning { + top: 10px; + position: sticky; + + margin: 10px 0; + padding: 5px 10px; + border-radius: 4px; + + letter-spacing: 1px; + color: #fff; + text-shadow: 0 0 2px #000; + text-align: center; + + background: #d40 repeating-linear-gradient( + 135deg, + transparent, + transparent 56px, + rgba(255, 255, 255, 0.2) 56px, + rgba(255, 255, 255, 0.2) 112px + ); +} + +p.version-warning a { + color: #fff; + border-color: #fff; +} + +/* -- body styles --------------------------------------------------- */ + +a { + text-decoration: underline; + text-decoration-style: dotted; + text-decoration-color: #000; + text-decoration-thickness: 1px; +} + +a:hover { + text-decoration-style: solid; +} + +h1, h2, h3, h4, h5, h6 { + font-weight: normal; + margin: 30px 0 10px; + padding: 0; + color: black; +} + +div.body h1 { + font-size: 240%; +} + +div.body h2 { + font-size: 180%; +} + +div.body h3 { + font-size: 150%; +} + +div.body h4 { + font-size: 130%; +} + +div.body h5 { + font-size: 100%; +} + +div.body h6 { + font-size: 100%; +} + +div.body h1:first-of-type { + margin-top: 0; +} + +a.headerlink { + color: #ddd; + margin: 0 0.2em; + padding: 0 0.2em; + border: none; +} + +a.headerlink:hover { + color: #444; +} + +div.body p, +div.body dd, +div.body li { + line-height: 1.4; +} + +img.screenshot { + box-shadow: 2px 2px 4px #eee; +} + +hr { + border: 1px solid #999; +} + +blockquote { + margin: 0 0 0 30px; + padding: 0; +} + +ul, ol { + margin: 10px 0 10px 30px; + padding: 0; +} + +a.footnote-reference { + font-size: 0.7em; + vertical-align: top; +} + +/* -- admonitions --------------------------------------------------- */ + +div.admonition, +div.topic { + background-color: #fafafa; + margin: 10px -10px; + padding: 10px; + border-top: 1px solid #ccc; + border-right: none; + border-bottom: 1px solid #ccc; + border-left: none; +} + +div.admonition p.admonition-title, +div.topic p.topic-title { + font-weight: normal; + font-size: 24px; + margin: 0 0 10px 0; + padding: 0; + line-height: 1; + display: inline; +} + +p.admonition-title::after { + content: ":"; +} + +div.admonition p.last, +div.topic p:last-child { + margin-bottom: 0; +} + +div.danger, div.error { + background-color: #fff0f0; + border-color: #ffb0b0; +} + +div.seealso { + background-color: #fffff0; + border-color: #f0f0a8; +} + +/* -- changelog ----------------------------------------------------- */ + +details.changelog summary { + cursor: pointer; + font-style: italic; + margin-bottom: 10px; +} + +/* -- search highlight ---------------------------------------------- */ + +dt:target, +.footnote:target, +span.highlighted { + background-color: #ffdf80; +} + +rect.highlighted { + fill: #ffdf80; +} + +/* -- code displays ------------------------------------------------- */ + +pre, code { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; + font-size: 0.9em; +} + +pre { + margin: 0; + padding: 0; + line-height: 1.3; +} + +div.literal-block-wrapper { + padding: 10px 0 0; +} + +div.code-block-caption { + padding: 0; +} + +div.highlight, div.literal-block-wrapper div.highlight { + margin: 10px -10px; + padding: 10px; +} + +code { + color: #222; + background: #e8eff0; +} + +/* -- tables -------------------------------------------------------- */ + +table.docutils { + border: 1px solid #888; + box-shadow: 2px 2px 4px #eee; +} + +table.docutils td, +table.docutils th { + border: 1px solid #888; + padding: 0.25em 0.7em; +} + +table.field-list, +table.footnote { + border: none; + box-shadow: none; +} + +table.footnote { + margin: 15px 0; + width: 100%; + border: 1px solid #eee; + background-color: #fafafa; + font-size: 0.9em; +} + +table.footnote + table.footnote { + margin-top: -15px; + border-top: none; +} + +table.field-list th { + padding: 0 0.8em 0 0; +} + +table.field-list td { + padding: 0; +} + +table.footnote td.label { + width: 0; + padding: 0.3em 0 0.3em 0.5em; +} + +table.footnote td { + padding: 0.3em 0.5em; +} + +/* -- responsive screen --------------------------------------------- */ + +@media screen and (max-width: 1139px) { + p.version-warning { + margin: 10px; + } + + div.footer { + margin: 20px 10px; + } +} + +/* -- small screen -------------------------------------------------- */ + +@media screen and (max-width: 767px) { + body { + padding: 0 20px; + } + + div.related { + display: block; + } + + p.version-warning { + margin: 10px 0; + } + + div.documentwrapper { + float: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + padding: 0; + } + + div.sphinxsidebar { + float: none; + width: 100%; + margin: 0 -20px -10px; + padding: 0 20px; + background-color: #333; + color: #ccc; + } + + div.sphinxsidebar a, + div.sphinxsidebar a code, + div.sphinxsidebar h3, + div.sphinxsidebar h4, + div.footer a { + color: #ccc; + border-color: #ccc; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.sphinxsidebar ul.versions a.current { + border-bottom-color: #fff; + color: #fff; + } + + div.footer { + text-align: left; + margin: 0 -20px; + padding: 20px; + background-color: #333; + color: #ccc; + } +} + +/* https://github.com/twbs/bootstrap/blob + /0e8831505ac845f3102fa2c5996a7141c9ab01ee + /scss/mixins/_screen-reader.scss */ +.hide-header > h1:first-child { + position: absolute; + width: 1px; + height: 1px; + padding: 0; + overflow: hidden; + clip: rect(0, 0, 0, 0); + white-space: nowrap; + border: 0; +} + +/* -- sphinx-tabs -------------------------------------------------- */ + +.sphinx-tabs { + margin-bottom: 0; +} + +.sphinx-tabs .ui.menu { + font-family: 'Garamond', 'Georgia', serif !important; +} + +.sphinx-tabs .ui.attached.menu { + border-bottom: none +} + +.sphinx-tabs .ui.tabular.menu .item { + border-bottom: 2px solid transparent; + border-left: none; + border-right: none; + border-top: none; + padding: .3em 0.6em; +} + +.sphinx-tabs .ui.attached.segment, .ui.segment { + border: 0; + padding: 0; +} diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 0000000..57c7df3 --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,84 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #8f5902; font-style: italic } /* Comment */ +.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */ +.highlight .g { color: #000000 } /* Generic */ +.highlight .k { color: #004461; font-weight: bold } /* Keyword */ +.highlight .l { color: #000000 } /* Literal */ +.highlight .n { color: #000000 } /* Name */ +.highlight .o { color: #582800 } /* Operator */ +.highlight .x { color: #000000 } /* Other */ +.highlight .p { color: #000000; font-weight: bold } /* Punctuation */ +.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #8f5902 } /* Comment.Preproc */ +.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #a40000 } /* Generic.Deleted */ +.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */ +.highlight .ges { color: #000000 } /* Generic.EmphStrong */ +.highlight .gr { color: #ef2929 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888888 } /* Generic.Output */ +.highlight .gp { color: #745334 } /* Generic.Prompt */ +.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */ +.highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */ +.highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */ +.highlight .ld { color: #000000 } /* Literal.Date */ +.highlight .m { color: #990000 } /* Literal.Number */ +.highlight .s { color: #4e9a06 } /* Literal.String */ +.highlight .na { color: #c4a000 } /* Name.Attribute */ +.highlight .nb { color: #004461 } /* Name.Builtin */ +.highlight .nc { color: #000000 } /* Name.Class */ +.highlight .no { color: #000000 } /* Name.Constant */ +.highlight .nd { color: #888888 } /* Name.Decorator */ +.highlight .ni { color: #ce5c00 } /* Name.Entity */ +.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #000000 } /* Name.Function */ +.highlight .nl { color: #f57900 } /* Name.Label */ +.highlight .nn { color: #000000 } /* Name.Namespace */ +.highlight .nx { color: #000000 } /* Name.Other */ +.highlight .py { color: #000000 } /* Name.Property */ +.highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #000000 } /* Name.Variable */ +.highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */ +.highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */ +.highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */ +.highlight .mb { color: #990000 } /* Literal.Number.Bin */ +.highlight .mf { color: #990000 } /* Literal.Number.Float */ +.highlight .mh { color: #990000 } /* Literal.Number.Hex */ +.highlight .mi { color: #990000 } /* Literal.Number.Integer */ +.highlight .mo { color: #990000 } /* Literal.Number.Oct */ +.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */ +.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */ +.highlight .sc { color: #4e9a06 } /* Literal.String.Char */ +.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */ +.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */ +.highlight .se { color: #4e9a06 } /* Literal.String.Escape */ +.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */ +.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */ +.highlight .sx { color: #4e9a06 } /* Literal.String.Other */ +.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */ +.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */ +.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */ +.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #000000 } /* Name.Function.Magic */ +.highlight .vc { color: #000000 } /* Name.Variable.Class */ +.highlight .vg { color: #000000 } /* Name.Variable.Global */ +.highlight .vi { color: #000000 } /* Name.Variable.Instance */ +.highlight .vm { color: #000000 } /* Name.Variable.Magic */ +.highlight .il { color: #990000 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 0000000..92da3f8 --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,619 @@ +/* + * searchtools.js + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for the full-text search. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename] = item; + + let listItem = document.createElement("li"); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = _( + "Search finished, found ${resultCount} page(s) matching the search query." + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlinks", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 0000000..8a96c69 --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/version_warning_offset.js b/_static/version_warning_offset.js new file mode 100644 index 0000000..c7f9f49 --- /dev/null +++ b/_static/version_warning_offset.js @@ -0,0 +1,40 @@ +/* +When showing the sticky version warning, the warning will cover the +scroll target when navigating to #id hash locations. Take over scrolling +to adjust the position to account for the height of the warning. +*/ +$(() => { + const versionWarning = $('.version-warning') + + // Skip if there is no version warning, regular browser behavior is + // fine in that case. + if (versionWarning.length) { + const height = versionWarning.outerHeight(true) + const target = $(':target') + + // Adjust position when the initial link has a hash. + if (target.length) { + // Use absolute scrollTo instead of relative scrollBy to avoid + // scrolling when the viewport is already at the bottom of the + // document and has space. + const y = target.offset().top - height + // Delayed because the initial browser scroll doesn't seem to + // happen until after the document ready event, so scrolling + // immediately will be overridden. + setTimeout(() => scrollTo(0, y), 100) + } + + // Listen to clicks on hash anchors. + $('a[href^="#"]').on('click', e => { + // Stop default scroll. Also stops the automatic URL hash update. + e.preventDefault() + // Get the id to scroll to and set the URL hash manually. + const id = $(e.currentTarget).attr('href').substring(1) + location.hash = id + // Use getElementById since the hash may have dots in it. + const target = $(document.getElementById(id)) + // Scroll to top of target with space for the version warning. + scrollTo(0, target.offset().top - height) + }) + } +}) diff --git a/api.html b/api.html new file mode 100644 index 0000000..9e4b196 --- /dev/null +++ b/api.html @@ -0,0 +1,330 @@ + + + + + + + + API — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

API

+
+

Request

+
+
+class waybacktweets.api.request_tweets.WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset)
+

Requests data from the Wayback CDX Server API and returns it in JSON format.

+
+
+get()
+

GET request to the Internet Archive’s CDX API to retrieve archived tweets.

+
+ +
+ +
+
+

Parse

+
+
+class waybacktweets.api.parse_tweets.TweetsParser(archived_tweets_response, username, field_options)
+

Handles the overall parsing of archived tweets.

+
+
+parse()
+

Parses the archived tweets CDX data and structures it.

+
+ +
+ +
+
+class waybacktweets.api.parse_tweets.TwitterEmbed(tweet_url)
+

Handles parsing of tweets using the Twitter Publish service.

+
+
+embed()
+

Parses the archived tweets when they are still available.

+
+ +
+ +
+
+class waybacktweets.api.parse_tweets.JsonParser(archived_tweet_url)
+

Handles parsing of tweets when the mimetype is application/json.

+
+
+parse()
+

Parses the archived tweets in JSON format.

+
+ +
+ +
+
+

Export

+
+
+class waybacktweets.api.export_tweets.TweetsExporter(data, username, field_options)
+

Handles the exporting of parsed archived tweets.

+
+
+save_to_csv()
+

Saves the DataFrame to a CSV file.

+
+ +
+
+save_to_html()
+

Saves the DataFrame to an HTML file.

+
+ +
+
+save_to_json()
+

Saves the DataFrame to a JSON file.

+
+ +
+ +
+
+

Visualizer

+
+
+class waybacktweets.api.viz_tweets.HTMLTweetsVisualizer(json_file_path, html_file_path, username)
+

Generates an HTML file to visualize the parsed data.

+
+
+generate()
+

Generates an HTML file.

+
+ +
+
+save(html_content)
+

Saves the generated HTML.

+
+ +
+ +
+
+

Utils

+
+
+waybacktweets.utils.utils.check_double_status(wayback_machine_url, original_tweet_url)
+

Checks if a Wayback Machine URL contains two occurrences of “/status/” +and if the original tweet does not contain “twitter.com”. +Returns a boolean.

+
+ +
+
+waybacktweets.utils.utils.check_pattern_tweet(tweet_url)
+

Extracts tweet IDs from various types of tweet URLs or tweet-related patterns.

+

Reply pattern: /status// +Link pattern: /status/// +Twimg pattern: /status/https://pbs

+
+ +
+
+waybacktweets.utils.utils.clean_tweet_url(tweet_url, username)
+

Converts the tweet to lowercase, +checks if it contains a tweet URL associated with the username. +Returns the original tweet URL with correct casing; +or returns the original tweet.

+
+ +
+
+waybacktweets.utils.utils.clean_wayback_machine_url(wayback_machine_url, archived_timestamp, username)
+

Converts the Wayback Machine URL to lowercase, +checks if it contains a tweet URL associated with the username. +Returns the original tweet URL with correct casing and archived timestamp; +otherwise, it returns the original Wayback Machine URL.

+
+ +
+
+waybacktweets.utils.utils.delete_tweet_pathnames(tweet_url)
+

Removes any pathnames (/photos, /likes, /retweet…) from the tweet URL.

+
+ +
+
+waybacktweets.utils.utils.get_response(url, params=None)
+

Sends a GET request to the specified URL and returns the response.

+
+ +
+
+waybacktweets.utils.utils.parse_date(ctx=None, param=None, value=None)
+

Parses a date string and returns it in the format “YYYYMMDD”.

+

This function takes an optional date string as input, +and if a date string is provided, it parses the date string into a datetime object +and then formats it in the “YYYYMMDD” format.

+
+
Args:

ctx (None, optional): Necessary when used with the click package. +Defaults to None. +param (None, optional): Necessary when used with the click package. +Defaults to None. +value (str, optional): A date string in the “YYYYMMDD” format. Defaults to None.

+
+
Returns:

str: The input date string formatted in the “YYYYMMDD” format, +or None if no date string was provided.

+
+
+
+ +
+
+waybacktweets.utils.utils.semicolon_parser(string)
+

Replaces semicolons in a string with %3B.

+
+ +
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/cli.html b/cli.html new file mode 100644 index 0000000..d47e0ff --- /dev/null +++ b/cli.html @@ -0,0 +1,161 @@ + + + + + + + + CLI — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

CLI

+
+

Usage

+
+

waybacktweets

+

Retrieves archived tweets CDX data from the Wayback Machine, +performs necessary parsing, and saves the data.

+

USERNAME: The Twitter username without @.

+
waybacktweets [OPTIONS] USERNAME
+
+
+

Options

+
+
+--collapse <collapse>
+

Collapse results based on a field, or a substring of a field. XX in the timestamp value ranges from 1 to 14, comparing the first XX digits of the timestamp field. It is recommended to use from 4 onwards, to compare at least by years.

+
+
Options:
+

urlkey | digest | timestamp:XX

+
+
+
+ +
+
+--from <DATE>
+

Filtering by date range from this date. Format: YYYYmmdd

+
+ +
+
+--to <DATE>
+

Filtering by date range up to this date. Format: YYYYmmdd

+
+ +
+
+--limit <limit>
+

Query result limits.

+
+ +
+
+--offset <offset>
+

Allows for a simple way to scroll through the results.

+
+ +

Arguments

+
+
+USERNAME
+

Required argument

+
+ +
+
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/contribute.html b/contribute.html new file mode 100644 index 0000000..5b1b8d3 --- /dev/null +++ b/contribute.html @@ -0,0 +1,132 @@ + + + + + + + + Contribute — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

Contribute

+

Here are all the ways you can contribute to this project.

+
+

Testing

+

The best way to help is by using the package, either on the command line or as a module, suggesting improvements and reporting bugs. You’re very welcome to open an issue.

+
+
+

Hacking

+

If you have Python skills, contribute to the code.

+

These are the prerequisites:

+
    +
  • Python 3.11+

  • +
  • Poetry

  • +
+

Install from the source, following these instructions.

+

Brief explanation about the code under the Wayback Tweets directory:

+
    +
  • app: Streamlit app

  • +
  • docs: Documentation generated with Sphinx

  • +
  • waybacktweets/api: Main package modules

  • +
  • waybacktweets/cli: Command line Interface module

  • +
  • waybacktweets/utils: Helper functions used in the package

  • +
+
+
+

Sponsoring

+

You can also donate to the project’s developer and maintainer, Claromes, via GitHub Sponsor or if you are interested in sponsoring the project you can contact via email at support at claromes dot com.

+
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/errors.html b/errors.html new file mode 100644 index 0000000..9215aae --- /dev/null +++ b/errors.html @@ -0,0 +1,123 @@ + + + + + + + + Errors — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

Errors

+

These are the most common errors and are handled by the waybacktweets package.

+
+

ReadTimeout

+

This error occurs when a request to the web.archive.org server takes too long to respond. The server could be overloaded or there could be network issues.

+

The output message from the package would be: Connection to web.archive.org timed out.

+
+
+

ConnectionError

+

This error is raised when the package fails to establish a new connection with web.archive.org. This could be due to network issues or the server being down.

+

The output message from the package would be: Failed to establish a new connection with web.archive.org. Max retries exceeded.

+

This is the error often returned when performing experimental parsing of URLs with the mimetype application/json.

+

The warning output message from the package would be: Connection error with https://web.archive.org/web/<TIMESTAMP>/https://twitter.com/<USERNAME>/status/<TWEET_ID>. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.

+
+
+

HTTPError

+

This error occurs when the Internet Archive services are temporarily offline. This could be due to maintenance or server issues.

+

The output message from the package would be: Temporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive Twitter feed (https://twitter.com/internetarchive) for the latest information.

+
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/genindex.html b/genindex.html new file mode 100644 index 0000000..c9e8c52 --- /dev/null +++ b/genindex.html @@ -0,0 +1,351 @@ + + + + + + + + Index — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ + +

Index

+ +
+ Symbols + | C + | D + | E + | G + | H + | J + | M + | P + | S + | T + | U + | W + +
+

Symbols

+ + + +
+ +

C

+ + + +
+ +

D

+ + +
+ +

E

+ + +
+ +

G

+ + + +
+ +

H

+ + +
+ +

J

+ + +
+ +

M

+ + +
+ +

P

+ + + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + +
+ +

W

+ + + +
    +
  • + waybacktweets.api.parse_tweets + +
  • +
  • + waybacktweets.api.request_tweets + +
  • +
  • + waybacktweets.api.viz_tweets + +
  • +
  • + waybacktweets.utils.utils + +
  • +
+ + + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 0000000..b5a5f75 --- /dev/null +++ b/index.html @@ -0,0 +1,202 @@ + + + + + + + + Wayback Tweets — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + +
+
+
+
+ +
+

Wayback Tweets

+
+

Wayback Tweets Documentation

+

Retrieves archived tweets’ CDX data from the Wayback Machine, performs necessary parsing, and saves the data.

+
+
+

User Guide

+ +
+
+

Command-Line Interface

+
+ +
+
+
+

API Reference

+ +
+
+

Streamlit Web App

+ +
+
+

Additional Information

+
+
+
+
+

Indices and tables

+ +
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/installation.html b/installation.html new file mode 100644 index 0000000..5410549 --- /dev/null +++ b/installation.html @@ -0,0 +1,134 @@ + + + + + + + + Installation — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

Installation

+
+

Using pip

+
+
pip install waybacktweets
+
+
+
+
+
+

From source

+
+

Clone the repository:

+
git clone git@github.com:claromes/waybacktweets.git
+
+
+

Change directory:

+
cd waybacktweets
+
+
+

Install poetry, if you haven’t already:

+
pip install poetry
+
+
+

Install the dependencies:

+
poetry install
+
+
+
+

Read the Poetry CLI documentation.

+
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/modules.html b/modules.html new file mode 100644 index 0000000..19643c5 --- /dev/null +++ b/modules.html @@ -0,0 +1,141 @@ + + + + + + + + waybacktweets — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 0000000000000000000000000000000000000000..70f9cccc61f8bd391c09bb258cb3901ad17ec252 GIT binary patch literal 760 zcmVNERX>N99Zgg*Qc_4OWa&u{KZXhxWBOp+6Z)#;@bUGkcVR>R< zV{0H(cV%UCa|$CMR%LQ?X>V>iATcg5E_ezfAXI2&AaZ4GVQFq;WpW^IW*~HEX>%ZE zX>4U6X>%ZBZ*6dLWpi_7WFU2OX>MmAdTeQ8E(&#Cl0SA|+A>a?A+bnFD~)F_A2WCn=Ok zQ3>VorWAS2A^aLP*20wh3YF+f7zKy-Ck^#R6bBil!p0tFbA0qQkn!omtX*n0O1ur=~r7o?p)p9DiRg?=XWuOxJ|7 zXRoIDfMlzxKoJA;N_(BzjQhE~+A!{ZJ>C7}hYU_LGf4#tgaQ)5w)5@sHhorY9_mZHL!$De@j zNA7yhKVC!0sNkYXW&e`CMX`%eoBSphG>isYxLDl=luvASiMDTd)+Q>WSXSE{$1c6< zJo|M3N(rUM&ZwYc(tZba=2(@(Q|rb_2E1|XRCK*TGyk5yg9;42HF-x3F%nC$0m}j3%)|WOM@EC{g+Y+o)3*XQivmi-uvRII-Din89;#r$8h8 qVK(L^==Vc%|8ucS{9#0GqdwYVh1zYnIE + + + + + + + Python Module Index — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + + + + +
+
+
+
+ + +

Python Module Index

+ +
+ w +
+ + + + + + + + + + + + + + + + + + + + + + +
 
+ w
+ waybacktweets +
    + waybacktweets.api.export_tweets +
    + waybacktweets.api.parse_tweets +
    + waybacktweets.api.request_tweets +
    + waybacktweets.api.viz_tweets +
    + waybacktweets.utils.utils +
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/quickstart.html b/quickstart.html new file mode 100644 index 0000000..692b32e --- /dev/null +++ b/quickstart.html @@ -0,0 +1,137 @@ + + + + + + + + Quickstart — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

Quickstart

+
+

CLI

+

Using Wayback Tweets as a standalone command line tool

+

waybacktweets [OPTIONS] USERNAME

+
waybacktweets --from 20150101 --to 20191231 --limit 250 jack`
+
+
+
+
+

Module

+

Using Wayback Tweets as a Python Module

+
from waybacktweets import WaybackTweets
+from waybacktweets.utils import parse_date
+
+username = "jack"
+collapse = "urlkey"
+timestamp_from = parse_date("20150101")
+timestamp_to = parse_date("20191231")
+limit = 250
+offset = 0
+
+api = WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset)
+
+archived_tweets = api.get()
+
+
+
+
+

Web App

+

Using Wayback Tweets as a Streamlit Web App

+

Access the application, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud.

+
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/result.html b/result.html new file mode 100644 index 0000000..73310f0 --- /dev/null +++ b/result.html @@ -0,0 +1,121 @@ + + + + + + + + Result — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
+
+
+
+ +
+

Result

+

The package saves in three formats: CSV, JSON, and HTML. The files have the following fields:

+
    +
  • archived_urlkey: (str) A canonical transformation of the URL you supplied, for example, org,eserver,tc)/. Such keys are useful for indexing.

  • +
  • archived_timestamp: (datetime) A 14 digit date-time representation in the YYYYMMDDhhmmss format.

  • +
  • original_tweet_url: (str) The original tweet URL.

  • +
  • archived_tweet_url: (str) The original archived URL.

  • +
  • parsed_tweet_url: (str) The original tweet URL after parsing. Check the utility functions.

  • +
  • parsed_archived_tweet_url: (str) The original archived URL after parsing. Check the utility functions.

  • +
  • parsed_tweet_text_mimetype_json: (str) The tweet text extracted from the archived URL that has mimetype application/json.

  • +
  • available_tweet_text: (str) The tweet text extracted from the URL that is still available on the Twitter account.

  • +
  • available_tweet_is_RT: (bool) Whether the tweet from the available_tweet_text field is a retweet or not.

  • +
  • available_tweet_info: (str) Name and date of the tweet from the available_tweet_text field.

  • +
  • archived_mimetype: (str) The mimetype of the archived content, which can be one of these:

    +
    +
      +
    • text/html

    • +
    • warc/revisit

    • +
    • application/json

    • +
    • unk

    • +
    +
    +
  • +
  • archived_statuscode: (str) The HTTP status code of the snapshot. If the mimetype is warc/revisit, the value returned for the statuscode key can be blank, but the actual value is the same as that of any other entry that has the same digest as this entry. If the mimetype is application/json, the value is usually empty or -.

  • +
  • archived_digest: (str) The SHA1 hash digest of the content, excluding the headers. It’s usually a base-32-encoded string.

  • +
  • archived_length: (int) The compressed byte size of the corresponding WARC record, which includes WARC headers, HTTP headers, and content payload.

  • +
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/search.html b/search.html new file mode 100644 index 0000000..74030ce --- /dev/null +++ b/search.html @@ -0,0 +1,106 @@ + + + + + + + + Search — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +

Search

+ + + + +

+ Searching for multiple words only shows matches that contain + all words. +

+ + +
+ + + +
+ + +
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/searchindex.js b/searchindex.js new file mode 100644 index 0000000..a3dd02a --- /dev/null +++ b/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"API": [[0, "api"]], "API Reference": [[4, "api-reference"]], "Additional Information": [[4, "additional-information"]], "CLI": [[1, "cli"], [7, "cli"]], "Command-Line Interface": [[4, "command-line-interface"]], "Community Comments": [[9, "community-comments"]], "ConnectionError": [[3, "connectionerror"]], "Contribute": [[2, "contribute"]], "Errors": [[3, "errors"]], "Export": [[0, "module-waybacktweets.api.export_tweets"]], "Filters": [[9, "filters"]], "From source": [[5, "from-source"]], "HTTPError": [[3, "httperror"]], "Hacking": [[2, "hacking"]], "Indices and tables": [[4, "indices-and-tables"]], "Installation": [[5, "installation"]], "Module": [[7, "module"]], "Pagination": [[9, "pagination"]], "Parse": [[0, "module-waybacktweets.api.parse_tweets"]], "Quickstart": [[7, "quickstart"]], "ReadTimeout": [[3, "readtimeout"]], "Request": [[0, "module-waybacktweets.api.request_tweets"]], "Result": [[8, "result"]], "Sponsoring": [[2, "sponsoring"]], "Streamlit Web App": [[4, "streamlit-web-app"]], "Testing": [[2, "testing"]], "Usage": [[1, "usage"]], "User Guide": [[4, "user-guide"]], "Using pip": [[5, "using-pip"]], "Utils": [[0, "module-waybacktweets.utils.utils"]], "Visualizer": [[0, "module-waybacktweets.api.viz_tweets"]], "Wayback Tweets": [[4, "wayback-tweets"]], "Wayback Tweets Documentation": [[4, "wayback-tweets-documentation"]], "Web App": [[7, "web-app"], [9, "web-app"]], "Workflow": [[10, "workflow"]], "waybacktweets": [[1, "waybacktweets"], [6, "waybacktweets"]]}, "docnames": ["api", "cli", "contribute", "errors", "index", "installation", "modules", "quickstart", "result", "streamlit", "workflow"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["api.rst", "cli.rst", "contribute.rst", "errors.rst", "index.rst", "installation.rst", "modules.rst", "quickstart.rst", "result.rst", "streamlit.rst", "workflow.rst"], "indexentries": {"--collapse": [[1, "cmdoption-waybacktweets-collapse", false]], "--from": [[1, "cmdoption-waybacktweets-from", false]], "--limit": [[1, "cmdoption-waybacktweets-limit", false]], "--offset": [[1, "cmdoption-waybacktweets-offset", false]], "--to": [[1, "cmdoption-waybacktweets-to", false]], "check_double_status() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_double_status", false]], "check_pattern_tweet() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_pattern_tweet", false]], "clean_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_tweet_url", false]], "clean_wayback_machine_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_wayback_machine_url", false]], "delete_tweet_pathnames() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.delete_tweet_pathnames", false]], "embed() (waybacktweets.api.parse_tweets.twitterembed method)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed.embed", false]], "generate() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.generate", false]], "get() (waybacktweets.api.request_tweets.waybacktweets method)": [[0, "waybacktweets.api.request_tweets.WaybackTweets.get", false]], "get_response() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.get_response", false]], "htmltweetsvisualizer (class in waybacktweets.api.viz_tweets)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer", false]], "jsonparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.JsonParser", false]], "module": [[0, "module-waybacktweets.api.export_tweets", false], [0, "module-waybacktweets.api.parse_tweets", false], [0, "module-waybacktweets.api.request_tweets", false], [0, "module-waybacktweets.api.viz_tweets", false], [0, "module-waybacktweets.utils.utils", false]], "parse() (waybacktweets.api.parse_tweets.jsonparser method)": [[0, "waybacktweets.api.parse_tweets.JsonParser.parse", false]], "parse() (waybacktweets.api.parse_tweets.tweetsparser method)": [[0, "waybacktweets.api.parse_tweets.TweetsParser.parse", false]], "parse_date() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.parse_date", false]], "save() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.save", false]], "save_to_csv() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_csv", false]], "save_to_html() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_html", false]], "save_to_json() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_json", false]], "semicolon_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.semicolon_parser", false]], "tweetsexporter (class in waybacktweets.api.export_tweets)": [[0, "waybacktweets.api.export_tweets.TweetsExporter", false]], "tweetsparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TweetsParser", false]], "twitterembed (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed", false]], "username": [[1, "cmdoption-waybacktweets-arg-USERNAME", false]], "waybacktweets (class in waybacktweets.api.request_tweets)": [[0, "waybacktweets.api.request_tweets.WaybackTweets", false]], "waybacktweets command line option": [[1, "cmdoption-waybacktweets-arg-USERNAME", false], [1, "cmdoption-waybacktweets-collapse", false], [1, "cmdoption-waybacktweets-from", false], [1, "cmdoption-waybacktweets-limit", false], [1, "cmdoption-waybacktweets-offset", false], [1, "cmdoption-waybacktweets-to", false]], "waybacktweets.api.export_tweets": [[0, "module-waybacktweets.api.export_tweets", false]], "waybacktweets.api.parse_tweets": [[0, "module-waybacktweets.api.parse_tweets", false]], "waybacktweets.api.request_tweets": [[0, "module-waybacktweets.api.request_tweets", false]], "waybacktweets.api.viz_tweets": [[0, "module-waybacktweets.api.viz_tweets", false]], "waybacktweets.utils.utils": [[0, "module-waybacktweets.utils.utils", false]]}, "objects": {"waybacktweets": [[1, 4, 1, "cmdoption-waybacktweets-collapse", "--collapse"], [1, 4, 1, "cmdoption-waybacktweets-from", "--from"], [1, 4, 1, "cmdoption-waybacktweets-limit", "--limit"], [1, 4, 1, "cmdoption-waybacktweets-offset", "--offset"], [1, 4, 1, "cmdoption-waybacktweets-to", "--to"], [1, 4, 1, "cmdoption-waybacktweets-arg-USERNAME", "USERNAME"]], "waybacktweets.api": [[0, 0, 0, "-", "export_tweets"], [0, 0, 0, "-", "parse_tweets"], [0, 0, 0, "-", "request_tweets"], [0, 0, 0, "-", "viz_tweets"]], "waybacktweets.api.export_tweets": [[0, 1, 1, "", "TweetsExporter"]], "waybacktweets.api.export_tweets.TweetsExporter": [[0, 2, 1, "", "save_to_csv"], [0, 2, 1, "", "save_to_html"], [0, 2, 1, "", "save_to_json"]], "waybacktweets.api.parse_tweets": [[0, 1, 1, "", "JsonParser"], [0, 1, 1, "", "TweetsParser"], [0, 1, 1, "", "TwitterEmbed"]], "waybacktweets.api.parse_tweets.JsonParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TweetsParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TwitterEmbed": [[0, 2, 1, "", "embed"]], "waybacktweets.api.request_tweets": [[0, 1, 1, "", "WaybackTweets"]], "waybacktweets.api.request_tweets.WaybackTweets": [[0, 2, 1, "", "get"]], "waybacktweets.api.viz_tweets": [[0, 1, 1, "", "HTMLTweetsVisualizer"]], "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer": [[0, 2, 1, "", "generate"], [0, 2, 1, "", "save"]], "waybacktweets.utils": [[0, 0, 0, "-", "utils"]], "waybacktweets.utils.utils": [[0, 3, 1, "", "check_double_status"], [0, 3, 1, "", "check_pattern_tweet"], [0, 3, 1, "", "clean_tweet_url"], [0, 3, 1, "", "clean_wayback_machine_url"], [0, 3, 1, "", "delete_tweet_pathnames"], [0, 3, 1, "", "get_response"], [0, 3, 1, "", "parse_date"], [0, 3, 1, "", "semicolon_parser"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "std:cmdoption"}, "terms": {"": [0, 2, 8], "0": [4, 7], "1": 1, "11": 2, "14": [1, 8], "20150101": 7, "20191231": 7, "22": 9, "25": 9, "250": 7, "3": [2, 4, 9], "32": 8, "3b": 0, "4": 1, "A": [0, 8, 9], "For": 9, "If": [2, 8, 9], "It": [1, 8, 9], "One": 9, "Such": 8, "The": [0, 1, 2, 3, 8, 9, 10], "These": [2, 3], "about": 2, "access": [7, 9], "account": [8, 9], "actual": 8, "after": 8, "all": 2, "allow": [1, 9], "alreadi": 5, "also": [2, 10], "alwai": [9, 10], "am": 9, "an": [0, 2, 9], "ani": [0, 8], "api": [2, 6, 7, 9, 10], "aplic": 9, "app": 2, "appli": 9, "applic": [0, 3, 7, 8, 9, 10], "ar": [0, 2, 3, 8, 10], "archiv": [0, 1, 3, 4, 8, 9, 10], "archived_digest": 8, "archived_length": 8, "archived_mimetyp": 8, "archived_statuscod": 8, "archived_timestamp": [0, 8], "archived_tweet": 7, "archived_tweet_url": [0, 8], "archived_tweets_respons": 0, "archived_urlkei": 8, "arg": 0, "argument": 1, "ari": 9, "arsip": 9, "associ": 0, "automat": 9, "avail": [0, 8, 9], "available_tweet_info": 8, "available_tweet_is_rt": 8, "available_tweet_text": 8, "avoid": 9, "base": [1, 8, 9], "being": 3, "bellingcat": 9, "ben": 9, "best": [2, 10], "blank": 8, "bool": 8, "boolean": 0, "brief": 2, "bug": 2, "bulk": 9, "byte": 8, "calendar": 9, "call": 9, "can": [2, 8, 9], "canon": 8, "case": 0, "cd": 5, "cdx": [0, 1, 3, 4, 9, 10], "chang": [4, 5, 9], "check": [0, 3, 8, 9], "check_double_statu": [0, 6], "check_pattern_tweet": [0, 6], "checkbox": 9, "clarissa": 9, "clarom": [2, 5], "class": 0, "clean_tweet_url": [0, 6], "clean_wayback_machine_url": [0, 6], "cli": [2, 4, 5], "click": 0, "clone": 5, "cloud": [7, 9], "code": [2, 8], "collaps": [0, 1, 7, 9], "com": [0, 2, 3, 5], "command": [2, 7], "comment": 4, "common": 3, "commun": 4, "compar": 1, "compress": 8, "connect": 3, "connectionerror": 4, "contact": 2, "contain": 0, "content": 8, "contribut": 4, "convert": 0, "correct": 0, "correspond": 8, "could": 3, "creat": 9, "csv": [0, 8], "ctx": 0, "daili": 9, "data": [0, 1, 3, 4, 9, 10], "datafram": 0, "date": [0, 1, 8, 9], "datetim": [0, 8], "deeper": 9, "default": 0, "defin": 10, "delet": 9, "delete_tweet_pathnam": [0, 6], "delight": 9, "depend": 5, "develop": 2, "digest": [1, 8], "digit": [1, 8], "directori": [2, 5], "displai": 9, "do": 9, "doc": 2, "document": [2, 5], "doe": 0, "donat": 2, "dot": 2, "down": [3, 9], "due": [3, 9, 10], "each": 9, "effect": 9, "either": 2, "email": 2, "emb": [0, 6], "empti": 8, "encod": 8, "entri": 8, "error": 4, "eserv": 8, "ess": 9, "establish": 3, "exampl": 8, "exceed": 3, "excel": 9, "exclud": 8, "experiment": 3, "explan": 2, "export": [4, 6], "export_tweet": 0, "extract": [0, 8], "facilit": 10, "fail": 3, "feed": 3, "field": [1, 8, 9], "field_opt": 0, "file": [0, 8], "filter": [1, 4], "find": 9, "first": 1, "fix": 9, "flow": 10, "flowchart": 10, "follow": [2, 8, 10], "format": [0, 1, 8, 10], "framework": [7, 9], "from": [0, 1, 2, 3, 4, 7, 8, 10], "fun": 9, "function": [0, 2, 8], "gem": 9, "gener": [0, 2, 6], "get": [0, 6, 7], "get_respons": [0, 6], "gijn": 9, "git": 5, "github": [2, 5, 9], "gone": 9, "gpl": 4, "gunakan": 9, "ha": [8, 9], "hack": 4, "handl": [0, 3], "hash": 8, "have": [2, 8, 9], "haven": 5, "header": 8, "help": 2, "helper": 2, "henk": 9, "here": 2, "hidden": 9, "host": [7, 9], "html": [0, 8], "html_content": 0, "html_file_path": 0, "htmltweetsvisu": [0, 6], "http": [0, 3, 8], "httperror": 4, "i": [0, 1, 2, 3, 8, 9, 10], "id": 0, "import": 7, "improv": 2, "includ": 8, "index": [4, 8], "indonesia": 9, "inform": 3, "input": 0, "instal": [2, 4], "instruct": 2, "int": 8, "interest": 2, "interfac": 2, "internet": [0, 3], "internetarch": 3, "irina_tech_tip": 9, "issu": [2, 3, 9], "jack": 7, "json": [0, 3, 8, 10], "json_file_path": 0, "jsonpars": [0, 6], "kei": [8, 9], "know": 9, "lack": 9, "latest": 3, "launch": 9, "least": 1, "licens": 4, "like": 0, "limit": [0, 1, 7, 9, 10], "line": [2, 7], "link": [0, 9], "long": 3, "look": 9, "love": 9, "lowercas": 0, "machin": [0, 1, 4, 9], "main": 2, "maintain": 2, "mainten": 3, "mani": 9, "manual": 9, "max": 3, "member": 9, "mempermudah": 9, "mend": 9, "messag": 3, "mimetyp": [0, 3, 8, 10], "modul": [2, 4], "more": 9, "most": [3, 9], "mous": 10, "multipl": 9, "my": 9, "myosinttip": 9, "name": 8, "necessari": [0, 1, 4], "network": 3, "new": [3, 9], "newslett": 9, "none": 0, "now": 9, "object": 0, "obtain": 10, "occur": 3, "occurr": 0, "offlin": 3, "offset": [0, 1, 7], "often": 3, "old": 9, "one": 8, "onli": [9, 10], "onward": 1, "open": [2, 9], "option": [0, 1, 7, 9], "org": [3, 8, 9], "origin": [0, 8, 9], "original_tweet_url": [0, 8], "osint": 9, "other": 8, "otherwis": 0, "our": 9, "out": [3, 10], "output": 3, "over": 9, "overal": 0, "overload": 3, "packag": [0, 2, 3, 8], "page": [4, 9], "pagin": 4, "param": 0, "pars": [1, 3, 4, 6, 8, 10], "parse_d": [0, 6, 7], "parse_tweet": 0, "parsed_archived_tweet_url": 8, "parsed_tweet_text_mimetype_json": 8, "parsed_tweet_url": 8, "pathnam": 0, "pattern": 0, "payload": 8, "pb": 0, "penelusuran": 9, "per": 9, "perform": [1, 3, 4], "photo": 0, "pip": 4, "pleas": [3, 9], "poetri": [2, 5], "possibl": 10, "prerequisit": 2, "price": 9, "project": 2, "propos": 10, "prototyp": [7, 9], "provid": 0, "publish": 0, "python": [2, 7, 9], "queri": 1, "quickli": 9, "quickstart": 4, "rais": 3, "rang": [1, 9], "rate": 9, "re": [2, 9], "read": [5, 10], "readtimeout": 4, "recommend": 1, "record": 8, "regardless": 10, "relat": 0, "remov": 0, "replac": 0, "repli": 0, "report": 2, "repositori": 5, "represent": 8, "request": [3, 4, 6], "request_tweet": 0, "requir": 1, "research": 9, "respond": 3, "respons": 0, "result": [1, 4, 10], "retri": 3, "retriev": [0, 1, 4, 10], "return": [0, 3, 8], "retweet": [0, 8], "revisit": 8, "same": 8, "save": [0, 1, 3, 4, 6, 8, 9, 10], "save_to_csv": [0, 6], "save_to_html": [0, 6], "save_to_json": [0, 6], "scroll": 1, "search": 4, "see": 9, "semicolon": 0, "semicolon_pars": [0, 6], "send": 0, "server": [0, 3, 9, 10], "servic": [0, 3], "sha1": 8, "simpl": 1, "size": 8, "skill": 2, "snapshot": 8, "so": 9, "sourc": [2, 4, 9], "specifi": 0, "sphinx": 2, "sponsor": 4, "standalon": 7, "statu": [0, 3, 8, 9], "statuscod": 8, "still": [0, 8], "str": [0, 8], "streamlit": [2, 7, 9], "string": [0, 8], "structur": [0, 9], "substr": 1, "suggest": 2, "suppli": 8, "support": 2, "t": [5, 9], "take": [0, 3], "tc": 8, "temporarili": 3, "test": 4, "text": 8, "thei": 0, "therefor": 10, "thi": [0, 1, 2, 3, 8, 9], "those": 9, "three": 8, "through": 1, "time": [3, 8], "timestamp": [0, 1, 3], "timestamp_from": [0, 7], "timestamp_to": [0, 7], "too": 3, "tool": [7, 9, 10], "train": 9, "transform": 8, "tweet": [0, 1, 2, 7, 8, 9, 10], "tweet_id": 3, "tweet_url": 0, "tweetsexport": [0, 6], "tweetspars": [0, 6], "twimg": 0, "twitter": [0, 1, 3, 8, 9], "twitteremb": [0, 6], "two": 0, "type": 0, "under": 2, "uniqu": 9, "unk": 8, "untuk": 9, "up": [1, 9], "url": [0, 3, 8, 9], "urlkei": [1, 7, 9], "us": [0, 1, 2, 4, 7, 8, 9, 10], "usag": 4, "user": 9, "usernam": [0, 1, 3, 7], "usual": 8, "util": [2, 4, 6, 7, 8, 9], "valu": [0, 1, 8, 9], "van": 9, "variou": 0, "veri": 2, "via": [2, 9], "view": 9, "visual": [4, 6], "viz_tweet": 0, "wa": [0, 3, 10], "wai": [1, 2, 9, 10], "want": 9, "warc": 8, "warn": 3, "wayback": [0, 1, 2, 7, 9, 10], "wayback_machine_url": 0, "waybacktweet": [0, 2, 3, 5, 7], "we": 9, "web": 3, "welcom": 2, "what": 9, "when": [0, 3, 9], "whether": 8, "which": [8, 9], "who": 9, "without": 1, "workflow": 4, "would": 3, "written": [7, 9, 10], "x": 9, "xx": 1, "year": 1, "you": [2, 5, 8, 9], "yyyymmdd": [0, 1], "yyyymmddhhmmss": 8, "zoom": 10}, "titles": ["API", "CLI", "Contribute", "Errors", "Wayback Tweets", "Installation", "waybacktweets", "Quickstart", "Result", "Web App", "Workflow"], "titleterms": {"addit": 4, "api": [0, 4], "app": [4, 7, 9], "cli": [1, 7], "command": 4, "comment": 9, "commun": 9, "connectionerror": 3, "contribut": 2, "document": 4, "error": 3, "export": 0, "filter": 9, "from": 5, "guid": 4, "hack": 2, "httperror": 3, "indic": 4, "inform": 4, "instal": 5, "interfac": 4, "line": 4, "modul": 7, "pagin": 9, "pars": 0, "pip": 5, "quickstart": 7, "readtimeout": 3, "refer": 4, "request": 0, "result": 8, "sourc": 5, "sponsor": 2, "streamlit": 4, "tabl": 4, "test": 2, "tweet": 4, "us": 5, "usag": 1, "user": 4, "util": 0, "visual": 0, "wayback": 4, "waybacktweet": [1, 6], "web": [4, 7, 9], "workflow": 10}}) \ No newline at end of file diff --git a/streamlit.html b/streamlit.html new file mode 100644 index 0000000..c89f7a6 --- /dev/null +++ b/streamlit.html @@ -0,0 +1,138 @@ + + + + + + + + Web App — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + +
+
+
+
+ +
+

Web App

+

Aplication that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. The application is a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud, allowing users to apply filters and view tweets that lack the original URL.

+
+

Filters

+
    +
  • Calendar: Filtering by date range

  • +
  • Checkbox: Only tweets not available

  • +
  • Checkbox: Only unique URLs (filtering by the collapse option using the urlkey field)

  • +
+
+
+

Pagination

+

Pagination is automatic and allows viewing up to 25 tweets per page. This is a fixed value due to the API rate limit.

+
+
+

Community Comments

+
    +
  • "We're always delighted when we see our community members create tools for open source research." Bellingcat
  • +
    +
  • "#myOSINTtip Clarissa Mendes launched a new tool for accessing old tweets via archive.org called the Wayback Tweets app. For those who love to look deeper at #osint tools, it is available on GitHub and uses the Wayback CDX Server API server (which is a hidden gem for accessing archive.org data!)" My OSINT Training
  • +
    +
  • "Original way to find deleted tweets." Henk Van Ess
  • +
    +
  • "This is an excellent tool to use now that most Twitter API-based tools have gone down with changes to the pricing structure over at X." The OSINT Newsletter - Issue #22
  • +
    +
  • "One of the keys to using the Wayback Machine effectively is knowing what it can and can't archive. It can, and has, archived many, many Twitter accounts... Utilize fun tools such as Wayback Tweets to do so more effectively." Ari Ben Am
  • +
    +
  • "Want to see archived tweets on Wayback Machine in bulk? You can use Wayback Tweets." Daily OSINT
  • +
    +
  • "Untuk mempermudah penelusuran arsip, gunakan Wayback Tweets." GIJN Indonesia
  • +
    +
  • "A tool to quickly view tweets saved on archive.org." Irina_Tech_Tips Newsletter #3
  • +
    +
+

Note

+

If the application is down, please check the Streamlit Cloud Status.

+
+
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file diff --git a/workflow.html b/workflow.html new file mode 100644 index 0000000..351822d --- /dev/null +++ b/workflow.html @@ -0,0 +1,127 @@ + + + + + + + + Workflow — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Workflow

+

The tool was written following a proposal not only to Retrieve data from archived tweets, but also to facilitate the reading of these tweets. Therefore, a flow is defined to obtain these results in the best possible way.

+

Due to limitations of the Wayback CDX Server API, it is not always possible to parse the results with the mimetype application/json, regardless, the data in CDX format are saved.

+

Use the mouse to zoom in and out the flowchart.

+
+ flowchart TB + A[input Username]--> B[(Wayback Machine)] + B--> B1[save Archived Tweets CDX data] + B1--> |parsing| C{embed Tweet URL\nvia Twitter Publisher} + C--> |2xx/3xx| D[return Tweet text] + C--> |4xx| E[return None] + E--> F{request Archived\nTweet URL} + F--> |4xx| G[return Only CDX data] + F--> |2xx/3xx: application/json| J[return JSON text] + F--> |2xx/3xx: text/html, warc/revisit, unk| K[return HTML iframe tag] +
+
+ + +
+
+
+
+ + +
+
+ + + + + + \ No newline at end of file From 742046f717255e7ba8c62203687b1dea64a0b62c Mon Sep 17 00:00:00 2001 From: claromes Date: Fri, 14 Jun 2024 22:06:56 +0000 Subject: [PATCH 03/63] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20cl?= =?UTF-8?q?aromes/waybacktweets@88ef5e2c472f4b55713d65abec7646ecf2869c15?= =?UTF-8?q?=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 404.html | 2 +- _sources/api.rst.txt | 5 +- _sources/cli.rst.txt | 33 ++++ _sources/errors.rst.txt | 5 +- _sources/index.rst.txt | 3 +- _sources/installation.rst.txt | 6 + _sources/result.rst.txt | 3 +- _sources/todo.rst.txt | 13 ++ _sources/workflow.rst.txt | 2 +- _static/css/custom.css | 3 +- api.html | 288 ++++++++++++++++++++++++++++------ cli.html | 42 ++++- contribute.html | 8 +- errors.html | 4 +- genindex.html | 17 +- index.html | 6 +- installation.html | 6 +- modules.html | 6 +- objects.inv | Bin 760 -> 763 bytes py-modindex.html | 2 +- quickstart.html | 2 +- result.html | 7 +- search.html | 2 +- searchindex.js | 2 +- streamlit.html | 2 +- todo.html | 98 ++++++++++++ workflow.html | 8 +- 27 files changed, 468 insertions(+), 107 deletions(-) create mode 100644 _sources/todo.rst.txt create mode 100644 todo.html diff --git a/404.html b/404.html index 43a7e90..3c2bdb8 100644 --- a/404.html +++ b/404.html @@ -8,7 +8,7 @@ Page Not Found — Wayback Tweets Documentation (1.0.x) - + diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt index 283e429..ced6bf4 100644 --- a/_sources/api.rst.txt +++ b/_sources/api.rst.txt @@ -22,8 +22,9 @@ Parse .. autoclass:: TwitterEmbed :members: -.. autoclass:: JsonParser - :members: +.. TODO: JSON Issue +.. .. autoclass:: JsonParser +.. :members: Export diff --git a/_sources/cli.rst.txt b/_sources/cli.rst.txt index 9276250..49abef2 100644 --- a/_sources/cli.rst.txt +++ b/_sources/cli.rst.txt @@ -7,3 +7,36 @@ Usage .. click:: waybacktweets.cli.main:cli :prog: waybacktweets :nested: full + +Collapsing +------------ + +The Wayback Tweets command line tool recommends the use of three types of "collapse": ``urlkey``, ``digest``, and ``timestamp`` field. + +- ``urlkey``: (`str`) A canonical transformation of the URL you supplied, for example, ``org,eserver,tc)/``. Such keys are useful for indexing. + +- ``digest``: (`str`) The ``SHA1`` hash digest of the content, excluding the headers. It's usually a base-32-encoded string. + +- ``timestamp``: (`datetime`) A 14 digit date-time representation in the ``YYYYMMDDhhmmss`` format. We recommend ``YYYYMMDD``. + +However, it is possible to use it with other options. Read below text extracted from the official Wayback CDX Server API (Beta) documentation. + +.. note:: + + A new form of filtering is the option to "collapse" results based on a field, or a substring of a field. Collapsing is done on adjacent CDX lines where all captures after the first one that are duplicate are filtered out. This is useful for filtering out captures that are "too dense" or when looking for unique captures. + + To use collapsing, add one or more ``collapse=field`` or ``collapse=field:N`` where ``N`` is the first ``N`` characters of field to test. + + - Ex: Only show at most 1 capture per hour (compare the first 10 digits of the ``timestamp`` field). Given 2 captures ``20130226010000`` and ``20130226010800``, since first 10 digits ``2013022601`` match, the 2nd capture will be filtered out: + + http://web.archive.org/cdx/search/cdx?url=google.com&collapse=timestamp:10 + + The calendar page at `web.archive.org` uses this filter by default: `http://web.archive.org/web/*/archive.org` + + - Ex: Only show unique captures by ``digest`` (note that only adjacent digest are collapsed, duplicates elsewhere in the cdx are not affected): + + http://web.archive.org/cdx/search/cdx?url=archive.org&collapse=digest + + - Ex: Only show unique urls in a prefix query (filtering out captures except first capture of a given url). This is similar to the old prefix query in wayback (note: this query may be slow at the moment): + + http://web.archive.org/cdx/search/cdx?url=archive.org&collapse=urlkey&matchType=prefix diff --git a/_sources/errors.rst.txt b/_sources/errors.rst.txt index 492279b..d12c436 100644 --- a/_sources/errors.rst.txt +++ b/_sources/errors.rst.txt @@ -17,9 +17,10 @@ This error is raised when the package fails to establish a new connection with w The output message from the package would be: ``Failed to establish a new connection with web.archive.org. Max retries exceeded.`` -This is the error often returned when performing experimental parsing of URLs with the mimetype ``application/json``. +.. TODO: JSON Issue +.. This is the error often returned when performing experimental parsing of URLs with the mimetype ``application/json``. -The warning output message from the package would be: ``Connection error with https://web.archive.org/web//https://twitter.com//status/. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.`` +.. The warning output message from the package would be: ``Connection error with https://web.archive.org/web//https://twitter.com//status/. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.`` HTTPError ---------------- diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt index b41345c..df48609 100644 --- a/_sources/index.rst.txt +++ b/_sources/index.rst.txt @@ -7,7 +7,7 @@ Wayback Tweets Wayback Tweets Documentation ------------------------------ -Retrieves archived tweets' CDX data from the Wayback Machine, performs necessary parsing, and saves the data. +Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data in CSV, JSON, and HTML formats. User Guide @@ -22,6 +22,7 @@ User Guide result errors contribute + todo Command-Line Interface diff --git a/_sources/installation.rst.txt b/_sources/installation.rst.txt index 725f6c7..e2df76b 100644 --- a/_sources/installation.rst.txt +++ b/_sources/installation.rst.txt @@ -37,4 +37,10 @@ From source poetry install + Run Streamlit App: + + .. code-block:: shell + + streamlit run app/app.py + `Read the Poetry CLI documentation `_. diff --git a/_sources/result.rst.txt b/_sources/result.rst.txt index 2794f40..38b5498 100644 --- a/_sources/result.rst.txt +++ b/_sources/result.rst.txt @@ -15,7 +15,8 @@ The package saves in three formats: CSV, JSON, and HTML. The files have the foll - ``parsed_archived_tweet_url``: (`str`) The original archived URL after parsing. `Check the utility functions `_. -- ``parsed_tweet_text_mimetype_json``: (`str`) The tweet text extracted from the archived URL that has mimetype ``application/json``. +.. TODO: JSON Issue +.. - ``parsed_tweet_text_mimetype_json``: (`str`) The tweet text extracted from the archived URL that has mimetype ``application/json``. - ``available_tweet_text``: (`str`) The tweet text extracted from the URL that is still available on the Twitter account. diff --git a/_sources/todo.rst.txt b/_sources/todo.rst.txt new file mode 100644 index 0000000..58d1feb --- /dev/null +++ b/_sources/todo.rst.txt @@ -0,0 +1,13 @@ +TODO +================ + +.. |uncheck| raw:: html + + + +|uncheck| Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse_tweets.py:73``), and avoid rate limiting + +|uncheck| Docs: Add tutorial on how to save Tweet via command line + +|uncheck| Web App: Return complete JSON when mimetype is ``application/json`` + diff --git a/_sources/workflow.rst.txt b/_sources/workflow.rst.txt index 2480b35..c3ffd32 100644 --- a/_sources/workflow.rst.txt +++ b/_sources/workflow.rst.txt @@ -19,5 +19,5 @@ Use the mouse to zoom in and out the flowchart. C--> |4xx| E[return None] E--> F{request Archived\nTweet URL} F--> |4xx| G[return Only CDX data] - F--> |2xx/3xx: application/json| J[return JSON text] + F--> |TODO: 2xx/3xx: application/json| J[return JSON text] F--> |2xx/3xx: text/html, warc/revisit, unk| K[return HTML iframe tag] diff --git a/_static/css/custom.css b/_static/css/custom.css index e2cc030..6429ee5 100644 --- a/_static/css/custom.css +++ b/_static/css/custom.css @@ -1,4 +1,5 @@ #cli #usage #waybacktweets h3, -.sphinxsidebarwrapper li ul li ul:has(a[href="#waybacktweets"]):last-child{ +#cli .admonition-title, +.sphinxsidebarwrapper li ul li ul:has(a[href="#waybacktweets"]):last-child { display: none; } diff --git a/api.html b/api.html index 9e4b196..ac8ece1 100644 --- a/api.html +++ b/api.html @@ -8,7 +8,7 @@ API — Wayback Tweets Documentation (1.0.x) - + @@ -50,11 +50,32 @@

API

class waybacktweets.api.request_tweets.WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset)
-

Requests data from the Wayback CDX Server API and returns it in JSON format.

+

Class responsible for requesting data from the Wayback CDX Server API.

+
+
Parameters:
+
    +
  • username (str) – The username associated with the tweets.

  • +
  • collapse (str) – The field to collapse duplicate lines on.

  • +
  • timestamp_from (str) – The timestamp to start retrieving tweets from.

  • +
  • timestamp_to (str) – The timestamp to stop retrieving tweets at.

  • +
  • limit (int) – The maximum number of results to return.

  • +
  • offset (int) – The number of lines to skip in the results.

  • +
+
+
get()
-

GET request to the Internet Archive’s CDX API to retrieve archived tweets.

+

Sends a GET request to the Internet Archive’s CDX API +to retrieve archived tweets.

+
+
Returns:
+

The response from the CDX API in JSON format, if successful.

+
+
Return type:
+

Dict[str, Any] | None

+
+
@@ -65,11 +86,31 @@

API

class waybacktweets.api.parse_tweets.TweetsParser(archived_tweets_response, username, field_options)
-

Handles the overall parsing of archived tweets.

+

Class responsible for the overall parsing of archived tweets.

+
+
Parameters:
+
    +
  • archived_tweets_response (List[str]) – The response from the archived tweets.

  • +
  • username (str) – The username associated with the tweets.

  • +
  • field_options (List[str]) – The fields to be included in the parsed data.

  • +
+
+
-parse()
+parse(print_progress=False)

Parses the archived tweets CDX data and structures it.

+
+
Parameters:
+

print_progress – A boolean indicating whether to print progress or not.

+
+
Returns:
+

The parsed tweets data.

+
+
Return type:
+

Dict[str, List[Any]]

+
+
@@ -77,23 +118,35 @@

API

class waybacktweets.api.parse_tweets.TwitterEmbed(tweet_url)
-

Handles parsing of tweets using the Twitter Publish service.

+

Class responsible for parsing tweets using the Twitter Publish service.

+
+
Parameters:
+

tweet_url (str) – The URL of the tweet to be parsed.

+
+
embed()

Parses the archived tweets when they are still available.

-
- -
- -
-
-class waybacktweets.api.parse_tweets.JsonParser(archived_tweet_url)
-

Handles parsing of tweets when the mimetype is application/json.

-
-
-parse()
-

Parses the archived tweets in JSON format.

+

This function goes through each archived tweet and checks +if it is still available. +If the tweet is available, it extracts the necessary information +and adds it to the respective lists. +The function returns a tuple of three lists: +- The first list contains the tweet texts. +- The second list contains boolean values indicating whether each tweet +is still available. +- The third list contains the URLs of the tweets.

+
+
Returns:
+

A tuple of three lists containing the tweet texts, +availability statuses, and URLs, respectively. If no tweets are available, +returns None.

+
+
Return type:
+

Tuple[List[str], List[bool], List[str]] | None

+
+
@@ -104,23 +157,47 @@

API

class waybacktweets.api.export_tweets.TweetsExporter(data, username, field_options)
-

Handles the exporting of parsed archived tweets.

+

Class responsible for exporting parsed archived tweets.

+
+
Parameters:
+
    +
  • data (Dict[str, List[Any]]) – The parsed archived tweets data.

  • +
  • username (str) – The username associated with the tweets.

  • +
  • field_options (List[str]) – The fields to be included in the exported data.

  • +
+
+
save_to_csv()

Saves the DataFrame to a CSV file.

+
+
Return type:
+

None

+
+
save_to_html()

Saves the DataFrame to an HTML file.

+
+
Return type:
+

None

+
+
save_to_json()

Saves the DataFrame to a JSON file.

+
+
Return type:
+

None

+
+
@@ -131,17 +208,43 @@

API

class waybacktweets.api.viz_tweets.HTMLTweetsVisualizer(json_file_path, html_file_path, username)
-

Generates an HTML file to visualize the parsed data.

+

Class responsible for generating an HTML file to visualize the parsed data.

+
+
Parameters:
+
    +
  • json_content – The content of the JSON file.

  • +
  • html_file_path (str) – The path where the HTML file will be saved.

  • +
  • username (str) – The username associated with the tweets.

  • +
  • json_file_path (str)

  • +
+
+
generate()
-

Generates an HTML file.

+

Generates an HTML string that represents the parsed data.

+
+
Returns:
+

The generated HTML string.

+
+
Return type:
+

str

+
+
save(html_content)
-

Saves the generated HTML.

+

Saves the generated HTML string to a file.

+
+
Parameters:
+

html_content (str) – The HTML string to be saved.

+
+
Return type:
+

None

+
+
@@ -153,66 +256,140 @@

API

waybacktweets.utils.utils.check_double_status(wayback_machine_url, original_tweet_url)

Checks if a Wayback Machine URL contains two occurrences of “/status/” -and if the original tweet does not contain “twitter.com”. -Returns a boolean.

+and if the original tweet does not contain “twitter.com”.

+
+
Parameters:
+
    +
  • wayback_machine_url (str) – The Wayback Machine URL to check.

  • +
  • original_tweet_url (str) – The original tweet URL to check.

  • +
+
+
Returns:
+

True if the conditions are met, False otherwise.

+
+
Return type:
+

bool

+
+
waybacktweets.utils.utils.check_pattern_tweet(tweet_url)
-

Extracts tweet IDs from various types of tweet URLs or tweet-related patterns.

-

Reply pattern: /status// -Link pattern: /status/// -Twimg pattern: /status/https://pbs

+

Extracts the tweet ID from a tweet URL.

+
+
Parameters:
+

tweet_url (str) – The tweet URL to extract the ID from.

+
+
Returns:
+

The extracted tweet ID.

+
+
Return type:
+

str

+
+
waybacktweets.utils.utils.clean_tweet_url(tweet_url, username)
-

Converts the tweet to lowercase, -checks if it contains a tweet URL associated with the username. -Returns the original tweet URL with correct casing; -or returns the original tweet.

+

Cleans a tweet URL by ensuring it is associated with the correct username.

+
+
Parameters:
+
    +
  • tweet_url (str) – The tweet URL to clean.

  • +
  • username (str) – The username to associate with the tweet URL.

  • +
+
+
Returns:
+

The cleaned tweet URL.

+
+
Return type:
+

str

+
+
waybacktweets.utils.utils.clean_wayback_machine_url(wayback_machine_url, archived_timestamp, username)
-

Converts the Wayback Machine URL to lowercase, -checks if it contains a tweet URL associated with the username. -Returns the original tweet URL with correct casing and archived timestamp; -otherwise, it returns the original Wayback Machine URL.

+

Cleans a Wayback Machine URL by ensuring it is associated with the correct username +and timestamp.

+
+
Parameters:
+
    +
  • wayback_machine_url (str) – The Wayback Machine URL to clean.

  • +
  • archived_timestamp (str) – The timestamp to associate with the Wayback Machine URL.

  • +
  • username (str) – The username to associate with the Wayback Machine URL.

  • +
+
+
Returns:
+

The cleaned Wayback Machine URL.

+
+
Return type:
+

str

+
+
waybacktweets.utils.utils.delete_tweet_pathnames(tweet_url)
-

Removes any pathnames (/photos, /likes, /retweet…) from the tweet URL.

+

Removes any pathnames from a tweet URL.

+
+
Parameters:
+

tweet_url (str) – The tweet URL to remove pathnames from.

+
+
Returns:
+

The tweet URL without any pathnames.

+
+
Return type:
+

str

+
+
waybacktweets.utils.utils.get_response(url, params=None)

Sends a GET request to the specified URL and returns the response.

+
+
Parameters:
+
    +
  • url (str) – The URL to send the GET request to.

  • +
  • params (dict | None) – The parameters to include in the GET request.

  • +
+
+
Returns:
+

The response from the server, +if the status code is not in the 400-511 range. +If the status code is in the 400-511 range.

+
+
Return type:
+

Response | None

+
+
waybacktweets.utils.utils.parse_date(ctx=None, param=None, value=None)

Parses a date string and returns it in the format “YYYYMMDD”.

-

This function takes an optional date string as input, -and if a date string is provided, it parses the date string into a datetime object -and then formats it in the “YYYYMMDD” format.

-
-
Args:

ctx (None, optional): Necessary when used with the click package. -Defaults to None. -param (None, optional): Necessary when used with the click package. -Defaults to None. -value (str, optional): A date string in the “YYYYMMDD” format. Defaults to None.

-
-
Returns:

str: The input date string formatted in the “YYYYMMDD” format, +

+
Parameters:
+
    +
  • ctx (Any | None) – Necessary when used with the click package. Defaults to None.

  • +
  • param (Any | None) – Necessary when used with the click package. Defaults to None.

  • +
  • value (str | None) – A date string in the “YYYYMMDD” format. Defaults to None.

  • +
+
+
Returns:
+

The input date string formatted in the “YYYYMMDD” format, or None if no date string was provided.

+
Return type:
+

str | None

+
@@ -220,6 +397,17 @@

API

waybacktweets.utils.utils.semicolon_parser(string)

Replaces semicolons in a string with %3B.

+
+
Parameters:
+

string (str) – The string to replace semicolons in.

+
+
Returns:
+

The string with semicolons replaced by %3B.

+
+
Return type:
+

str

+
+
@@ -259,10 +447,6 @@

Contents

  • TwitterEmbed.embed()
  • -
  • JsonParser -
  • Export
      diff --git a/cli.html b/cli.html index d47e0ff..15eb7cc 100644 --- a/cli.html +++ b/cli.html @@ -8,7 +8,7 @@ CLI — Wayback Tweets Documentation (1.0.x) - + @@ -16,7 +16,7 @@ - +
  • +
  • Collapsing
  • @@ -131,7 +165,7 @@

    Navigation

    • Overview
    • diff --git a/contribute.html b/contribute.html index 5b1b8d3..35f1485 100644 --- a/contribute.html +++ b/contribute.html @@ -8,14 +8,14 @@ Contribute — Wayback Tweets Documentation (1.0.x) - + - +
    diff --git a/errors.html b/errors.html index 9215aae..d5d5af6 100644 --- a/errors.html +++ b/errors.html @@ -8,7 +8,7 @@ Errors — Wayback Tweets Documentation (1.0.x) - + @@ -55,8 +55,6 @@

    ReadTimeout

    This error is raised when the package fails to establish a new connection with web.archive.org. This could be due to network issues or the server being down.

    The output message from the package would be: Failed to establish a new connection with web.archive.org. Max retries exceeded.

    -

    This is the error often returned when performing experimental parsing of URLs with the mimetype application/json.

    -

    The warning output message from the package would be: Connection error with https://web.archive.org/web/<TIMESTAMP>/https://twitter.com/<USERNAME>/status/<TWEET_ID>. Max retries exceeded. Error parsing the JSON, but the CDX data was saved.

    HTTPError

    diff --git a/genindex.html b/genindex.html index c9e8c52..8e89746 100644 --- a/genindex.html +++ b/genindex.html @@ -8,7 +8,7 @@ Index — Wayback Tweets Documentation (1.0.x) - + @@ -47,7 +47,6 @@

    Index

    | E | G | H - | J | M | P | S @@ -153,14 +152,6 @@

    H

    -

    J

    - - -
    -

    M

    - + @@ -114,11 +172,15 @@

    C

  • check_pattern_tweet() (in module waybacktweets.utils.utils)
  • - - +
      @@ -185,12 +176,8 @@

      M

      P

      -
      • parse_date() (in module waybacktweets.utils.utils) diff --git a/index.html b/index.html index b5a5f75..0f3f572 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@ Wayback Tweets — Wayback Tweets Documentation (1.0.x) - + @@ -43,7 +43,7 @@

        Navigation

        Wayback Tweets

        Wayback Tweets Documentation

        -

        Retrieves archived tweets’ CDX data from the Wayback Machine, performs necessary parsing, and saves the data.

        +

        Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data in CSV, JSON, and HTML formats.

        User Guide

        @@ -74,6 +74,7 @@

        User GuideSponsoring

      +
    • TODO
    • @@ -83,6 +84,7 @@

      Command-Line Interface
    • CLI
    • diff --git a/installation.html b/installation.html index 5410549..521755a 100644 --- a/installation.html +++ b/installation.html @@ -8,7 +8,7 @@ Installation — Wayback Tweets Documentation (1.0.x) - + @@ -72,6 +72,10 @@

      From source
      poetry install
       
      +

      Run Streamlit App:

      +
      streamlit run app/app.py
      +
      +

      Read the Poetry CLI documentation.

      diff --git a/modules.html b/modules.html index 19643c5..7e9139e 100644 --- a/modules.html +++ b/modules.html @@ -8,7 +8,7 @@ waybacktweets — Wayback Tweets Documentation (1.0.x) - + @@ -56,10 +56,6 @@

      waybacktweetsTwitterEmbed.embed() -
    • JsonParser -
    • Export
        diff --git a/objects.inv b/objects.inv index 70f9cccc61f8bd391c09bb258cb3901ad17ec252..6b5c2d9cedabe5184405c24fd9c0733cbbec1c8d 100644 GIT binary patch delta 642 zcmV-|0)74X1^We%gnx6}AP|P{{0hyaS81ASZl2aN%}m_5iQ67E7_BX8BnTio&aW@h zqOUj~M#;q%@4owJS1cUpaZTCI9ssOKsNxAcDyeOXN~Dfgh0H1e(buSs=C%+QsKj5Q zm>k~DPgc#}ywO0{I3}nu(Pd8y$0UaCA+<7PX5Ul@Hka8aF@Nj)Plx_9vJ8PLstruv zgJHTaX<17l8}KRvBU8I^C~n=SnHOuwrkB$*5@<>1mZRaJ0`q|p9|MxVXjpZ)Gh2EK zlWf6;^t9(s)BEX@lOLFJU?^ z0|C|_*}BKPqy){>)i|_ts&{s40$NLLhJR*M&^T|Ni92(=%;2%H3AzSR z8(S5v*YzXmZJsu8_}eMM^LeL0E5l(f=Q-&2{rv9dY%vdq cQN4}3Y=@P4w-NF%9D-K!qvBWOKe2yWGn(l93z` z%3n=CBBz(cdPzPaB~k}+%n03?1Ax&nkt~KMDU?Z33FYyo6nV`d{2Dga!j$|9mFPK=eM>)|tydG%&m{^lNq(-Dv@7oN)6e9m5W`7g^>Ck^#R6bBil!p0t zFbA0 zqQkn!omtX*n0O1ur=~r7o?p)p9DiRg?=XWuOxJ|7XRoIDfMlzxKoJA;N_(BzjQhE~ z+A!{ZJ zrzN2Ut6)AZ2M)%KY*S-VkrHMel%0lDftI4em&c!g??>)>&p%#6$*AC>N@f3&zD2Q% zP@DWF7c`6pTew)=29!^1c8Rucch)8VG`@bpT2UrN_>wpkva02X^LI zmBUl(#z_XeaqLudy=^{&!OhV|9`1Dtar~^S?R(BiTmB?s&^z>?1a`b$$weNc&VVsX zoJ-6KM{-i7lhAo`XTgZ|!6vJteJ`r%UXL}SJDaaEAJJ)va4Q< zCb2_gbOPNdQThYhs8bkcrK`+~hGC~TvDs*t!Ef`?Python Module Index — Wayback Tweets Documentation (1.0.x) - + diff --git a/quickstart.html b/quickstart.html index 692b32e..63adfa3 100644 --- a/quickstart.html +++ b/quickstart.html @@ -8,7 +8,7 @@ Quickstart — Wayback Tweets Documentation (1.0.x) - + diff --git a/result.html b/result.html index 73310f0..eef30f8 100644 --- a/result.html +++ b/result.html @@ -8,7 +8,7 @@ Result — Wayback Tweets Documentation (1.0.x) - + @@ -46,14 +46,15 @@

        Navigation

        Result

        The package saves in three formats: CSV, JSON, and HTML. The files have the following fields:

        -
          +
          • archived_urlkey: (str) A canonical transformation of the URL you supplied, for example, org,eserver,tc)/. Such keys are useful for indexing.

          • archived_timestamp: (datetime) A 14 digit date-time representation in the YYYYMMDDhhmmss format.

          • original_tweet_url: (str) The original tweet URL.

          • archived_tweet_url: (str) The original archived URL.

          • parsed_tweet_url: (str) The original tweet URL after parsing. Check the utility functions.

          • parsed_archived_tweet_url: (str) The original archived URL after parsing. Check the utility functions.

          • -
          • parsed_tweet_text_mimetype_json: (str) The tweet text extracted from the archived URL that has mimetype application/json.

          • +
          +
          • available_tweet_text: (str) The tweet text extracted from the URL that is still available on the Twitter account.

          • available_tweet_is_RT: (bool) Whether the tweet from the available_tweet_text field is a retweet or not.

          • available_tweet_info: (str) Name and date of the tweet from the available_tweet_text field.

          • diff --git a/search.html b/search.html index 74030ce..b2c9eb1 100644 --- a/search.html +++ b/search.html @@ -8,7 +8,7 @@ Search — Wayback Tweets Documentation (1.0.x) - + diff --git a/searchindex.js b/searchindex.js index a3dd02a..43eb431 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"API": [[0, "api"]], "API Reference": [[4, "api-reference"]], "Additional Information": [[4, "additional-information"]], "CLI": [[1, "cli"], [7, "cli"]], "Command-Line Interface": [[4, "command-line-interface"]], "Community Comments": [[9, "community-comments"]], "ConnectionError": [[3, "connectionerror"]], "Contribute": [[2, "contribute"]], "Errors": [[3, "errors"]], "Export": [[0, "module-waybacktweets.api.export_tweets"]], "Filters": [[9, "filters"]], "From source": [[5, "from-source"]], "HTTPError": [[3, "httperror"]], "Hacking": [[2, "hacking"]], "Indices and tables": [[4, "indices-and-tables"]], "Installation": [[5, "installation"]], "Module": [[7, "module"]], "Pagination": [[9, "pagination"]], "Parse": [[0, "module-waybacktweets.api.parse_tweets"]], "Quickstart": [[7, "quickstart"]], "ReadTimeout": [[3, "readtimeout"]], "Request": [[0, "module-waybacktweets.api.request_tweets"]], "Result": [[8, "result"]], "Sponsoring": [[2, "sponsoring"]], "Streamlit Web App": [[4, "streamlit-web-app"]], "Testing": [[2, "testing"]], "Usage": [[1, "usage"]], "User Guide": [[4, "user-guide"]], "Using pip": [[5, "using-pip"]], "Utils": [[0, "module-waybacktweets.utils.utils"]], "Visualizer": [[0, "module-waybacktweets.api.viz_tweets"]], "Wayback Tweets": [[4, "wayback-tweets"]], "Wayback Tweets Documentation": [[4, "wayback-tweets-documentation"]], "Web App": [[7, "web-app"], [9, "web-app"]], "Workflow": [[10, "workflow"]], "waybacktweets": [[1, "waybacktweets"], [6, "waybacktweets"]]}, "docnames": ["api", "cli", "contribute", "errors", "index", "installation", "modules", "quickstart", "result", "streamlit", "workflow"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["api.rst", "cli.rst", "contribute.rst", "errors.rst", "index.rst", "installation.rst", "modules.rst", "quickstart.rst", "result.rst", "streamlit.rst", "workflow.rst"], "indexentries": {"--collapse": [[1, "cmdoption-waybacktweets-collapse", false]], "--from": [[1, "cmdoption-waybacktweets-from", false]], "--limit": [[1, "cmdoption-waybacktweets-limit", false]], "--offset": [[1, "cmdoption-waybacktweets-offset", false]], "--to": [[1, "cmdoption-waybacktweets-to", false]], "check_double_status() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_double_status", false]], "check_pattern_tweet() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_pattern_tweet", false]], "clean_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_tweet_url", false]], "clean_wayback_machine_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_wayback_machine_url", false]], "delete_tweet_pathnames() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.delete_tweet_pathnames", false]], "embed() (waybacktweets.api.parse_tweets.twitterembed method)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed.embed", false]], "generate() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.generate", false]], "get() (waybacktweets.api.request_tweets.waybacktweets method)": [[0, "waybacktweets.api.request_tweets.WaybackTweets.get", false]], "get_response() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.get_response", false]], "htmltweetsvisualizer (class in waybacktweets.api.viz_tweets)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer", false]], "jsonparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.JsonParser", false]], "module": [[0, "module-waybacktweets.api.export_tweets", false], [0, "module-waybacktweets.api.parse_tweets", false], [0, "module-waybacktweets.api.request_tweets", false], [0, "module-waybacktweets.api.viz_tweets", false], [0, "module-waybacktweets.utils.utils", false]], "parse() (waybacktweets.api.parse_tweets.jsonparser method)": [[0, "waybacktweets.api.parse_tweets.JsonParser.parse", false]], "parse() (waybacktweets.api.parse_tweets.tweetsparser method)": [[0, "waybacktweets.api.parse_tweets.TweetsParser.parse", false]], "parse_date() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.parse_date", false]], "save() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.save", false]], "save_to_csv() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_csv", false]], "save_to_html() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_html", false]], "save_to_json() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_json", false]], "semicolon_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.semicolon_parser", false]], "tweetsexporter (class in waybacktweets.api.export_tweets)": [[0, "waybacktweets.api.export_tweets.TweetsExporter", false]], "tweetsparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TweetsParser", false]], "twitterembed (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed", false]], "username": [[1, "cmdoption-waybacktweets-arg-USERNAME", false]], "waybacktweets (class in waybacktweets.api.request_tweets)": [[0, "waybacktweets.api.request_tweets.WaybackTweets", false]], "waybacktweets command line option": [[1, "cmdoption-waybacktweets-arg-USERNAME", false], [1, "cmdoption-waybacktweets-collapse", false], [1, "cmdoption-waybacktweets-from", false], [1, "cmdoption-waybacktweets-limit", false], [1, "cmdoption-waybacktweets-offset", false], [1, "cmdoption-waybacktweets-to", false]], "waybacktweets.api.export_tweets": [[0, "module-waybacktweets.api.export_tweets", false]], "waybacktweets.api.parse_tweets": [[0, "module-waybacktweets.api.parse_tweets", false]], "waybacktweets.api.request_tweets": [[0, "module-waybacktweets.api.request_tweets", false]], "waybacktweets.api.viz_tweets": [[0, "module-waybacktweets.api.viz_tweets", false]], "waybacktweets.utils.utils": [[0, "module-waybacktweets.utils.utils", false]]}, "objects": {"waybacktweets": [[1, 4, 1, "cmdoption-waybacktweets-collapse", "--collapse"], [1, 4, 1, "cmdoption-waybacktweets-from", "--from"], [1, 4, 1, "cmdoption-waybacktweets-limit", "--limit"], [1, 4, 1, "cmdoption-waybacktweets-offset", "--offset"], [1, 4, 1, "cmdoption-waybacktweets-to", "--to"], [1, 4, 1, "cmdoption-waybacktweets-arg-USERNAME", "USERNAME"]], "waybacktweets.api": [[0, 0, 0, "-", "export_tweets"], [0, 0, 0, "-", "parse_tweets"], [0, 0, 0, "-", "request_tweets"], [0, 0, 0, "-", "viz_tweets"]], "waybacktweets.api.export_tweets": [[0, 1, 1, "", "TweetsExporter"]], "waybacktweets.api.export_tweets.TweetsExporter": [[0, 2, 1, "", "save_to_csv"], [0, 2, 1, "", "save_to_html"], [0, 2, 1, "", "save_to_json"]], "waybacktweets.api.parse_tweets": [[0, 1, 1, "", "JsonParser"], [0, 1, 1, "", "TweetsParser"], [0, 1, 1, "", "TwitterEmbed"]], "waybacktweets.api.parse_tweets.JsonParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TweetsParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TwitterEmbed": [[0, 2, 1, "", "embed"]], "waybacktweets.api.request_tweets": [[0, 1, 1, "", "WaybackTweets"]], "waybacktweets.api.request_tweets.WaybackTweets": [[0, 2, 1, "", "get"]], "waybacktweets.api.viz_tweets": [[0, 1, 1, "", "HTMLTweetsVisualizer"]], "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer": [[0, 2, 1, "", "generate"], [0, 2, 1, "", "save"]], "waybacktweets.utils": [[0, 0, 0, "-", "utils"]], "waybacktweets.utils.utils": [[0, 3, 1, "", "check_double_status"], [0, 3, 1, "", "check_pattern_tweet"], [0, 3, 1, "", "clean_tweet_url"], [0, 3, 1, "", "clean_wayback_machine_url"], [0, 3, 1, "", "delete_tweet_pathnames"], [0, 3, 1, "", "get_response"], [0, 3, 1, "", "parse_date"], [0, 3, 1, "", "semicolon_parser"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "std:cmdoption"}, "terms": {"": [0, 2, 8], "0": [4, 7], "1": 1, "11": 2, "14": [1, 8], "20150101": 7, "20191231": 7, "22": 9, "25": 9, "250": 7, "3": [2, 4, 9], "32": 8, "3b": 0, "4": 1, "A": [0, 8, 9], "For": 9, "If": [2, 8, 9], "It": [1, 8, 9], "One": 9, "Such": 8, "The": [0, 1, 2, 3, 8, 9, 10], "These": [2, 3], "about": 2, "access": [7, 9], "account": [8, 9], "actual": 8, "after": 8, "all": 2, "allow": [1, 9], "alreadi": 5, "also": [2, 10], "alwai": [9, 10], "am": 9, "an": [0, 2, 9], "ani": [0, 8], "api": [2, 6, 7, 9, 10], "aplic": 9, "app": 2, "appli": 9, "applic": [0, 3, 7, 8, 9, 10], "ar": [0, 2, 3, 8, 10], "archiv": [0, 1, 3, 4, 8, 9, 10], "archived_digest": 8, "archived_length": 8, "archived_mimetyp": 8, "archived_statuscod": 8, "archived_timestamp": [0, 8], "archived_tweet": 7, "archived_tweet_url": [0, 8], "archived_tweets_respons": 0, "archived_urlkei": 8, "arg": 0, "argument": 1, "ari": 9, "arsip": 9, "associ": 0, "automat": 9, "avail": [0, 8, 9], "available_tweet_info": 8, "available_tweet_is_rt": 8, "available_tweet_text": 8, "avoid": 9, "base": [1, 8, 9], "being": 3, "bellingcat": 9, "ben": 9, "best": [2, 10], "blank": 8, "bool": 8, "boolean": 0, "brief": 2, "bug": 2, "bulk": 9, "byte": 8, "calendar": 9, "call": 9, "can": [2, 8, 9], "canon": 8, "case": 0, "cd": 5, "cdx": [0, 1, 3, 4, 9, 10], "chang": [4, 5, 9], "check": [0, 3, 8, 9], "check_double_statu": [0, 6], "check_pattern_tweet": [0, 6], "checkbox": 9, "clarissa": 9, "clarom": [2, 5], "class": 0, "clean_tweet_url": [0, 6], "clean_wayback_machine_url": [0, 6], "cli": [2, 4, 5], "click": 0, "clone": 5, "cloud": [7, 9], "code": [2, 8], "collaps": [0, 1, 7, 9], "com": [0, 2, 3, 5], "command": [2, 7], "comment": 4, "common": 3, "commun": 4, "compar": 1, "compress": 8, "connect": 3, "connectionerror": 4, "contact": 2, "contain": 0, "content": 8, "contribut": 4, "convert": 0, "correct": 0, "correspond": 8, "could": 3, "creat": 9, "csv": [0, 8], "ctx": 0, "daili": 9, "data": [0, 1, 3, 4, 9, 10], "datafram": 0, "date": [0, 1, 8, 9], "datetim": [0, 8], "deeper": 9, "default": 0, "defin": 10, "delet": 9, "delete_tweet_pathnam": [0, 6], "delight": 9, "depend": 5, "develop": 2, "digest": [1, 8], "digit": [1, 8], "directori": [2, 5], "displai": 9, "do": 9, "doc": 2, "document": [2, 5], "doe": 0, "donat": 2, "dot": 2, "down": [3, 9], "due": [3, 9, 10], "each": 9, "effect": 9, "either": 2, "email": 2, "emb": [0, 6], "empti": 8, "encod": 8, "entri": 8, "error": 4, "eserv": 8, "ess": 9, "establish": 3, "exampl": 8, "exceed": 3, "excel": 9, "exclud": 8, "experiment": 3, "explan": 2, "export": [4, 6], "export_tweet": 0, "extract": [0, 8], "facilit": 10, "fail": 3, "feed": 3, "field": [1, 8, 9], "field_opt": 0, "file": [0, 8], "filter": [1, 4], "find": 9, "first": 1, "fix": 9, "flow": 10, "flowchart": 10, "follow": [2, 8, 10], "format": [0, 1, 8, 10], "framework": [7, 9], "from": [0, 1, 2, 3, 4, 7, 8, 10], "fun": 9, "function": [0, 2, 8], "gem": 9, "gener": [0, 2, 6], "get": [0, 6, 7], "get_respons": [0, 6], "gijn": 9, "git": 5, "github": [2, 5, 9], "gone": 9, "gpl": 4, "gunakan": 9, "ha": [8, 9], "hack": 4, "handl": [0, 3], "hash": 8, "have": [2, 8, 9], "haven": 5, "header": 8, "help": 2, "helper": 2, "henk": 9, "here": 2, "hidden": 9, "host": [7, 9], "html": [0, 8], "html_content": 0, "html_file_path": 0, "htmltweetsvisu": [0, 6], "http": [0, 3, 8], "httperror": 4, "i": [0, 1, 2, 3, 8, 9, 10], "id": 0, "import": 7, "improv": 2, "includ": 8, "index": [4, 8], "indonesia": 9, "inform": 3, "input": 0, "instal": [2, 4], "instruct": 2, "int": 8, "interest": 2, "interfac": 2, "internet": [0, 3], "internetarch": 3, "irina_tech_tip": 9, "issu": [2, 3, 9], "jack": 7, "json": [0, 3, 8, 10], "json_file_path": 0, "jsonpars": [0, 6], "kei": [8, 9], "know": 9, "lack": 9, "latest": 3, "launch": 9, "least": 1, "licens": 4, "like": 0, "limit": [0, 1, 7, 9, 10], "line": [2, 7], "link": [0, 9], "long": 3, "look": 9, "love": 9, "lowercas": 0, "machin": [0, 1, 4, 9], "main": 2, "maintain": 2, "mainten": 3, "mani": 9, "manual": 9, "max": 3, "member": 9, "mempermudah": 9, "mend": 9, "messag": 3, "mimetyp": [0, 3, 8, 10], "modul": [2, 4], "more": 9, "most": [3, 9], "mous": 10, "multipl": 9, "my": 9, "myosinttip": 9, "name": 8, "necessari": [0, 1, 4], "network": 3, "new": [3, 9], "newslett": 9, "none": 0, "now": 9, "object": 0, "obtain": 10, "occur": 3, "occurr": 0, "offlin": 3, "offset": [0, 1, 7], "often": 3, "old": 9, "one": 8, "onli": [9, 10], "onward": 1, "open": [2, 9], "option": [0, 1, 7, 9], "org": [3, 8, 9], "origin": [0, 8, 9], "original_tweet_url": [0, 8], "osint": 9, "other": 8, "otherwis": 0, "our": 9, "out": [3, 10], "output": 3, "over": 9, "overal": 0, "overload": 3, "packag": [0, 2, 3, 8], "page": [4, 9], "pagin": 4, "param": 0, "pars": [1, 3, 4, 6, 8, 10], "parse_d": [0, 6, 7], "parse_tweet": 0, "parsed_archived_tweet_url": 8, "parsed_tweet_text_mimetype_json": 8, "parsed_tweet_url": 8, "pathnam": 0, "pattern": 0, "payload": 8, "pb": 0, "penelusuran": 9, "per": 9, "perform": [1, 3, 4], "photo": 0, "pip": 4, "pleas": [3, 9], "poetri": [2, 5], "possibl": 10, "prerequisit": 2, "price": 9, "project": 2, "propos": 10, "prototyp": [7, 9], "provid": 0, "publish": 0, "python": [2, 7, 9], "queri": 1, "quickli": 9, "quickstart": 4, "rais": 3, "rang": [1, 9], "rate": 9, "re": [2, 9], "read": [5, 10], "readtimeout": 4, "recommend": 1, "record": 8, "regardless": 10, "relat": 0, "remov": 0, "replac": 0, "repli": 0, "report": 2, "repositori": 5, "represent": 8, "request": [3, 4, 6], "request_tweet": 0, "requir": 1, "research": 9, "respond": 3, "respons": 0, "result": [1, 4, 10], "retri": 3, "retriev": [0, 1, 4, 10], "return": [0, 3, 8], "retweet": [0, 8], "revisit": 8, "same": 8, "save": [0, 1, 3, 4, 6, 8, 9, 10], "save_to_csv": [0, 6], "save_to_html": [0, 6], "save_to_json": [0, 6], "scroll": 1, "search": 4, "see": 9, "semicolon": 0, "semicolon_pars": [0, 6], "send": 0, "server": [0, 3, 9, 10], "servic": [0, 3], "sha1": 8, "simpl": 1, "size": 8, "skill": 2, "snapshot": 8, "so": 9, "sourc": [2, 4, 9], "specifi": 0, "sphinx": 2, "sponsor": 4, "standalon": 7, "statu": [0, 3, 8, 9], "statuscod": 8, "still": [0, 8], "str": [0, 8], "streamlit": [2, 7, 9], "string": [0, 8], "structur": [0, 9], "substr": 1, "suggest": 2, "suppli": 8, "support": 2, "t": [5, 9], "take": [0, 3], "tc": 8, "temporarili": 3, "test": 4, "text": 8, "thei": 0, "therefor": 10, "thi": [0, 1, 2, 3, 8, 9], "those": 9, "three": 8, "through": 1, "time": [3, 8], "timestamp": [0, 1, 3], "timestamp_from": [0, 7], "timestamp_to": [0, 7], "too": 3, "tool": [7, 9, 10], "train": 9, "transform": 8, "tweet": [0, 1, 2, 7, 8, 9, 10], "tweet_id": 3, "tweet_url": 0, "tweetsexport": [0, 6], "tweetspars": [0, 6], "twimg": 0, "twitter": [0, 1, 3, 8, 9], "twitteremb": [0, 6], "two": 0, "type": 0, "under": 2, "uniqu": 9, "unk": 8, "untuk": 9, "up": [1, 9], "url": [0, 3, 8, 9], "urlkei": [1, 7, 9], "us": [0, 1, 2, 4, 7, 8, 9, 10], "usag": 4, "user": 9, "usernam": [0, 1, 3, 7], "usual": 8, "util": [2, 4, 6, 7, 8, 9], "valu": [0, 1, 8, 9], "van": 9, "variou": 0, "veri": 2, "via": [2, 9], "view": 9, "visual": [4, 6], "viz_tweet": 0, "wa": [0, 3, 10], "wai": [1, 2, 9, 10], "want": 9, "warc": 8, "warn": 3, "wayback": [0, 1, 2, 7, 9, 10], "wayback_machine_url": 0, "waybacktweet": [0, 2, 3, 5, 7], "we": 9, "web": 3, "welcom": 2, "what": 9, "when": [0, 3, 9], "whether": 8, "which": [8, 9], "who": 9, "without": 1, "workflow": 4, "would": 3, "written": [7, 9, 10], "x": 9, "xx": 1, "year": 1, "you": [2, 5, 8, 9], "yyyymmdd": [0, 1], "yyyymmddhhmmss": 8, "zoom": 10}, "titles": ["API", "CLI", "Contribute", "Errors", "Wayback Tweets", "Installation", "waybacktweets", "Quickstart", "Result", "Web App", "Workflow"], "titleterms": {"addit": 4, "api": [0, 4], "app": [4, 7, 9], "cli": [1, 7], "command": 4, "comment": 9, "commun": 9, "connectionerror": 3, "contribut": 2, "document": 4, "error": 3, "export": 0, "filter": 9, "from": 5, "guid": 4, "hack": 2, "httperror": 3, "indic": 4, "inform": 4, "instal": 5, "interfac": 4, "line": 4, "modul": 7, "pagin": 9, "pars": 0, "pip": 5, "quickstart": 7, "readtimeout": 3, "refer": 4, "request": 0, "result": 8, "sourc": 5, "sponsor": 2, "streamlit": 4, "tabl": 4, "test": 2, "tweet": 4, "us": 5, "usag": 1, "user": 4, "util": 0, "visual": 0, "wayback": 4, "waybacktweet": [1, 6], "web": [4, 7, 9], "workflow": 10}}) \ No newline at end of file +Search.setIndex({"alltitles": {"API": [[0, "api"]], "API Reference": [[4, "api-reference"]], "Additional Information": [[4, "additional-information"]], "CLI": [[1, "cli"], [7, "cli"]], "Collapsing": [[1, "collapsing"]], "Command-Line Interface": [[4, "command-line-interface"]], "Community Comments": [[9, "community-comments"]], "ConnectionError": [[3, "connectionerror"]], "Contribute": [[2, "contribute"]], "Errors": [[3, "errors"]], "Export": [[0, "module-waybacktweets.api.export_tweets"]], "Filters": [[9, "filters"]], "From source": [[5, "from-source"]], "HTTPError": [[3, "httperror"]], "Hacking": [[2, "hacking"]], "Indices and tables": [[4, "indices-and-tables"]], "Installation": [[5, "installation"]], "Module": [[7, "module"]], "Pagination": [[9, "pagination"]], "Parse": [[0, "module-waybacktweets.api.parse_tweets"]], "Quickstart": [[7, "quickstart"]], "ReadTimeout": [[3, "readtimeout"]], "Request": [[0, "module-waybacktweets.api.request_tweets"]], "Result": [[8, "result"]], "Sponsoring": [[2, "sponsoring"]], "Streamlit Web App": [[4, "streamlit-web-app"]], "TODO": [[10, "todo"]], "Testing": [[2, "testing"]], "Usage": [[1, "usage"]], "User Guide": [[4, "user-guide"]], "Using pip": [[5, "using-pip"]], "Utils": [[0, "module-waybacktweets.utils.utils"]], "Visualizer": [[0, "module-waybacktweets.api.viz_tweets"]], "Wayback Tweets": [[4, "wayback-tweets"]], "Wayback Tweets Documentation": [[4, "wayback-tweets-documentation"]], "Web App": [[7, "web-app"], [9, "web-app"]], "Workflow": [[11, "workflow"]], "waybacktweets": [[1, "waybacktweets"], [6, "waybacktweets"]]}, "docnames": ["api", "cli", "contribute", "errors", "index", "installation", "modules", "quickstart", "result", "streamlit", "todo", "workflow"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["api.rst", "cli.rst", "contribute.rst", "errors.rst", "index.rst", "installation.rst", "modules.rst", "quickstart.rst", "result.rst", "streamlit.rst", "todo.rst", "workflow.rst"], "indexentries": {"--collapse": [[1, "cmdoption-waybacktweets-collapse", false]], "--from": [[1, "cmdoption-waybacktweets-from", false]], "--limit": [[1, "cmdoption-waybacktweets-limit", false]], "--offset": [[1, "cmdoption-waybacktweets-offset", false]], "--to": [[1, "cmdoption-waybacktweets-to", false]], "check_double_status() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_double_status", false]], "check_pattern_tweet() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_pattern_tweet", false]], "clean_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_tweet_url", false]], "clean_wayback_machine_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_wayback_machine_url", false]], "delete_tweet_pathnames() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.delete_tweet_pathnames", false]], "embed() (waybacktweets.api.parse_tweets.twitterembed method)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed.embed", false]], "generate() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.generate", false]], "get() (waybacktweets.api.request_tweets.waybacktweets method)": [[0, "waybacktweets.api.request_tweets.WaybackTweets.get", false]], "get_response() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.get_response", false]], "htmltweetsvisualizer (class in waybacktweets.api.viz_tweets)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer", false]], "module": [[0, "module-waybacktweets.api.export_tweets", false], [0, "module-waybacktweets.api.parse_tweets", false], [0, "module-waybacktweets.api.request_tweets", false], [0, "module-waybacktweets.api.viz_tweets", false], [0, "module-waybacktweets.utils.utils", false]], "parse() (waybacktweets.api.parse_tweets.tweetsparser method)": [[0, "waybacktweets.api.parse_tweets.TweetsParser.parse", false]], "parse_date() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.parse_date", false]], "save() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.save", false]], "save_to_csv() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_csv", false]], "save_to_html() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_html", false]], "save_to_json() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_json", false]], "semicolon_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.semicolon_parser", false]], "tweetsexporter (class in waybacktweets.api.export_tweets)": [[0, "waybacktweets.api.export_tweets.TweetsExporter", false]], "tweetsparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TweetsParser", false]], "twitterembed (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed", false]], "username": [[1, "cmdoption-waybacktweets-arg-USERNAME", false]], "waybacktweets (class in waybacktweets.api.request_tweets)": [[0, "waybacktweets.api.request_tweets.WaybackTweets", false]], "waybacktweets command line option": [[1, "cmdoption-waybacktweets-arg-USERNAME", false], [1, "cmdoption-waybacktweets-collapse", false], [1, "cmdoption-waybacktweets-from", false], [1, "cmdoption-waybacktweets-limit", false], [1, "cmdoption-waybacktweets-offset", false], [1, "cmdoption-waybacktweets-to", false]], "waybacktweets.api.export_tweets": [[0, "module-waybacktweets.api.export_tweets", false]], "waybacktweets.api.parse_tweets": [[0, "module-waybacktweets.api.parse_tweets", false]], "waybacktweets.api.request_tweets": [[0, "module-waybacktweets.api.request_tweets", false]], "waybacktweets.api.viz_tweets": [[0, "module-waybacktweets.api.viz_tweets", false]], "waybacktweets.utils.utils": [[0, "module-waybacktweets.utils.utils", false]]}, "objects": {"waybacktweets": [[1, 4, 1, "cmdoption-waybacktweets-collapse", "--collapse"], [1, 4, 1, "cmdoption-waybacktweets-from", "--from"], [1, 4, 1, "cmdoption-waybacktweets-limit", "--limit"], [1, 4, 1, "cmdoption-waybacktweets-offset", "--offset"], [1, 4, 1, "cmdoption-waybacktweets-to", "--to"], [1, 4, 1, "cmdoption-waybacktweets-arg-USERNAME", "USERNAME"]], "waybacktweets.api": [[0, 0, 0, "-", "export_tweets"], [0, 0, 0, "-", "parse_tweets"], [0, 0, 0, "-", "request_tweets"], [0, 0, 0, "-", "viz_tweets"]], "waybacktweets.api.export_tweets": [[0, 1, 1, "", "TweetsExporter"]], "waybacktweets.api.export_tweets.TweetsExporter": [[0, 2, 1, "", "save_to_csv"], [0, 2, 1, "", "save_to_html"], [0, 2, 1, "", "save_to_json"]], "waybacktweets.api.parse_tweets": [[0, 1, 1, "", "TweetsParser"], [0, 1, 1, "", "TwitterEmbed"]], "waybacktweets.api.parse_tweets.TweetsParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TwitterEmbed": [[0, 2, 1, "", "embed"]], "waybacktweets.api.request_tweets": [[0, 1, 1, "", "WaybackTweets"]], "waybacktweets.api.request_tweets.WaybackTweets": [[0, 2, 1, "", "get"]], "waybacktweets.api.viz_tweets": [[0, 1, 1, "", "HTMLTweetsVisualizer"]], "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer": [[0, 2, 1, "", "generate"], [0, 2, 1, "", "save"]], "waybacktweets.utils": [[0, 0, 0, "-", "utils"]], "waybacktweets.utils.utils": [[0, 3, 1, "", "check_double_status"], [0, 3, 1, "", "check_pattern_tweet"], [0, 3, 1, "", "clean_tweet_url"], [0, 3, 1, "", "clean_wayback_machine_url"], [0, 3, 1, "", "delete_tweet_pathnames"], [0, 3, 1, "", "get_response"], [0, 3, 1, "", "parse_date"], [0, 3, 1, "", "semicolon_parser"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "std:cmdoption"}, "terms": {"": [0, 1, 2, 8], "0": [4, 7], "1": 1, "10": 1, "11": 2, "14": [1, 8], "2": 1, "2013022601": 1, "20130226010000": 1, "20130226010800": 1, "20150101": 7, "20191231": 7, "22": 9, "25": 9, "250": 7, "2nd": 1, "3": [2, 4, 9], "32": [1, 8], "3b": 0, "4": 1, "400": 0, "511": 0, "73": 10, "A": [0, 1, 8, 9], "For": 9, "If": [0, 2, 8, 9], "It": [1, 8, 9], "One": 9, "Such": [1, 8], "The": [0, 1, 2, 3, 8, 9, 11], "These": [2, 3], "To": 1, "about": 2, "access": [7, 9], "account": [8, 9], "actual": 8, "add": [0, 1, 10], "adjac": 1, "affect": 1, "after": [1, 8], "all": [1, 2], "allow": [1, 9], "alreadi": 5, "also": [2, 11], "alwai": [9, 11], "am": 9, "an": [0, 2, 9], "ani": [0, 8], "api": [1, 2, 6, 7, 9, 10, 11], "aplic": 9, "app": [2, 5, 10], "appli": [9, 10], "applic": [7, 8, 9, 10, 11], "ar": [0, 1, 2, 3, 8, 11], "archiv": [0, 1, 3, 4, 8, 9, 11], "archived_digest": 8, "archived_length": 8, "archived_mimetyp": 8, "archived_statuscod": 8, "archived_timestamp": [0, 8], "archived_tweet": 7, "archived_tweet_url": 8, "archived_tweets_respons": 0, "archived_urlkei": 8, "argument": 1, "ari": 9, "arsip": 9, "associ": 0, "automat": 9, "avail": [0, 8, 9], "available_tweet_info": 8, "available_tweet_is_rt": 8, "available_tweet_text": 8, "avoid": [9, 10], "base": [1, 8, 9], "being": 3, "bellingcat": 9, "below": 1, "ben": 9, "best": [2, 11], "beta": 1, "blank": 8, "bool": [0, 8], "boolean": 0, "brief": 2, "bug": 2, "bulk": 9, "byte": 8, "calendar": [1, 9], "call": 9, "can": [2, 8, 9], "canon": [1, 8], "captur": 1, "cd": 5, "cdx": [0, 1, 4, 9, 11], "chang": [4, 5, 9], "charact": 1, "check": [0, 3, 8, 9], "check_double_statu": [0, 6], "check_pattern_tweet": [0, 6], "checkbox": 9, "clarissa": 9, "clarom": [2, 5], "class": 0, "clean": 0, "clean_tweet_url": [0, 6], "clean_wayback_machine_url": [0, 6], "cli": [2, 4, 5], "click": 0, "clone": 5, "cloud": [7, 9], "code": [0, 2, 8, 10], "collaps": [0, 4, 7, 9], "com": [0, 1, 2, 3, 5], "command": [1, 2, 7, 10], "comment": 4, "common": 3, "commun": 4, "compar": 1, "complet": 10, "compress": 8, "condit": 0, "connect": 3, "connectionerror": 4, "contact": 2, "contain": 0, "content": [0, 1, 8], "contribut": 4, "correct": 0, "correspond": 8, "could": 3, "creat": [9, 10], "csv": [0, 4, 8], "ctx": 0, "daili": 9, "data": [0, 1, 4, 9, 11], "datafram": 0, "date": [0, 1, 8, 9], "datetim": [1, 8], "deeper": 9, "default": [0, 1], "defin": 11, "delet": 9, "delete_tweet_pathnam": [0, 6], "delight": 9, "dens": 1, "depend": 5, "develop": 2, "dict": 0, "digest": [1, 8], "digit": [1, 8], "directori": [2, 5], "displai": 9, "do": 9, "doc": [2, 10], "document": [1, 2, 5], "doe": 0, "donat": 2, "done": 1, "dot": 2, "down": [3, 9], "due": [3, 9, 11], "duplic": [0, 1], "each": [0, 9], "effect": 9, "either": 2, "elsewher": 1, "email": 2, "emb": [0, 6], "empti": 8, "encod": [1, 8], "ensur": 0, "entri": 8, "error": 4, "eserv": [1, 8], "ess": 9, "establish": 3, "ex": 1, "exampl": [1, 8], "exceed": 3, "excel": 9, "except": 1, "exclud": [1, 8], "explan": 2, "export": [4, 6], "export_tweet": 0, "extract": [0, 1, 8], "facilit": 11, "fail": 3, "fals": 0, "feed": 3, "field": [0, 1, 8, 9], "field_opt": 0, "file": [0, 8], "filter": [1, 4], "find": 9, "first": [0, 1], "fix": 9, "flow": 11, "flowchart": 11, "follow": [2, 8, 11], "form": 1, "format": [0, 1, 4, 8, 11], "framework": [7, 9], "from": [0, 1, 2, 3, 4, 7, 8, 11], "fun": 9, "function": [0, 2, 8, 10], "gem": 9, "gener": [0, 2, 6], "get": [0, 6, 7], "get_respons": [0, 6], "gijn": 9, "git": 5, "github": [2, 5, 9], "given": 1, "goe": 0, "gone": 9, "googl": 1, "gpl": 4, "gunakan": 9, "ha": [8, 9], "hack": 4, "handl": [3, 10], "hash": [1, 8], "have": [2, 8, 9], "haven": 5, "header": [1, 8], "help": 2, "helper": 2, "henk": 9, "here": 2, "hidden": 9, "host": [7, 9], "hour": 1, "how": 10, "howev": 1, "html": [0, 4, 8], "html_content": 0, "html_file_path": 0, "htmltweetsvisu": [0, 6], "http": [1, 3, 8], "httperror": 4, "i": [0, 1, 2, 3, 8, 9, 10, 11], "id": 0, "import": 7, "improv": 2, "includ": [0, 8], "index": [1, 4, 8], "indic": 0, "indonesia": 9, "inform": [0, 3], "input": 0, "instal": [2, 4], "instruct": 2, "int": [0, 8], "interest": 2, "interfac": 2, "internet": [0, 3], "internetarch": 3, "irina_tech_tip": 9, "issu": [2, 3, 9, 10], "jack": 7, "json": [0, 4, 8, 10, 11], "json_cont": 0, "json_file_path": 0, "jsonpars": 10, "kei": [1, 8, 9], "know": 9, "lack": 9, "latest": 3, "launch": 9, "least": 1, "licens": 4, "limit": [0, 1, 7, 9, 10, 11], "line": [0, 1, 2, 7, 10], "link": 9, "list": 0, "long": 3, "look": [1, 9], "love": 9, "machin": [0, 1, 4, 9], "mai": 1, "main": 2, "maintain": 2, "mainten": 3, "mani": 9, "manual": 9, "match": 1, "matchtyp": 1, "max": 3, "maximum": 0, "member": 9, "mempermudah": 9, "mend": 9, "messag": 3, "met": 0, "mimetyp": [8, 10, 11], "modul": [2, 4], "moment": 1, "more": [1, 9], "most": [1, 3, 9], "mous": 11, "multipl": 9, "my": 9, "myosinttip": 9, "n": 1, "name": 8, "necessari": [0, 1, 4], "network": 3, "new": [1, 3, 9], "newslett": 9, "none": 0, "note": 1, "now": 9, "number": 0, "obtain": 11, "occur": 3, "occurr": 0, "offici": 1, "offlin": 3, "offset": [0, 1, 7], "old": [1, 9], "one": [1, 8], "onli": [1, 9, 11], "onward": 1, "open": [2, 9], "option": [1, 7, 9], "org": [1, 3, 8, 9], "origin": [0, 8, 9], "original_tweet_url": [0, 8], "osint": 9, "other": [1, 8], "otherwis": 0, "our": 9, "out": [1, 3, 11], "output": 3, "over": 9, "overal": 0, "overload": 3, "packag": [0, 2, 3, 8], "page": [1, 4, 9], "pagin": 4, "param": 0, "paramet": 0, "pars": [1, 4, 6, 8, 11], "parse_d": [0, 6, 7], "parse_tweet": [0, 10], "parsed_archived_tweet_url": 8, "parsed_tweet_url": 8, "path": 0, "pathnam": 0, "payload": 8, "penelusuran": 9, "per": [1, 9], "perform": [1, 4], "pip": 4, "pleas": [3, 9], "poetri": [2, 5], "possibl": [1, 11], "prefix": 1, "prerequisit": 2, "price": 9, "print": 0, "print_progress": 0, "progress": 0, "project": 2, "propos": 11, "prototyp": [7, 9], "provid": 0, "publish": 0, "py": [5, 10], "python": [2, 7, 9], "queri": 1, "quickli": 9, "quickstart": 4, "rais": 3, "rang": [0, 1, 9], "rate": [9, 10], "re": [2, 9], "read": [1, 5, 11], "readtimeout": 4, "recommend": 1, "record": 8, "regardless": 11, "remov": 0, "replac": 0, "report": 2, "repositori": 5, "repres": 0, "represent": [1, 8], "request": [3, 4, 6], "request_tweet": 0, "requir": 1, "research": 9, "respect": 0, "respond": 3, "respons": 0, "result": [0, 1, 4, 11], "retri": 3, "retriev": [0, 1, 4, 11], "return": [0, 8, 10], "retweet": 8, "revisit": 8, "run": 5, "same": 8, "save": [0, 1, 4, 6, 8, 9, 10, 11], "save_to_csv": [0, 6], "save_to_html": [0, 6], "save_to_json": [0, 6], "scroll": 1, "search": [1, 4], "second": 0, "see": 9, "semicolon": 0, "semicolon_pars": [0, 6], "send": 0, "separ": 10, "server": [0, 1, 3, 9, 11], "servic": [0, 3], "sha1": [1, 8], "show": 1, "similar": 1, "simpl": 1, "sinc": 1, "size": 8, "skill": 2, "skip": 0, "slow": 1, "snapshot": 8, "so": 9, "sourc": [2, 4, 9], "specifi": 0, "sphinx": 2, "sponsor": 4, "standalon": 7, "start": 0, "statu": [0, 8, 9], "status": 0, "statuscod": 8, "still": [0, 8], "stop": 0, "str": [0, 1, 8], "streamlit": [2, 5, 7, 9], "string": [0, 1, 8], "structur": [0, 9], "substr": 1, "success": 0, "suggest": 2, "suppli": [1, 8], "support": 2, "t": [5, 9], "take": 3, "tc": [1, 8], "temporarili": 3, "test": [1, 4], "text": [0, 1, 8], "thei": 0, "therefor": 11, "thi": [0, 1, 2, 3, 8, 9], "third": 0, "those": 9, "three": [0, 1, 8], "through": [0, 1], "time": [1, 3, 8], "timestamp": [0, 1], "timestamp_from": [0, 7], "timestamp_to": [0, 7], "todo": 4, "too": [1, 3], "tool": [1, 7, 9, 11], "train": 9, "transform": [1, 8], "true": 0, "tupl": 0, "tutori": 10, "tweet": [0, 1, 2, 7, 8, 9, 10, 11], "tweet_url": 0, "tweetsexport": [0, 6], "tweetspars": [0, 6], "twitter": [0, 1, 3, 8, 9], "twitteremb": [0, 6], "two": 0, "type": [0, 1], "under": 2, "uniqu": [1, 9], "unk": 8, "untuk": 9, "up": [1, 9], "url": [0, 1, 8, 9], "urlkei": [1, 7, 9], "us": [0, 1, 2, 4, 7, 8, 9, 11], "usag": 4, "user": 9, "usernam": [0, 1, 7], "usual": [1, 8], "util": [2, 4, 6, 7, 8, 9], "valu": [0, 1, 8, 9], "van": 9, "veri": 2, "via": [2, 9, 10], "view": 9, "visual": [4, 6], "viz_tweet": 0, "wa": [0, 11], "wai": [1, 2, 9, 11], "want": 9, "warc": 8, "wayback": [0, 1, 2, 7, 9, 11], "wayback_machine_url": 0, "waybacktweet": [0, 2, 3, 5, 7, 10], "we": [1, 9], "web": [1, 3, 10], "welcom": 2, "what": 9, "when": [0, 1, 3, 9, 10], "where": [0, 1], "whether": [0, 8], "which": [8, 9], "who": 9, "without": [0, 1], "workflow": 4, "would": 3, "written": [7, 9, 11], "x": 9, "xx": 1, "year": 1, "you": [1, 2, 5, 8, 9], "yyyymmdd": [0, 1], "yyyymmddhhmmss": [1, 8], "zoom": 11}, "titles": ["API", "CLI", "Contribute", "Errors", "Wayback Tweets", "Installation", "waybacktweets", "Quickstart", "Result", "Web App", "TODO", "Workflow"], "titleterms": {"addit": 4, "api": [0, 4], "app": [4, 7, 9], "cli": [1, 7], "collaps": 1, "command": 4, "comment": 9, "commun": 9, "connectionerror": 3, "contribut": 2, "document": 4, "error": 3, "export": 0, "filter": 9, "from": 5, "guid": 4, "hack": 2, "httperror": 3, "indic": 4, "inform": 4, "instal": 5, "interfac": 4, "line": 4, "modul": 7, "pagin": 9, "pars": 0, "pip": 5, "quickstart": 7, "readtimeout": 3, "refer": 4, "request": 0, "result": 8, "sourc": 5, "sponsor": 2, "streamlit": 4, "tabl": 4, "test": 2, "todo": 10, "tweet": 4, "us": 5, "usag": 1, "user": 4, "util": 0, "visual": 0, "wayback": 4, "waybacktweet": [1, 6], "web": [4, 7, 9], "workflow": 11}}) \ No newline at end of file diff --git a/streamlit.html b/streamlit.html index c89f7a6..58b81eb 100644 --- a/streamlit.html +++ b/streamlit.html @@ -8,7 +8,7 @@ Web App — Wayback Tweets Documentation (1.0.x) - + diff --git a/todo.html b/todo.html new file mode 100644 index 0000000..e6720d5 --- /dev/null +++ b/todo.html @@ -0,0 +1,98 @@ + + + + + + + + TODO — Wayback Tweets Documentation (1.0.x) + + + + + + + + + + + + + + +
            +
            +
            +
            + +
            +

            TODO

            +

            Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (waybacktweets/api/parse_tweets.py:73), and avoid rate limiting

            +

            Docs: Add tutorial on how to save Tweet via command line

            +

            Web App: Return complete JSON when mimetype is application/json

            +
            + + +
            +
            +
            +
            + + +
            +
            + + + + + + \ No newline at end of file diff --git a/workflow.html b/workflow.html index 351822d..66add5a 100644 --- a/workflow.html +++ b/workflow.html @@ -8,7 +8,7 @@ Workflow — Wayback Tweets Documentation (1.0.x) - + @@ -17,7 +17,7 @@ diff --git a/_static/css/custom.css b/_static/css/custom.css index 6429ee5..6b9a1dc 100644 --- a/_static/css/custom.css +++ b/_static/css/custom.css @@ -1,3 +1,7 @@ +body { + font-family: Georgia, 'Times New Roman', Times, serif; +} + #cli #usage #waybacktweets h3, #cli .admonition-title, .sphinxsidebarwrapper li ul li ul:has(a[href="#waybacktweets"]):last-child { diff --git a/api.html b/api.html index 4992e33..2deb29b 100644 --- a/api.html +++ b/api.html @@ -8,7 +8,7 @@ API — Wayback Tweets Documentation (1.0.x) - + diff --git a/cli.html b/cli.html index daaf738..2f01efc 100644 --- a/cli.html +++ b/cli.html @@ -8,7 +8,7 @@ CLI — Wayback Tweets Documentation (1.0.x) - + diff --git a/contribute.html b/contribute.html index 35f1485..1ce1a38 100644 --- a/contribute.html +++ b/contribute.html @@ -8,7 +8,7 @@ Contribute — Wayback Tweets Documentation (1.0.x) - + diff --git a/errors.html b/errors.html index 95f7f57..1b46693 100644 --- a/errors.html +++ b/errors.html @@ -8,7 +8,7 @@ Errors — Wayback Tweets Documentation (1.0.x) - + diff --git a/genindex.html b/genindex.html index 350dc03..93d1fc3 100644 --- a/genindex.html +++ b/genindex.html @@ -8,7 +8,7 @@ Index — Wayback Tweets Documentation (1.0.x) - + diff --git a/index.html b/index.html index 9bfa88a..a45beb9 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@ Wayback Tweets — Wayback Tweets Documentation (1.0.x) - + diff --git a/installation.html b/installation.html index 521755a..fb0641d 100644 --- a/installation.html +++ b/installation.html @@ -8,7 +8,7 @@ Installation — Wayback Tweets Documentation (1.0.x) - + diff --git a/modules.html b/modules.html index 32a17cc..9361612 100644 --- a/modules.html +++ b/modules.html @@ -8,7 +8,7 @@ waybacktweets — Wayback Tweets Documentation (1.0.x) - + diff --git a/py-modindex.html b/py-modindex.html index 8716190..cfa9ef1 100644 --- a/py-modindex.html +++ b/py-modindex.html @@ -8,7 +8,7 @@ Python Module Index — Wayback Tweets Documentation (1.0.x) - + diff --git a/quickstart.html b/quickstart.html index bf6c222..682748b 100644 --- a/quickstart.html +++ b/quickstart.html @@ -8,7 +8,7 @@ Quickstart — Wayback Tweets Documentation (1.0.x) - + diff --git a/result.html b/result.html index eef30f8..a6b4ec3 100644 --- a/result.html +++ b/result.html @@ -8,7 +8,7 @@ Result — Wayback Tweets Documentation (1.0.x) - + diff --git a/search.html b/search.html index b2c9eb1..4624cb3 100644 --- a/search.html +++ b/search.html @@ -8,7 +8,7 @@ Search — Wayback Tweets Documentation (1.0.x) - + diff --git a/streamlit.html b/streamlit.html index 16f33ad..edbd362 100644 --- a/streamlit.html +++ b/streamlit.html @@ -8,7 +8,7 @@ Web App — Wayback Tweets Documentation (1.0.x) - + diff --git a/todo.html b/todo.html index e6720d5..2d72313 100644 --- a/todo.html +++ b/todo.html @@ -8,7 +8,7 @@ TODO — Wayback Tweets Documentation (1.0.x) - + diff --git a/workflow.html b/workflow.html index 76458cd..a7b3089 100644 --- a/workflow.html +++ b/workflow.html @@ -8,7 +8,7 @@ Workflow — Wayback Tweets Documentation (1.0.x) - + @@ -17,7 +17,7 @@ diff --git a/_images/waybacktweets.png b/_images/waybacktweets.png new file mode 100644 index 0000000000000000000000000000000000000000..937a666e7ff9e7f71035d891aff09287d35728e0 GIT binary patch literal 5648 zcmeHLRZ|>{lAU4D!5s!CxVr{-cMBdQKyWA65ZocS>m(t#y9G!H?h@P?++FT>U$?#= zu(h>aby`mML-%>?XiaqmOfUot001zR6lJvm0HEYQxd1BYA0HxJbM{XFdT1+10~i0W z1^-i^xhfiY008Lt{{;|`ol61$Ahc_$>dO7o{D1r3q5$w;7x{PgUq}D1lK(1!sD)|! zKL^7-wbgY1+8+9-9F(QI6}TpEv-{m?Gt>irY?U*AS@v%$*_iYT`-#pyKEJb6h`^S< z%0RCawK;0fONP|QDIOUsM~ArmC#TNy>*CBhlS@np9&(KpkSesVTBG~3?FcK%inX5c zZ=5dK0(P5WkRZv#8Vu__S1wJkXcdo`6dx%V1Ct1woL^FcT!fL}jW9l0Dw;rtLqO}_ zkI_XDu6d44nmPthADMV36+&~nTFZCMkgT!Zxx~R~H$w{VAycjTvYvqk-MD1m1+{!3 z>Gq`Ja0}|GPs+c(r_CxR7%6l*o3Y3fr&JOKeQ5Dg{T4%8^8f(g-YdyU>H6j#=lGkb z8C~_vXO$-6y?@U~=>FmB5>v#L=t0$ctGCQ}ABdUjqfw!w43hHm~Pa5an`4?;!nmN!fzup2~ z;M&+V>HiZKMF98Qt6M$H6a8ue9!#^jS*Unw0236#108p8ZBEBnPiccpMB=@OHSP$x zC*G~u6850=8Mx4Oe!tnI@Z;MPsv-~0d0U3oqd!jOhRu9Q1pV}b(|+x_Tkl3<%y1OD zh%jX)tIt^}6+@p{5hn-0>4FY_+=4f+Iehu|@~A3p^3Bnq6PH3Agcj9S)N_8T`;7Z} zk@Ptr05W!kM^D0%aUYi6uw=lVsY@$|X}kJz&_nGWPZ&7kavq56l+ncdz(Photlpt9 z@0^G|KZE=QV>qlCfv*cWw0a7O z*2w5hNuG|iBZN`EpM8G@>Efe56nR$lg^Xu>vq?wlM`a)Ayg)9r5t?qzoWx2I+j8Jh z>>^N(HRFcr^Xe`Ta3vynphVB%$a+gE z+u66`e|Lba!yng|na<1vb>?ox)mxRt#k?wpLd zu8M9q?ZHQW;n87{7f)R#Gl;O@bubZV^0Hk-W6(m`AljyI!9nppXFv>pr&z;fNh`o9 z|7w*>tQt6}tIkIK!pP2M(oB}jIpk}}A^T)m`hvdSpX@?X!;9DQ1`}!p@c$-X?@z_%vr`c_a`@4>Y9)nB> zTb>y$3YVxzfy>r9FRE_TTS>fQu#o^0I3;!RGyB>Wt*(y{(wg<4d|vP3cVH=10`jh?7HgZdw=)|QH zV1>%`k`So4#Q=9}Zy9LEO9vKFkLQ4~wmTE|hpcVQgo1zp%LrrqR3U&{x95~}4sUP8 z-^lU)6Xijt8k{TpsX^mjzQ%pYBeo-T9{qxLUX|k}xY-F*kuO+WB60wdqB1e0`e*uP zUR`DHGJ2YyHCp7%_#1!TN zA*?CJXZh*oXt?0t^jv#3MC;Z2l#iJe*%8s_Myl?8N(voFk<%m@6V|$H$gU%Eq%$K2 zg5cQs>1ErN?R3>1AlbaX$gqs&|CEcU81;;j_a8ibulDbZxCSQO}WXu51QhKzx!Vs@bk= z7doQQYP5SO78rv7kZo4%$z!bZF|;3~44I6-AGz>u1919o5fO(&?kb6QXJ)yCEhCgJ z@^Fk)kk3k)QO!uc*w%;F#HvQklj;`H@%m+cR<1?6rQu+5zEQ z65v#bRvv8+dkH|?=CR=H2yY%vkp_&>Q5?TtxF_Z%u1~^){JMkbnn7r%_TB_6Rbw_i zS4g#oc}_8!?pBPyvvikd%VC3Bj`bWQjZ9>|lW$<|GG; z^ZTG{(}N$kRiM4!xvJALTp3bvJO}!7ylEZRy zPflY51{W)jhim@whv&W;s>N=mLWIk|7hODJI?s}m3a&OK0YYwQcefV==Cz}rIH9pv zg$3+9MOqJ(tb|*T{lLVdy-id%2-XHdW#4vD1^h%nXtBwovU#fb;eqZ*Itml%P69lp)_VW?^;=#|-P|XgUvMbY%K0OE*4+ydK(TF@=N2%g3?UrdzM zex+!^p3=5)qZzeT!3-=l_h7A-9*6QUi`31O=_@)66h$#}XDo7M|%?#>O+FRWY zSwoyS8mP82dmRK8Xiv|ft#C!v{UWh?q84=5cv@x}j9I|~4tWj8D+BU=jpEVv&A~-s z+?@zL+80p__Q&Y#m`f?g8VT(3Z(5qIqy%XqWU^LZNbu&~p)YeKKY$lf2NoD^HI)`h znZ#I3xIH=;fG$>U?Wi1;CM}t251S}J(*5ND^JaJ_>^u-0jA2r;if^$%-yeKb$J)?& zY=^si!Hw*GsnJ7^(jwo><_E&j8*G%|5?@wt<(N@HqwW_tr8D$AW-K7J>A;$}q94oI zNtq7CI(Su1{9@AAgMYK=2GRZi<^VR3q1tI{p0^k@V#-?C#IZ{8SyP_es~cZ10#rfm z*Z5a41Kd<=P`}RPZr^*#_#bQ6r2K<#Cu2=>dTpbl-Ig`7EXv5)-wL^cYn8(el%BNY;C&;9|B0#`TK*=3i#pO`(9AT)xzXp z#_?EZwtWc!48rxgwaIT>_S|b>i+^8)Jr~GnuqWU_K+UIJgX}#!u||kRgO2FSujQc3 z?-_!q()3}l=0BFhxr0y^$1H)_f)m@5o>H75SVtSWsqXJiqVBc-P8n40t>+hyjD*Gp&pyB!bblRFI8`)@>&8+-(7YQ8fs>Jp!CC^_Znf#24pIs zXdu=cIxZW=PonFQpo60PiHWXa<_)`}3ml-1u3e+ZH(f{W2yY__-D5cn^FHGVd!g~n8`swV7IpB5kGw*I+DEEQdQUmDtjG5niZTo!kI7RBLl zQ7U6#hp6R+e{ok4w@`~sL}xg$-s=Qx=D_6{wTEi86IWmIG@>=jDR>MK!w5C zYNOwY!*Z7wh12FArftpTH;Ef}+Pg7AxR{AXO@18zCp=_oI?+|6CgoT*!8fI`ZId(D z`SuoIRCAObLBoTp?sp!aHhRxBpo-~V7wf|4bQpgr>_lj*T{VO-c7~LR=+de~QK zZzJnk1DX1OB%vxe)gfi?y#gbfJ3bRaU5(7}VVdxa3>V^&q?L5Vgr|?Rr~jF#4wJc{ z%{tbVaq^z#eS=r5(dHN-Eaf@Nvi=;j||)s01!Qf5$Y_`ls?KAkucFx*fx1z zGBo96(V_=;uSWmPadFlA@udv6RCqJVGV>FP)^^QE;+k+Y@`kfQ?tv_H(O$NYbJQ@6 z6+K#dsWgrZ*z{4!CTlmcbVDAg4iuvYa+E=)w^fVQzSi{qYSW!Uc!T+s?=Teyk6_7a zzKPYA2PV*c6IR@JuOzfAAhzH`Rh#~9Bjlt-xt?5d_ua@iD9+lKdSn)&t~#OxOzO5$ z?KslRaN^#+aSPlJn6N#o3>23zCy{i!4Kjw)Hz8$EFiUQ~Yzck2?ixUU`)PSeawBLs z!H9ZMMl7GH&9!`3|8fsaTOM<1a^5afu-B~ zXs(bZ)H+uC`GPd{!VEOElyZXBF1P{3z}UO*Fmd&~`y0>%g7&F?goyGHTzhq4TfGzy z!tpP>MFRIqg&F1i4Ap+e$O~y%68rq=>SlhmNAU-X#%~VXpp8DVEsBpV1*IlkiaGxx zU|^LJ@qF;WI>(l0>()9_erOc!`+eQ!x>z*LrmB$J(o5X<>cH}~viKxo&6Vm@RG0Q} zy6UL}mPm2;l&)tL8B+Oxx%u(7v+RP0(gh-%`KoW{E9UKie*Xq#n)thx+s_k~4op20 zAK!+n;%PYlNnN@XWvyC(g?nFQoro4KI{q165P2LGb&=co!VkOa*@LC~@<>29?v2cDB8?yGzb{0}l2Co02Jb zGuqo-j+UYJ^GvMf&G0P}8>l6p(wiM|IeBVAtNZSBE_XBH)3jU{=vZGEMej*!P+62B zd+1bjM}2PHm*+d$u_&O3aBF@($JBF$Pb|3hl@mU{Sp-~rlzw@WS#mv~xaL(D9NPwFa4TSRM_V~qA+Xs-+=OQKf(*Alr*DZU1VbMl)B8g=* zJG=G5t*rnUn2!6ECRIQF>H#(+20SBU*9XhuN}mW0EtS1OXMmCq(^6bdW`Xt_!Aa(( zfChH8PVMs=^L}ITm6ur93zd@t^EKe(zv30$Kgu#WDxF;Bzpp+eId$18X^YSQ0hk?d Ai~s-t literal 0 HcmV?d00001 diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt index 820cbd8..12537b9 100644 --- a/_sources/api.rst.txt +++ b/_sources/api.rst.txt @@ -54,7 +54,6 @@ Utils .. autofunction:: clean_tweet_url .. autofunction:: clean_wayback_machine_url .. autofunction:: delete_tweet_pathnames -.. autofunction:: is_tweet_url .. autofunction:: get_response -.. autofunction:: parse_date +.. autofunction:: is_tweet_url .. autofunction:: semicolon_parser diff --git a/_sources/cli.rst.txt b/_sources/cli.rst.txt index f64b299..f6f19fc 100644 --- a/_sources/cli.rst.txt +++ b/_sources/cli.rst.txt @@ -47,7 +47,7 @@ URL Match Scope The CDX Server can return results matching a certain prefix, a certain host or all subdomains by using the ``matchType`` param. -For example, with the value ``prefix`` it is possible to retrieve URLs beyond `/status/`. +The package ``waybacktweets`` uses the pathname ``/status`` followed by the wildcard '*' at the end of the URL to retrieve only tweets. However, if a value is provided for this parameter, the search will be made from the URL `twitter.com/`. Read below text extracted from the official Wayback CDX Server API (Beta) documentation. @@ -71,7 +71,7 @@ Read below text extracted from the official Wayback CDX Server API (Beta) docume The matchType may also be set implicitly by using wildcard '*' at end or beginning of the url: - - If url is ends in '/*', eg url=archive.org/* the query is equivalent to url=archive.org/&matchType=prefix - - If url starts with '*.', eg url=*.archive.org/ the query is equivalent to url=archive.org/&matchType=domain + - If url is ends in '/\*', eg url=archive.org/\* the query is equivalent to url=archive.org/&matchType=prefix + - If url starts with '\*.', eg url=\*.archive.org/ the query is equivalent to url=archive.org/&matchType=domain (Note: The domain mode is only available if the CDX is in `SURT `_-order format.) diff --git a/_sources/contribute.rst.txt b/_sources/contribute.rst.txt index 298ec43..84ed2cb 100644 --- a/_sources/contribute.rst.txt +++ b/_sources/contribute.rst.txt @@ -24,6 +24,7 @@ Install from the source, following `these instructions `_. Brief explanation about the code under the Wayback Tweets directory: - ``app``: Streamlit app +- ``assets``: Title and logo images - ``docs``: Documentation generated with Sphinx - ``waybacktweets/api``: Main package modules - ``waybacktweets/cli``: Command line Interface module diff --git a/_sources/errors.rst.txt b/_sources/exceptions.rst.txt similarity index 99% rename from _sources/errors.rst.txt rename to _sources/exceptions.rst.txt index 38a8f1b..109e41b 100644 --- a/_sources/errors.rst.txt +++ b/_sources/exceptions.rst.txt @@ -1,4 +1,4 @@ -Errors +Exceptions ================ These are the most common errors and are handled by the ``waybacktweets`` package. diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt index df48609..9bf5bcd 100644 --- a/_sources/index.rst.txt +++ b/_sources/index.rst.txt @@ -3,9 +3,8 @@ Wayback Tweets ================ - -Wayback Tweets Documentation ------------------------------- +.. image:: ../assets/waybacktweets.png + :align: center Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data in CSV, JSON, and HTML formats. @@ -20,7 +19,7 @@ User Guide quickstart workflow result - errors + exceptions contribute todo diff --git a/_sources/quickstart.rst.txt b/_sources/quickstart.rst.txt index 8700ee3..4e3c4d7 100644 --- a/_sources/quickstart.rst.txt +++ b/_sources/quickstart.rst.txt @@ -10,7 +10,7 @@ waybacktweets [OPTIONS] USERNAME .. code-block:: shell - waybacktweets --from 20150101 --to 20191231 --limit 250 jack` + waybacktweets --from 20150101 --to 20191231 --limit 250 jack Module @@ -20,24 +20,30 @@ Using Wayback Tweets as a Python Module .. code-block:: python - from waybacktweets import WaybackTweets - from waybacktweets.utils import parse_date + from waybacktweets import WaybackTweets, TweetsParser, TweetsExporter - username = "jack" - collapse = "urlkey" - timestamp_from = parse_date("20150101") - timestamp_to = parse_date("20191231") - limit = 250 - offset = 0 - matchtype = "exact" - - api = WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype) + USERNAME = "jack" + api = WaybackTweets(USERNAME) archived_tweets = api.get() + if archived_tweets: + field_options = [ + "archived_timestamp", + "original_tweet_url", + "archived_tweet_url", + "archived_statuscode", + ] + + parser = TweetsParser(archived_tweets, USERNAME, field_options) + parsed_tweets = parser.parse() + + exporter = TweetsExporter(parsed_tweets, USERNAME, field_options) + exporter.save_to_csv() + Web App ------------- Using Wayback Tweets as a Streamlit Web App -`Access the application `_, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. +`Open the application `_, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud. diff --git a/_sources/streamlit.rst.txt b/_sources/streamlit.rst.txt index 78da866..b8de7d9 100644 --- a/_sources/streamlit.rst.txt +++ b/_sources/streamlit.rst.txt @@ -3,6 +3,8 @@ Web App Aplication that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. The application is a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud, allowing users to apply filters and view tweets that lack the original URL. +`Open the application `_. + Filters ---------- diff --git a/_sources/todo.rst.txt b/_sources/todo.rst.txt index 58d1feb..cda3ea5 100644 --- a/_sources/todo.rst.txt +++ b/_sources/todo.rst.txt @@ -5,9 +5,15 @@ TODO -|uncheck| Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse_tweets.py:73``), and avoid rate limiting +|uncheck| Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse_tweets.py:73``), and avoid rate limiting (`Planned for v1.1`) -|uncheck| Docs: Add tutorial on how to save Tweet via command line +|uncheck| Docs: Add tutorial on how to save Tweet via command line (`Planned for v1.1`) -|uncheck| Web App: Return complete JSON when mimetype is ``application/json`` +|uncheck| Code: Download images when tweet URL has extensions like JPG or PNG (`Planned for v1.2`) + +|uncheck| Code: Develop a scraper to download snapshots from https://archive.today (`Not planned`) + +|uncheck| Code: Unit Tests (`Planned`) + +|uncheck| Code: Mapping and parsing of other Twitter-related URLs (`Planned`) diff --git a/_sources/workflow.rst.txt b/_sources/workflow.rst.txt index c3ffd32..2480b35 100644 --- a/_sources/workflow.rst.txt +++ b/_sources/workflow.rst.txt @@ -19,5 +19,5 @@ Use the mouse to zoom in and out the flowchart. C--> |4xx| E[return None] E--> F{request Archived\nTweet URL} F--> |4xx| G[return Only CDX data] - F--> |TODO: 2xx/3xx: application/json| J[return JSON text] + F--> |2xx/3xx: application/json| J[return JSON text] F--> |2xx/3xx: text/html, warc/revisit, unk| K[return HTML iframe tag] diff --git a/api.html b/api.html index 2deb29b..6068485 100644 --- a/api.html +++ b/api.html @@ -49,34 +49,30 @@

            API

            Request

            -class waybacktweets.api.request_tweets.WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype)
            +class waybacktweets.api.request_tweets.WaybackTweets(username: str, collapse: str = None, timestamp_from: str = None, timestamp_to: str = None, limit: int = None, offset: int = None, matchtype: str = None)

            Class responsible for requesting data from the Wayback CDX Server API.

            Parameters:
              -
            • username (str) – The username associated with the tweets.

            • -
            • collapse (str) – The field to collapse duplicate lines on.

            • -
            • timestamp_from (str) – The timestamp to start retrieving tweets from.

            • -
            • timestamp_to (str) – The timestamp to stop retrieving tweets at.

            • -
            • limit (int) – The maximum number of results to return.

            • -
            • offset (int) – The number of lines to skip in the results.

            • -
            • matchType – Results matching a certain prefix, a certain host or all subdomains. # noqa: E501

            • -
            • matchtype (str)

            • +
            • username – The username associated with the tweets.

            • +
            • collapse – The field to collapse duplicate lines on.

            • +
            • timestamp_from – The timestamp to start retrieving tweets from.

            • +
            • timestamp_to – The timestamp to stop retrieving tweets at.

            • +
            • limit – The maximum number of results to return.

            • +
            • offset – The number of lines to skip in the results.

            • +
            • matchType – Results matching a certain prefix, a certain host or all subdomains.

            -get()
            +get() Dict[str, Any] | None

            Sends a GET request to the Internet Archive’s CDX API to retrieve archived tweets.

            Returns:

            The response from the CDX API in JSON format, if successful.

            -
            Return type:
            -

            Dict[str, Any] | None

            -
            @@ -87,20 +83,20 @@

            API

            Parse

            -class waybacktweets.api.parse_tweets.TweetsParser(archived_tweets_response, username, field_options)
            +class waybacktweets.api.parse_tweets.TweetsParser(archived_tweets_response: List[str], username: str, field_options: List[str])

            Class responsible for the overall parsing of archived tweets.

            Parameters:
              -
            • archived_tweets_response (List[str]) – The response from the archived tweets.

            • -
            • username (str) – The username associated with the tweets.

            • -
            • field_options (List[str]) – The fields to be included in the parsed data.

            • +
            • archived_tweets_response – The response from the archived tweets.

            • +
            • username – The username associated with the tweets.

            • +
            • field_options – The fields to be included in the parsed data. Options include “archived_urlkey”, “archived_timestamp”, “original_tweet_url”, “archived_tweet_url”, “parsed_tweet_url”, “parsed_archived_tweet_url”, “available_tweet_text”, “available_tweet_is_RT”, “available_tweet_info”, “archived_mimetype”, “archived_statuscode”, “archived_digest”, “archived_length”.

            -parse(print_progress=False)
            +parse(print_progress=False) Dict[str, List[Any]]

            Parses the archived tweets CDX data and structures it.

            Parameters:
            @@ -109,9 +105,6 @@

            API

            Returns:

            The parsed tweets data.

            -
            Return type:
            -

            Dict[str, List[Any]]

            -
            @@ -119,16 +112,16 @@

            API

            -class waybacktweets.api.parse_tweets.TwitterEmbed(tweet_url)
            +class waybacktweets.api.parse_tweets.TwitterEmbed(tweet_url: str)

            Class responsible for parsing tweets using the Twitter Publish service.

            Parameters:
            -

            tweet_url (str) – The URL of the tweet to be parsed.

            +

            tweet_url – The URL of the tweet to be parsed.

            -embed()
            +embed() Tuple[List[str], List[bool], List[str]] | None

            Parses the archived tweets when they are still available.

            This function goes through each archived tweet and checks if it is still available. @@ -145,9 +138,6 @@

            API

            -
            Return type:
            -

            Tuple[List[str], List[bool], List[str]] | None

            -
            @@ -155,26 +145,23 @@

            API

            -class waybacktweets.api.parse_tweets.JsonParser(archived_tweet_url)
            +class waybacktweets.api.parse_tweets.JsonParser(archived_tweet_url: str)

            Class responsible for parsing tweets when the mimetype is application/json.

            Note: This class is in an experimental phase, but it is currently being used by the Streamlit Web App.

            Parameters:
            -

            archived_tweet_url (str) – The URL of the archived tweet to be parsed.

            +

            archived_tweet_url – The URL of the archived tweet to be parsed.

            -parse()
            +parse() str

            Parses the archived tweets in JSON format.

            Returns:

            The parsed tweet text.

            -
            Return type:
            -

            str

            -
            @@ -185,48 +172,33 @@

            API

            Export

            -class waybacktweets.api.export_tweets.TweetsExporter(data, username, field_options)
            +class waybacktweets.api.export_tweets.TweetsExporter(data: Dict[str, List[Any]], username: str, field_options: List[str])

            Class responsible for exporting parsed archived tweets.

            Parameters:
              -
            • data (Dict[str, List[Any]]) – The parsed archived tweets data.

            • -
            • username (str) – The username associated with the tweets.

            • -
            • field_options (List[str]) – The fields to be included in the exported data.

            • +
            • data – The parsed archived tweets data.

            • +
            • username – The username associated with the tweets.

            • +
            • field_options – The fields to be included in the exported data. Options include “archived_urlkey”, “archived_timestamp”, “original_tweet_url”, “archived_tweet_url”, “parsed_tweet_url”, “parsed_archived_tweet_url”, “available_tweet_text”, “available_tweet_is_RT”, “available_tweet_info”, “archived_mimetype”, “archived_statuscode”, “archived_digest”, “archived_length”.

            -save_to_csv()
            +save_to_csv() None

            Saves the DataFrame to a CSV file.

            -
            -
            Return type:
            -

            None

            -
            -
            -save_to_html()
            +save_to_html() None

            Saves the DataFrame to an HTML file.

            -
            -
            Return type:
            -

            None

            -
            -
            -save_to_json()
            +save_to_json() None

            Saves the DataFrame to a JSON file.

            -
            -
            Return type:
            -

            None

            -
            -
            @@ -236,42 +208,35 @@

            API

            Visualizer

            -class waybacktweets.api.viz_tweets.HTMLTweetsVisualizer(json_file_path, html_file_path, username)
            +class waybacktweets.api.viz_tweets.HTMLTweetsVisualizer(json_file_path: str, html_file_path: str, username: str)

            Class responsible for generating an HTML file to visualize the parsed data.

            Parameters:
            • json_content – The content of the JSON file.

            • -
            • html_file_path (str) – The path where the HTML file will be saved.

            • -
            • username (str) – The username associated with the tweets.

            • -
            • json_file_path (str)

            • +
            • html_file_path – The path where the HTML file will be saved.

            • +
            • username – The username associated with the tweets.

            -generate()
            +generate() str

            Generates an HTML string that represents the parsed data.

            Returns:

            The generated HTML string.

            -
            Return type:
            -

            str

            -
            -save(html_content)
            +save(html_content: str) None

            Saves the generated HTML string to a file.

            Parameters:
            -

            html_content (str) – The HTML string to be saved.

            -
            -
            Return type:
            -

            None

            +

            html_content – The HTML string to be saved.

            @@ -283,178 +248,132 @@

            API

            Utils

            -waybacktweets.utils.utils.check_double_status(wayback_machine_url, original_tweet_url)
            +waybacktweets.utils.utils.check_double_status(wayback_machine_url: str, original_tweet_url: str) bool

            Checks if a Wayback Machine URL contains two occurrences of “/status/” and if the original tweet does not contain “twitter.com”.

            Parameters:
              -
            • wayback_machine_url (str) – The Wayback Machine URL to check.

            • -
            • original_tweet_url (str) – The original tweet URL to check.

            • +
            • wayback_machine_url – The Wayback Machine URL to check.

            • +
            • original_tweet_url – The original tweet URL to check.

            Returns:

            True if the conditions are met, False otherwise.

            -
            Return type:
            -

            bool

            -
            -waybacktweets.utils.utils.check_pattern_tweet(tweet_url)
            +waybacktweets.utils.utils.check_pattern_tweet(tweet_url: str) str

            Extracts the tweet ID from a tweet URL.

            Parameters:
            -

            tweet_url (str) – The tweet URL to extract the ID from.

            +

            tweet_url – The tweet URL to extract the ID from.

            Returns:

            The extracted tweet ID.

            -
            Return type:
            -

            str

            -
            -waybacktweets.utils.utils.clean_tweet_url(tweet_url, username)
            +waybacktweets.utils.utils.clean_tweet_url(tweet_url: str, username: str) str

            Cleans a tweet URL by ensuring it is associated with the correct username.

            Parameters:
              -
            • tweet_url (str) – The tweet URL to clean.

            • -
            • username (str) – The username to associate with the tweet URL.

            • +
            • tweet_url – The tweet URL to clean.

            • +
            • username – The username to associate with the tweet URL.

            Returns:

            The cleaned tweet URL.

            -
            Return type:
            -

            str

            -
            -waybacktweets.utils.utils.clean_wayback_machine_url(wayback_machine_url, archived_timestamp, username)
            +waybacktweets.utils.utils.clean_wayback_machine_url(wayback_machine_url: str, archived_timestamp: str, username: str) str

            Cleans a Wayback Machine URL by ensuring it is associated with the correct username and timestamp.

            Parameters:
              -
            • wayback_machine_url (str) – The Wayback Machine URL to clean.

            • -
            • archived_timestamp (str) – The timestamp to associate with the Wayback Machine URL.

            • -
            • username (str) – The username to associate with the Wayback Machine URL.

            • +
            • wayback_machine_url – The Wayback Machine URL to clean.

            • +
            • archived_timestamp – The timestamp to associate with the Wayback Machine URL.

            • +
            • username – The username to associate with the Wayback Machine URL.

            Returns:

            The cleaned Wayback Machine URL.

            -
            Return type:
            -

            str

            -
            -waybacktweets.utils.utils.delete_tweet_pathnames(tweet_url)
            +waybacktweets.utils.utils.delete_tweet_pathnames(tweet_url: str) str

            Removes any pathnames from a tweet URL.

            Parameters:
            -

            tweet_url (str) – The tweet URL to remove pathnames from.

            +

            tweet_url – The tweet URL to remove pathnames from.

            Returns:

            The tweet URL without any pathnames.

            -
            Return type:
            -

            str

            -
            -
            -
            - -
            -
            -waybacktweets.utils.utils.is_tweet_url(twitter_url)
            -

            Checks if the provided URL is a Twitter status URL.

            -

            This function checks if the provided URL contains “/status/” exactly once, -which is a common pattern in Twitter status URLs.

            -
            -
            Parameters:
            -

            twitter_url (str) – The URL to check.

            -
            -
            Returns:
            -

            True if the URL is a Twitter status URL, False otherwise.

            -
            -
            Return type:
            -

            bool

            -
            -waybacktweets.utils.utils.get_response(url, params=None)
            -

            Sends a GET request to the specified URL and returns the response.

            +waybacktweets.utils.utils.get_response(url: str, params: dict | None = None) Tuple[Response | None, str | None, str | None] +

            Sends a GET request to the specified URL and returns the response, +an error message if any, and the type of exception if any.

            Parameters:
              -
            • url (str) – The URL to send the GET request to.

            • -
            • params (dict | None) – The parameters to include in the GET request.

            • +
            • url – The URL to send the GET request to.

            • +
            • params – The parameters to include in the GET request.

            Returns:
            -

            The response from the server, -if the status code is not in the 400-511 range. -If the status code is in the 400-511 range.

            -
            -
            Return type:
            -

            Response | None

            +

            A tuple containing the response from the server or None, +an error message or None, and the type of exception or None.

            -
            -waybacktweets.utils.utils.parse_date(ctx=None, param=None, value=None)
            -

            Parses a date string and returns it in the format “YYYYMMDD”.

            +
            +waybacktweets.utils.utils.is_tweet_url(twitter_url: str) bool
            +

            Checks if the provided URL is a Twitter status URL.

            +

            This function checks if the provided URL contains “/status/” exactly once, +which is a common pattern in Twitter status URLs.

            Parameters:
            -
              -
            • ctx (Any | None) – Necessary when used with the click package. Defaults to None.

            • -
            • param (Any | None) – Necessary when used with the click package. Defaults to None.

            • -
            • value (str | None) – A date string in the “YYYYMMDD” format. Defaults to None.

            • -
            +

            twitter_url – The URL to check.

            Returns:
            -

            The input date string formatted in the “YYYYMMDD” format, -or None if no date string was provided.

            -
            -
            Return type:
            -

            str | None

            +

            True if the URL is a Twitter status URL, False otherwise.

            -waybacktweets.utils.utils.semicolon_parser(string)
            +waybacktweets.utils.utils.semicolon_parser(string: str) str

            Replaces semicolons in a string with %3B.

            Parameters:
            -

            string (str) – The string to replace semicolons in.

            +

            string – The string to replace semicolons in.

            Returns:

            The string with semicolons replaced by %3B.

            -
            Return type:
            -

            str

            -
            @@ -524,9 +443,8 @@

            Contents

          • clean_tweet_url()
          • clean_wayback_machine_url()
          • delete_tweet_pathnames()
          • -
          • is_tweet_url()
          • get_response()
          • -
          • parse_date()
          • +
          • is_tweet_url()
          • semicolon_parser()
          @@ -558,7 +476,7 @@

          Quick search

          diff --git a/cli.html b/cli.html index 2f01efc..1e860ac 100644 --- a/cli.html +++ b/cli.html @@ -94,7 +94,7 @@

          waybacktweets
          --matchtype <matchtype>
          -

          Results matching a certain prefix, a certain host or all subdomains. Default: exact

          +

          Results matching a certain prefix, a certain host or all subdomains.

          Options:

          exact | prefix | host | domain

          @@ -147,7 +147,7 @@

          Collapsing

          URL Match Scope

          The CDX Server can return results matching a certain prefix, a certain host or all subdomains by using the matchType param.

          -

          For example, with the value prefix it is possible to retrieve URLs beyond /status/.

          +

          The package waybacktweets uses the pathname /status followed by the wildcard ‘*’ at the end of the URL to retrieve only tweets. However, if a value is provided for this parameter, the search will be made from the URL twitter.com/<USERNAME>.

          Read below text extracted from the official Wayback CDX Server API (Beta) documentation.

        (Note: The domain mode is only available if the CDX is in SURT-order format.)

        @@ -232,7 +232,7 @@

        Quick search

        diff --git a/contribute.html b/contribute.html index 1ce1a38..af2dbbc 100644 --- a/contribute.html +++ b/contribute.html @@ -16,7 +16,7 @@ - + diff --git a/errors.html b/exceptions.html similarity index 94% rename from errors.html rename to exceptions.html index 1b46693..032c175 100644 --- a/errors.html +++ b/exceptions.html @@ -5,7 +5,7 @@ - Errors — Wayback Tweets Documentation (1.0.x) + Exceptions — Wayback Tweets Documentation (1.0.x) @@ -34,7 +34,7 @@

        Navigation

        previous | - +
      @@ -43,8 +43,8 @@

      Navigation

      -
      -

      Errors

      +
      +

      Exceptions

      These are the most common errors and are handled by the waybacktweets package.

      ReadTimeout

      @@ -82,7 +82,7 @@

      HTTPErrorErrors

      diff --git a/genindex.html b/genindex.html index 93d1fc3..be26c0b 100644 --- a/genindex.html +++ b/genindex.html @@ -208,10 +208,6 @@

      P

    • S

      @@ -360,7 +356,7 @@

      Quick search

      diff --git a/index.html b/index.html index a45beb9..9704f3a 100644 --- a/index.html +++ b/index.html @@ -41,10 +41,8 @@

      Navigation

      Wayback Tweets

      -
      -

      Wayback Tweets Documentation

      +_images/waybacktweets.png

      Retrieves archived tweets CDX data from the Wayback Machine, performs necessary parsing, and saves the data in CSV, JSON, and HTML formats.

      -

      User Guide

      @@ -62,10 +60,10 @@

      User GuideWorkflow
    • Result
    • -
    • Errors
    • diff --git a/installation.html b/installation.html index fb0641d..ff464e0 100644 --- a/installation.html +++ b/installation.html @@ -129,7 +129,7 @@

      Quick search

      diff --git a/modules.html b/modules.html index 9361612..8fb2a3e 100644 --- a/modules.html +++ b/modules.html @@ -85,9 +85,8 @@

      waybacktweetsclean_tweet_url()
    • clean_wayback_machine_url()
    • delete_tweet_pathnames()
    • -
    • is_tweet_url()
    • get_response()
    • -
    • parse_date()
    • +
    • is_tweet_url()
    • semicolon_parser()
    @@ -133,7 +132,7 @@

    Quick search

    diff --git a/objects.inv b/objects.inv index fc162c302a359edd1c3ba21010fec2d3d07130b4..521907d41647a7ef5ed3e4f0763a193979e95bf0 100644 GIT binary patch delta 663 zcmV;I0%-k}29O4jg@2nc5Qgvi70jepnD&~Rr}a$Ji5oX@+an{y+Nc&tk%T|;>nm9C z73ag?Tx|4e--lhXt35p?lx^)Ez?z6E9>GCLZDUjdb-d1HS_%k02i0z7b8&@A_$dg< z{%wD>s{iJ-2D-vAL5&H%bhPo9$j}|cR>sWi>I}hVGW{TC<$wQh=szQiF;GRdf$86% zZ;yVscP}ep%<;cMKz(W@{&X%&rq8%I9d6xMn&%0m)6?k~5j3Q8)6wu)g1KPYhJffV z2CQ1#nI*l2i8f%z)U@MI)9dNX(f8Hj7Bl$6O-TiR8r3xKkX%&-Xlh|vX)iOYaX*(= zS}y$W$aCsBBY&0w+s3IZ69KWY)Rr}ET^7u8cjug&f(I#DHFJSQP6P>{9uSvWTmtd? zaZXvr3z&{eL4frmTh*8qG>3@?<)$IlV3aKI<{3}m=F;OakaH%5EMnE(rLR%kA}}_+ zP9+P%ZZDWEZwM2EJ-5WYueWYb7FP45bQ97KX;mNDu73#}7^o@((>HV~8s_bqxC!J% z3Wqve&;&$fXjBZpsUAre=Bd31e^&)Kj4sqofe^Glih-lk8+4}-H*7EDqK;v+fu03U zDQ1NOI;hr57(DoXq{n*aCCj6`J&NJG9!t))E?;f?uj^EF{%Y66wP+~@!Kh&1ef>-f zP|dPoI)A8P*aPk_=>~|g8fY%~fZ2-vTR;-blpCvM!tZI2p+))6%l1dtu)*B5EA zKH_{BITu^J?E7dJ?Xsi$jIxd00a%kz#S?f`Qri@jNbN66nb!iM&r!2m*iu}e5`T(f zvU}T~tlGbMtATECOi*K@FFh?BlNh>%)XJ2ZZJQz3LgpXDY=8V84*h3j6#`XM8<_qL zhW6x#d-t*tLXQ6x0_syM@uzcMGDF7M>2T}5(!9(dpPo+7NZ>#^H60BPHJCG|Z45~M zV!*1yomkRanB*R8NKIS*G`*hQoP1v`t}%l@+|*R?r=X^JhvcfNKvN6TN_&~vjQgp) z+H&E41J7yTOn+DbY?J1)&IF{!Qd_sQbzL#b-JNr43Ld0r)65kXIT0j)dO%uhaSp`q zrzK?tuV6YZ2LaZPY*S-V(Gn&el$(ZBgHf`=neH&E`oH{5Ta55OwbHO6L?TD^s0FzeVC^WBK-Xp;Q+f(H#tJkw)~N!*AM7kA#Oll$VKhr z!v=a5IN6vL4)v%?FJbWLn~?$QotG^4?#8Lc?|P^?+qitS9lowt(fO-e6W7+Y7zLw( zfw%25F@HccE5_-dhGBQOzohp-jMYF(!AHza^xp!qXr>ggGMB@!W{nZ6n{PMInmLYL z`wFTIHWw>%seJPun((3%Quick search diff --git a/quickstart.html b/quickstart.html index 682748b..bcfc9a0 100644 --- a/quickstart.html +++ b/quickstart.html @@ -49,34 +49,40 @@

    Quickstart

    Using Wayback Tweets as a standalone command line tool

    waybacktweets [OPTIONS] USERNAME

    -
    waybacktweets --from 20150101 --to 20191231 --limit 250 jack`
    +
    waybacktweets --from 20150101 --to 20191231 --limit 250 jack
     

    Module

    Using Wayback Tweets as a Python Module

    -
    from waybacktweets import WaybackTweets
    -from waybacktweets.utils import parse_date
    +
    from waybacktweets import WaybackTweets, TweetsParser, TweetsExporter
     
    -username = "jack"
    -collapse = "urlkey"
    -timestamp_from = parse_date("20150101")
    -timestamp_to = parse_date("20191231")
    -limit = 250
    -offset = 0
    -matchtype = "exact"
    -
    -api = WaybackTweets(username, collapse, timestamp_from, timestamp_to, limit, offset, matchtype)
    +USERNAME = "jack"
     
    +api = WaybackTweets(USERNAME)
     archived_tweets = api.get()
    +
    +if archived_tweets:
    +    field_options = [
    +        "archived_timestamp",
    +        "original_tweet_url",
    +        "archived_tweet_url",
    +        "archived_statuscode",
    +    ]
    +
    +    parser = TweetsParser(archived_tweets, USERNAME, field_options)
    +    parsed_tweets = parser.parse()
    +
    +    exporter = TweetsExporter(parsed_tweets, USERNAME, field_options)
    +    exporter.save_to_csv()
     

    Web App

    Using Wayback Tweets as a Streamlit Web App

    -

    Access the application, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud.

    +

    Open the application, a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud.

    @@ -129,7 +135,7 @@

    Quick search

    diff --git a/result.html b/result.html index a6b4ec3..5be0195 100644 --- a/result.html +++ b/result.html @@ -15,7 +15,7 @@ - + diff --git a/search.html b/search.html index 4624cb3..e39af74 100644 --- a/search.html +++ b/search.html @@ -97,7 +97,7 @@

    Navigation

    diff --git a/searchindex.js b/searchindex.js index 409044a..96a9231 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"API": [[0, "api"]], "API Reference": [[4, "api-reference"]], "Additional Information": [[4, "additional-information"]], "CLI": [[1, "cli"], [7, "cli"]], "Collapsing": [[1, "collapsing"]], "Command-Line Interface": [[4, "command-line-interface"]], "Community Comments": [[9, "community-comments"]], "ConnectionError": [[3, "connectionerror"]], "Contribute": [[2, "contribute"]], "Errors": [[3, "errors"]], "Export": [[0, "module-waybacktweets.api.export_tweets"]], "Filters": [[9, "filters"]], "From source": [[5, "from-source"]], "HTTPError": [[3, "httperror"]], "Hacking": [[2, "hacking"]], "Indices and tables": [[4, "indices-and-tables"]], "Installation": [[5, "installation"]], "Module": [[7, "module"]], "Pagination": [[9, "pagination"]], "Parse": [[0, "module-waybacktweets.api.parse_tweets"]], "Quickstart": [[7, "quickstart"]], "ReadTimeout": [[3, "readtimeout"]], "Request": [[0, "module-waybacktweets.api.request_tweets"]], "Result": [[8, "result"]], "Sponsoring": [[2, "sponsoring"]], "Streamlit Web App": [[4, "streamlit-web-app"]], "TODO": [[10, "todo"]], "Testing": [[2, "testing"]], "URL Match Scope": [[1, "url-match-scope"]], "Usage": [[1, "usage"]], "User Guide": [[4, "user-guide"]], "Using pip": [[5, "using-pip"]], "Utils": [[0, "module-waybacktweets.utils.utils"]], "Visualizer": [[0, "module-waybacktweets.api.viz_tweets"]], "Wayback Tweets": [[4, "wayback-tweets"]], "Wayback Tweets Documentation": [[4, "wayback-tweets-documentation"]], "Web App": [[7, "web-app"], [9, "web-app"]], "Workflow": [[11, "workflow"]], "waybacktweets": [[1, "waybacktweets"], [6, "waybacktweets"]]}, "docnames": ["api", "cli", "contribute", "errors", "index", "installation", "modules", "quickstart", "result", "streamlit", "todo", "workflow"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["api.rst", "cli.rst", "contribute.rst", "errors.rst", "index.rst", "installation.rst", "modules.rst", "quickstart.rst", "result.rst", "streamlit.rst", "todo.rst", "workflow.rst"], "indexentries": {"--collapse": [[1, "cmdoption-waybacktweets-collapse", false]], "--from": [[1, "cmdoption-waybacktweets-from", false]], "--limit": [[1, "cmdoption-waybacktweets-limit", false]], "--matchtype": [[1, "cmdoption-waybacktweets-matchtype", false]], "--offset": [[1, "cmdoption-waybacktweets-offset", false]], "--to": [[1, "cmdoption-waybacktweets-to", false]], "check_double_status() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_double_status", false]], "check_pattern_tweet() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_pattern_tweet", false]], "clean_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_tweet_url", false]], "clean_wayback_machine_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_wayback_machine_url", false]], "delete_tweet_pathnames() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.delete_tweet_pathnames", false]], "embed() (waybacktweets.api.parse_tweets.twitterembed method)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed.embed", false]], "generate() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.generate", false]], "get() (waybacktweets.api.request_tweets.waybacktweets method)": [[0, "waybacktweets.api.request_tweets.WaybackTweets.get", false]], "get_response() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.get_response", false]], "htmltweetsvisualizer (class in waybacktweets.api.viz_tweets)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer", false]], "is_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.is_tweet_url", false]], "jsonparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.JsonParser", false]], "module": [[0, "module-waybacktweets.api.export_tweets", false], [0, "module-waybacktweets.api.parse_tweets", false], [0, "module-waybacktweets.api.request_tweets", false], [0, "module-waybacktweets.api.viz_tweets", false], [0, "module-waybacktweets.utils.utils", false]], "parse() (waybacktweets.api.parse_tweets.jsonparser method)": [[0, "waybacktweets.api.parse_tweets.JsonParser.parse", false]], "parse() (waybacktweets.api.parse_tweets.tweetsparser method)": [[0, "waybacktweets.api.parse_tweets.TweetsParser.parse", false]], "parse_date() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.parse_date", false]], "save() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.save", false]], "save_to_csv() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_csv", false]], "save_to_html() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_html", false]], "save_to_json() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_json", false]], "semicolon_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.semicolon_parser", false]], "tweetsexporter (class in waybacktweets.api.export_tweets)": [[0, "waybacktweets.api.export_tweets.TweetsExporter", false]], "tweetsparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TweetsParser", false]], "twitterembed (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed", false]], "username": [[1, "cmdoption-waybacktweets-arg-USERNAME", false]], "waybacktweets (class in waybacktweets.api.request_tweets)": [[0, "waybacktweets.api.request_tweets.WaybackTweets", false]], "waybacktweets command line option": [[1, "cmdoption-waybacktweets-arg-USERNAME", false], [1, "cmdoption-waybacktweets-collapse", false], [1, "cmdoption-waybacktweets-from", false], [1, "cmdoption-waybacktweets-limit", false], [1, "cmdoption-waybacktweets-matchtype", false], [1, "cmdoption-waybacktweets-offset", false], [1, "cmdoption-waybacktweets-to", false]], "waybacktweets.api.export_tweets": [[0, "module-waybacktweets.api.export_tweets", false]], "waybacktweets.api.parse_tweets": [[0, "module-waybacktweets.api.parse_tweets", false]], "waybacktweets.api.request_tweets": [[0, "module-waybacktweets.api.request_tweets", false]], "waybacktweets.api.viz_tweets": [[0, "module-waybacktweets.api.viz_tweets", false]], "waybacktweets.utils.utils": [[0, "module-waybacktweets.utils.utils", false]]}, "objects": {"waybacktweets": [[1, 4, 1, "cmdoption-waybacktweets-collapse", "--collapse"], [1, 4, 1, "cmdoption-waybacktweets-from", "--from"], [1, 4, 1, "cmdoption-waybacktweets-limit", "--limit"], [1, 4, 1, "cmdoption-waybacktweets-matchtype", "--matchtype"], [1, 4, 1, "cmdoption-waybacktweets-offset", "--offset"], [1, 4, 1, "cmdoption-waybacktweets-to", "--to"], [1, 4, 1, "cmdoption-waybacktweets-arg-USERNAME", "USERNAME"]], "waybacktweets.api": [[0, 0, 0, "-", "export_tweets"], [0, 0, 0, "-", "parse_tweets"], [0, 0, 0, "-", "request_tweets"], [0, 0, 0, "-", "viz_tweets"]], "waybacktweets.api.export_tweets": [[0, 1, 1, "", "TweetsExporter"]], "waybacktweets.api.export_tweets.TweetsExporter": [[0, 2, 1, "", "save_to_csv"], [0, 2, 1, "", "save_to_html"], [0, 2, 1, "", "save_to_json"]], "waybacktweets.api.parse_tweets": [[0, 1, 1, "", "JsonParser"], [0, 1, 1, "", "TweetsParser"], [0, 1, 1, "", "TwitterEmbed"]], "waybacktweets.api.parse_tweets.JsonParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TweetsParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TwitterEmbed": [[0, 2, 1, "", "embed"]], "waybacktweets.api.request_tweets": [[0, 1, 1, "", "WaybackTweets"]], "waybacktweets.api.request_tweets.WaybackTweets": [[0, 2, 1, "", "get"]], "waybacktweets.api.viz_tweets": [[0, 1, 1, "", "HTMLTweetsVisualizer"]], "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer": [[0, 2, 1, "", "generate"], [0, 2, 1, "", "save"]], "waybacktweets.utils": [[0, 0, 0, "-", "utils"]], "waybacktweets.utils.utils": [[0, 3, 1, "", "check_double_status"], [0, 3, 1, "", "check_pattern_tweet"], [0, 3, 1, "", "clean_tweet_url"], [0, 3, 1, "", "clean_wayback_machine_url"], [0, 3, 1, "", "delete_tweet_pathnames"], [0, 3, 1, "", "get_response"], [0, 3, 1, "", "is_tweet_url"], [0, 3, 1, "", "parse_date"], [0, 3, 1, "", "semicolon_parser"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "std:cmdoption"}, "terms": {"": [0, 1, 2, 8], "0": [4, 7], "1": 1, "10": 1, "1000": 1, "11": 2, "14": [1, 8], "2": 1, "2013022601": 1, "20130226010000": 1, "20130226010800": 1, "20150101": 7, "20191231": 7, "22": 9, "25": 9, "250": 7, "2nd": 1, "3": [2, 4, 9], "32": [1, 8], "3b": 0, "4": 1, "400": 0, "511": 0, "73": 10, "A": [0, 1, 8, 9], "For": [1, 9], "If": [0, 1, 2, 8, 9], "It": [1, 8, 9], "One": 9, "Such": [1, 8], "The": [0, 1, 2, 3, 8, 9, 11], "These": [2, 3], "To": 1, "about": [1, 2], "access": [7, 9], "account": [8, 9], "actual": 8, "add": [0, 1, 10], "adjac": 1, "affect": 1, "after": [1, 8], "all": [0, 1, 2], "allow": [1, 9], "alreadi": 5, "also": [1, 2, 11], "alwai": [9, 11], "am": 9, "an": [0, 2, 9], "ani": [0, 8], "api": [1, 2, 6, 7, 9, 10, 11], "aplic": 9, "app": [0, 2, 5, 10], "appli": [9, 10], "applic": [0, 3, 7, 8, 9, 10, 11], "ar": [0, 1, 2, 3, 8, 11], "archiv": [0, 1, 3, 4, 8, 9, 11], "archived_digest": 8, "archived_length": 8, "archived_mimetyp": 8, "archived_statuscod": 8, "archived_timestamp": [0, 8], "archived_tweet": 7, "archived_tweet_url": [0, 8], "archived_tweets_respons": 0, "archived_urlkei": 8, "argument": 1, "ari": 9, "arsip": 9, "associ": 0, "avail": [0, 1, 8, 9], "available_tweet_info": 8, "available_tweet_is_rt": 8, "available_tweet_text": 8, "avoid": [9, 10], "base": [1, 8, 9], "begin": 1, "being": [0, 3], "bellingcat": 9, "below": 1, "ben": 9, "best": [2, 11], "beta": 1, "beyond": 1, "blank": 8, "bool": [0, 8], "boolean": 0, "brief": 2, "bug": 2, "bulk": 9, "byte": 8, "calendar": 1, "call": 9, "can": [1, 2, 8, 9], "canon": [1, 8], "captur": 1, "cd": 5, "cdx": [0, 1, 3, 4, 9, 11], "certain": [0, 1], "chang": [4, 5, 9], "charact": 1, "check": [0, 3, 8, 9], "check_double_statu": [0, 6], "check_pattern_tweet": [0, 6], "clarissa": 9, "clarom": [2, 5], "class": 0, "clean": 0, "clean_tweet_url": [0, 6], "clean_wayback_machine_url": [0, 6], "cli": [2, 4, 5], "click": 0, "clone": 5, "cloud": [7, 9], "code": [0, 2, 8, 10], "collaps": [0, 4, 7, 9], "com": [0, 1, 2, 3, 5], "command": [1, 2, 7, 10], "comment": 4, "common": [0, 3], "commun": 4, "compar": 1, "complet": 10, "compress": 8, "condit": 0, "connect": 3, "connectionerror": 4, "contact": 2, "contain": 0, "content": [0, 1, 8], "contribut": 4, "correct": 0, "correspond": 8, "could": 3, "creat": [9, 10], "csv": [0, 4, 8], "ctx": 0, "current": 0, "daili": 9, "data": [0, 1, 3, 4, 9, 11], "datafram": 0, "date": [0, 1, 8, 9], "datetim": [1, 8], "deeper": 9, "default": [0, 1], "defin": 11, "delet": 9, "delete_tweet_pathnam": [0, 6], "delight": 9, "dens": 1, "depend": 5, "develop": 2, "dict": 0, "digest": [1, 8], "digit": [1, 8], "directori": [2, 5], "displai": 9, "do": 9, "doc": [2, 10], "document": [1, 2, 5], "doe": 0, "domain": 1, "donat": 2, "done": 1, "dot": 2, "down": [3, 9], "due": [3, 11], "duplic": [0, 1], "e501": 0, "each": [0, 9], "effect": 9, "eg": 1, "either": 2, "elsewher": 1, "email": 2, "emb": [0, 6], "empti": 8, "encod": [1, 8], "end": 1, "ensur": 0, "entri": 8, "equival": 1, "error": 4, "eserv": [1, 8], "ess": 9, "establish": 3, "ex": 1, "exact": [1, 7], "exactli": [0, 1], "exampl": [1, 8], "exceed": 3, "excel": 9, "except": 1, "exclud": [1, 8], "exist": 9, "experiment": [0, 3], "explan": 2, "export": [4, 6], "export_tweet": 0, "extract": [0, 1, 8], "facilit": 11, "fail": 3, "fals": 0, "feed": 3, "field": [0, 1, 8, 9], "field_opt": 0, "file": [0, 8], "filter": [1, 4], "find": 9, "first": [0, 1], "flow": 11, "flowchart": [9, 11], "follow": [2, 8, 11], "form": 1, "format": [0, 1, 4, 8, 11], "framework": [7, 9], "from": [0, 1, 2, 3, 4, 7, 8, 9, 11], "fun": 9, "function": [0, 2, 8, 10], "gem": 9, "gener": [0, 2, 6], "get": [0, 6, 7], "get_respons": [0, 6], "gijn": 9, "git": 5, "github": [2, 5, 9], "given": 1, "goe": 0, "gone": 9, "googl": 1, "gpl": 4, "gunakan": 9, "ha": [8, 9], "hack": 4, "handl": [3, 10], "hash": [1, 8], "have": [2, 8, 9], "haven": 5, "header": [1, 8], "help": [2, 9], "helper": 2, "henk": 9, "here": 2, "hidden": 9, "host": [0, 1, 7, 9], "hour": 1, "how": 10, "howev": 1, "html": [0, 4, 8], "html_content": 0, "html_file_path": 0, "htmltweetsvisu": [0, 6], "http": [1, 3, 8], "httperror": 4, "i": [0, 1, 2, 3, 8, 9, 10, 11], "id": 0, "implicitli": 1, "import": 7, "improv": 2, "includ": [0, 8], "index": [1, 4, 8], "indic": 0, "indonesia": 9, "inform": [0, 3], "input": 0, "instal": [2, 4], "instruct": 2, "int": [0, 8], "integ": 1, "interest": 2, "interfac": 2, "internet": [0, 3], "internetarch": 3, "irina_tech_tip": 9, "is_tweet_url": [0, 6], "issu": [2, 3, 9, 10], "jack": 7, "json": [0, 3, 4, 8, 9, 10, 11], "json_cont": 0, "json_file_path": 0, "jsonpars": [0, 6, 10], "kei": [1, 8, 9], "know": 9, "lack": 9, "latest": 3, "launch": 9, "least": 1, "licens": 4, "limit": [0, 1, 7, 9, 10, 11], "line": [0, 1, 2, 7, 10], "link": 9, "list": 0, "long": 3, "look": [1, 9], "love": 9, "machin": [0, 1, 4, 9], "mai": 1, "main": 2, "maintain": 2, "mainten": 3, "mani": 9, "manual": 9, "match": [0, 4, 9], "matchtyp": [0, 1, 7], "max": 3, "maximum": 0, "member": 9, "mempermudah": 9, "mend": 9, "messag": 3, "met": 0, "mimetyp": [0, 3, 8, 9, 10, 11], "mode": 1, "modul": [2, 4], "moment": 1, "more": [1, 9], "most": [1, 3, 9], "mous": 11, "multipl": 9, "my": 9, "myosinttip": 9, "n": 1, "name": 8, "necessari": [0, 1, 4], "network": 3, "new": [1, 3, 9], "newslett": 9, "none": 0, "noqa": 0, "note": [0, 1], "now": 9, "number": 0, "obtain": 11, "occur": 3, "occurr": 0, "offici": 1, "offlin": 3, "offset": [0, 1, 7], "often": 3, "old": [1, 9], "omit": 1, "onc": 0, "one": [1, 8], "onli": [1, 9, 11], "onward": 1, "open": [2, 9], "option": [1, 7, 9], "order": 1, "org": [1, 3, 8, 9], "origin": [0, 8, 9], "original_tweet_url": [0, 8], "osint": 9, "other": [1, 8], "otherwis": 0, "our": 9, "out": [1, 3, 11], "output": 3, "over": 9, "overal": 0, "overload": 3, "packag": [0, 2, 3, 8], "page": [1, 4, 9], "pagin": 4, "param": [0, 1], "paramet": 0, "pars": [1, 3, 4, 6, 8, 9, 11], "parse_d": [0, 6, 7], "parse_tweet": [0, 10], "parsed_archived_tweet_url": 8, "parsed_tweet_url": 8, "path": [0, 1], "pathnam": 0, "pattern": 0, "payload": 8, "penelusuran": 9, "per": [1, 9], "perform": [1, 3, 4], "phase": 0, "pip": 4, "pleas": [3, 9], "poetri": [2, 5], "possibl": [1, 11], "prefix": [0, 1, 9], "prerequisit": 2, "price": 9, "print": 0, "print_progress": 0, "progress": 0, "project": 2, "propos": 11, "prototyp": [7, 9], "provid": 0, "publish": 0, "py": [5, 10], "python": [2, 7, 9], "queri": 1, "quickli": 9, "quickstart": 4, "rais": 3, "rang": [0, 1, 9], "rate": [9, 10], "re": [2, 9], "read": [1, 5, 11], "readtimeout": 4, "recommend": 1, "record": 8, "regardless": 11, "remov": 0, "replac": 0, "report": 2, "repositori": 5, "repres": 0, "represent": [1, 8], "request": [3, 4, 6], "request_tweet": 0, "requir": 1, "research": 9, "respect": 0, "respond": 3, "respons": 0, "result": [0, 1, 4, 11], "retri": 3, "retriev": [0, 1, 4, 11], "return": [0, 1, 3, 8, 9, 10], "retweet": 8, "revisit": 8, "run": 5, "same": 8, "save": [0, 1, 3, 4, 6, 8, 9, 10, 11], "save_to_csv": [0, 6], "save_to_html": [0, 6], "save_to_json": [0, 6], "scope": [4, 9], "scroll": 1, "search": [1, 4], "second": 0, "see": 9, "semicolon": 0, "semicolon_pars": [0, 6], "send": 0, "separ": 10, "server": [0, 1, 3, 9, 11], "servic": [0, 3], "set": 1, "sha1": [1, 8], "show": 1, "similar": 1, "simpl": 1, "sinc": 1, "size": 8, "skill": 2, "skip": 0, "slow": 1, "snapshot": 8, "so": 9, "sourc": [2, 4, 9], "specifi": 0, "sphinx": 2, "sponsor": 4, "standalon": 7, "start": [0, 1], "statu": [0, 1, 3, 8, 9], "status": 0, "statuscod": 8, "still": [0, 8, 9], "stop": 0, "str": [0, 1, 8], "streamlit": [0, 2, 5, 7, 9], "string": [0, 1, 8], "structur": [0, 9], "subdomain": [0, 1], "subhost": 1, "substr": 1, "success": 0, "suggest": 2, "suppli": [1, 8], "support": 2, "surt": 1, "t": [5, 9], "take": 3, "tc": [1, 8], "temporarili": 3, "test": [1, 4], "text": [0, 1, 8], "thei": 0, "therefor": 11, "thi": [0, 1, 2, 3, 8, 9], "third": 0, "those": 9, "three": [0, 1, 8], "through": [0, 1], "time": [1, 3, 8], "timestamp": [0, 1, 3], "timestamp_from": [0, 7], "timestamp_to": [0, 7], "todo": 4, "too": [1, 3], "tool": [1, 7, 9, 11], "train": 9, "transform": [1, 8], "true": 0, "tupl": 0, "tutori": 10, "tweet": [0, 1, 2, 7, 8, 9, 10, 11], "tweet_id": 3, "tweet_url": 0, "tweetsexport": [0, 6], "tweetspars": [0, 6], "twitter": [0, 1, 3, 8, 9], "twitter_url": 0, "twitteremb": [0, 6], "two": 0, "type": [0, 1], "unavail": 9, "under": [1, 2], "uniqu": [1, 9], "unk": 8, "untuk": 9, "up": [1, 9], "url": [0, 3, 4, 8, 9], "urlkei": [1, 7, 9], "us": [0, 1, 2, 4, 7, 8, 9, 11], "usag": 4, "user": 9, "usernam": [0, 1, 3, 7], "usual": [1, 8], "util": [2, 4, 6, 7, 8, 9], "valu": [0, 1, 8], "van": 9, "veri": 2, "via": [2, 9, 10], "view": 9, "visual": [4, 6], "viz_tweet": 0, "wa": [0, 3, 11], "wai": [1, 2, 9, 11], "want": 9, "warc": 8, "warn": 3, "wayback": [0, 1, 2, 7, 9, 11], "wayback_machine_url": 0, "waybacktweet": [0, 2, 3, 5, 7, 10], "we": [1, 9], "web": [0, 1, 3, 10], "welcom": 2, "what": 9, "when": [0, 1, 3, 9, 10], "where": [0, 1], "whether": [0, 8], "which": [0, 8, 9], "who": 9, "wildcard": 1, "without": [0, 1], "workflow": 4, "would": 3, "written": [7, 9, 11], "x": 9, "xx": 1, "year": 1, "you": [1, 2, 5, 8, 9], "yyyymmdd": [0, 1], "yyyymmddhhmmss": [1, 8], "zoom": 11}, "titles": ["API", "CLI", "Contribute", "Errors", "Wayback Tweets", "Installation", "waybacktweets", "Quickstart", "Result", "Web App", "TODO", "Workflow"], "titleterms": {"addit": 4, "api": [0, 4], "app": [4, 7, 9], "cli": [1, 7], "collaps": 1, "command": 4, "comment": 9, "commun": 9, "connectionerror": 3, "contribut": 2, "document": 4, "error": 3, "export": 0, "filter": 9, "from": 5, "guid": 4, "hack": 2, "httperror": 3, "indic": 4, "inform": 4, "instal": 5, "interfac": 4, "line": 4, "match": 1, "modul": 7, "pagin": 9, "pars": 0, "pip": 5, "quickstart": 7, "readtimeout": 3, "refer": 4, "request": 0, "result": 8, "scope": 1, "sourc": 5, "sponsor": 2, "streamlit": 4, "tabl": 4, "test": 2, "todo": 10, "tweet": 4, "url": 1, "us": 5, "usag": 1, "user": 4, "util": 0, "visual": 0, "wayback": 4, "waybacktweet": [1, 6], "web": [4, 7, 9], "workflow": 11}}) \ No newline at end of file +Search.setIndex({"alltitles": {"API": [[0, "api"]], "API Reference": [[4, "api-reference"]], "Additional Information": [[4, "additional-information"]], "CLI": [[1, "cli"], [7, "cli"]], "Collapsing": [[1, "collapsing"]], "Command-Line Interface": [[4, "command-line-interface"]], "Community Comments": [[9, "community-comments"]], "ConnectionError": [[3, "connectionerror"]], "Contribute": [[2, "contribute"]], "Exceptions": [[3, "exceptions"]], "Export": [[0, "module-waybacktweets.api.export_tweets"]], "Filters": [[9, "filters"]], "From source": [[5, "from-source"]], "HTTPError": [[3, "httperror"]], "Hacking": [[2, "hacking"]], "Indices and tables": [[4, "indices-and-tables"]], "Installation": [[5, "installation"]], "Module": [[7, "module"]], "Pagination": [[9, "pagination"]], "Parse": [[0, "module-waybacktweets.api.parse_tweets"]], "Quickstart": [[7, "quickstart"]], "ReadTimeout": [[3, "readtimeout"]], "Request": [[0, "module-waybacktweets.api.request_tweets"]], "Result": [[8, "result"]], "Sponsoring": [[2, "sponsoring"]], "Streamlit Web App": [[4, "streamlit-web-app"]], "TODO": [[10, "todo"]], "Testing": [[2, "testing"]], "URL Match Scope": [[1, "url-match-scope"]], "Usage": [[1, "usage"]], "User Guide": [[4, "user-guide"]], "Using pip": [[5, "using-pip"]], "Utils": [[0, "module-waybacktweets.utils.utils"]], "Visualizer": [[0, "module-waybacktweets.api.viz_tweets"]], "Wayback Tweets": [[4, "wayback-tweets"]], "Web App": [[7, "web-app"], [9, "web-app"]], "Workflow": [[11, "workflow"]], "waybacktweets": [[1, "waybacktweets"], [6, "waybacktweets"]]}, "docnames": ["api", "cli", "contribute", "exceptions", "index", "installation", "modules", "quickstart", "result", "streamlit", "todo", "workflow"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1}, "filenames": ["api.rst", "cli.rst", "contribute.rst", "exceptions.rst", "index.rst", "installation.rst", "modules.rst", "quickstart.rst", "result.rst", "streamlit.rst", "todo.rst", "workflow.rst"], "indexentries": {"--collapse": [[1, "cmdoption-waybacktweets-collapse", false]], "--from": [[1, "cmdoption-waybacktweets-from", false]], "--limit": [[1, "cmdoption-waybacktweets-limit", false]], "--matchtype": [[1, "cmdoption-waybacktweets-matchtype", false]], "--offset": [[1, "cmdoption-waybacktweets-offset", false]], "--to": [[1, "cmdoption-waybacktweets-to", false]], "check_double_status() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_double_status", false]], "check_pattern_tweet() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.check_pattern_tweet", false]], "clean_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_tweet_url", false]], "clean_wayback_machine_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.clean_wayback_machine_url", false]], "delete_tweet_pathnames() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.delete_tweet_pathnames", false]], "embed() (waybacktweets.api.parse_tweets.twitterembed method)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed.embed", false]], "generate() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.generate", false]], "get() (waybacktweets.api.request_tweets.waybacktweets method)": [[0, "waybacktweets.api.request_tweets.WaybackTweets.get", false]], "get_response() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.get_response", false]], "htmltweetsvisualizer (class in waybacktweets.api.viz_tweets)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer", false]], "is_tweet_url() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.is_tweet_url", false]], "jsonparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.JsonParser", false]], "module": [[0, "module-waybacktweets.api.export_tweets", false], [0, "module-waybacktweets.api.parse_tweets", false], [0, "module-waybacktweets.api.request_tweets", false], [0, "module-waybacktweets.api.viz_tweets", false], [0, "module-waybacktweets.utils.utils", false]], "parse() (waybacktweets.api.parse_tweets.jsonparser method)": [[0, "waybacktweets.api.parse_tweets.JsonParser.parse", false]], "parse() (waybacktweets.api.parse_tweets.tweetsparser method)": [[0, "waybacktweets.api.parse_tweets.TweetsParser.parse", false]], "save() (waybacktweets.api.viz_tweets.htmltweetsvisualizer method)": [[0, "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer.save", false]], "save_to_csv() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_csv", false]], "save_to_html() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_html", false]], "save_to_json() (waybacktweets.api.export_tweets.tweetsexporter method)": [[0, "waybacktweets.api.export_tweets.TweetsExporter.save_to_json", false]], "semicolon_parser() (in module waybacktweets.utils.utils)": [[0, "waybacktweets.utils.utils.semicolon_parser", false]], "tweetsexporter (class in waybacktweets.api.export_tweets)": [[0, "waybacktweets.api.export_tweets.TweetsExporter", false]], "tweetsparser (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TweetsParser", false]], "twitterembed (class in waybacktweets.api.parse_tweets)": [[0, "waybacktweets.api.parse_tweets.TwitterEmbed", false]], "username": [[1, "cmdoption-waybacktweets-arg-USERNAME", false]], "waybacktweets (class in waybacktweets.api.request_tweets)": [[0, "waybacktweets.api.request_tweets.WaybackTweets", false]], "waybacktweets command line option": [[1, "cmdoption-waybacktweets-arg-USERNAME", false], [1, "cmdoption-waybacktweets-collapse", false], [1, "cmdoption-waybacktweets-from", false], [1, "cmdoption-waybacktweets-limit", false], [1, "cmdoption-waybacktweets-matchtype", false], [1, "cmdoption-waybacktweets-offset", false], [1, "cmdoption-waybacktweets-to", false]], "waybacktweets.api.export_tweets": [[0, "module-waybacktweets.api.export_tweets", false]], "waybacktweets.api.parse_tweets": [[0, "module-waybacktweets.api.parse_tweets", false]], "waybacktweets.api.request_tweets": [[0, "module-waybacktweets.api.request_tweets", false]], "waybacktweets.api.viz_tweets": [[0, "module-waybacktweets.api.viz_tweets", false]], "waybacktweets.utils.utils": [[0, "module-waybacktweets.utils.utils", false]]}, "objects": {"waybacktweets": [[1, 4, 1, "cmdoption-waybacktweets-collapse", "--collapse"], [1, 4, 1, "cmdoption-waybacktweets-from", "--from"], [1, 4, 1, "cmdoption-waybacktweets-limit", "--limit"], [1, 4, 1, "cmdoption-waybacktweets-matchtype", "--matchtype"], [1, 4, 1, "cmdoption-waybacktweets-offset", "--offset"], [1, 4, 1, "cmdoption-waybacktweets-to", "--to"], [1, 4, 1, "cmdoption-waybacktweets-arg-USERNAME", "USERNAME"]], "waybacktweets.api": [[0, 0, 0, "-", "export_tweets"], [0, 0, 0, "-", "parse_tweets"], [0, 0, 0, "-", "request_tweets"], [0, 0, 0, "-", "viz_tweets"]], "waybacktweets.api.export_tweets": [[0, 1, 1, "", "TweetsExporter"]], "waybacktweets.api.export_tweets.TweetsExporter": [[0, 2, 1, "", "save_to_csv"], [0, 2, 1, "", "save_to_html"], [0, 2, 1, "", "save_to_json"]], "waybacktweets.api.parse_tweets": [[0, 1, 1, "", "JsonParser"], [0, 1, 1, "", "TweetsParser"], [0, 1, 1, "", "TwitterEmbed"]], "waybacktweets.api.parse_tweets.JsonParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TweetsParser": [[0, 2, 1, "", "parse"]], "waybacktweets.api.parse_tweets.TwitterEmbed": [[0, 2, 1, "", "embed"]], "waybacktweets.api.request_tweets": [[0, 1, 1, "", "WaybackTweets"]], "waybacktweets.api.request_tweets.WaybackTweets": [[0, 2, 1, "", "get"]], "waybacktweets.api.viz_tweets": [[0, 1, 1, "", "HTMLTweetsVisualizer"]], "waybacktweets.api.viz_tweets.HTMLTweetsVisualizer": [[0, 2, 1, "", "generate"], [0, 2, 1, "", "save"]], "waybacktweets.utils": [[0, 0, 0, "-", "utils"]], "waybacktweets.utils.utils": [[0, 3, 1, "", "check_double_status"], [0, 3, 1, "", "check_pattern_tweet"], [0, 3, 1, "", "clean_tweet_url"], [0, 3, 1, "", "clean_wayback_machine_url"], [0, 3, 1, "", "delete_tweet_pathnames"], [0, 3, 1, "", "get_response"], [0, 3, 1, "", "is_tweet_url"], [0, 3, 1, "", "semicolon_parser"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "function", "Python function"], "4": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:function", "4": "std:cmdoption"}, "terms": {"": [0, 1, 2, 8], "0": 4, "1": [1, 10], "10": 1, "1000": 1, "11": 2, "14": [1, 8], "2": [1, 10], "2013022601": 1, "20130226010000": 1, "20130226010800": 1, "20150101": 7, "20191231": 7, "22": 9, "25": 9, "250": 7, "2nd": 1, "3": [2, 4, 9], "32": [1, 8], "3b": 0, "4": 1, "73": 10, "A": [0, 1, 8, 9], "For": [1, 9], "If": [0, 1, 2, 8, 9], "It": [1, 8, 9], "Not": 10, "One": 9, "Such": [1, 8], "The": [0, 1, 2, 3, 8, 9, 11], "These": [2, 3], "To": 1, "about": [1, 2], "access": 9, "account": [8, 9], "actual": 8, "add": [0, 1, 10], "adjac": 1, "affect": 1, "after": [1, 8], "all": [0, 1, 2], "allow": [1, 9], "alreadi": 5, "also": [1, 2, 11], "alwai": [9, 11], "am": 9, "an": [0, 2, 9], "ani": [0, 8], "api": [1, 2, 6, 7, 9, 10, 11], "aplic": 9, "app": [0, 2, 5], "appli": [9, 10], "applic": [0, 3, 7, 8, 9, 11], "ar": [0, 1, 2, 3, 8, 11], "archiv": [0, 1, 3, 4, 8, 9, 10, 11], "archived_digest": [0, 8], "archived_length": [0, 8], "archived_mimetyp": [0, 8], "archived_statuscod": [0, 7, 8], "archived_timestamp": [0, 7, 8], "archived_tweet": 7, "archived_tweet_url": [0, 7, 8], "archived_tweets_respons": 0, "archived_urlkei": [0, 8], "argument": 1, "ari": 9, "arsip": 9, "asset": 2, "associ": 0, "avail": [0, 1, 8, 9], "available_tweet_info": [0, 8], "available_tweet_is_rt": [0, 8], "available_tweet_text": [0, 8], "avoid": [9, 10], "base": [1, 8, 9], "begin": 1, "being": [0, 3], "bellingcat": 9, "below": 1, "ben": 9, "best": [2, 11], "beta": 1, "blank": 8, "bool": [0, 8], "boolean": 0, "brief": 2, "bug": 2, "bulk": 9, "byte": 8, "calendar": 1, "call": 9, "can": [1, 2, 8, 9], "canon": [1, 8], "captur": 1, "cd": 5, "cdx": [0, 1, 3, 4, 9, 11], "certain": [0, 1], "chang": [4, 5, 9], "charact": 1, "check": [0, 3, 8, 9], "check_double_statu": [0, 6], "check_pattern_tweet": [0, 6], "clarissa": 9, "clarom": [2, 5], "class": 0, "clean": 0, "clean_tweet_url": [0, 6], "clean_wayback_machine_url": [0, 6], "cli": [2, 4, 5], "clone": 5, "cloud": [7, 9], "code": [2, 8, 10], "collaps": [0, 4, 9], "com": [0, 1, 2, 3, 5], "command": [1, 2, 7, 10], "comment": 4, "common": [0, 3], "commun": 4, "compar": 1, "compress": 8, "condit": 0, "connect": 3, "connectionerror": 4, "contact": 2, "contain": 0, "content": [0, 1, 8], "contribut": 4, "correct": 0, "correspond": 8, "could": 3, "creat": [9, 10], "csv": [0, 4, 8], "current": 0, "daili": 9, "data": [0, 1, 3, 4, 9, 11], "datafram": 0, "date": [1, 8, 9], "datetim": [1, 8], "deeper": 9, "default": 1, "defin": 11, "delet": 9, "delete_tweet_pathnam": [0, 6], "delight": 9, "dens": 1, "depend": 5, "develop": [2, 10], "dict": 0, "digest": [1, 8], "digit": [1, 8], "directori": [2, 5], "displai": 9, "do": 9, "doc": [2, 10], "document": [1, 2, 5], "doe": 0, "domain": 1, "donat": 2, "done": 1, "dot": 2, "down": [3, 9], "download": 10, "due": [3, 11], "duplic": [0, 1], "each": [0, 9], "effect": 9, "eg": 1, "either": 2, "elsewher": 1, "email": 2, "emb": [0, 6], "empti": 8, "encod": [1, 8], "end": 1, "ensur": 0, "entri": 8, "equival": 1, "error": [0, 3], "eserv": [1, 8], "ess": 9, "establish": 3, "ex": 1, "exact": 1, "exactli": [0, 1], "exampl": [1, 8], "exceed": 3, "excel": 9, "except": [0, 1, 4], "exclud": [1, 8], "exist": 9, "experiment": [0, 3], "explan": 2, "export": [4, 6, 7], "export_tweet": 0, "extens": 10, "extract": [0, 1, 8], "facilit": 11, "fail": 3, "fals": 0, "feed": 3, "field": [0, 1, 8, 9], "field_opt": [0, 7], "file": [0, 8], "filter": [1, 4], "find": 9, "first": [0, 1], "flow": 11, "flowchart": [9, 11], "follow": [1, 2, 8, 11], "form": 1, "format": [0, 1, 4, 8, 11], "framework": [7, 9], "from": [0, 1, 2, 3, 4, 7, 8, 9, 10, 11], "fun": 9, "function": [0, 2, 8, 10], "gem": 9, "gener": [0, 2, 6], "get": [0, 6, 7], "get_respons": [0, 6], "gijn": 9, "git": 5, "github": [2, 5, 9], "given": 1, "goe": 0, "gone": 9, "googl": 1, "gpl": 4, "gunakan": 9, "ha": [8, 9, 10], "hack": 4, "handl": [3, 10], "hash": [1, 8], "have": [2, 8, 9], "haven": 5, "header": [1, 8], "help": [2, 9], "helper": 2, "henk": 9, "here": 2, "hidden": 9, "host": [0, 1, 7, 9], "hour": 1, "how": 10, "howev": 1, "html": [0, 4, 8], "html_content": 0, "html_file_path": 0, "htmltweetsvisu": [0, 6], "http": [1, 3, 8, 10], "httperror": 4, "i": [0, 1, 2, 3, 8, 9, 11], "id": 0, "imag": [2, 10], "implicitli": 1, "import": 7, "improv": 2, "includ": [0, 8], "index": [1, 4, 8], "indic": 0, "indonesia": 9, "inform": [0, 3], "instal": [2, 4], "instruct": 2, "int": [0, 8], "integ": 1, "interest": 2, "interfac": 2, "internet": [0, 3], "internetarch": 3, "irina_tech_tip": 9, "is_tweet_url": [0, 6], "issu": [2, 3, 9, 10], "jack": 7, "jpg": 10, "json": [0, 3, 4, 8, 9, 10, 11], "json_cont": 0, "json_file_path": 0, "jsonpars": [0, 6, 10], "kei": [1, 8, 9], "know": 9, "lack": 9, "latest": 3, "launch": 9, "least": 1, "licens": 4, "like": 10, "limit": [0, 1, 7, 9, 10, 11], "line": [0, 1, 2, 7, 10], "link": 9, "list": 0, "logo": 2, "long": 3, "look": [1, 9], "love": 9, "machin": [0, 1, 4, 9], "made": 1, "mai": 1, "main": 2, "maintain": 2, "mainten": 3, "mani": 9, "manual": 9, "map": 10, "match": [0, 4, 9], "matchtyp": [0, 1], "max": 3, "maximum": 0, "member": 9, "mempermudah": 9, "mend": 9, "messag": [0, 3], "met": 0, "mimetyp": [0, 3, 8, 9, 11], "mode": 1, "modul": [2, 4], "moment": 1, "more": [1, 9], "most": [1, 3, 9], "mous": 11, "multipl": 9, "my": 9, "myosinttip": 9, "n": 1, "name": 8, "necessari": [0, 1, 4], "network": 3, "new": [1, 3, 9], "newslett": 9, "none": 0, "note": [0, 1], "now": 9, "number": 0, "obtain": 11, "occur": 3, "occurr": 0, "offici": 1, "offlin": 3, "offset": [0, 1], "often": 3, "old": [1, 9], "omit": 1, "onc": 0, "one": [1, 8], "onli": [1, 9, 11], "onward": 1, "open": [2, 7, 9], "option": [0, 1, 7, 9], "order": 1, "org": [1, 3, 8, 9], "origin": [0, 8, 9], "original_tweet_url": [0, 7, 8], "osint": 9, "other": [1, 8, 10], "otherwis": 0, "our": 9, "out": [1, 3, 11], "output": 3, "over": 9, "overal": 0, "overload": 3, "packag": [1, 2, 3, 8], "page": [1, 4, 9], "pagin": 4, "param": [0, 1], "paramet": [0, 1], "pars": [1, 3, 4, 6, 7, 8, 9, 10, 11], "parse_tweet": [0, 10], "parsed_archived_tweet_url": [0, 8], "parsed_tweet": 7, "parsed_tweet_url": [0, 8], "parser": 7, "path": [0, 1], "pathnam": [0, 1], "pattern": 0, "payload": 8, "penelusuran": 9, "per": [1, 9], "perform": [1, 3, 4], "phase": 0, "pip": 4, "plan": 10, "pleas": [3, 9], "png": 10, "poetri": [2, 5], "possibl": [1, 11], "prefix": [0, 1, 9], "prerequisit": 2, "price": 9, "print": 0, "print_progress": 0, "progress": 0, "project": 2, "propos": 11, "prototyp": [7, 9], "provid": [0, 1], "publish": 0, "py": [5, 10], "python": [2, 7, 9], "queri": 1, "quickli": 9, "quickstart": 4, "rais": 3, "rang": [1, 9], "rate": [9, 10], "re": [2, 9], "read": [1, 5, 11], "readtimeout": 4, "recommend": 1, "record": 8, "regardless": 11, "relat": 10, "remov": 0, "replac": 0, "report": 2, "repositori": 5, "repres": 0, "represent": [1, 8], "request": [3, 4, 6], "request_tweet": 0, "requir": 1, "research": 9, "respect": 0, "respond": 3, "respons": 0, "result": [0, 1, 4, 11], "retri": 3, "retriev": [0, 1, 4, 11], "return": [0, 1, 3, 8, 9, 10], "retweet": 8, "revisit": 8, "run": 5, "same": 8, "save": [0, 1, 3, 4, 6, 8, 9, 10, 11], "save_to_csv": [0, 6, 7], "save_to_html": [0, 6], "save_to_json": [0, 6], "scope": [4, 9], "scraper": 10, "scroll": 1, "search": [1, 4], "second": 0, "see": 9, "semicolon": 0, "semicolon_pars": [0, 6], "send": 0, "separ": 10, "server": [0, 1, 3, 9, 11], "servic": [0, 3], "set": 1, "sha1": [1, 8], "show": 1, "similar": 1, "simpl": 1, "sinc": 1, "size": 8, "skill": 2, "skip": 0, "slow": 1, "snapshot": [8, 10], "so": 9, "sourc": [2, 4, 9], "specifi": 0, "sphinx": 2, "sponsor": 4, "standalon": 7, "start": [0, 1], "statu": [0, 1, 3, 8, 9], "status": 0, "statuscod": 8, "still": [0, 8, 9], "stop": 0, "str": [0, 1, 8], "streamlit": [0, 2, 5, 7, 9], "string": [0, 1, 8], "structur": [0, 9], "subdomain": [0, 1], "subhost": 1, "substr": 1, "success": 0, "suggest": 2, "suppli": [1, 8], "support": 2, "surt": 1, "t": [5, 9], "take": 3, "tc": [1, 8], "temporarili": 3, "test": [1, 4, 10], "text": [0, 1, 8], "thei": 0, "therefor": 11, "thi": [0, 1, 2, 3, 8, 9], "third": 0, "those": 9, "three": [0, 1, 8], "through": [0, 1], "time": [1, 3, 8], "timestamp": [0, 1, 3], "timestamp_from": 0, "timestamp_to": 0, "titl": 2, "todai": 10, "todo": 4, "too": [1, 3], "tool": [1, 7, 9, 11], "train": 9, "transform": [1, 8], "true": 0, "tupl": 0, "tutori": 10, "tweet": [0, 1, 2, 7, 8, 9, 10, 11], "tweet_id": 3, "tweet_url": 0, "tweetsexport": [0, 6, 7], "tweetspars": [0, 6, 7], "twitter": [0, 1, 3, 8, 9, 10], "twitter_url": 0, "twitteremb": [0, 6], "two": 0, "type": [0, 1], "unavail": 9, "under": [1, 2], "uniqu": [1, 9], "unit": 10, "unk": 8, "untuk": 9, "up": [1, 9], "url": [0, 3, 4, 8, 9, 10], "urlkei": [1, 9], "us": [0, 1, 2, 4, 7, 8, 9, 11], "usag": 4, "user": 9, "usernam": [0, 1, 3, 7], "usual": [1, 8], "util": [2, 4, 6, 8, 9], "v1": 10, "valu": [0, 1, 8], "van": 9, "veri": 2, "via": [2, 9, 10], "view": 9, "visual": [4, 6], "viz_tweet": 0, "wa": [3, 11], "wai": [1, 2, 9, 11], "want": 9, "warc": 8, "warn": 3, "wayback": [0, 1, 2, 7, 9, 11], "wayback_machine_url": 0, "waybacktweet": [0, 2, 3, 5, 7, 10], "we": [1, 9], "web": [0, 1, 3], "welcom": 2, "what": 9, "when": [0, 1, 3, 9, 10], "where": [0, 1], "whether": [0, 8], "which": [0, 8, 9], "who": 9, "wildcard": 1, "without": [0, 1], "workflow": 4, "would": 3, "written": [7, 9, 11], "x": 9, "xx": 1, "year": 1, "you": [1, 2, 5, 8, 9], "yyyymmdd": 1, "yyyymmddhhmmss": [1, 8], "zoom": 11}, "titles": ["API", "CLI", "Contribute", "Exceptions", "Wayback Tweets", "Installation", "waybacktweets", "Quickstart", "Result", "Web App", "TODO", "Workflow"], "titleterms": {"addit": 4, "api": [0, 4], "app": [4, 7, 9], "cli": [1, 7], "collaps": 1, "command": 4, "comment": 9, "commun": 9, "connectionerror": 3, "contribut": 2, "except": 3, "export": 0, "filter": 9, "from": 5, "guid": 4, "hack": 2, "httperror": 3, "indic": 4, "inform": 4, "instal": 5, "interfac": 4, "line": 4, "match": 1, "modul": 7, "pagin": 9, "pars": 0, "pip": 5, "quickstart": 7, "readtimeout": 3, "refer": 4, "request": 0, "result": 8, "scope": 1, "sourc": 5, "sponsor": 2, "streamlit": 4, "tabl": 4, "test": 2, "todo": 10, "tweet": 4, "url": 1, "us": 5, "usag": 1, "user": 4, "util": 0, "visual": 0, "wayback": 4, "waybacktweet": [1, 6], "web": [4, 7, 9], "workflow": 11}}) \ No newline at end of file diff --git a/streamlit.html b/streamlit.html index edbd362..e44557b 100644 --- a/streamlit.html +++ b/streamlit.html @@ -42,6 +42,7 @@

    Navigation

    Web App

    Aplication that displays multiple archived tweets on Wayback Machine to avoid opening each link manually. The application is a prototype written in Python with the Streamlit framework and hosted on Streamlit Cloud, allowing users to apply filters and view tweets that lack the original URL.

    +

    Open the application.

    Filters

      @@ -129,7 +130,7 @@

      Quick search

      diff --git a/todo.html b/todo.html index 2d72313..af92f06 100644 --- a/todo.html +++ b/todo.html @@ -45,9 +45,12 @@

      Navigation

      TODO

      -

      Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (waybacktweets/api/parse_tweets.py:73), and avoid rate limiting

      -

      Docs: Add tutorial on how to save Tweet via command line

      -

      Web App: Return complete JSON when mimetype is application/json

      +

      Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (waybacktweets/api/parse_tweets.py:73), and avoid rate limiting (Planned for v1.1)

      +

      Docs: Add tutorial on how to save Tweet via command line (Planned for v1.1)

      +

      Code: Download images when tweet URL has extensions like JPG or PNG (Planned for v1.2)

      +

      Code: Develop a scraper to download snapshots from https://archive.today (Not planned)

      +

      Code: Unit Tests (Planned)

      +

      Code: Mapping and parsing of other Twitter-related URLs (Planned)

      @@ -89,7 +92,7 @@

      Quick search

      diff --git a/workflow.html b/workflow.html index a7b3089..e6819ab 100644 --- a/workflow.html +++ b/workflow.html @@ -17,7 +17,7 @@ From 479e75d09d7cbefd2cfd5afa30e909dcde8f4083 Mon Sep 17 00:00:00 2001 From: claromes Date: Mon, 17 Jun 2024 01:58:49 +0000 Subject: [PATCH 07/63] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20cl?= =?UTF-8?q?aromes/waybacktweets@90a8611c2f4ee85ae8647995e3095c0605a97c0e?= =?UTF-8?q?=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _sources/api.rst.txt | 61 ++++++-- _sources/cli.rst.txt | 2 +- _sources/contribute.rst.txt | 5 +- _sources/exceptions.rst.txt | 7 +- _sources/todo.rst.txt | 4 +- api.html | 304 ++++++++++++++++++++++-------------- cli.html | 30 ++-- contribute.html | 3 +- exceptions.html | 12 +- genindex.html | 210 +++++++++++++++++++------ index.html | 13 +- modules.html | 51 +++--- objects.inv | 6 +- py-modindex.html | 18 ++- searchindex.js | 2 +- todo.html | 3 +- workflow.html | 4 +- 17 files changed, 500 insertions(+), 235 deletions(-) diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt index 12537b9..7e7ab9f 100644 --- a/_sources/api.rst.txt +++ b/_sources/api.rst.txt @@ -1,53 +1,71 @@ API ==== -Request ---------- - -.. module:: waybacktweets.api.request_tweets +Config +------------ -.. autoclass:: WaybackTweets +.. automodule:: waybacktweets.config.config :members: +Exceptions +------------ -Parse ---------- +.. automodule:: waybacktweets.exceptions.exceptions -.. module:: waybacktweets.api.parse_tweets +.. autoclass:: ReadTimeoutError + :members: -.. autoclass:: TweetsParser +.. autoclass:: ConnectionError :members: -.. autoclass:: TwitterEmbed +.. autoclass:: HTTPError :members: -.. autoclass:: JsonParser +.. autoclass:: EmptyResponseError + :members: + +.. autoclass:: GetResponseError :members: Export --------- -.. module:: waybacktweets.api.export_tweets +.. automodule:: waybacktweets.api.export .. autoclass:: TweetsExporter :members: -Visualizer ------------ +Parse +--------- -.. module:: waybacktweets.api.viz_tweets +.. automodule:: waybacktweets.api.parse -.. autoclass:: HTMLTweetsVisualizer +.. autoclass:: TweetsParser + :members: + +.. autoclass:: TwitterEmbed + :members: + +.. autoclass:: JsonParser + :members: + + +Request +--------- + +.. automodule:: waybacktweets.api.request + +.. autoclass:: WaybackTweets :members: Utils ------- -.. module:: waybacktweets.utils.utils +.. automodule:: waybacktweets.utils.utils .. autofunction:: check_double_status .. autofunction:: check_pattern_tweet @@ -57,3 +75,12 @@ Utils .. autofunction:: get_response .. autofunction:: is_tweet_url .. autofunction:: semicolon_parser + + +Visualizer +----------- + +.. automodule:: waybacktweets.api.visualize + +.. autoclass:: HTMLTweetsVisualizer + :members: diff --git a/_sources/cli.rst.txt b/_sources/cli.rst.txt index f6f19fc..2a16040 100644 --- a/_sources/cli.rst.txt +++ b/_sources/cli.rst.txt @@ -4,7 +4,7 @@ CLI Usage --------- -.. click:: waybacktweets.cli.main:cli +.. click:: waybacktweets._cli:main :prog: waybacktweets :nested: full diff --git a/_sources/contribute.rst.txt b/_sources/contribute.rst.txt index 84ed2cb..0191658 100644 --- a/_sources/contribute.rst.txt +++ b/_sources/contribute.rst.txt @@ -27,10 +27,11 @@ Brief explanation about the code under the Wayback Tweets directory: - ``assets``: Title and logo images - ``docs``: Documentation generated with Sphinx - ``waybacktweets/api``: Main package modules -- ``waybacktweets/cli``: Command line Interface module +- ``waybacktweets/config``: Global configuration module +- ``waybacktweets/exceptions``: Wayback Tweets Exceptions - ``waybacktweets/utils``: Helper functions used in the package Sponsoring ------------ +------------ You can also donate to the project's developer and maintainer, `Claromes `_, via `GitHub Sponsor `_ or if you are interested in sponsoring the project you can contact via email at support at claromes dot com. diff --git a/_sources/exceptions.rst.txt b/_sources/exceptions.rst.txt index 109e41b..22f0f3f 100644 --- a/_sources/exceptions.rst.txt +++ b/_sources/exceptions.rst.txt @@ -3,7 +3,7 @@ Exceptions These are the most common errors and are handled by the ``waybacktweets`` package. -ReadTimeout +ReadTimeoutError ---------------- This error occurs when a request to the web.archive.org server takes too long to respond. The server could be overloaded or there could be network issues. @@ -29,4 +29,9 @@ This error occurs when the Internet Archive services are temporarily offline. Th The output message from the package would be: ``Temporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive Twitter feed (https://twitter.com/internetarchive) for the latest information.`` +EmptyResponseError +---------------------- +This exception raised for empty responses. + +The output message from the package would be: ``No data was saved due to an empty response.`` diff --git a/_sources/todo.rst.txt b/_sources/todo.rst.txt index cda3ea5..eaced03 100644 --- a/_sources/todo.rst.txt +++ b/_sources/todo.rst.txt @@ -5,9 +5,7 @@ TODO -|uncheck| Code: JSON Issue: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse_tweets.py:73``), and avoid rate limiting (`Planned for v1.1`) - -|uncheck| Docs: Add tutorial on how to save Tweet via command line (`Planned for v1.1`) +|uncheck| Code: JSON Parser: Create a separate function to handle JSON return, apply JsonParser (``waybacktweets/api/parse.py:111``), and avoid rate limiting (`Planned for v1.1`) |uncheck| Code: Download images when tweet URL has extensions like JPG or PNG (`Planned for v1.2`) diff --git a/api.html b/api.html index 6068485..9e05172 100644 --- a/api.html +++ b/api.html @@ -45,45 +45,99 @@

      Navigation

      API

      -
      -

      Request

      +
      +

      Config

      +

      Manages global configuration settings throughout the application.

      +
      +
      +waybacktweets.config.config.config = <waybacktweets.config.config._Config object>
      +

      Configuration settings.

      +
      +
      +waybacktweets.config.config.verbose
      +

      Determines if verbose logging should be enabled.

      +
      + +
      + +
      +
      +

      Exceptions

      +

      Wayback Tweets Exceptions

      -
      -class waybacktweets.api.request_tweets.WaybackTweets(username: str, collapse: str = None, timestamp_from: str = None, timestamp_to: str = None, limit: int = None, offset: int = None, matchtype: str = None)
      -

      Class responsible for requesting data from the Wayback CDX Server API.

      +
      +class waybacktweets.exceptions.exceptions.ReadTimeoutError
      +

      Exception raised for read timeout errors.

      +
      + +
      +
      +class waybacktweets.exceptions.exceptions.ConnectionError
      +

      Exception raised for connection errors.

      +
      + +
      +
      +class waybacktweets.exceptions.exceptions.HTTPError
      +

      Exception raised for HTTP errors.

      +
      + +
      +
      +class waybacktweets.exceptions.exceptions.EmptyResponseError
      +

      Exception raised for empty responses.

      +
      + +
      +
      +class waybacktweets.exceptions.exceptions.GetResponseError
      +

      Base class for exceptions in get_response.

      +
      + +
      +
      +

      Export

      +

      Exports the parsed archived tweets.

      +
      +
      +class waybacktweets.api.export.TweetsExporter(data: Dict[str, List[Any]], username: str, field_options: List[str])
      +

      Class responsible for exporting parsed archived tweets.

      Parameters:
        +
      • data – The parsed archived tweets data.

      • username – The username associated with the tweets.

      • -
      • collapse – The field to collapse duplicate lines on.

      • -
      • timestamp_from – The timestamp to start retrieving tweets from.

      • -
      • timestamp_to – The timestamp to stop retrieving tweets at.

      • -
      • limit – The maximum number of results to return.

      • -
      • offset – The number of lines to skip in the results.

      • -
      • matchType – Results matching a certain prefix, a certain host or all subdomains.

      • +
      • field_options – The fields to be included in the exported data. Options include “archived_urlkey”, “archived_timestamp”, “original_tweet_url”, “archived_tweet_url”, “parsed_tweet_url”, “parsed_archived_tweet_url”, “available_tweet_text”, “available_tweet_is_RT”, “available_tweet_info”, “archived_mimetype”, “archived_statuscode”, “archived_digest”, “archived_length”.

      -
      -get() Dict[str, Any] | None
      -

      Sends a GET request to the Internet Archive’s CDX API -to retrieve archived tweets.

      -
      -
      Returns:
      -

      The response from the CDX API in JSON format, if successful.

      -
      -
      +
      +save_to_csv() None
      +

      Saves the DataFrame to a CSV file.

      +
      + +
      +
      +save_to_html() None
      +

      Saves the DataFrame to an HTML file.

      +
      + +
      +
      +save_to_json() None
      +

      Saves the DataFrame to a JSON file.

      -
      -

      Parse

      +
      +

      Parse

      +

      Parses the returned data from the Wayback CDX Server API.

      -
      -class waybacktweets.api.parse_tweets.TweetsParser(archived_tweets_response: List[str], username: str, field_options: List[str])
      +
      +class waybacktweets.api.parse.TweetsParser(archived_tweets_response: List[str], username: str, field_options: List[str])

      Class responsible for the overall parsing of archived tweets.

      Parameters:
      @@ -95,8 +149,8 @@

      API

      -
      -parse(print_progress=False) Dict[str, List[Any]]
      +
      +parse(print_progress=False) Dict[str, List[Any]]

      Parses the archived tweets CDX data and structures it.

      Parameters:
      @@ -111,8 +165,8 @@

      API

      -
      -class waybacktweets.api.parse_tweets.TwitterEmbed(tweet_url: str)
      +
      +class waybacktweets.api.parse.TwitterEmbed(tweet_url: str)

      Class responsible for parsing tweets using the Twitter Publish service.

      Parameters:
      @@ -120,8 +174,8 @@

      API

      -
      -embed() Tuple[List[str], List[bool], List[str]] | None
      +
      +embed() Tuple[List[str], List[bool], List[str]] | None

      Parses the archived tweets when they are still available.

      This function goes through each archived tweet and checks if it is still available. @@ -144,8 +198,8 @@

      API

      -
      -class waybacktweets.api.parse_tweets.JsonParser(archived_tweet_url: str)
      +
      +class waybacktweets.api.parse.JsonParser(archived_tweet_url: str)

      Class responsible for parsing tweets when the mimetype is application/json.

      Note: This class is in an experimental phase, but it is currently being used by the Streamlit Web App.

      @@ -155,8 +209,8 @@

      API

      -
      -parse() str
      +
      +parse() str

      Parses the archived tweets in JSON format.

      Returns:
      @@ -168,75 +222,34 @@

      API

      -
      -

      Export

      -
      -
      -class waybacktweets.api.export_tweets.TweetsExporter(data: Dict[str, List[Any]], username: str, field_options: List[str])
      -

      Class responsible for exporting parsed archived tweets.

      -
      -
      Parameters:
      -
        -
      • data – The parsed archived tweets data.

      • -
      • username – The username associated with the tweets.

      • -
      • field_options – The fields to be included in the exported data. Options include “archived_urlkey”, “archived_timestamp”, “original_tweet_url”, “archived_tweet_url”, “parsed_tweet_url”, “parsed_archived_tweet_url”, “available_tweet_text”, “available_tweet_is_RT”, “available_tweet_info”, “archived_mimetype”, “archived_statuscode”, “archived_digest”, “archived_length”.

      • -
      -
      -
      -
      -
      -save_to_csv() None
      -

      Saves the DataFrame to a CSV file.

      -
      - -
      -
      -save_to_html() None
      -

      Saves the DataFrame to an HTML file.

      -
      - -
      -
      -save_to_json() None
      -

      Saves the DataFrame to a JSON file.

      -
      - -
      - -
      -
      -

      Visualizer

      +
      +

      Request

      +

      Requests data from the Wayback Machine API.

      -
      -class waybacktweets.api.viz_tweets.HTMLTweetsVisualizer(json_file_path: str, html_file_path: str, username: str)
      -

      Class responsible for generating an HTML file to visualize the parsed data.

      +
      +class waybacktweets.api.request.WaybackTweets(username: str, collapse: str = None, timestamp_from: str = None, timestamp_to: str = None, limit: int = None, offset: int = None, matchtype: str = None)
      +

      Class responsible for requesting data from the Wayback CDX Server API.

      Parameters:
        -
      • json_content – The content of the JSON file.

      • -
      • html_file_path – The path where the HTML file will be saved.

      • username – The username associated with the tweets.

      • +
      • collapse – The field to collapse duplicate lines on.

      • +
      • timestamp_from – The timestamp to start retrieving tweets from.

      • +
      • timestamp_to – The timestamp to stop retrieving tweets at.

      • +
      • limit – The maximum number of results to return.

      • +
      • offset – The number of lines to skip in the results.

      • +
      • matchType – Results matching a certain prefix, a certain host or all subdomains.

      -
      -generate() str
      -

      Generates an HTML string that represents the parsed data.

      +
      +get() Dict[str, Any] | None
      +

      Sends a GET request to the Internet Archive’s CDX API +to retrieve archived tweets.

      Returns:
      -

      The generated HTML string.

      -
      -
      -
      - -
      -
      -save(html_content: str) None
      -

      Saves the generated HTML string to a file.

      -
      -
      Parameters:
      -

      html_content – The HTML string to be saved.

      +

      The response from the CDX API in JSON format, if successful.

      @@ -246,6 +259,7 @@

      API

      Utils

      +

      Utility functions for handling HTTP requests and manipulating URLs.

      waybacktweets.utils.utils.check_double_status(wayback_machine_url: str, original_tweet_url: str) bool
      @@ -331,8 +345,7 @@

      API

      waybacktweets.utils.utils.get_response(url: str, params: dict | None = None) Tuple[Response | None, str | None, str | None]
      -

      Sends a GET request to the specified URL and returns the response, -an error message if any, and the type of exception if any.

      +

      Sends a GET request to the specified URL and returns the response.

      Parameters:
        @@ -341,8 +354,15 @@

        API

      Returns:
      -

      A tuple containing the response from the server or None, -an error message or None, and the type of exception or None.

      +

      The response from the server.

      +
      +
      Raises:
      +
      @@ -377,6 +397,47 @@

      API

      +
      +
      +

      Visualizer

      +

      Generates an HTML file to visualize the parsed data.

      +
      +
      +class waybacktweets.api.visualize.HTMLTweetsVisualizer(json_file_path: str, html_file_path: str, username: str)
      +

      Class responsible for generating an HTML file to visualize the parsed data.

      +
      +
      Parameters:
      +
        +
      • json_content – The content of the JSON file.

      • +
      • html_file_path – The path where the HTML file will be saved.

      • +
      • username – The username associated with the tweets.

      • +
      +
      +
      +
      +
      +generate() str
      +

      Generates an HTML string that represents the parsed data.

      +
      +
      Returns:
      +

      The generated HTML string.

      +
      +
      +
      + +
      +
      +save(html_content: str) None
      +

      Saves the generated HTML string to a file.

      +
      +
      Parameters:
      +

      html_content – The HTML string to be saved.

      +
      +
      +
      + +
      +
      @@ -398,41 +459,46 @@

      API

      Contents
      diff --git a/exceptions.html b/exceptions.html index 032c175..09155bf 100644 --- a/exceptions.html +++ b/exceptions.html @@ -46,8 +46,8 @@

      Navigation

      Exceptions

      These are the most common errors and are handled by the waybacktweets package.

      -
      -

      ReadTimeout

      +
      +

      ReadTimeoutError

      This error occurs when a request to the web.archive.org server takes too long to respond. The server could be overloaded or there could be network issues.

      The output message from the package would be: Connection to web.archive.org timed out.

      @@ -63,6 +63,11 @@

      HTTPErrorTemporarily Offline: Internet Archive services are temporarily offline. Please check Internet Archive Twitter feed (https://twitter.com/internetarchive) for the latest information.

      +
      +

      EmptyResponseError

      +

      This exception raised for empty responses.

      +

      The output message from the package would be: No data was saved due to an empty response.

      +
      @@ -83,9 +88,10 @@

      HTTPErrorExceptions

    diff --git a/genindex.html b/genindex.html index be26c0b..36632d1 100644 --- a/genindex.html +++ b/genindex.html @@ -51,9 +51,11 @@

    Index

    | J | M | P + | R | S | T | U + | V | W @@ -64,44 +66,100 @@

    Symbols

    --collapse
  • --from
  • --limit
  • -
    @@ -134,7 +196,11 @@

    D

    E

    +
    @@ -142,13 +208,15 @@

    E

    G

    @@ -156,7 +224,11 @@

    G

    H

    +
    @@ -172,7 +244,7 @@

    I

    J

    @@ -184,13 +256,17 @@

    M

    module