first commit

2023-08-08 00:58:15 -05:00 · 2023-08-08 00:58:15 -05:00 · 56b709a551
commit 56b709a551
2 changed files with 70 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 mydata*
--- a/mastodon-get-posts.org
+++ b/mastodon-get-posts.org
@ -0,0 +1,69 @@
 * Import 
 #+begin_src python
 import json
 import requests
 import pandas as pd
 from mastodon import Mastodon # to get the user id
 from datetime import date # to get the current date
 import subprocess # for getting access token from pass
 from bs4 import BeautifulSoup # to more easily read the html output
 #+end_src
 * Get/refresh data
 - I used [[https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/][this]] setup.
 #+begin_src python
 # Get access token
 personal_access_token=subprocess.check_output(["pass", 'mastodon/access_token']).strip().decode('utf-8')
 #  Set up access
 instance = "https://social.edu.nl"
 mastodon = Mastodon(api_base_url=instance, access_token=personal_access_token)
 #  Get user's info
 me = mastodon.me()
 my_id = me["id"]
 URL = f'{instance}/api/v1/accounts/{my_id}/statuses'
 params = {
    'limit': 40
 }
 results = []
 while True:
    r = requests.get(URL, params=params)
    toots = json.loads(r.text)
    if len(toots) == 0:
        break
    results.extend(toots)
    max_id = toots[-1]['id']
    params['max_id'] = max_id
 df = pd.DataFrame(results)
 current_date = date.today()
 current_dir="/".join(inspect.getfile(inspect.currentframe()).split("/")[:-1])
 file_name_save=f'{current_dir}/mydata_{current_date}.csv'
 df.to_csv(file_name_save, index=False)
 #+end_src
 * Use/search data
 Use existing data multiple times.
 #+begin_src python
 df=pd.read_csv(file_name_save)
 query="test"
 # Search for words
 for i in df['content']:
    if isinstance(i,str):
      if query in i:
          soup = BeautifulSoup(i, 'html.parser')
          readable_text = soup.get_text(separator=' ', strip=True)
          print(readable_text)
          print("----")
 #+end_src