commit 56b709a5511b3872b1a9de98554925ed14b8de90 Author: Misha Date: Tue Aug 8 00:58:15 2023 -0500 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8479e7a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +mydata* \ No newline at end of file diff --git a/mastodon-get-posts.org b/mastodon-get-posts.org new file mode 100644 index 0000000..60639aa --- /dev/null +++ b/mastodon-get-posts.org @@ -0,0 +1,69 @@ +* Import +#+begin_src python +import json +import requests +import pandas as pd +from mastodon import Mastodon # to get the user id +from datetime import date # to get the current date +import subprocess # for getting access token from pass +from bs4 import BeautifulSoup # to more easily read the html output +#+end_src +* Get/refresh data +- I used [[https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/][this]] setup. +#+begin_src python +# Get access token +personal_access_token=subprocess.check_output(["pass", 'mastodon/access_token']).strip().decode('utf-8') + +# Set up access +instance = "https://social.edu.nl" +mastodon = Mastodon(api_base_url=instance, access_token=personal_access_token) + +# Get user's info +me = mastodon.me() +my_id = me["id"] + +URL = f'{instance}/api/v1/accounts/{my_id}/statuses' +params = { + 'limit': 40 +} + +results = [] + +while True: + r = requests.get(URL, params=params) + toots = json.loads(r.text) + + if len(toots) == 0: + break + + results.extend(toots) + + max_id = toots[-1]['id'] + params['max_id'] = max_id + +df = pd.DataFrame(results) + +current_date = date.today() +current_dir="/".join(inspect.getfile(inspect.currentframe()).split("/")[:-1]) +file_name_save=f'{current_dir}/mydata_{current_date}.csv' +df.to_csv(file_name_save, index=False) + +#+end_src + +* Use/search data +Use existing data multiple times. +#+begin_src python +df=pd.read_csv(file_name_save) + +query="test" + +# Search for words +for i in df['content']: + if isinstance(i,str): + if query in i: + soup = BeautifulSoup(i, 'html.parser') + readable_text = soup.get_text(separator=' ', strip=True) + print(readable_text) + print("----") +#+end_src +