first commit

2023-08-08 00:58:15 -05:00 · 2023-08-08 00:58:15 -05:00 · 56b709a551
commit 56b709a551
2 changed files with 70 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+mydata*
--- a/mastodon-get-posts.org
+++ b/mastodon-get-posts.org
@ -0,0 +1,69 @@
+* Import 
+#+begin_src python
+import json
+import requests
+import pandas as pd
+from mastodon import Mastodon # to get the user id
+from datetime import date # to get the current date
+import subprocess # for getting access token from pass
+from bs4 import BeautifulSoup # to more easily read the html output
+#+end_src
+* Get/refresh data
+- I used [[https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/][this]] setup.
+#+begin_src python
+# Get access token
+personal_access_token=subprocess.check_output(["pass", 'mastodon/access_token']).strip().decode('utf-8')
+
+#  Set up access
+instance = "https://social.edu.nl"
+mastodon = Mastodon(api_base_url=instance, access_token=personal_access_token)
+
+#  Get user's info
+me = mastodon.me()
+my_id = me["id"]
+
+URL = f'{instance}/api/v1/accounts/{my_id}/statuses'
+params = {
+    'limit': 40
+}
+
+results = []
+
+while True:
+    r = requests.get(URL, params=params)
+    toots = json.loads(r.text)
+
+    if len(toots) == 0:
+        break
+    
+    results.extend(toots)
+    
+    max_id = toots[-1]['id']
+    params['max_id'] = max_id
+    
+df = pd.DataFrame(results)
+
+current_date = date.today()
+current_dir="/".join(inspect.getfile(inspect.currentframe()).split("/")[:-1])
+file_name_save=f'{current_dir}/mydata_{current_date}.csv'
+df.to_csv(file_name_save, index=False)
+
+#+end_src
+
+* Use/search data
+Use existing data multiple times.
+#+begin_src python
+df=pd.read_csv(file_name_save)
+
+query="test"
+
+# Search for words
+for i in df['content']:
+    if isinstance(i,str):
+      if query in i:
+          soup = BeautifulSoup(i, 'html.parser')
+          readable_text = soup.get_text(separator=' ', strip=True)
+          print(readable_text)
+          print("----")
+#+end_src
+