From 789f5bf2526b3d5c4655ec722cbb680386eef3ed Mon Sep 17 00:00:00 2001 From: Misha Date: Tue, 8 Aug 2023 17:28:22 -0500 Subject: [PATCH] Some new comments --- mastodon-get-posts.org | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mastodon-get-posts.org b/mastodon-get-posts.org index 615ed57..9e3401d 100644 --- a/mastodon-get-posts.org +++ b/mastodon-get-posts.org @@ -1,7 +1,10 @@ * Initialize +- Run this part for either of the following two sections. #+begin_src python import json # to parse data import requests # to get data +from datetime import date # to get the current date +import os # get user id instance = "https://social.edu.nl" @@ -18,16 +21,15 @@ file_name_save=f'{download_dir}/mydata_{current_date}_{username}.csv' #+end_src * Get/refresh data - I used [[https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/][this]] setup. -- Only have to be refreshed (run) every now and then +- The results are saved in a csv file, so you don't have to download all messages for every text search. (You only have to refresh the data every now and then). #+begin_src python import json # to parse data import requests # to get data import pandas as pd # work with data -from datetime import date # to get the current date import subprocess # for getting access token from pass import os # to remove file -# To not append to existing file +# To start with a fresh file os.remove(file_name_save) url = f'{instance}/api/v1/accounts/{id}/statuses' @@ -65,7 +67,7 @@ while True: num_done=num_done-40 #+end_src * Use/search data -- You don't have to load all data for every search. +- You can use the csv-file saved in the previous section to search posts. #+begin_src python import pandas as pd # work with data from bs4 import BeautifulSoup # to more easily read the html output