Some new comments
This commit is contained in:
parent
4a1259599c
commit
789f5bf252
|
@ -1,7 +1,10 @@
|
||||||
* Initialize
|
* Initialize
|
||||||
|
- Run this part for either of the following two sections.
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
import json # to parse data
|
import json # to parse data
|
||||||
import requests # to get data
|
import requests # to get data
|
||||||
|
from datetime import date # to get the current date
|
||||||
|
import os
|
||||||
|
|
||||||
# get user id
|
# get user id
|
||||||
instance = "https://social.edu.nl"
|
instance = "https://social.edu.nl"
|
||||||
|
@ -18,16 +21,15 @@ file_name_save=f'{download_dir}/mydata_{current_date}_{username}.csv'
|
||||||
#+end_src
|
#+end_src
|
||||||
* Get/refresh data
|
* Get/refresh data
|
||||||
- I used [[https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/][this]] setup.
|
- I used [[https://jrashford.com/2023/02/13/how-to-scrape-mastodon-timelines-using-python-and-pandas/][this]] setup.
|
||||||
- Only have to be refreshed (run) every now and then
|
- The results are saved in a csv file, so you don't have to download all messages for every text search. (You only have to refresh the data every now and then).
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
import json # to parse data
|
import json # to parse data
|
||||||
import requests # to get data
|
import requests # to get data
|
||||||
import pandas as pd # work with data
|
import pandas as pd # work with data
|
||||||
from datetime import date # to get the current date
|
|
||||||
import subprocess # for getting access token from pass
|
import subprocess # for getting access token from pass
|
||||||
import os # to remove file
|
import os # to remove file
|
||||||
|
|
||||||
# To not append to existing file
|
# To start with a fresh file
|
||||||
os.remove(file_name_save)
|
os.remove(file_name_save)
|
||||||
|
|
||||||
url = f'{instance}/api/v1/accounts/{id}/statuses'
|
url = f'{instance}/api/v1/accounts/{id}/statuses'
|
||||||
|
@ -65,7 +67,7 @@ while True:
|
||||||
num_done=num_done-40
|
num_done=num_done-40
|
||||||
#+end_src
|
#+end_src
|
||||||
* Use/search data
|
* Use/search data
|
||||||
- You don't have to load all data for every search.
|
- You can use the csv-file saved in the previous section to search posts.
|
||||||
#+begin_src python
|
#+begin_src python
|
||||||
import pandas as pd # work with data
|
import pandas as pd # work with data
|
||||||
from bs4 import BeautifulSoup # to more easily read the html output
|
from bs4 import BeautifulSoup # to more easily read the html output
|
||||||
|
|
Loading…
Reference in a new issue