From 527754828977c913b16f80a139439b6082f8e35a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moussouni=2C=20Ya=C3=ABl?= Date: Mon, 22 Sep 2025 10:31:25 +0200 Subject: [PATCH] 2025-09-22: RSS feed query and parsing --- .DS_Store | Bin 0 -> 6148 bytes cfg/filters/authors.txt | 2 + cfg/filters/keywords.txt | 15 +++++++- cfg/general_config.cfg | 7 ++++ src/arxtic.py | 81 ++++++++++++++++++++++++++++++++++++--- 5 files changed, 98 insertions(+), 7 deletions(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..136fdf0ad054faad1c04cf10a1f1c28d4add1ad7 GIT binary patch literal 6148 zcmeHKu}T9$5S_gQiCCDzBIVU0(exH)I4c!|fVCviC?sCcnEF0OEfoZ7)1|WVCoKF3 z-|TFXT`pRR$PCQBy_ubR?7QRUmWWLAw%Z`85>W-t7_DMhVw`6$SmYvHZ1^7iUZd5? z2E)Qi4g6IF_}P`IPCdG$QTY6xHvHTmOVf5gYa>?KuHBCgcc*({zWZZ-c;USp(FCPb zq7I!?FJxW$SY&+8m(Q*Gk#M@YLAGElW&=Lj5xyI`rcTJWeq#A-#^-yyo4mGWd@j4- zD`rDJmKC~=>5Q&u>@!B=gEv3tb3Q-3A8*cdb+!I3m*sulXlnP#+-ja%DVk|r2v0Y9k5!6u?Pz6E-N^)7{{=fhI`9CbuD^)-h z_*V*;a@tCp*pln5h0SrVjp3(oHeQzt&Py<{tr%;$71!X_5RZ8ROdRHdh`{tmz{;S5 JD)6fcd;x@#c&q>b literal 0 HcmV?d00001 diff --git a/cfg/filters/authors.txt b/cfg/filters/authors.txt index eb2fb74..fd2d8af 100644 --- a/cfg/filters/authors.txt +++ b/cfg/filters/authors.txt @@ -1,5 +1,7 @@ +#FIELD="author" Moussouni Voggel +Lan\ccon Lançon Lancon Hilker diff --git a/cfg/filters/keywords.txt b/cfg/filters/keywords.txt index 7fb4802..2053397 100644 --- a/cfg/filters/keywords.txt +++ b/cfg/filters/keywords.txt @@ -1,7 +1,18 @@ +#FIELD="title" +#FIELD="title_detail" +#FIELD="tags" +#FIELD="summary" + +# GC +Globular cluster Globular clusters +Galaxy: globular cluster Galaxy: globular clusters -Galaxy: globular clusters -Galaxies: clusters: intracluster medium GC GCs +# ICM +Intracluster medium +Clusters: intracluster medium +Galaxies: clusters: intracluster medium + diff --git a/cfg/general_config.cfg b/cfg/general_config.cfg index 89452b8..98531f7 100644 --- a/cfg/general_config.cfg +++ b/cfg/general_config.cfg @@ -30,5 +30,12 @@ ## Directories VENV_DIR="./venv/" +DB_DIR="./db/" +FILTERS_DIR="./cfg/filters/" + +## Remote URLs QUERY_URL="https://export.arxiv.org/api/query?" RSS_URL="https://rss.arxiv.org/atom/astro-ph" + +## FILTERS +FILTERS_LIST="authors.txt,keywords.txt" diff --git a/src/arxtic.py b/src/arxtic.py index 0e3547d..f038a85 100644 --- a/src/arxtic.py +++ b/src/arxtic.py @@ -33,8 +33,11 @@ along with this program. If not, see www.gnu.org/licenses/. import os import feedparser as fp +FILTERS_DIR = os.environ.get("FILTERS_DIR") +DB_DIR = os.environ.get("DB_DIR") QUERY_URL = os.environ.get("QUERY_URL") RSS_URL = os.environ.get("RSS_URL") +FILTERS_LIST = os.environ.get("FILTERS_LIST").split(",") COLOUR_DEFAULT="\033[0m" COLOUR_INPUT="\033[36m" @@ -43,11 +46,79 @@ COLOUR_INFO="\033[34m" COLOUR_WARNING="\033[93m" COLOUR_ERROR="\033[91m" -d = fp.parse(RSS_URL) +def get_rss(): + rss = fp.parse(RSS_URL) + return rss -fields = list(d) -entries = d["entries"] +def get_entries(rss): + entries = rss["entries"] + return entries -for i in range(len(entries)): - print(entries[i]["published"] + ": " + entries[i]["id"]) +def print_entries(entries, fields=None): + for i in range(len(entries)): + entry = entries[i] + print(COLOUR_INFO + + entry["id"] + + " (" + + entry["arxiv_announce_type"] + + ") [" + + entry["link"] + + "]" + + COLOUR_DEFAULT) + print(COLOUR_DEFAULT + entry["title"] + COLOUR_DEFAULT) + print(COLOUR_OUTPUT + entry["author"] + COLOUR_DEFAULT) + print(COLOUR_INPUT + "\n".join(entry["summary"].split("\n")[1:]) + COLOUR_DEFAULT) + if fields is not None: + print(COLOUR_ERROR + + "Filtered field(s): " + + ", ".join(fields[i]) + + COLOUR_DEFAULT) + print("") + return 0 +def get_filters(): + filters = [] + for i in range(len(FILTERS_LIST)): + path = FILTERS_DIR + FILTERS_LIST[i] + filter_file = open(path) + dic = {"fields": [], "values": []} + for line in filter_file.readlines(): + if "#FIELD" in line: + field = line.split("=")[1].replace("\"", "").strip() + dic["fields"].append(field) + elif line[0] == "#" or line in [" \n", "\n", ""]: + continue + else: + value = line.replace("\n", "") + dic["values"].append(value) + filters.append(dic) + filter_file.close() + return filters + +def filter_entries(filters, entries): + filtered_entries = [] + filtered_fields = [] + for entry in entries: + added = False + for filter_ in filters: + fields = filter_["fields"] + values = filter_["values"] + for field in fields: + for value in values: + if not added and value in entry[field]: + filtered_entries.append(entry) + filtered_fields.append([field]) + added = True + elif added and value in entry[field]: + filtered_fields[-1].append(field) + return filtered_entries, filtered_fields + +def today_arxiv(): + filters = get_filters() + rss = get_rss() + entries = get_entries(rss) + entries, fields = filter_entries(filters, entries) + print_entries(entries, fields) + return entries, fields + +## Find using arxiv id