From 527754828977c913b16f80a139439b6082f8e35a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moussouni=2C=20Ya=C3=ABl?= <yael-ii@noreply.codeberg.org>
Date: Mon, 22 Sep 2025 10:31:25 +0200
Subject: [PATCH] 2025-09-22: RSS feed query and parsing

---
 .DS_Store                | Bin 0 -> 6148 bytes
 cfg/filters/authors.txt  |   2 +
 cfg/filters/keywords.txt |  15 +++++++-
 cfg/general_config.cfg   |   7 ++++
 src/arxtic.py            |  81 ++++++++++++++++++++++++++++++++++++---
 5 files changed, 98 insertions(+), 7 deletions(-)
 create mode 100644 .DS_Store
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..136fdf0ad054faad1c04cf10a1f1c28d4add1ad7
GIT binary patch
literal 6148
zcmeHKu}T9$5S_gQiCCDzBIVU0(exH)I4c!|fVCviC?sCcnEF0OEfoZ7)1|WVCoKF3
z-|TFXT`pRR$PCQBy_ubR?7QRUmWWLAw%Z`85>W-t7_DMhVw`6$SmYvHZ1^7iUZd5?
z2E)Qi4g6IF_}P`IPCdG$QTY6xHvHTmOVf5gYa>?KuHBCgcc*({zWZZ-c;USp(FCPb
zq7I!?FJxW$SY&+8m(Q*Gk#M@YLAGElW&=Lj5xyI`rcTJWeq#A-#^-yyo4mGWd@j4-
zD`rDJmKC~=>5Q&u>@!B=gEv3tb3Q-3A8*cdb+!I3m*sulXlnP#+-j<TDxeDdqyTz0
zTV+j9M^!)-PzB}+@b@8xGbRpmLHp^zV2=R6GQ!p{xBouCm;=DXVJ?UW%t$FvN{u^W
z7%4|QcIy&{xuBGjvCO!SV>a%DVk|r2v0Y9k5!6u?Pz6E-N^)7{{=fhI`9CbuD^)-h
z_*V*;a@tCp*pln5h0SrVjp3(oHeQzt&Py<{tr%;$71!X_5RZ8ROdRHdh`{tmz{;S5
JD)6fcd;x@#c&q>b

literal 0
HcmV?d00001

diff --git a/cfg/filters/authors.txt b/cfg/filters/authors.txt
index eb2fb74..fd2d8af 100644
--- a/cfg/filters/authors.txt
+++ b/cfg/filters/authors.txt
@@ -1,5 +1,7 @@
+#FIELD="author"
 Moussouni
 Voggel
+Lan\ccon
 Lançon
 Lancon
 Hilker
diff --git a/cfg/filters/keywords.txt b/cfg/filters/keywords.txt
index 7fb4802..2053397 100644
--- a/cfg/filters/keywords.txt
+++ b/cfg/filters/keywords.txt
@@ -1,7 +1,18 @@
+#FIELD="title"
+#FIELD="title_detail"
+#FIELD="tags"
+#FIELD="summary"
+
+# GC
+Globular cluster
 Globular clusters
+Galaxy: globular cluster
 Galaxy: globular clusters
-Galaxy: globular clusters
-Galaxies: clusters: intracluster medium
 GC
 GCs
 
+# ICM
+Intracluster medium
+Clusters: intracluster medium
+Galaxies: clusters: intracluster medium
+
diff --git a/cfg/general_config.cfg b/cfg/general_config.cfg
index 89452b8..98531f7 100644
--- a/cfg/general_config.cfg
+++ b/cfg/general_config.cfg
@@ -30,5 +30,12 @@
 
 ## Directories
 VENV_DIR="./venv/"
+DB_DIR="./db/"
+FILTERS_DIR="./cfg/filters/"
+
+## Remote URLs
 QUERY_URL="https://export.arxiv.org/api/query?"
 RSS_URL="https://rss.arxiv.org/atom/astro-ph"
+
+## FILTERS
+FILTERS_LIST="authors.txt,keywords.txt"
diff --git a/src/arxtic.py b/src/arxtic.py
index 0e3547d..f038a85 100644
--- a/src/arxtic.py
+++ b/src/arxtic.py
@@ -33,8 +33,11 @@ along with this program. If not, see www.gnu.org/licenses/.
 import os
 import feedparser as fp
 
+FILTERS_DIR = os.environ.get("FILTERS_DIR")
+DB_DIR = os.environ.get("DB_DIR")
 QUERY_URL = os.environ.get("QUERY_URL")
 RSS_URL = os.environ.get("RSS_URL")
+FILTERS_LIST = os.environ.get("FILTERS_LIST").split(",")
 
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
@@ -43,11 +46,79 @@ COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
 
-d = fp.parse(RSS_URL)
+def get_rss():
+    rss = fp.parse(RSS_URL)
+    return rss
 
-fields = list(d)
-entries = d["entries"]
+def get_entries(rss):
+    entries = rss["entries"]
+    return entries
 
-for i in range(len(entries)):
-    print(entries[i]["published"] + ": " + entries[i]["id"])
+def print_entries(entries, fields=None):
+    for i in range(len(entries)):
+        entry = entries[i]
+        print(COLOUR_INFO 
+              + entry["id"] 
+              + " ("
+              + entry["arxiv_announce_type"]
+              + ") ["
+              + entry["link"]
+              + "]"
+              + COLOUR_DEFAULT)
+        print(COLOUR_DEFAULT + entry["title"] + COLOUR_DEFAULT)
+        print(COLOUR_OUTPUT + entry["author"] + COLOUR_DEFAULT)
+        print(COLOUR_INPUT + "\n".join(entry["summary"].split("\n")[1:]) + COLOUR_DEFAULT)
+        if fields is not None:
+            print(COLOUR_ERROR 
+                  + "Filtered field(s): " 
+                  + ", ".join(fields[i]) 
+                  + COLOUR_DEFAULT)
+        print("")
+    return 0
 
+def get_filters():
+    filters = []
+    for i in range(len(FILTERS_LIST)):
+        path = FILTERS_DIR + FILTERS_LIST[i]
+        filter_file = open(path)
+        dic = {"fields": [], "values": []}
+        for line in filter_file.readlines():
+            if "#FIELD" in line:
+                field = line.split("=")[1].replace("\"", "").strip()
+                dic["fields"].append(field)
+            elif line[0] == "#" or line in [" \n", "\n", ""]:
+                continue
+            else:
+                value = line.replace("\n", "")
+                dic["values"].append(value)
+        filters.append(dic)
+        filter_file.close()
+    return filters
+
+def filter_entries(filters, entries):
+    filtered_entries = []
+    filtered_fields = []
+    for entry in entries:
+        added = False
+        for filter_ in filters:
+            fields = filter_["fields"]
+            values = filter_["values"]
+            for field in fields:
+                for value in values:
+                    if not added and value in entry[field]:
+                        filtered_entries.append(entry)
+                        filtered_fields.append([field])
+                        added = True
+                    elif added and value in entry[field]:
+                        filtered_fields[-1].append(field)
+    return filtered_entries, filtered_fields
+
+def today_arxiv():
+    filters = get_filters()
+    rss = get_rss()
+    entries = get_entries(rss)
+    entries, fields = filter_entries(filters, entries)
+    print_entries(entries, fields)
+    return entries, fields
+
+## Find using arxiv id