mirror of
https://codeberg.org/Yael-II/ArXtic.git
synced 2026-03-15 06:16:26 +01:00
2025-09-22: RSS feed query and parsing
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
|
#FIELD="author"
|
||||||
Moussouni
|
Moussouni
|
||||||
Voggel
|
Voggel
|
||||||
|
Lan\ccon
|
||||||
Lançon
|
Lançon
|
||||||
Lancon
|
Lancon
|
||||||
Hilker
|
Hilker
|
||||||
|
|||||||
@@ -1,7 +1,18 @@
|
|||||||
|
#FIELD="title"
|
||||||
|
#FIELD="title_detail"
|
||||||
|
#FIELD="tags"
|
||||||
|
#FIELD="summary"
|
||||||
|
|
||||||
|
# GC
|
||||||
|
Globular cluster
|
||||||
Globular clusters
|
Globular clusters
|
||||||
|
Galaxy: globular cluster
|
||||||
Galaxy: globular clusters
|
Galaxy: globular clusters
|
||||||
Galaxy: globular clusters
|
|
||||||
Galaxies: clusters: intracluster medium
|
|
||||||
GC
|
GC
|
||||||
GCs
|
GCs
|
||||||
|
|
||||||
|
# ICM
|
||||||
|
Intracluster medium
|
||||||
|
Clusters: intracluster medium
|
||||||
|
Galaxies: clusters: intracluster medium
|
||||||
|
|
||||||
|
|||||||
@@ -30,5 +30,12 @@
|
|||||||
|
|
||||||
## Directories
|
## Directories
|
||||||
VENV_DIR="./venv/"
|
VENV_DIR="./venv/"
|
||||||
|
DB_DIR="./db/"
|
||||||
|
FILTERS_DIR="./cfg/filters/"
|
||||||
|
|
||||||
|
## Remote URLs
|
||||||
QUERY_URL="https://export.arxiv.org/api/query?"
|
QUERY_URL="https://export.arxiv.org/api/query?"
|
||||||
RSS_URL="https://rss.arxiv.org/atom/astro-ph"
|
RSS_URL="https://rss.arxiv.org/atom/astro-ph"
|
||||||
|
|
||||||
|
## FILTERS
|
||||||
|
FILTERS_LIST="authors.txt,keywords.txt"
|
||||||
|
|||||||
@@ -33,8 +33,11 @@ along with this program. If not, see www.gnu.org/licenses/.
|
|||||||
import os
|
import os
|
||||||
import feedparser as fp
|
import feedparser as fp
|
||||||
|
|
||||||
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
QUERY_URL = os.environ.get("QUERY_URL")
|
QUERY_URL = os.environ.get("QUERY_URL")
|
||||||
RSS_URL = os.environ.get("RSS_URL")
|
RSS_URL = os.environ.get("RSS_URL")
|
||||||
|
FILTERS_LIST = os.environ.get("FILTERS_LIST").split(",")
|
||||||
|
|
||||||
COLOUR_DEFAULT="\033[0m"
|
COLOUR_DEFAULT="\033[0m"
|
||||||
COLOUR_INPUT="\033[36m"
|
COLOUR_INPUT="\033[36m"
|
||||||
@@ -43,11 +46,79 @@ COLOUR_INFO="\033[34m"
|
|||||||
COLOUR_WARNING="\033[93m"
|
COLOUR_WARNING="\033[93m"
|
||||||
COLOUR_ERROR="\033[91m"
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
d = fp.parse(RSS_URL)
|
def get_rss():
|
||||||
|
rss = fp.parse(RSS_URL)
|
||||||
|
return rss
|
||||||
|
|
||||||
fields = list(d)
|
def get_entries(rss):
|
||||||
entries = d["entries"]
|
entries = rss["entries"]
|
||||||
|
return entries
|
||||||
|
|
||||||
for i in range(len(entries)):
|
def print_entries(entries, fields=None):
|
||||||
print(entries[i]["published"] + ": " + entries[i]["id"])
|
for i in range(len(entries)):
|
||||||
|
entry = entries[i]
|
||||||
|
print(COLOUR_INFO
|
||||||
|
+ entry["id"]
|
||||||
|
+ " ("
|
||||||
|
+ entry["arxiv_announce_type"]
|
||||||
|
+ ") ["
|
||||||
|
+ entry["link"]
|
||||||
|
+ "]"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
print(COLOUR_DEFAULT + entry["title"] + COLOUR_DEFAULT)
|
||||||
|
print(COLOUR_OUTPUT + entry["author"] + COLOUR_DEFAULT)
|
||||||
|
print(COLOUR_INPUT + "\n".join(entry["summary"].split("\n")[1:]) + COLOUR_DEFAULT)
|
||||||
|
if fields is not None:
|
||||||
|
print(COLOUR_ERROR
|
||||||
|
+ "Filtered field(s): "
|
||||||
|
+ ", ".join(fields[i])
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
print("")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_filters():
|
||||||
|
filters = []
|
||||||
|
for i in range(len(FILTERS_LIST)):
|
||||||
|
path = FILTERS_DIR + FILTERS_LIST[i]
|
||||||
|
filter_file = open(path)
|
||||||
|
dic = {"fields": [], "values": []}
|
||||||
|
for line in filter_file.readlines():
|
||||||
|
if "#FIELD" in line:
|
||||||
|
field = line.split("=")[1].replace("\"", "").strip()
|
||||||
|
dic["fields"].append(field)
|
||||||
|
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
value = line.replace("\n", "")
|
||||||
|
dic["values"].append(value)
|
||||||
|
filters.append(dic)
|
||||||
|
filter_file.close()
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def filter_entries(filters, entries):
|
||||||
|
filtered_entries = []
|
||||||
|
filtered_fields = []
|
||||||
|
for entry in entries:
|
||||||
|
added = False
|
||||||
|
for filter_ in filters:
|
||||||
|
fields = filter_["fields"]
|
||||||
|
values = filter_["values"]
|
||||||
|
for field in fields:
|
||||||
|
for value in values:
|
||||||
|
if not added and value in entry[field]:
|
||||||
|
filtered_entries.append(entry)
|
||||||
|
filtered_fields.append([field])
|
||||||
|
added = True
|
||||||
|
elif added and value in entry[field]:
|
||||||
|
filtered_fields[-1].append(field)
|
||||||
|
return filtered_entries, filtered_fields
|
||||||
|
|
||||||
|
def today_arxiv():
|
||||||
|
filters = get_filters()
|
||||||
|
rss = get_rss()
|
||||||
|
entries = get_entries(rss)
|
||||||
|
entries, fields = filter_entries(filters, entries)
|
||||||
|
print_entries(entries, fields)
|
||||||
|
return entries, fields
|
||||||
|
|
||||||
|
## Find using arxiv id
|
||||||
|
|||||||
Reference in New Issue
Block a user