mirror of
https://codeberg.org/Yael-II/ArXtic.git
synced 2026-03-14 22:06:27 +01:00
2025-10-07: Update
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ log/
|
|||||||
venv/
|
venv/
|
||||||
.venv/
|
.venv/
|
||||||
filters/
|
filters/
|
||||||
|
*.secret
|
||||||
|
|||||||
@@ -33,6 +33,12 @@ VENV_DIR="./venv/"
|
|||||||
DB_DIR="./db/"
|
DB_DIR="./db/"
|
||||||
FILTERS_DIR="./cfg/filters/"
|
FILTERS_DIR="./cfg/filters/"
|
||||||
|
|
||||||
|
## API Keys
|
||||||
|
ARXIV_API_KEY=""
|
||||||
|
ADSABS_API_KEY="$(cat cfg/adsabs.secret)"
|
||||||
|
|
||||||
## Remote URLs
|
## Remote URLs
|
||||||
QUERY_URL="https://export.arxiv.org/api/query?"
|
ARXIV_QUERY_URL="https://export.arxiv.org/api/query?"
|
||||||
RSS_URL="https://rss.arxiv.org/atom/astro-ph"
|
ARXIV_RSS_URL="https://rss.arxiv.org/atom/astro-ph"
|
||||||
|
|
||||||
|
ADSABS_QUERY_URL="https://api.adsabs.harvard.edu/v1/search/query?q="
|
||||||
|
|||||||
@@ -2,3 +2,5 @@
|
|||||||
2509.15337v1
|
2509.15337v1
|
||||||
2509.15720v1
|
2509.15720v1
|
||||||
2509.16168v1
|
2509.16168v1
|
||||||
|
2509.18650v1
|
||||||
|
2509.19068v1
|
||||||
|
|||||||
142
src/arxtic.py
142
src/arxtic.py
@@ -36,8 +36,9 @@ import feedparser as fp
|
|||||||
|
|
||||||
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
DB_DIR = os.environ.get("DB_DIR")
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
QUERY_URL = os.environ.get("QUERY_URL")
|
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
|
||||||
RSS_URL = os.environ.get("RSS_URL")
|
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
|
||||||
|
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
|
||||||
|
|
||||||
COLOUR_DEFAULT="\033[0m"
|
COLOUR_DEFAULT="\033[0m"
|
||||||
COLOUR_INPUT="\033[36m"
|
COLOUR_INPUT="\033[36m"
|
||||||
@@ -46,18 +47,60 @@ COLOUR_INFO="\033[34m"
|
|||||||
COLOUR_WARNING="\033[93m"
|
COLOUR_WARNING="\033[93m"
|
||||||
COLOUR_ERROR="\033[91m"
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
|
## General
|
||||||
|
|
||||||
def wrap(txt, length=80):
|
def wrap(txt, length=80):
|
||||||
wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
|
wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
|
||||||
return wrapped_txt
|
return wrapped_txt
|
||||||
|
|
||||||
def get_rss():
|
|
||||||
feed = fp.parse(RSS_URL)
|
|
||||||
return feed
|
|
||||||
|
|
||||||
def get_entries(rss):
|
def get_entries(rss):
|
||||||
entries = rss["entries"]
|
entries = rss["entries"]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
def get_filters():
|
||||||
|
filters = []
|
||||||
|
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
||||||
|
for i in range(len(filters_list)):
|
||||||
|
path = FILTERS_DIR + filters_list[i]
|
||||||
|
with open(path) as filter_file:
|
||||||
|
dic = {"fields": [], "values": []}
|
||||||
|
for line in filter_file.readlines():
|
||||||
|
if "#FIELD" in line:
|
||||||
|
field = line.split("=")[1].replace("\"", "").strip()
|
||||||
|
dic["fields"].append(field)
|
||||||
|
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
value = line.replace("\n", "")
|
||||||
|
dic["values"].append(value)
|
||||||
|
filters.append(dic)
|
||||||
|
return filters
|
||||||
|
|
||||||
|
## ArXiV Entries
|
||||||
|
|
||||||
|
def filter_entries(filters, entries):
|
||||||
|
filtered_entries = []
|
||||||
|
filtered_fields = []
|
||||||
|
filtered_keywords = []
|
||||||
|
for entry in entries:
|
||||||
|
added = False
|
||||||
|
for filter_ in filters:
|
||||||
|
fields = filter_["fields"]
|
||||||
|
values = filter_["values"]
|
||||||
|
for field in fields:
|
||||||
|
for value in values:
|
||||||
|
if not added and value.upper() in str(entry[field]).upper():
|
||||||
|
filtered_entries.append(entry)
|
||||||
|
filtered_fields.append([field])
|
||||||
|
filtered_keywords.append([value])
|
||||||
|
added = True
|
||||||
|
elif added and value.upper() in str(entry[field]).upper():
|
||||||
|
if not field in filtered_fields[-1]:
|
||||||
|
filtered_fields[-1].append(field)
|
||||||
|
if not value in filtered_keywords[-1]:
|
||||||
|
filtered_keywords[-1].append(value)
|
||||||
|
return filtered_entries, filtered_fields, filtered_keywords
|
||||||
|
|
||||||
def print_entries(entries, fields=None, keywords=None):
|
def print_entries(entries, fields=None, keywords=None):
|
||||||
for i in range(len(entries)):
|
for i in range(len(entries)):
|
||||||
entry = entries[i]
|
entry = entries[i]
|
||||||
@@ -89,67 +132,27 @@ def print_entries(entries, fields=None, keywords=None):
|
|||||||
print("")
|
print("")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def get_filters():
|
# ArXiV IDs
|
||||||
filters = []
|
|
||||||
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
|
||||||
for i in range(len(filters_list)):
|
|
||||||
path = FILTERS_DIR + filters_list[i]
|
|
||||||
with open(path) as filter_file:
|
|
||||||
dic = {"fields": [], "values": []}
|
|
||||||
for line in filter_file.readlines():
|
|
||||||
if "#FIELD" in line:
|
|
||||||
field = line.split("=")[1].replace("\"", "").strip()
|
|
||||||
dic["fields"].append(field)
|
|
||||||
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
value = line.replace("\n", "")
|
|
||||||
dic["values"].append(value)
|
|
||||||
filters.append(dic)
|
|
||||||
return filters
|
|
||||||
|
|
||||||
def filter_entries(filters, entries):
|
def get_arxiv_ids(entries):
|
||||||
filtered_entries = []
|
|
||||||
filtered_fields = []
|
|
||||||
filtered_keywords = []
|
|
||||||
for entry in entries:
|
|
||||||
added = False
|
|
||||||
for filter_ in filters:
|
|
||||||
fields = filter_["fields"]
|
|
||||||
values = filter_["values"]
|
|
||||||
for field in fields:
|
|
||||||
for value in values:
|
|
||||||
if not added and value.upper() in str(entry[field]).upper():
|
|
||||||
filtered_entries.append(entry)
|
|
||||||
filtered_fields.append([field])
|
|
||||||
filtered_keywords.append([value])
|
|
||||||
added = True
|
|
||||||
elif added and value.upper() in str(entry[field]).upper():
|
|
||||||
if not field in filtered_fields[-1]:
|
|
||||||
filtered_fields[-1].append(field)
|
|
||||||
if not value in filtered_keywords[-1]:
|
|
||||||
filtered_keywords[-1].append(value)
|
|
||||||
return filtered_entries, filtered_fields, filtered_keywords
|
|
||||||
|
|
||||||
def get_ids(entries):
|
|
||||||
ids = []
|
ids = []
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
ids.append(entry["id"])
|
ids.append(entry["id"])
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
def save_ids(ids, library="saved"):
|
def save_arxiv_ids(ids, library="saved"):
|
||||||
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
||||||
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
||||||
elif isinstance(ids, str):
|
elif isinstance(ids, str):
|
||||||
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
||||||
else:
|
else:
|
||||||
raise Exception("The type of ids ({}) is not recognized".format(type(ids)))
|
raise Exception(
|
||||||
|
"The type of ids ({}) is not recognized".format(type(ids))
|
||||||
|
)
|
||||||
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
||||||
None # creates the file if not already in the directory
|
None # creates the file if not already in the directory
|
||||||
with open(DB_DIR + library + ".txt", "r+") as db_file:
|
with open(DB_DIR + library + ".txt", "r+") as db_file:
|
||||||
known_ids = [line.replace("\n", "") for line in db_file.readlines()]
|
known_ids = [line.replace("\n", "") for line in db_file.readlines()]
|
||||||
|
|
||||||
print(known_ids)
|
|
||||||
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
||||||
for i in ids:
|
for i in ids:
|
||||||
if not i in known_ids:
|
if not i in known_ids:
|
||||||
@@ -157,34 +160,39 @@ def save_ids(ids, library="saved"):
|
|||||||
db_file.write("\n")
|
db_file.write("\n")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
## ArXiV
|
||||||
|
|
||||||
|
def get_arxiv_rss():
|
||||||
|
feed = fp.parse(ARXIV_RSS_URL)
|
||||||
|
return feed
|
||||||
|
|
||||||
def today_arxiv():
|
def today_arxiv():
|
||||||
filters = get_filters()
|
filters = get_filters()
|
||||||
feed = get_rss()
|
feed = get_arxiv_rss()
|
||||||
entries = get_entries(feed)
|
entries = get_entries(feed)
|
||||||
entries, fields, keywords = filter_entries(filters, entries)
|
entries, fields, keywords = filter_entries(filters, entries)
|
||||||
ids = get_ids(entries)
|
ids = get_arxiv_ids(entries)
|
||||||
save_ids(ids)
|
save_arxiv_ids(ids)
|
||||||
print_entries(entries, fields, keywords)
|
print_entries(entries, fields, keywords)
|
||||||
return entries, fields
|
return entries, fields, keywords
|
||||||
|
|
||||||
def get_api_ids(ids):
|
def get_arxiv_from_ids(ids):
|
||||||
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
||||||
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
||||||
elif isinstance(ids, str):
|
elif isinstance(ids, str):
|
||||||
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
||||||
else:
|
else:
|
||||||
raise Exception("The type of ids ({}) is not recognized".format(type(ids)))
|
raise Exception(
|
||||||
query = QUERY_URL + "id_list=" + ",".join(ids)
|
"The type of ids ({}) is not recognized".format(type(ids))
|
||||||
|
)
|
||||||
|
query = ARXIV_QUERY_URL + "id_list=" + ",".join(ids)
|
||||||
feed = fp.parse(query)
|
feed = fp.parse(query)
|
||||||
return feed
|
return feed
|
||||||
|
|
||||||
|
## ADS-ABS
|
||||||
|
|
||||||
|
def get_adsabs_from_ids(ids):
|
||||||
|
return None
|
||||||
|
|
||||||
"""
|
entries, fields, keywords = today_arxiv()
|
||||||
ids = ["oai:arXiv.org:2509.13163"]
|
|
||||||
feed = get_api_ids(ids)
|
|
||||||
entries = get_entries(feed)
|
|
||||||
print_entries(entries)
|
|
||||||
"""
|
|
||||||
today_arxiv()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user