mirror of
https://codeberg.org/Yael-II/ArXtic.git
synced 2026-03-15 06:16:26 +01:00
2025-09-22: Added saves of retrieved ids
This commit is contained in:
@@ -58,7 +58,7 @@ def get_entries(rss):
|
||||
entries = rss["entries"]
|
||||
return entries
|
||||
|
||||
def print_entries(entries, fields=None):
|
||||
def print_entries(entries, fields=None, keywords=None):
|
||||
for i in range(len(entries)):
|
||||
entry = entries[i]
|
||||
|
||||
@@ -81,6 +81,11 @@ def print_entries(entries, fields=None):
|
||||
+ "Filtered field(s): "
|
||||
+ ", ".join(fields[i])
|
||||
+ COLOUR_DEFAULT)
|
||||
if keywords is not None:
|
||||
print(COLOUR_ERROR
|
||||
+ "Filtered keyword(s): "
|
||||
+ ", ".join(keywords[i])
|
||||
+ COLOUR_DEFAULT)
|
||||
print("")
|
||||
return 0
|
||||
|
||||
@@ -89,24 +94,24 @@ def get_filters():
|
||||
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
||||
for i in range(len(filters_list)):
|
||||
path = FILTERS_DIR + filters_list[i]
|
||||
filter_file = open(path)
|
||||
dic = {"fields": [], "values": []}
|
||||
for line in filter_file.readlines():
|
||||
if "#FIELD" in line:
|
||||
field = line.split("=")[1].replace("\"", "").strip()
|
||||
dic["fields"].append(field)
|
||||
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
||||
continue
|
||||
else:
|
||||
value = line.replace("\n", "")
|
||||
dic["values"].append(value)
|
||||
filters.append(dic)
|
||||
filter_file.close()
|
||||
with open(path) as filter_file:
|
||||
dic = {"fields": [], "values": []}
|
||||
for line in filter_file.readlines():
|
||||
if "#FIELD" in line:
|
||||
field = line.split("=")[1].replace("\"", "").strip()
|
||||
dic["fields"].append(field)
|
||||
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
||||
continue
|
||||
else:
|
||||
value = line.replace("\n", "")
|
||||
dic["values"].append(value)
|
||||
filters.append(dic)
|
||||
return filters
|
||||
|
||||
def filter_entries(filters, entries):
|
||||
filtered_entries = []
|
||||
filtered_fields = []
|
||||
filtered_keywords = []
|
||||
for entry in entries:
|
||||
added = False
|
||||
for filter_ in filters:
|
||||
@@ -114,21 +119,52 @@ def filter_entries(filters, entries):
|
||||
values = filter_["values"]
|
||||
for field in fields:
|
||||
for value in values:
|
||||
if not added and value in entry[field]:
|
||||
if not added and value.upper() in str(entry[field]).upper():
|
||||
filtered_entries.append(entry)
|
||||
filtered_fields.append([field])
|
||||
filtered_keywords.append([value])
|
||||
added = True
|
||||
elif added and value in entry[field]:
|
||||
elif added and value.upper() in str(entry[field]).upper():
|
||||
if not field in filtered_fields[-1]:
|
||||
filtered_fields[-1].append(field)
|
||||
return filtered_entries, filtered_fields
|
||||
if not value in filtered_keywords[-1]:
|
||||
filtered_keywords[-1].append(value)
|
||||
return filtered_entries, filtered_fields, filtered_keywords
|
||||
|
||||
def get_ids(entries):
|
||||
ids = []
|
||||
for entry in entries:
|
||||
ids.append(entry["id"])
|
||||
return ids
|
||||
|
||||
def save_ids(ids, library="saved"):
|
||||
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
||||
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
||||
elif isinstance(ids, str):
|
||||
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
||||
else:
|
||||
raise Exception("The type of ids ({}) is not recognized".format(type(ids)))
|
||||
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
||||
None # creates the file if not already in the directory
|
||||
with open(DB_DIR + library + ".txt", "r+") as db_file:
|
||||
known_ids = [line.replace("\n", "") for line in db_file.readlines()]
|
||||
|
||||
print(known_ids)
|
||||
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
||||
for i in ids:
|
||||
if not i in known_ids:
|
||||
db_file.write(i)
|
||||
db_file.write("\n")
|
||||
return 0
|
||||
|
||||
def today_arxiv():
|
||||
filters = get_filters()
|
||||
feed = get_rss()
|
||||
entries = get_entries(feed)
|
||||
entries, fields = filter_entries(filters, entries)
|
||||
print_entries(entries, fields)
|
||||
entries, fields, keywords = filter_entries(filters, entries)
|
||||
ids = get_ids(entries)
|
||||
save_ids(ids)
|
||||
print_entries(entries, fields, keywords)
|
||||
return entries, fields
|
||||
|
||||
def get_api_ids(ids):
|
||||
@@ -142,11 +178,13 @@ def get_api_ids(ids):
|
||||
feed = fp.parse(query)
|
||||
return feed
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
ids = ["oai:arXiv.org:2509.13163"]
|
||||
feed = get_api_ids(ids)
|
||||
entries = get_entries(feed)
|
||||
print_entries(entries)
|
||||
"""
|
||||
|
||||
today_arxiv()
|
||||
|
||||
Reference in New Issue
Block a user