mirror of
https://codeberg.org/Yael-II/ArXtic.git
synced 2026-03-14 22:06:27 +01:00
2025-09-22: Added saves of retrieved ids
This commit is contained in:
4
db/saved.txt
Normal file
4
db/saved.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
2509.15307v1
|
||||||
|
2509.15337v1
|
||||||
|
2509.15720v1
|
||||||
|
2509.16168v1
|
||||||
@@ -58,7 +58,7 @@ def get_entries(rss):
|
|||||||
entries = rss["entries"]
|
entries = rss["entries"]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def print_entries(entries, fields=None):
|
def print_entries(entries, fields=None, keywords=None):
|
||||||
for i in range(len(entries)):
|
for i in range(len(entries)):
|
||||||
entry = entries[i]
|
entry = entries[i]
|
||||||
|
|
||||||
@@ -81,6 +81,11 @@ def print_entries(entries, fields=None):
|
|||||||
+ "Filtered field(s): "
|
+ "Filtered field(s): "
|
||||||
+ ", ".join(fields[i])
|
+ ", ".join(fields[i])
|
||||||
+ COLOUR_DEFAULT)
|
+ COLOUR_DEFAULT)
|
||||||
|
if keywords is not None:
|
||||||
|
print(COLOUR_ERROR
|
||||||
|
+ "Filtered keyword(s): "
|
||||||
|
+ ", ".join(keywords[i])
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
print("")
|
print("")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@@ -89,24 +94,24 @@ def get_filters():
|
|||||||
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
||||||
for i in range(len(filters_list)):
|
for i in range(len(filters_list)):
|
||||||
path = FILTERS_DIR + filters_list[i]
|
path = FILTERS_DIR + filters_list[i]
|
||||||
filter_file = open(path)
|
with open(path) as filter_file:
|
||||||
dic = {"fields": [], "values": []}
|
dic = {"fields": [], "values": []}
|
||||||
for line in filter_file.readlines():
|
for line in filter_file.readlines():
|
||||||
if "#FIELD" in line:
|
if "#FIELD" in line:
|
||||||
field = line.split("=")[1].replace("\"", "").strip()
|
field = line.split("=")[1].replace("\"", "").strip()
|
||||||
dic["fields"].append(field)
|
dic["fields"].append(field)
|
||||||
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
value = line.replace("\n", "")
|
value = line.replace("\n", "")
|
||||||
dic["values"].append(value)
|
dic["values"].append(value)
|
||||||
filters.append(dic)
|
filters.append(dic)
|
||||||
filter_file.close()
|
|
||||||
return filters
|
return filters
|
||||||
|
|
||||||
def filter_entries(filters, entries):
|
def filter_entries(filters, entries):
|
||||||
filtered_entries = []
|
filtered_entries = []
|
||||||
filtered_fields = []
|
filtered_fields = []
|
||||||
|
filtered_keywords = []
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
added = False
|
added = False
|
||||||
for filter_ in filters:
|
for filter_ in filters:
|
||||||
@@ -114,21 +119,52 @@ def filter_entries(filters, entries):
|
|||||||
values = filter_["values"]
|
values = filter_["values"]
|
||||||
for field in fields:
|
for field in fields:
|
||||||
for value in values:
|
for value in values:
|
||||||
if not added and value in entry[field]:
|
if not added and value.upper() in str(entry[field]).upper():
|
||||||
filtered_entries.append(entry)
|
filtered_entries.append(entry)
|
||||||
filtered_fields.append([field])
|
filtered_fields.append([field])
|
||||||
|
filtered_keywords.append([value])
|
||||||
added = True
|
added = True
|
||||||
elif added and value in entry[field]:
|
elif added and value.upper() in str(entry[field]).upper():
|
||||||
if not field in filtered_fields[-1]:
|
if not field in filtered_fields[-1]:
|
||||||
filtered_fields[-1].append(field)
|
filtered_fields[-1].append(field)
|
||||||
return filtered_entries, filtered_fields
|
if not value in filtered_keywords[-1]:
|
||||||
|
filtered_keywords[-1].append(value)
|
||||||
|
return filtered_entries, filtered_fields, filtered_keywords
|
||||||
|
|
||||||
|
def get_ids(entries):
|
||||||
|
ids = []
|
||||||
|
for entry in entries:
|
||||||
|
ids.append(entry["id"])
|
||||||
|
return ids
|
||||||
|
|
||||||
|
def save_ids(ids, library="saved"):
|
||||||
|
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
||||||
|
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
||||||
|
elif isinstance(ids, str):
|
||||||
|
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
||||||
|
else:
|
||||||
|
raise Exception("The type of ids ({}) is not recognized".format(type(ids)))
|
||||||
|
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
||||||
|
None # creates the file if not already in the directory
|
||||||
|
with open(DB_DIR + library + ".txt", "r+") as db_file:
|
||||||
|
known_ids = [line.replace("\n", "") for line in db_file.readlines()]
|
||||||
|
|
||||||
|
print(known_ids)
|
||||||
|
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
||||||
|
for i in ids:
|
||||||
|
if not i in known_ids:
|
||||||
|
db_file.write(i)
|
||||||
|
db_file.write("\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
def today_arxiv():
|
def today_arxiv():
|
||||||
filters = get_filters()
|
filters = get_filters()
|
||||||
feed = get_rss()
|
feed = get_rss()
|
||||||
entries = get_entries(feed)
|
entries = get_entries(feed)
|
||||||
entries, fields = filter_entries(filters, entries)
|
entries, fields, keywords = filter_entries(filters, entries)
|
||||||
print_entries(entries, fields)
|
ids = get_ids(entries)
|
||||||
|
save_ids(ids)
|
||||||
|
print_entries(entries, fields, keywords)
|
||||||
return entries, fields
|
return entries, fields
|
||||||
|
|
||||||
def get_api_ids(ids):
|
def get_api_ids(ids):
|
||||||
@@ -142,11 +178,13 @@ def get_api_ids(ids):
|
|||||||
feed = fp.parse(query)
|
feed = fp.parse(query)
|
||||||
return feed
|
return feed
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
ids = ["oai:arXiv.org:2509.13163"]
|
ids = ["oai:arXiv.org:2509.13163"]
|
||||||
feed = get_api_ids(ids)
|
feed = get_api_ids(ids)
|
||||||
entries = get_entries(feed)
|
entries = get_entries(feed)
|
||||||
print_entries(entries)
|
print_entries(entries)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
today_arxiv()
|
today_arxiv()
|
||||||
|
|||||||
Reference in New Issue
Block a user