diff --git a/.gitignore b/.gitignore index 9be6f28..12048e4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ log/ venv/ .venv/ filters/ +*.secret diff --git a/cfg/general_config.cfg b/cfg/general_config.cfg index bb884a0..d812b33 100644 --- a/cfg/general_config.cfg +++ b/cfg/general_config.cfg @@ -33,6 +33,12 @@ VENV_DIR="./venv/" DB_DIR="./db/" FILTERS_DIR="./cfg/filters/" +## API Keys +ARXIV_API_KEY="" +ADSABS_API_KEY="$(cat cfg/adsabs.secret)" + ## Remote URLs -QUERY_URL="https://export.arxiv.org/api/query?" -RSS_URL="https://rss.arxiv.org/atom/astro-ph" +ARXIV_QUERY_URL="https://export.arxiv.org/api/query?" +ARXIV_RSS_URL="https://rss.arxiv.org/atom/astro-ph" + +ADSABS_QUERY_URL="https://api.adsabs.harvard.edu/v1/search/query?q=" diff --git a/db/saved.txt b/db/saved.txt index 91aa528..146119f 100644 --- a/db/saved.txt +++ b/db/saved.txt @@ -2,3 +2,5 @@ 2509.15337v1 2509.15720v1 2509.16168v1 +2509.18650v1 +2509.19068v1 diff --git a/src/arxtic.py b/src/arxtic.py index 7ead428..814f8d8 100644 --- a/src/arxtic.py +++ b/src/arxtic.py @@ -36,8 +36,9 @@ import feedparser as fp FILTERS_DIR = os.environ.get("FILTERS_DIR") DB_DIR = os.environ.get("DB_DIR") -QUERY_URL = os.environ.get("QUERY_URL") -RSS_URL = os.environ.get("RSS_URL") +ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL") +ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL") +ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL") COLOUR_DEFAULT="\033[0m" COLOUR_INPUT="\033[36m" @@ -46,18 +47,60 @@ COLOUR_INFO="\033[34m" COLOUR_WARNING="\033[93m" COLOUR_ERROR="\033[91m" +## General + def wrap(txt, length=80): wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False)) return wrapped_txt -def get_rss(): - feed = fp.parse(RSS_URL) - return feed - def get_entries(rss): entries = rss["entries"] return entries +def get_filters(): + filters = [] + filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."] + for i in range(len(filters_list)): + path = FILTERS_DIR + filters_list[i] + with open(path) as filter_file: + dic = {"fields": [], "values": []} + for line in filter_file.readlines(): + if "#FIELD" in line: + field = line.split("=")[1].replace("\"", "").strip() + dic["fields"].append(field) + elif line[0] == "#" or line in [" \n", "\n", ""]: + continue + else: + value = line.replace("\n", "") + dic["values"].append(value) + filters.append(dic) + return filters + +## ArXiV Entries + +def filter_entries(filters, entries): + filtered_entries = [] + filtered_fields = [] + filtered_keywords = [] + for entry in entries: + added = False + for filter_ in filters: + fields = filter_["fields"] + values = filter_["values"] + for field in fields: + for value in values: + if not added and value.upper() in str(entry[field]).upper(): + filtered_entries.append(entry) + filtered_fields.append([field]) + filtered_keywords.append([value]) + added = True + elif added and value.upper() in str(entry[field]).upper(): + if not field in filtered_fields[-1]: + filtered_fields[-1].append(field) + if not value in filtered_keywords[-1]: + filtered_keywords[-1].append(value) + return filtered_entries, filtered_fields, filtered_keywords + def print_entries(entries, fields=None, keywords=None): for i in range(len(entries)): entry = entries[i] @@ -89,67 +132,27 @@ def print_entries(entries, fields=None, keywords=None): print("") return 0 -def get_filters(): - filters = [] - filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."] - for i in range(len(filters_list)): - path = FILTERS_DIR + filters_list[i] - with open(path) as filter_file: - dic = {"fields": [], "values": []} - for line in filter_file.readlines(): - if "#FIELD" in line: - field = line.split("=")[1].replace("\"", "").strip() - dic["fields"].append(field) - elif line[0] == "#" or line in [" \n", "\n", ""]: - continue - else: - value = line.replace("\n", "") - dic["values"].append(value) - filters.append(dic) - return filters +# ArXiV IDs -def filter_entries(filters, entries): - filtered_entries = [] - filtered_fields = [] - filtered_keywords = [] - for entry in entries: - added = False - for filter_ in filters: - fields = filter_["fields"] - values = filter_["values"] - for field in fields: - for value in values: - if not added and value.upper() in str(entry[field]).upper(): - filtered_entries.append(entry) - filtered_fields.append([field]) - filtered_keywords.append([value]) - added = True - elif added and value.upper() in str(entry[field]).upper(): - if not field in filtered_fields[-1]: - filtered_fields[-1].append(field) - if not value in filtered_keywords[-1]: - filtered_keywords[-1].append(value) - return filtered_entries, filtered_fields, filtered_keywords - -def get_ids(entries): +def get_arxiv_ids(entries): ids = [] for entry in entries: ids.append(entry["id"]) return ids -def save_ids(ids, library="saved"): +def save_arxiv_ids(ids, library="saved"): if isinstance(ids, list) or isinstance(ids, np.ndarray): ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids] elif isinstance(ids, str): ids = [ids.replace("oai:", "").replace("arXiv.org:", "")] else: - raise Exception("The type of ids ({}) is not recognized".format(type(ids))) + raise Exception( + "The type of ids ({}) is not recognized".format(type(ids)) + ) with open(DB_DIR + library + ".txt", "a+") as db_file: None # creates the file if not already in the directory with open(DB_DIR + library + ".txt", "r+") as db_file: known_ids = [line.replace("\n", "") for line in db_file.readlines()] - - print(known_ids) with open(DB_DIR + library + ".txt", "a+") as db_file: for i in ids: if not i in known_ids: @@ -157,34 +160,39 @@ def save_ids(ids, library="saved"): db_file.write("\n") return 0 +## ArXiV + +def get_arxiv_rss(): + feed = fp.parse(ARXIV_RSS_URL) + return feed + def today_arxiv(): filters = get_filters() - feed = get_rss() + feed = get_arxiv_rss() entries = get_entries(feed) entries, fields, keywords = filter_entries(filters, entries) - ids = get_ids(entries) - save_ids(ids) + ids = get_arxiv_ids(entries) + save_arxiv_ids(ids) print_entries(entries, fields, keywords) - return entries, fields + return entries, fields, keywords -def get_api_ids(ids): +def get_arxiv_from_ids(ids): if isinstance(ids, list) or isinstance(ids, np.ndarray): ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids] elif isinstance(ids, str): ids = [ids.replace("oai:", "").replace("arXiv.org:", "")] else: - raise Exception("The type of ids ({}) is not recognized".format(type(ids))) - query = QUERY_URL + "id_list=" + ",".join(ids) + raise Exception( + "The type of ids ({}) is not recognized".format(type(ids)) + ) + query = ARXIV_QUERY_URL + "id_list=" + ",".join(ids) feed = fp.parse(query) return feed +## ADS-ABS - +def get_adsabs_from_ids(ids): + return None -""" -ids = ["oai:arXiv.org:2509.13163"] -feed = get_api_ids(ids) -entries = get_entries(feed) -print_entries(entries) -""" -today_arxiv() +entries, fields, keywords = today_arxiv() +