diff --git a/setup.sh b/setup.sh index 2f48983..a39416b 100755 --- a/setup.sh +++ b/setup.sh @@ -32,6 +32,7 @@ mkdir -p -v cfg/filters mkdir -p -v db touch db/read.bib touch db/unread.bib +touch db/library.bib chmod u+x *.sh echo "=== Done ===" diff --git a/src/__pycache__/ads_api.cpython-312.pyc b/src/__pycache__/ads_api.cpython-312.pyc new file mode 100644 index 0000000..31e03aa Binary files /dev/null and b/src/__pycache__/ads_api.cpython-312.pyc differ diff --git a/src/__pycache__/arxiv_api.cpython-312.pyc b/src/__pycache__/arxiv_api.cpython-312.pyc new file mode 100644 index 0000000..13b0891 Binary files /dev/null and b/src/__pycache__/arxiv_api.cpython-312.pyc differ diff --git a/src/__pycache__/bibtex_interface.cpython-312.pyc b/src/__pycache__/bibtex_interface.cpython-312.pyc new file mode 100644 index 0000000..85bbabe Binary files /dev/null and b/src/__pycache__/bibtex_interface.cpython-312.pyc differ diff --git a/src/__pycache__/local_api.cpython-312.pyc b/src/__pycache__/local_api.cpython-312.pyc new file mode 100644 index 0000000..fe30ffd Binary files /dev/null and b/src/__pycache__/local_api.cpython-312.pyc differ diff --git a/src/__pycache__/utils.cpython-312.pyc b/src/__pycache__/utils.cpython-312.pyc new file mode 100644 index 0000000..79a6144 Binary files /dev/null and b/src/__pycache__/utils.cpython-312.pyc differ diff --git a/src/ads_api.py b/src/ads_api.py new file mode 100644 index 0000000..3028e20 --- /dev/null +++ b/src/ads_api.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS +""" +ArXtic: + +ArXtic queries arXiv and filters the output. + +@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) +@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique + de Strasbourg, UMR 7550, F-67000 Strasbourg, France +@ Date: 2025-10-10 + +Licence: +ArXtic +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +ads_api.py +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see www.gnu.org/licenses/. +""" +import os +from urllib.parse import urlencode, quote_plus + +import requests as rq +import numpy as np +import textwrap as tw +import feedparser as fp +import bibtexparser as bib + +FILTERS_DIR = os.environ.get("FILTERS_DIR") +DB_DIR = os.environ.get("DB_DIR") +PDF_DIR = os.environ.get("PDF_DIR") + +ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL") +ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL") +ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY") + +COLOUR_DEFAULT="\033[0m" +COLOUR_INPUT="\033[36m" +COLOUR_OUTPUT="\033[32m" +COLOUR_INFO="\033[34m" +COLOUR_WARNING="\033[93m" +COLOUR_ERROR="\033[91m" + +## Parse Entries + +def parse_entries(feed): + num = len(feed.json()["response"]["docs"]) + bibcodes = [] + for i in range(num): + entry = feed.json()["response"]["docs"][i] + bibcodes.append(entry["bibcode"]) + return bibcodes + +## ADS API + +def ads_bibcode_search(query, num=1, sort="date"): + query = urlencode({"q": query, + "fl": "bibcode", + "rows": num, + "sort": sort}) + url = ADSABS_QUERY_URL + query + header = "Bearer " + ADSABS_API_KEY + feed = rq.get(url, headers={'Authorization': header}) + bibcodes = parse_entries(feed) + return bibcodes + +def ads_bibcode(bibcodes): + if isinstance(bibcodes, list) or isinstance(bibcodes, np.ndarray): + bibcodes = bibcodes + elif isinstance(bibcodes, str): + bibcodes = [bibcodes] + else: + raise Exception(("The type of bibcodes ({}) is not recognized" + .format(type(bibcodes)))) + bibentries = "" + for bibcode in bibcodes: + url = ADSABS_EXPORT_URL + bibcode + header = "Bearer " + ADSABS_API_KEY + feed = rq.get(url, headers={'Authorization': header}) + bibentry = feed.text + bibentry = bibentry[:-2] + bibentry += (",\n" + "\tarxtic_notes={},\n" + "\tarxtic_category={},\n" + "\tarxtic_keywords={},\n" + "\tarxtic_score={-1},\n" + "\tarxtic_filename={},\n" + "\tarxtic_date_read={},\n" + "}") + bibentry = (bibentry + .replace("adsurl", "url") + .replace("\n", " ") + .replace("\t", "") + .replace(" ", "")) + bibentries += bibentry + + library = bib.parse_string(bibentries) + library.remove(library.failed_blocks) + return library diff --git a/src/arxiv_api.py b/src/arxiv_api.py new file mode 100644 index 0000000..dd424f6 --- /dev/null +++ b/src/arxiv_api.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python +#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS +""" +ArXtic: + +ArXtic queries arXiv and filters the output. + +@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) +@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique + de Strasbourg, UMR 7550, F-67000 Strasbourg, France +@ Date: 2025-10-10 + +Licence: +ArXtic +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +arxiv_api.py +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see www.gnu.org/licenses/. +""" +import os +from urllib.parse import urlencode, quote_plus + +import requests as rq +import numpy as np +import textwrap as tw +import feedparser as fp +import bibtexparser as bib + +FILTERS_DIR = os.environ.get("FILTERS_DIR") +DB_DIR = os.environ.get("DB_DIR") +PDF_DIR = os.environ.get("PDF_DIR") + +ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL") +ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL") + +COLOUR_DEFAULT="\033[0m" +COLOUR_INPUT="\033[36m" +COLOUR_OUTPUT="\033[32m" +COLOUR_INFO="\033[34m" +COLOUR_WARNING="\033[93m" +COLOUR_ERROR="\033[91m" + +## Parse Entries + +def parse_entries(feed): + entries_old = feed["entries"] + entries = [] + for entry_old in entries_old: + entry = {} + entry["id"] = (entry_old["id"] + .replace("oai:", "") + .replace("arXiv.org:", "")) + entry["url"] = entry_old["link"] + entry["title"] = entry_old["title"] + tmp = [] + for element in entry_old["authors"]: + if isinstance(element, dict): + tmp += element["name"].split(",") + entry["author"] = [a.strip() for a in tmp] + entry["abstract"] = entry_old["summary"] + entry["pubdate"] = entry_old["published"][0:10] + entries.append(entry) + return entries + +## Parse Bibtex + +def parse_bibtex(entries, + arxtic_notes = "", + arxtic_category = "", + arxtic_keywords = "", + arxtic_score = 0, + arxtic_filename = "", + arxtic_date_read = ""): + if not (isinstance(entries, list) or isinstance(entries, np.ndarray)): + entries = [entries] + bibentries = "" + for entry in entries: + key = entry["id"].replace("http://arxiv.org/abs/", "").replace("/", "_") + title = entry["title"] + author = " and ".join(entry["author"]) + year = entry["pubdate"][0:4] + abstract = entry["abstract"] + eprint = key + url = entry["url"] + bibentry = (f"@misc{{{key},\n" + f"\ttitle={{{title}}},\n" + f"\tauthor={{{author}}},\n" + f"\tyear={{{year}}},\n" + f"\teprint={{{eprint}}},\n" + f"\turl={{{url}}},\n" + f"\tabstract={{{abstract}}},\n" + "\tarxtic_notes={},\n" + "\tarxtic_category={},\n" + "\tarxtic_keywords={},\n" + "\tarxtic_score={-1},\n" + "\tarxtic_filename={},\n" + "\tarxtic_date_read={},\n" + "}") + bibentries += bibentry + library = bib.parse_string(bibentries) + library.remove(library.failed_blocks) + return library + +## arXiv API + +def arxiv_today(): + feed = fp.parse(ARXIV_RSS_URL) + for i in range(len(feed["entries"])): + feed["entries"][i]["summary"] = (" " + .join(feed["entries"][i]["summary"].split("\n")[1:]) + .replace("Abstract: ", "")) + entries = parse_entries(feed) + library = parse_bibtex(entries) + return library + +def arxiv_id(ids): + if isinstance(ids, list) or isinstance(ids, np.ndarray): + ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids] + elif isinstance(ids, str): + ids = [ids.replace("oai:", "").replace("arXiv.org:", "")] + else: + raise Exception(("The type of ids ({}) is not recognized" + .format(type(ids)))) + query = urlencode({"id_list": ",".join(ids)}) + url = ARXIV_QUERY_URL + query + feed = fp.parse(url) + for i in range(len(feed["entries"])): + feed["entries"][i]["summary"] = (feed["entries"][i]["summary"] + .replace("\n", " ") + .replace("\t", "") + .replace("Abstract: ", "")) + entries = parse_entries(feed) + library = parse_bibtex(entries) + return library + + diff --git a/src/arxtic.py b/src/arxtic.py index aaae729..554f012 100644 --- a/src/arxtic.py +++ b/src/arxtic.py @@ -8,7 +8,7 @@ ArXtic queries arXiv and filters the output. @ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) @ Institution: Université de Strasbourg, CNRS, Observatoire astronomique de Strasbourg, UMR 7550, F-67000 Strasbourg, France -@ Date: 2025-09-15 +@ Date: 2025-10-10 Licence: ArXtic @@ -31,21 +31,22 @@ You should have received a copy of the GNU General Public License along with this program. If not, see www.gnu.org/licenses/. """ import os +from urllib.parse import urlencode, quote_plus + +import requests as rq +import numpy as np import textwrap as tw import feedparser as fp import bibtexparser as bib -import requests as rq -import numpy as np -from urllib.parse import urlencode, quote_plus + +import arxiv_api +import ads_api +import local_api +import utils FILTERS_DIR = os.environ.get("FILTERS_DIR") DB_DIR = os.environ.get("DB_DIR") PDF_DIR = os.environ.get("PDF_DIR") -ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL") -ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL") -ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL") -ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL") -ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY") COLOUR_DEFAULT="\033[0m" COLOUR_INPUT="\033[36m" @@ -54,328 +55,13 @@ COLOUR_INFO="\033[34m" COLOUR_WARNING="\033[93m" COLOUR_ERROR="\033[91m" -## General +ids = ["2510.06329", "2509.13163"] +bibcodes = ["2022A&A...658A.152V", "2021A&A...649A..97L"] +query = "first_author:\"Voggel, K\"year:(2022)" -def wrap(txt, length=80): - wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False)) - return wrapped_txt +utils.print_reference(arxiv_api.arxiv_today()) +utils.print_title_author(arxiv_api.arxiv_id(ids)) +print(ads_api.ads_bibcode_search(query, num=2)) +utils.print_abstract(ads_api.ads_bibcode(bibcodes)) -## Filters - -def get_filters(): - filters = [] - filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."] - for i in range(len(filters_list)): - path = FILTERS_DIR + filters_list[i] - with open(path) as filter_file: - dic = {"fields": [], "values": [], "score": 1} - for line in filter_file.readlines(): - if "#FIELD" in line: - field = line.split("=")[1].replace("\"", "").strip() - dic["fields"].append(field) - elif "#SCORE" in line: - field = line.split("=")[1].strip() - dic["score"] = int(field) - elif line[0] == "#" or line in [" \n", "\n", ""]: - continue - else: - value = line.replace("\n", "") - dic["values"].append(value) - filters.append(dic) - return filters - -def filter_entries(filters, entries): - filtered_entries = [] - filtered_fields = [] - filtered_keywords = [] - filtered_score = [] - for entry in entries: - added = False - for filter_ in filters: - fields = filter_["fields"] - values = filter_["values"] - score = filter_["score"] - for field in fields: - for value in values: - if field in list(entry): - val = entry[field] - else: - val = "" - if not added and value.upper() in str(val).upper(): - filtered_entries.append(entry) - filtered_fields.append([field]) - filtered_keywords.append([value]) - filtered_score.append(score) - added = True - elif added and value.upper() in str(val).upper(): - filtered_score[-1] = filtered_score[-1] + score - if not field in filtered_fields[-1]: - filtered_fields[-1].append(field) - if not value in filtered_keywords[-1]: - filtered_keywords[-1].append(value) - filtered_data = {"fields": filtered_fields, - "keywords": filtered_keywords, - "score": filtered_score} - return filtered_entries, filtered_data - -## Print entries - -def print_entries(entries, data=None): - for i in range(len(entries)): - entry = entries[i] - - print(COLOUR_INFO, end="") - if "bibcode" in list(entry): - print(entry["bibcode"], end="") - if "id" in list(entry): - print(entry["id"], end="") - if "arxiv_announce_type" in list(entry) : - print(" (" + entry["arxiv_announce_type"] + ")", end="") - print(" [" + entry["link"] + "]", end="") - print(COLOUR_DEFAULT) - - print(COLOUR_DEFAULT + wrap(entry["title"]) + COLOUR_DEFAULT) - print(COLOUR_OUTPUT - + wrap(", ".join(entry["author"])) - + COLOUR_DEFAULT) - print(COLOUR_INPUT - + wrap(entry["abstract"]) - + COLOUR_DEFAULT) - if data is not None: - print(COLOUR_ERROR - + "Filtered field(s): " - + ", ".join(data["fields"][i]) - + COLOUR_DEFAULT) - print(COLOUR_ERROR - + "Filtered keyword(s): " - + ", ".join(data["keywords"][i]) - + COLOUR_DEFAULT) - print(COLOUR_ERROR - + "Filtered score: " - + str(data["score"][i]) - + COLOUR_DEFAULT) - print("") - return 0 - -# IDs - -def get_arxiv_ids(entries): - ids = [] - for entry in entries: - ids.append(entry["id"]) - return ids - -def save_arxiv_ids(ids, library="saved"): - if isinstance(ids, list) or isinstance(ids, np.ndarray): - ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids] - elif isinstance(ids, str): - ids = [ids.replace("oai:", "").replace("arXiv.org:", "")] - else: - raise Exception( - "The type of ids ({}) is not recognized".format(type(ids)) - ) - with open(DB_DIR + library + ".txt", "a+") as db_file: - None # creates the file if not already in the directory - with open(DB_DIR + library + ".txt", "r+") as db_file: - known_ids = [line.replace("\n", "") for line in db_file.readlines()] - with open(DB_DIR + library + ".txt", "a+") as db_file: - for i in ids: - if not i in known_ids: - db_file.write(i) - db_file.write("\n") - return 0 - -## ArXiV - -def get_arxiv_rss(): - feed = fp.parse(ARXIV_RSS_URL) - return feed - -def today_arxiv(): - filters = get_filters() - feed = get_arxiv_rss() - entries = get_arxiv_entries(feed) - entries, data = filter_entries(filters, entries) - print_entries(entries, data) - return entries, data - -def get_arxiv_from_ids(ids): - if isinstance(ids, list) or isinstance(ids, np.ndarray): - ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids] - elif isinstance(ids, str): - ids = [ids.replace("oai:", "").replace("arXiv.org:", "")] - else: - raise Exception( - "The type of ids ({}) is not recognized".format(type(ids)) - ) - query = urlencode({"id_list": ",".join(ids)}) - url = ARXIV_QUERY_URL + query - feed = fp.parse(url) - return feed - -## ADS-ABS - -def ads_search(query, num=5, sort="date"): - query = urlencode({"q": query, - "fl": ("bibcode,title,author,abstract,bibstem,doi," - "keyword,citation,pubdate"), - "rows": num, - "sort": sort}) - url = ADSABS_QUERY_URL + query - header = "Bearer " + ADSABS_API_KEY - feed = rq.get(url, headers={'Authorization': header}) - return feed - -def ads_author(author, num=10, sort="date"): - filters = get_filters() - feed = ads_search("author:" + author, num=num, sort=sort) - entries = get_ads_entries(feed) - entries, data = filter_entries(filters, entries) - print_entries(entries, data) - return entries, data - -# Entries - -def get_arxiv_entries(rss): - entries_old = rss["entries"] - entries = [] - for entry_old in entries_old: - entry = {} - entry["id"] = entry_old["id"].replace("oai:", "").replace("arXiv.org:", "") - entry["link"] = entry_old["link"] - entry["title"] = entry_old["title"] - tmp = [] - for element in entry_old["authors"]: - if isinstance(element, dict): - tmp += element["name"].split(",") - entry["author"] = [a.strip() for a in tmp] - entry["abstract"] = "\n".join(entry_old["summary"].split("\n")[1:])[10:] - entry["pubdate"] = entry_old["published"][0:10] - entries.append(entry) - return entries - - -def get_ads_entries(feed): - num = len(feed.json()["response"]["docs"]) - entries = [] - for i in range(num): - entry = feed.json()["response"]["docs"][i] - entry["link"] = "https://ui.adsabs.harvard.edu/abs/" + entry["bibcode"] - entry["title"] = entry["title"][0] - entry["publisher"] = entry["bibstem"][0] - entries.append(entry) - return entries - -# BibTeX - -def arxiv_to_bibtex(entry, - arxtic_notes = "", - arxtic_category = "", - arxtic_keywords = "", - arxtic_score = 0, - arxtic_filename = ""): - key = entry["id"] - title = entry["title"] - author = " and ".join(entry["author"]) - year = entry["pubdate"][0:4] - eprint = key - url = entry["link"] - bibentry = (f"@misc{{{key},\n" - f"\ttitle={{{title}}},\n" - f"\tauthor={{{author}}},\n" - f"\tyear={{{year}}},\n" - f"\teprint={{{eprint}}},\n" - f"\turl={{{url}}},\n" - f"\tarxtic_notes={{{arxtic_notes}}},\n" - f"\tarxtic_category={{{arxtic_category}}},\n" - f"\tarxtic_keywords={{{arxtic_keywords}}},\n" - f"\tarxtic_score={{{str(arxtic_score)}}},\n" - f"\tarxtic_filename={{{str(arxtic_filename)}}},\n" - "}") - bibtex = bib.parse_string(bibentry) - return bibtex - -def ads_to_bibtex(entry, - arxtic_notes = "", - arxtic_category = "", - arxtic_keywords = "", - arxtic_score = 0, - arxtic_filename = ""): - bibcode = entry["bibcode"] - url = ADSABS_EXPORT_URL + bibcode - header = "Bearer " + ADSABS_API_KEY - feed = rq.get(url, headers={'Authorization': header}) - bibentry = feed.text - bibentry = bibentry[:-2] - bibentry += (",\n" - f"\tarxtic_notes={{{arxtic_notes}}},\n" - f"\tarxtic_category={{{arxtic_category}}},\n" - f"\tarxtic_keywords={{{arxtic_keywords}}},\n" - f"\tarxtic_score={{{str(arxtic_score)}}},\n" - f"\tarxtic_filename={{{str(arxtic_filename)}}},\n" - "}") - bibtex = bib.parse_string(bibentry) - return bibtex - -def list_pdf(): - bibtex_list = [] - pdf_names = [f for f in os.listdir(PDF_DIR) - if not f[0] == "." and ".pdf" in f] - for pdf_name in pdf_names: - fields = pdf_name.replace(".pdf", "").split("_") - if len(fields) < 2: - print(COLOUR_WARNING - + f"Warning: {pdf_name} has not been correctly identified. " - + "(unrecognized format #1)" - + COLOUR+DEFAULT) - elif fields[1].upper() == "ARXIV": - arxiv_id = "/".join(fields[2:]) - feed = get_arxiv_from_ids(arxiv_id) - entries = get_arxiv_entries(feed) - if len(entries) == 1: - entry = entries[0] - bibtex = arxiv_to_bibtex(entry, - arxtic_score=99, - arxtic_filename=pdf_name) - bibtex_list.append(bibtex) - else: - print(COLOUR_WARNING - + f"Warning: {pdf_name} has not been correctly identified. " - + "(ambiguous #1)" - + COLOUR_DEFAULT) - elif len(fields) == 5: - first_author = fields[0] - year = fields[1] - bibstem = fields[2] - volume = fields[3] - page = fields[4] - if bibstem == "AA": bibstem = "A&A" - query=(f"first_author:\"{first_author}\"" - f"year:({year})" - f"bibstem:\"{bibstem}\"" - f"volume:\"{volume}\"" - f"page:\"{page}\"") - feed = ads_search(query, num=2) - entries = get_ads_entries(feed) - if len(entries) == 1: - entry = entries[0] - bibtex = ads_to_bibtex(entry, - arxtic_score=99, - arxtic_filename=pdf_name) - bibtex_list.append(bibtex) - else: - print(COLOUR_WARNING - + f"Warning: {pdf_name} has not been correctly identified. " - + "(ambiguous #2)" - + COLOUR_DEFAULT) - else: - print(COLOUR_WARNING - + f"Warning: {pdf_name} has not been correctly identified. " - + "(unrecognized format #2)" - + COLOUR_DEFAULT) - return None - - -list_pdf() - -#entries, data = today_arxiv() +utils.print_reference(local_api.update_local_pdf()) diff --git a/src/legacy.py b/src/legacy.py new file mode 100644 index 0000000..6b16a2f --- /dev/null +++ b/src/legacy.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS +""" +ArXtic: + +ArXtic queries arXiv and filters the output. + +@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) +@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique + de Strasbourg, UMR 7550, F-67000 Strasbourg, France +@ Date: 2025-09-15 + +Licence: +ArXtic +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +legacy.py +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see www.gnu.org/licenses/. +""" +import os +import textwrap as tw +import feedparser as fp +import bibtexparser as bib +import requests as rq +import numpy as np +from urllib.parse import urlencode, quote_plus + +FILTERS_DIR = os.environ.get("FILTERS_DIR") +DB_DIR = os.environ.get("DB_DIR") +PDF_DIR = os.environ.get("PDF_DIR") +ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL") +ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL") +ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL") +ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL") +ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY") + +COLOUR_DEFAULT="\033[0m" +COLOUR_INPUT="\033[36m" +COLOUR_OUTPUT="\033[32m" +COLOUR_INFO="\033[34m" +COLOUR_WARNING="\033[93m" +COLOUR_ERROR="\033[91m" + +## Filters + +def get_filters(): + filters = [] + filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."] + for i in range(len(filters_list)): + path = FILTERS_DIR + filters_list[i] + with open(path) as filter_file: + dic = {"fields": [], "values": [], "score": 1} + for line in filter_file.readlines(): + if "#FIELD" in line: + field = line.split("=")[1].replace("\"", "").strip() + dic["fields"].append(field) + elif "#SCORE" in line: + field = line.split("=")[1].strip() + dic["score"] = int(field) + elif line[0] == "#" or line in [" \n", "\n", ""]: + continue + else: + value = line.replace("\n", "") + dic["values"].append(value) + filters.append(dic) + return filters + +def filter_entries(filters, entries): + filtered_entries = [] + filtered_fields = [] + filtered_keywords = [] + filtered_score = [] + for entry in entries: + added = False + for filter_ in filters: + fields = filter_["fields"] + values = filter_["values"] + score = filter_["score"] + for field in fields: + for value in values: + if field in list(entry): + val = entry[field] + else: + val = "" + if not added and value.upper() in str(val).upper(): + filtered_entries.append(entry) + filtered_fields.append([field]) + filtered_keywords.append([value]) + filtered_score.append(score) + added = True + elif added and value.upper() in str(val).upper(): + filtered_score[-1] = filtered_score[-1] + score + if not field in filtered_fields[-1]: + filtered_fields[-1].append(field) + if not value in filtered_keywords[-1]: + filtered_keywords[-1].append(value) + filtered_data = {"fields": filtered_fields, + "keywords": filtered_keywords, + "score": filtered_score} + return filtered_entries, filtered_data + +if __name__ == "__main__": + read = file_to_bibtex("read.bib") + unread = file_to_bibtex("unread.bib") + library = file_to_bibtex("library.bib") + quit_action = False + + working_bibtex = bib.Library() + + while not quit_action: + read_keys = [b.key for b in read.blocks] + unread_keys = [b.key for b in unread.blocks] + library_keys = [b.key for b in library.blocks] + library_keys = [b.key for b in library.blocks] + + + print(COLOUR_INPUT + "Select an action") + action = input("> " + COLOUR_DEFAULT) + if action.upper() in ["QUIT", "EXIT", "Q"]: + quit_action = True + elif action in ["", " ", "help", "h"]: + print(COLOUR_OUTPUT + + "Available commands:\n" + + "\t- quit, exit, q: exit\n" + + COLOUR_DEFAULT) + + # Print + elif action.split(" ")[0].upper() in ["PRINT", "P"]: + if len(action.split(" ")) == 1: + print_bibtex(working_bibtex) + elif action.split(" ")[1].upper() == "READ": + print_bibtex(read) + elif action.split(" ")[1].upper() == "UNREAD": + print_bibtex(unread) + elif action.split(" ")[1].upper() == "LIBRARY": + print_bibtex(library) + else: + search_key = action.split(" ")[1] + + if search_key in read_keys: + print_bibtex(read.blocks[read_keys.index(search_key)]) + elif search_key in unread_keys: + print_bibtex(unread.blocks[unread_keys.index(search_key)]) + elif search_key in library_keys: + print_bibtex(library.blocks[library_keys.index(search_key)]) + else: + print(COLOUR_WARNING + + f"Warning: {search_key} cannot be found" + + COLOUR_DEFAULT) + # Clear + elif action.upper() in ["CLEAR", "CLEAN"]: + working_bibtex = bib.Library() + # Today + elif action.upper() in ["TODAY"]: + today_bibtex = today_arxiv() + working_bibtex.add(today_bibtex.blocks) + + # Library + elif action.upper() in ["LIBRARY"]: + library = list_pdf(library) + bibtex_to_file(library, "library.bib") + + # Arxiv + elif action.split(" ")[0].upper() == "ARXIV": + arxiv_ids = action.split(" ")[1:] + feed = get_arxiv_from_ids(arxiv_ids) + entries = get_arxiv_entries(feed) + for entry in entries: + bibtex_entry = arxiv_to_bibtex(entry, + arxtic_score=99) + working_bibtex.add(bibtex_entry.blocks) + + # ADS + elif action.split(" ")[0].upper() == "ADS": + ads_bibcode = "".join(action.split(" ")[1:]) + feed = get_ads_from_bibcode(ads_bibcode) + entries = get_ads_entries(feed) + for entry in entries: + bibtex_entry = ads_to_bibtex(entry, + arxtic_score=99) + working_bibtex.add(bibtex_entry.blocks) diff --git a/src/local_api.py b/src/local_api.py new file mode 100644 index 0000000..030ce10 --- /dev/null +++ b/src/local_api.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS +""" +ArXtic: + +ArXtic queries arXiv and filters the output. + +@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) +@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique + de Strasbourg, UMR 7550, F-67000 Strasbourg, France +@ Date: 2025-10-10 + +Licence: +ArXtic +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +local_api.py +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see www.gnu.org/licenses/. +""" +import os +from urllib.parse import urlencode, quote_plus + +import requests as rq +import numpy as np +import textwrap as tw +import feedparser as fp +import bibtexparser as bib + +import arxiv_api +import ads_api + +FILTERS_DIR = os.environ.get("FILTERS_DIR") +DB_DIR = os.environ.get("DB_DIR") +PDF_DIR = os.environ.get("PDF_DIR") + +COLOUR_DEFAULT="\033[0m" +COLOUR_INPUT="\033[36m" +COLOUR_OUTPUT="\033[32m" +COLOUR_INFO="\033[34m" +COLOUR_WARNING="\033[93m" +COLOUR_ERROR="\033[91m" + +def file_to_bibtex(filename, directory=DB_DIR): + if not ".bib" in filename: + filename += ".bib" + bibtex = bib.parse_file(directory + filename) + bibtex.remove(bibtex.failed_blocks) + return bibtex + +def bibtex_to_file(bibtex, filename, directory=DB_DIR): + if not ".bib" in filename: + filename += ".bib" + bibentry = bib.write_file(directory+filename, bibtex) + return bibtex + +def update_local_pdf(library=None, directory=PDF_DIR): + # TODO: delete entry in library if the PDF file is deleted. + if library is None: + library = bib.Library() + known_pdf = [] + else: + blocks = [b for b in library.blocks if isinstance(b, bib.model.Block)] + known_pdf = [block["arxtic_filename"] for block in blocks] + + folder_pdf = [f for f in os.listdir(directory) + if not f[0] == "." and ".pdf" if f] + + for pdf in folder_pdf: + fields = pdf.replace(".pdf", "").split("_") + + if pdf in known_pdf: + continue + elif len(fields) < 2: + print(COLOUR_WARNING + + f"Warning: {pdf} has not been correctly identified. " + + "(unrecognized format #1)" + + COLOUR_DEFAULT) + elif fields[1].upper() == "ARXIV": + arxiv_id = "/".join(fields[2:]) + arxiv_library = arxiv_api.arxiv_id(arxiv_id) + if len(arxiv_library.blocks) == 1: + library.add(arxiv_library.blocks) + else: + print(COLOUR_WARNING + + f"Warning: {pdf} has not been correctly identified. " + + "(ambiguous #1)" + + COLOUR_DEFAULT) + elif len(fields) == 5: + first_author = fields[0] + year = fields[1] + bibstem = fields[2] + volume = fields[3] + page = fields[4] + if bibstem == "AA": bibstem = "A&A" + query=(f"first_author:\"{first_author}\"" + f"year:({year})" + f"bibstem:\"{bibstem}\"" + f"volume:\"{volume}\"" + f"page:\"{page}\"") + bibcodes = ads_api.ads_bibcode_search(query, num=2) + if len(bibcodes) == 1: + ads_library = ads_api.ads_bibcode(bibcodes) + library.add(ads_library.blocks) + else: + print(COLOUR_WARNING + + f"Warning: {pdf} has not been correctly identified. " + + "(ambiguous #2)" + + COLOUR_DEFAULT) + else: + print(COLOUR_WARNING + + f"Warning: {pdf} has not been correctly identified. " + + "(unrecognized format #2)" + + COLOUR_DEFAULT) + return library diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..3fb7576 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS +""" +ArXtic: + +ArXtic queries arXiv and filters the output. + +@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) +@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique + de Strasbourg, UMR 7550, F-67000 Strasbourg, France +@ Date: 2025-10-10 + +Licence: +ArXtic +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +utils.py +Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr) + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see www.gnu.org/licenses/. +""" +import os +from urllib.parse import urlencode, quote_plus + +import requests as rq +import numpy as np +import textwrap as tw +import feedparser as fp +import bibtexparser as bib + +FILTERS_DIR = os.environ.get("FILTERS_DIR") +DB_DIR = os.environ.get("DB_DIR") +PDF_DIR = os.environ.get("PDF_DIR") + +COLOUR_DEFAULT="\033[0m" +COLOUR_INPUT="\033[36m" +COLOUR_OUTPUT="\033[32m" +COLOUR_INFO="\033[34m" +COLOUR_WARNING="\033[93m" +COLOUR_ERROR="\033[91m" + +def wrap(txt, length=80): + wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False)) + return wrapped_txt + +def print_abstract(library): + if not isinstance(library, bib.Library): + library = bib.Library(library) + for block in library.blocks: + if isinstance(block, bib.model.Block): + print(COLOUR_INFO, end="") + print(block.key, end="") + print(" [" + block["url"] + "]", end="") + print(COLOUR_DEFAULT) + + print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT) + print(COLOUR_OUTPUT + + wrap(", ".join(block["author"].split(" and "))) + + COLOUR_DEFAULT) + print(COLOUR_INPUT + + wrap(block["abstract"]) + + COLOUR_DEFAULT) + print("") + return 0 + +def print_title_author(library): + if not isinstance(library, bib.Library): + library = bib.Library(library) + for block in library.blocks: + if isinstance(block, bib.model.Block): + print(COLOUR_INFO, end="") + print(block.key, end="") + print(" [" + block["url"] + "]", end="") + print(COLOUR_DEFAULT) + + print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT) + print(COLOUR_OUTPUT + + wrap(", ".join(block["author"].split(" and "))) + + COLOUR_DEFAULT) + print("") + return 0 + +def print_reference(library): + if not isinstance(library, bib.Library): + library = bib.Library(library) + for block in library.blocks: + if isinstance(block, bib.model.Block): + print(COLOUR_INFO, end="") + print(block.key, end="") + print(" [" + block["url"] + "]", end="") + print(COLOUR_DEFAULT) + return 0