2025-10-11: Rewriting and segmentation of the code

2026-03-15 06:16:26 +01:00 · 2025-10-11 11:38:33 +02:00
parent 6c17b76c37
commit 7baa9137bf
12 changed files with 704 additions and 332 deletions
--- a/setup.sh
+++ b/setup.sh
@@ -32,6 +32,7 @@ mkdir -p -v cfg/filters
 mkdir -p -v db
 touch db/read.bib
 touch db/unread.bib
 touch db/library.bib
 chmod u+x *.sh
 echo "=== Done ==="
--- a/src/pycache/ads_api.cpython-312.pyc
+++ b/src/pycache/ads_api.cpython-312.pyc
--- a/src/pycache/arxiv_api.cpython-312.pyc
+++ b/src/pycache/arxiv_api.cpython-312.pyc
--- a/src/pycache/bibtex_interface.cpython-312.pyc
+++ b/src/pycache/bibtex_interface.cpython-312.pyc
--- a/src/pycache/local_api.cpython-312.pyc
+++ b/src/pycache/local_api.cpython-312.pyc
--- a/src/pycache/utils.cpython-312.pyc
+++ b/src/pycache/utils.cpython-312.pyc
--- a/src/ads_api.py
+++ b/src/ads_api.py
@@ -0,0 +1,112 @@
 #!/usr/bin/env python
 #[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
 """
 ArXtic: 
 ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution:  Université de Strasbourg, CNRS, Observatoire astronomique
                de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
 Licence:
 ArXtic
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 ads_api.py
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 This program is free software: you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 3 of the License, or 
 (at your option) any later version.
 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License 
 along with this program. If not, see www.gnu.org/licenses/.
 """
 import os
 from urllib.parse import urlencode, quote_plus
 import requests as rq
 import numpy as np
 import textwrap as tw
 import feedparser as fp
 import bibtexparser as bib
 FILTERS_DIR = os.environ.get("FILTERS_DIR")
 DB_DIR = os.environ.get("DB_DIR")
 PDF_DIR = os.environ.get("PDF_DIR")
 ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
 ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
 ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
 COLOUR_OUTPUT="\033[32m"
 COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
 ## Parse Entries
 def parse_entries(feed):
    num = len(feed.json()["response"]["docs"])
    bibcodes = []
    for i in range(num):
        entry = feed.json()["response"]["docs"][i]
        bibcodes.append(entry["bibcode"])
    return bibcodes
 ## ADS API
 def ads_bibcode_search(query, num=1, sort="date"):
    query = urlencode({"q": query, 
                       "fl": "bibcode", 
                       "rows": num, 
                       "sort": sort})
    url = ADSABS_QUERY_URL + query
    header = "Bearer " + ADSABS_API_KEY
    feed = rq.get(url, headers={'Authorization': header})
    bibcodes = parse_entries(feed)
    return bibcodes
 def ads_bibcode(bibcodes):
    if isinstance(bibcodes, list) or isinstance(bibcodes, np.ndarray):
        bibcodes = bibcodes
    elif isinstance(bibcodes, str):
        bibcodes = [bibcodes]
    else:
        raise Exception(("The type of bibcodes ({}) is not recognized"
                         .format(type(bibcodes))))
    bibentries = ""
    for bibcode in bibcodes:
        url = ADSABS_EXPORT_URL + bibcode
        header = "Bearer " + ADSABS_API_KEY
        feed = rq.get(url, headers={'Authorization': header})
        bibentry = feed.text
        bibentry = bibentry[:-2]
        bibentry += (",\n"
                     "\tarxtic_notes={},\n"
                     "\tarxtic_category={},\n"
                     "\tarxtic_keywords={},\n"
                     "\tarxtic_score={-1},\n"
                     "\tarxtic_filename={},\n"
                     "\tarxtic_date_read={},\n"
                     "}")
        bibentry = (bibentry
                    .replace("adsurl", "url")
                    .replace("\n", " ")
                    .replace("\t", "")
                    .replace("        ", ""))
        bibentries += bibentry
    library = bib.parse_string(bibentries)
    library.remove(library.failed_blocks)
    return library
--- a/src/arxiv_api.py
+++ b/src/arxiv_api.py
@@ -0,0 +1,149 @@
 #!/usr/bin/env python
 #[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
 """
 ArXtic: 
 ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution:  Université de Strasbourg, CNRS, Observatoire astronomique
                de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
 Licence:
 ArXtic
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 arxiv_api.py
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 This program is free software: you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 3 of the License, or 
 (at your option) any later version.
 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License 
 along with this program. If not, see www.gnu.org/licenses/.
 """
 import os
 from urllib.parse import urlencode, quote_plus
 import requests as rq
 import numpy as np
 import textwrap as tw
 import feedparser as fp
 import bibtexparser as bib
 FILTERS_DIR = os.environ.get("FILTERS_DIR")
 DB_DIR = os.environ.get("DB_DIR")
 PDF_DIR = os.environ.get("PDF_DIR")
 ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
 ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
 COLOUR_OUTPUT="\033[32m"
 COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
 ## Parse Entries
 def parse_entries(feed):
    entries_old = feed["entries"]
    entries = []
    for entry_old in entries_old:
        entry = {}
        entry["id"] = (entry_old["id"]
                       .replace("oai:", "")
                       .replace("arXiv.org:", ""))
        entry["url"] = entry_old["link"]
        entry["title"] = entry_old["title"]
        tmp = []
        for element in entry_old["authors"]:
            if isinstance(element, dict):
                tmp += element["name"].split(",")
        entry["author"] = [a.strip() for a in tmp]
        entry["abstract"] = entry_old["summary"]
        entry["pubdate"] = entry_old["published"][0:10]
        entries.append(entry)
    return entries
 ## Parse Bibtex
 def parse_bibtex(entries, 
                 arxtic_notes = "", 
                 arxtic_category = "",
                 arxtic_keywords = "",
                 arxtic_score = 0,
                 arxtic_filename = "",
                 arxtic_date_read = ""):
    if not (isinstance(entries, list) or isinstance(entries, np.ndarray)):
        entries = [entries]
    bibentries = ""
    for entry in entries:
        key = entry["id"].replace("http://arxiv.org/abs/", "").replace("/", "_")
        title = entry["title"]
        author = " and ".join(entry["author"])
        year = entry["pubdate"][0:4]
        abstract = entry["abstract"]
        eprint = key
        url = entry["url"]
        bibentry = (f"@misc{{{key},\n"
                    f"\ttitle={{{title}}},\n"
                    f"\tauthor={{{author}}},\n"
                    f"\tyear={{{year}}},\n"
                    f"\teprint={{{eprint}}},\n"
                    f"\turl={{{url}}},\n"
                    f"\tabstract={{{abstract}}},\n"
                    "\tarxtic_notes={},\n"
                    "\tarxtic_category={},\n"
                    "\tarxtic_keywords={},\n"
                    "\tarxtic_score={-1},\n"
                    "\tarxtic_filename={},\n"
                    "\tarxtic_date_read={},\n"
                    "}")
        bibentries += bibentry
    library = bib.parse_string(bibentries)
    library.remove(library.failed_blocks)
    return library
 ## arXiv API
 def arxiv_today():
    feed = fp.parse(ARXIV_RSS_URL)
    for i in range(len(feed["entries"])):
        feed["entries"][i]["summary"] = (" "
                .join(feed["entries"][i]["summary"].split("\n")[1:])
                .replace("Abstract: ", ""))
    entries = parse_entries(feed)
    library = parse_bibtex(entries)
    return library
 def arxiv_id(ids):
    if isinstance(ids, list) or isinstance(ids, np.ndarray):
        ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
    elif isinstance(ids, str):
        ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
    else:
        raise Exception(("The type of ids ({}) is not recognized"
                         .format(type(ids))))
    query = urlencode({"id_list": ",".join(ids)})
    url = ARXIV_QUERY_URL + query
    feed = fp.parse(url)
    for i in range(len(feed["entries"])):
        feed["entries"][i]["summary"] = (feed["entries"][i]["summary"]
                                         .replace("\n", " ")
                                         .replace("\t", "")
                                         .replace("Abstract: ", ""))
    entries = parse_entries(feed)
    library = parse_bibtex(entries)
    return library
--- a/src/arxtic.py
+++ b/src/arxtic.py
@@ -8,7 +8,7 @@ ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution:  Université de Strasbourg, CNRS, Observatoire astronomique
                de Strasbourg, UMR 7550, F-67000 Strasbourg, France
-@ Date: 2025-09-15
+@ Date: 2025-10-10
 Licence:
 ArXtic
@@ -31,21 +31,22 @@ You should have received a copy of the GNU General Public License
 along with this program. If not, see www.gnu.org/licenses/.
 """
 import os
 from urllib.parse import urlencode, quote_plus
 import requests as rq
 import numpy as np
 import textwrap as tw
 import feedparser as fp
 import bibtexparser as bib
-import requests as rq
+
-import numpy as np
+import arxiv_api
-from urllib.parse import urlencode, quote_plus
+import ads_api
 import local_api
 import utils
 FILTERS_DIR = os.environ.get("FILTERS_DIR")
 DB_DIR = os.environ.get("DB_DIR")
 PDF_DIR = os.environ.get("PDF_DIR")
 ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
 ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
 ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
 ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
 ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
@@ -54,328 +55,13 @@ COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
-## General
+ids = ["2510.06329", "2509.13163"]
 bibcodes = ["2022A&A...658A.152V", "2021A&A...649A..97L"]
 query =  "first_author:\"Voggel, K\"year:(2022)"
-def wrap(txt, length=80):
+utils.print_reference(arxiv_api.arxiv_today())
-    wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
+utils.print_title_author(arxiv_api.arxiv_id(ids))
-    return wrapped_txt
+print(ads_api.ads_bibcode_search(query, num=2))
 utils.print_abstract(ads_api.ads_bibcode(bibcodes))
-## Filters
+utils.print_reference(local_api.update_local_pdf())
 def get_filters():
    filters = []
    filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
    for i in range(len(filters_list)):
        path = FILTERS_DIR + filters_list[i]
        with open(path) as filter_file:
            dic = {"fields": [], "values": [], "score": 1}
            for line in filter_file.readlines():
                if "#FIELD" in line:
                    field = line.split("=")[1].replace("\"", "").strip()
                    dic["fields"].append(field)
                elif "#SCORE" in line:
                    field = line.split("=")[1].strip()
                    dic["score"] = int(field)
                elif line[0] == "#" or line in [" \n", "\n", ""]:
                    continue
                else:
                    value = line.replace("\n", "")
                    dic["values"].append(value)
            filters.append(dic)
    return filters
 def filter_entries(filters, entries):
    filtered_entries = []
    filtered_fields = []
    filtered_keywords = []
    filtered_score = []
    for entry in entries:
        added = False
        for filter_ in filters:
            fields = filter_["fields"]
            values = filter_["values"]
            score = filter_["score"]
            for field in fields:
                for value in values:
                    if field in list(entry):
                        val = entry[field]
                    else:
                        val = ""
                    if not added and value.upper() in str(val).upper():
                        filtered_entries.append(entry)
                        filtered_fields.append([field])
                        filtered_keywords.append([value])
                        filtered_score.append(score)
                        added = True
                    elif added and value.upper() in str(val).upper():
                        filtered_score[-1] = filtered_score[-1] + score
                        if not field in filtered_fields[-1]:
                            filtered_fields[-1].append(field)
                        if not value in filtered_keywords[-1]:
                            filtered_keywords[-1].append(value)
    filtered_data = {"fields": filtered_fields,
                     "keywords": filtered_keywords,
                     "score": filtered_score}
    return filtered_entries, filtered_data
 ##  Print entries
 def print_entries(entries, data=None):
    for i in range(len(entries)):
        entry = entries[i]
        print(COLOUR_INFO, end="")
        if "bibcode" in list(entry):
            print(entry["bibcode"], end="")
        if "id" in list(entry):
            print(entry["id"], end="")
        if "arxiv_announce_type" in list(entry) : 
            print(" (" + entry["arxiv_announce_type"] + ")", end="")
        print(" [" + entry["link"] + "]", end="")
        print(COLOUR_DEFAULT)
        print(COLOUR_DEFAULT + wrap(entry["title"]) + COLOUR_DEFAULT)
        print(COLOUR_OUTPUT 
              + wrap(", ".join(entry["author"]))
              + COLOUR_DEFAULT)
        print(COLOUR_INPUT 
              + wrap(entry["abstract"])
              + COLOUR_DEFAULT)
        if data is not None:
            print(COLOUR_ERROR 
                  + "Filtered field(s): " 
                  + ", ".join(data["fields"][i]) 
                  + COLOUR_DEFAULT)
            print(COLOUR_ERROR 
                  + "Filtered keyword(s): " 
                  + ", ".join(data["keywords"][i]) 
                  + COLOUR_DEFAULT)
            print(COLOUR_ERROR 
                  + "Filtered score: " 
                  + str(data["score"][i]) 
                  + COLOUR_DEFAULT)
        print("")
    return 0
 # IDs
 def get_arxiv_ids(entries):
    ids = []
    for entry in entries:
        ids.append(entry["id"])
    return ids
 def save_arxiv_ids(ids, library="saved"):
    if isinstance(ids, list) or isinstance(ids, np.ndarray):
        ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
    elif isinstance(ids, str):
        ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
    else:
        raise Exception(
                "The type of ids ({}) is not recognized".format(type(ids))
                )
    with open(DB_DIR + library + ".txt", "a+") as db_file:
        None # creates the file if not already in the directory
    with open(DB_DIR + library + ".txt", "r+") as db_file:
        known_ids = [line.replace("\n", "") for line in db_file.readlines()]
    with open(DB_DIR + library + ".txt", "a+") as db_file:
        for i in ids:
            if not i in known_ids:
                db_file.write(i)
                db_file.write("\n")
    return 0
 ## ArXiV
 def get_arxiv_rss():
    feed = fp.parse(ARXIV_RSS_URL)
    return feed
 def today_arxiv():
    filters = get_filters()
    feed = get_arxiv_rss()
    entries = get_arxiv_entries(feed)
    entries, data = filter_entries(filters, entries)
    print_entries(entries, data)
    return entries, data
 def get_arxiv_from_ids(ids):
    if isinstance(ids, list) or isinstance(ids, np.ndarray):
        ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
    elif isinstance(ids, str):
        ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
    else:
        raise Exception(
                "The type of ids ({}) is not recognized".format(type(ids))
                )
    query = urlencode({"id_list": ",".join(ids)})
    url = ARXIV_QUERY_URL + query
    feed = fp.parse(url)
    return feed
 ## ADS-ABS
 def ads_search(query, num=5, sort="date"):
    query = urlencode({"q": query, 
                       "fl": ("bibcode,title,author,abstract,bibstem,doi,"
                              "keyword,citation,pubdate"), 
                       "rows": num, 
                       "sort": sort})
    url = ADSABS_QUERY_URL + query
    header = "Bearer " + ADSABS_API_KEY
    feed = rq.get(url, headers={'Authorization': header})
    return feed
 def ads_author(author, num=10, sort="date"):
    filters = get_filters()
    feed = ads_search("author:" + author, num=num, sort=sort)
    entries = get_ads_entries(feed)
    entries, data = filter_entries(filters, entries)
    print_entries(entries, data)
    return entries, data
 # Entries
 def get_arxiv_entries(rss):
    entries_old = rss["entries"]
    entries = []
    for entry_old in entries_old:
        entry = {}
        entry["id"] = entry_old["id"].replace("oai:", "").replace("arXiv.org:", "")
        entry["link"] = entry_old["link"]
        entry["title"] = entry_old["title"]
        tmp = []
        for element in entry_old["authors"]:
            if isinstance(element, dict):
                tmp += element["name"].split(",")
        entry["author"] = [a.strip() for a in tmp]
        entry["abstract"] = "\n".join(entry_old["summary"].split("\n")[1:])[10:]
        entry["pubdate"] = entry_old["published"][0:10]
        entries.append(entry)
    return entries
 def get_ads_entries(feed):
    num = len(feed.json()["response"]["docs"])
    entries = []
    for i in range(num):
        entry = feed.json()["response"]["docs"][i]
        entry["link"] = "https://ui.adsabs.harvard.edu/abs/" + entry["bibcode"]
        entry["title"] = entry["title"][0]
        entry["publisher"] = entry["bibstem"][0]
        entries.append(entry)
    return entries
 # BibTeX
 def arxiv_to_bibtex(entry, 
                    arxtic_notes = "", 
                    arxtic_category = "",
                    arxtic_keywords = "",
                    arxtic_score = 0,
                    arxtic_filename = ""):
    key = entry["id"]
    title = entry["title"]
    author = " and ".join(entry["author"])
    year = entry["pubdate"][0:4]
    eprint = key
    url = entry["link"]
    bibentry = (f"@misc{{{key},\n"
                f"\ttitle={{{title}}},\n"
                f"\tauthor={{{author}}},\n"
                f"\tyear={{{year}}},\n"
                f"\teprint={{{eprint}}},\n"
                f"\turl={{{url}}},\n"
                f"\tarxtic_notes={{{arxtic_notes}}},\n"
                f"\tarxtic_category={{{arxtic_category}}},\n"
                f"\tarxtic_keywords={{{arxtic_keywords}}},\n"
                f"\tarxtic_score={{{str(arxtic_score)}}},\n"
                 f"\tarxtic_filename={{{str(arxtic_filename)}}},\n"
                "}")
    bibtex = bib.parse_string(bibentry)
    return bibtex
 def ads_to_bibtex(entry, 
                  arxtic_notes = "", 
                  arxtic_category = "",
                  arxtic_keywords = "",
                  arxtic_score = 0,
                  arxtic_filename = ""):
    bibcode = entry["bibcode"]
    url = ADSABS_EXPORT_URL + bibcode
    header = "Bearer " + ADSABS_API_KEY
    feed = rq.get(url, headers={'Authorization': header})
    bibentry = feed.text
    bibentry = bibentry[:-2]
    bibentry += (",\n"
                 f"\tarxtic_notes={{{arxtic_notes}}},\n"
                 f"\tarxtic_category={{{arxtic_category}}},\n"
                 f"\tarxtic_keywords={{{arxtic_keywords}}},\n"
                 f"\tarxtic_score={{{str(arxtic_score)}}},\n"
                 f"\tarxtic_filename={{{str(arxtic_filename)}}},\n"
                 "}")
    bibtex = bib.parse_string(bibentry)
    return bibtex
 def list_pdf():
    bibtex_list = []
    pdf_names = [f for f in os.listdir(PDF_DIR) 
                 if not f[0] == "." and ".pdf" in f]
    for pdf_name in pdf_names:
        fields = pdf_name.replace(".pdf", "").split("_")
        if len(fields) < 2:
            print(COLOUR_WARNING 
                  + f"Warning: {pdf_name} has not been correctly identified. "
                  + "(unrecognized format #1)"
                  + COLOUR+DEFAULT)
        elif fields[1].upper() == "ARXIV":
            arxiv_id = "/".join(fields[2:])
            feed = get_arxiv_from_ids(arxiv_id)
            entries = get_arxiv_entries(feed)
            if len(entries) == 1:
                entry = entries[0]
                bibtex = arxiv_to_bibtex(entry, 
                                         arxtic_score=99, 
                                         arxtic_filename=pdf_name)
                bibtex_list.append(bibtex)
            else:
                print(COLOUR_WARNING 
                      + f"Warning: {pdf_name} has not been correctly identified. "
                      + "(ambiguous #1)"
                      + COLOUR_DEFAULT)
        elif len(fields) == 5:
            first_author = fields[0]
            year = fields[1]
            bibstem = fields[2]
            volume = fields[3]
            page = fields[4]
            if bibstem == "AA": bibstem = "A&A"
            query=(f"first_author:\"{first_author}\""
                   f"year:({year})"
                   f"bibstem:\"{bibstem}\""
                   f"volume:\"{volume}\""
                   f"page:\"{page}\"")
            feed = ads_search(query, num=2)
            entries = get_ads_entries(feed)
            if len(entries) == 1:
                entry = entries[0]
                bibtex = ads_to_bibtex(entry, 
                                       arxtic_score=99, 
                                       arxtic_filename=pdf_name)
                bibtex_list.append(bibtex)
            else:
                print(COLOUR_WARNING 
                      + f"Warning: {pdf_name} has not been correctly identified. "
                      + "(ambiguous #2)"
                      + COLOUR_DEFAULT)
        else:
            print(COLOUR_WARNING 
                  + f"Warning: {pdf_name} has not been correctly identified. "
                  + "(unrecognized format #2)"
                  + COLOUR_DEFAULT)
    return None
 list_pdf()
 #entries, data = today_arxiv()
--- a/src/legacy.py
+++ b/src/legacy.py
@@ -0,0 +1,194 @@
 #!/usr/bin/env python
 #[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
 """
 ArXtic: 
 ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution:  Université de Strasbourg, CNRS, Observatoire astronomique
                de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-09-15
 Licence:
 ArXtic
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 legacy.py
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 This program is free software: you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 3 of the License, or 
 (at your option) any later version.
 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License 
 along with this program. If not, see www.gnu.org/licenses/.
 """
 import os
 import textwrap as tw
 import feedparser as fp
 import bibtexparser as bib
 import requests as rq
 import numpy as np
 from urllib.parse import urlencode, quote_plus
 FILTERS_DIR = os.environ.get("FILTERS_DIR")
 DB_DIR = os.environ.get("DB_DIR")
 PDF_DIR = os.environ.get("PDF_DIR")
 ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
 ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
 ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
 ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
 ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
 COLOUR_OUTPUT="\033[32m"
 COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
 ## Filters
 def get_filters():
    filters = []
    filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
    for i in range(len(filters_list)):
        path = FILTERS_DIR + filters_list[i]
        with open(path) as filter_file:
            dic = {"fields": [], "values": [], "score": 1}
            for line in filter_file.readlines():
                if "#FIELD" in line:
                    field = line.split("=")[1].replace("\"", "").strip()
                    dic["fields"].append(field)
                elif "#SCORE" in line:
                    field = line.split("=")[1].strip()
                    dic["score"] = int(field)
                elif line[0] == "#" or line in [" \n", "\n", ""]:
                    continue
                else:
                    value = line.replace("\n", "")
                    dic["values"].append(value)
            filters.append(dic)
    return filters
 def filter_entries(filters, entries):
    filtered_entries = []
    filtered_fields = []
    filtered_keywords = []
    filtered_score = []
    for entry in entries:
        added = False
        for filter_ in filters:
            fields = filter_["fields"]
            values = filter_["values"]
            score = filter_["score"]
            for field in fields:
                for value in values:
                    if field in list(entry):
                        val = entry[field]
                    else:
                        val = ""
                    if not added and value.upper() in str(val).upper():
                        filtered_entries.append(entry)
                        filtered_fields.append([field])
                        filtered_keywords.append([value])
                        filtered_score.append(score)
                        added = True
                    elif added and value.upper() in str(val).upper():
                        filtered_score[-1] = filtered_score[-1] + score
                        if not field in filtered_fields[-1]:
                            filtered_fields[-1].append(field)
                        if not value in filtered_keywords[-1]:
                            filtered_keywords[-1].append(value)
    filtered_data = {"fields": filtered_fields,
                     "keywords": filtered_keywords,
                     "score": filtered_score}
    return filtered_entries, filtered_data
 if __name__ == "__main__":
    read = file_to_bibtex("read.bib")
    unread = file_to_bibtex("unread.bib")
    library = file_to_bibtex("library.bib")
    quit_action = False
    working_bibtex = bib.Library()
    while not quit_action:
        read_keys = [b.key for b in read.blocks]
        unread_keys = [b.key for b in unread.blocks]
        library_keys = [b.key for b in library.blocks]
        library_keys = [b.key for b in library.blocks]
        print(COLOUR_INPUT + "Select an action")
        action = input("> " + COLOUR_DEFAULT)
        if action.upper() in ["QUIT", "EXIT", "Q"]:
            quit_action = True
        elif action in ["", " ", "help", "h"]:
            print(COLOUR_OUTPUT
                  + "Available commands:\n"
                  + "\t- quit, exit, q: exit\n"
                  + COLOUR_DEFAULT)
        # Print
        elif action.split(" ")[0].upper() in ["PRINT", "P"]:
            if len(action.split(" ")) == 1:
                print_bibtex(working_bibtex)
            elif action.split(" ")[1].upper() == "READ":
                print_bibtex(read)
            elif action.split(" ")[1].upper() == "UNREAD":
                print_bibtex(unread)
            elif action.split(" ")[1].upper() == "LIBRARY":
                print_bibtex(library)
            else:
                search_key = action.split(" ")[1]
                if search_key in read_keys:
                    print_bibtex(read.blocks[read_keys.index(search_key)])
                elif search_key in unread_keys:
                    print_bibtex(unread.blocks[unread_keys.index(search_key)])
                elif search_key in library_keys:
                    print_bibtex(library.blocks[library_keys.index(search_key)])
                else:
                    print(COLOUR_WARNING 
                          + f"Warning: {search_key} cannot be found"
                          + COLOUR_DEFAULT)
        # Clear
        elif action.upper() in ["CLEAR", "CLEAN"]:
            working_bibtex = bib.Library()
        # Today
        elif action.upper() in ["TODAY"]:
            today_bibtex = today_arxiv()
            working_bibtex.add(today_bibtex.blocks)
        # Library
        elif action.upper() in ["LIBRARY"]:
            library = list_pdf(library)
            bibtex_to_file(library, "library.bib")
        # Arxiv
        elif action.split(" ")[0].upper() == "ARXIV":
            arxiv_ids = action.split(" ")[1:]
            feed = get_arxiv_from_ids(arxiv_ids)
            entries = get_arxiv_entries(feed)
            for entry in entries:
                bibtex_entry = arxiv_to_bibtex(entry, 
                                               arxtic_score=99)
                working_bibtex.add(bibtex_entry.blocks)
        # ADS
        elif action.split(" ")[0].upper() == "ADS":
            ads_bibcode = "".join(action.split(" ")[1:])
            feed = get_ads_from_bibcode(ads_bibcode)
            entries = get_ads_entries(feed)
            for entry in entries:
                bibtex_entry = ads_to_bibtex(entry, 
                                             arxtic_score=99)
                working_bibtex.add(bibtex_entry.blocks)
--- a/src/local_api.py
+++ b/src/local_api.py
@@ -0,0 +1,127 @@
 #!/usr/bin/env python
 #[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
 """
 ArXtic: 
 ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution:  Université de Strasbourg, CNRS, Observatoire astronomique
                de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
 Licence:
 ArXtic
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 local_api.py
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 This program is free software: you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 3 of the License, or 
 (at your option) any later version.
 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License 
 along with this program. If not, see www.gnu.org/licenses/.
 """
 import os
 from urllib.parse import urlencode, quote_plus
 import requests as rq
 import numpy as np
 import textwrap as tw
 import feedparser as fp
 import bibtexparser as bib
 import arxiv_api
 import ads_api
 FILTERS_DIR = os.environ.get("FILTERS_DIR")
 DB_DIR = os.environ.get("DB_DIR")
 PDF_DIR = os.environ.get("PDF_DIR")
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
 COLOUR_OUTPUT="\033[32m"
 COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
 def file_to_bibtex(filename, directory=DB_DIR):
    if not ".bib" in filename:
        filename += ".bib"
    bibtex = bib.parse_file(directory + filename)
    bibtex.remove(bibtex.failed_blocks)
    return bibtex
 def bibtex_to_file(bibtex, filename, directory=DB_DIR):
    if not ".bib" in filename:
        filename += ".bib"
    bibentry = bib.write_file(directory+filename, bibtex)
    return bibtex
 def update_local_pdf(library=None, directory=PDF_DIR):
    # TODO: delete entry in library if the PDF file is deleted. <YM, 2025-10-11>
    if library is None:
        library = bib.Library()
        known_pdf = []
    else:
        blocks = [b for b in library.blocks if isinstance(b, bib.model.Block)]
        known_pdf = [block["arxtic_filename"] for block in blocks]
    folder_pdf = [f for f in os.listdir(directory)
                  if not f[0] == "." and ".pdf" if f]
    for pdf in folder_pdf:
        fields = pdf.replace(".pdf", "").split("_")
        if pdf in known_pdf:
            continue
        elif len(fields) < 2:
            print(COLOUR_WARNING 
                  + f"Warning: {pdf} has not been correctly identified. "
                  + "(unrecognized format #1)"
                  + COLOUR_DEFAULT)
        elif fields[1].upper() == "ARXIV":
            arxiv_id = "/".join(fields[2:])
            arxiv_library = arxiv_api.arxiv_id(arxiv_id)
            if len(arxiv_library.blocks) == 1:
                library.add(arxiv_library.blocks)
            else:
                print(COLOUR_WARNING 
                      + f"Warning: {pdf} has not been correctly identified. "
                      + "(ambiguous #1)"
                      + COLOUR_DEFAULT)
        elif len(fields) == 5:
            first_author = fields[0]
            year = fields[1]
            bibstem = fields[2]
            volume = fields[3]
            page = fields[4]
            if bibstem == "AA": bibstem = "A&A"
            query=(f"first_author:\"{first_author}\""
                   f"year:({year})"
                   f"bibstem:\"{bibstem}\""
                   f"volume:\"{volume}\""
                   f"page:\"{page}\"")
            bibcodes = ads_api.ads_bibcode_search(query, num=2)
            if len(bibcodes) == 1:
                ads_library = ads_api.ads_bibcode(bibcodes)
                library.add(ads_library.blocks)
            else:
                print(COLOUR_WARNING 
                      + f"Warning: {pdf} has not been correctly identified. "
                      + "(ambiguous #2)"
                      + COLOUR_DEFAULT)
        else:
            print(COLOUR_WARNING 
                  + f"Warning: {pdf} has not been correctly identified. "
                  + "(unrecognized format #2)"
                  + COLOUR_DEFAULT)
    return library
--- a/src/utils.py
+++ b/src/utils.py
@@ -0,0 +1,103 @@
 #!/usr/bin/env python
 #[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
 """
 ArXtic: 
 ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution:  Université de Strasbourg, CNRS, Observatoire astronomique
                de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
 Licence:
 ArXtic
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 utils.py
 Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
 This program is free software: you can redistribute it and/or modify 
 it under the terms of the GNU General Public License as published by 
 the Free Software Foundation; either version 3 of the License, or 
 (at your option) any later version.
 This program is distributed in the hope that it will be useful, 
 but WITHOUT ANY WARRANTY; without even the implied warranty of 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License 
 along with this program. If not, see www.gnu.org/licenses/.
 """
 import os
 from urllib.parse import urlencode, quote_plus
 import requests as rq
 import numpy as np
 import textwrap as tw
 import feedparser as fp
 import bibtexparser as bib
 FILTERS_DIR = os.environ.get("FILTERS_DIR")
 DB_DIR = os.environ.get("DB_DIR")
 PDF_DIR = os.environ.get("PDF_DIR")
 COLOUR_DEFAULT="\033[0m"
 COLOUR_INPUT="\033[36m"
 COLOUR_OUTPUT="\033[32m"
 COLOUR_INFO="\033[34m"
 COLOUR_WARNING="\033[93m"
 COLOUR_ERROR="\033[91m"
 def wrap(txt, length=80):
    wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
    return wrapped_txt
 def print_abstract(library):
    if not isinstance(library, bib.Library):
        library = bib.Library(library)
    for block in library.blocks:
        if isinstance(block, bib.model.Block):
            print(COLOUR_INFO, end="")
            print(block.key, end="")
            print(" [" + block["url"] + "]", end="")
            print(COLOUR_DEFAULT)
            print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT)
            print(COLOUR_OUTPUT 
                  + wrap(", ".join(block["author"].split(" and ")))
                  + COLOUR_DEFAULT)
            print(COLOUR_INPUT 
                  + wrap(block["abstract"])
                  + COLOUR_DEFAULT)
            print("")
    return 0
 def print_title_author(library):
    if not isinstance(library, bib.Library):
        library = bib.Library(library)
    for block in library.blocks:       
        if isinstance(block, bib.model.Block):
            print(COLOUR_INFO, end="")
            print(block.key, end="")
            print(" [" + block["url"] + "]", end="")
            print(COLOUR_DEFAULT)
            print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT)
            print(COLOUR_OUTPUT 
                  + wrap(", ".join(block["author"].split(" and ")))
                  + COLOUR_DEFAULT)
            print("")
    return 0
 def print_reference(library):
    if not isinstance(library, bib.Library):
        library = bib.Library(library)
    for block in library.blocks:       
        if isinstance(block, bib.model.Block):
            print(COLOUR_INFO, end="")
            print(block.key, end="")
            print(" [" + block["url"] + "]", end="")
            print(COLOUR_DEFAULT)
    return 0