2025-10-11: Rewriting and segmentation of the code

This commit is contained in:
Moussouni, Yaël
2025-10-11 11:38:33 +02:00
parent 6c17b76c37
commit 7baa9137bf
12 changed files with 704 additions and 332 deletions

View File

@@ -32,6 +32,7 @@ mkdir -p -v cfg/filters
mkdir -p -v db mkdir -p -v db
touch db/read.bib touch db/read.bib
touch db/unread.bib touch db/unread.bib
touch db/library.bib
chmod u+x *.sh chmod u+x *.sh
echo "=== Done ===" echo "=== Done ==="

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

112
src/ads_api.py Normal file
View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
"""
ArXtic:
ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
Licence:
ArXtic
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
ads_api.py
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see www.gnu.org/licenses/.
"""
import os
from urllib.parse import urlencode, quote_plus
import requests as rq
import numpy as np
import textwrap as tw
import feedparser as fp
import bibtexparser as bib
FILTERS_DIR = os.environ.get("FILTERS_DIR")
DB_DIR = os.environ.get("DB_DIR")
PDF_DIR = os.environ.get("PDF_DIR")
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
COLOUR_DEFAULT="\033[0m"
COLOUR_INPUT="\033[36m"
COLOUR_OUTPUT="\033[32m"
COLOUR_INFO="\033[34m"
COLOUR_WARNING="\033[93m"
COLOUR_ERROR="\033[91m"
## Parse Entries
def parse_entries(feed):
num = len(feed.json()["response"]["docs"])
bibcodes = []
for i in range(num):
entry = feed.json()["response"]["docs"][i]
bibcodes.append(entry["bibcode"])
return bibcodes
## ADS API
def ads_bibcode_search(query, num=1, sort="date"):
query = urlencode({"q": query,
"fl": "bibcode",
"rows": num,
"sort": sort})
url = ADSABS_QUERY_URL + query
header = "Bearer " + ADSABS_API_KEY
feed = rq.get(url, headers={'Authorization': header})
bibcodes = parse_entries(feed)
return bibcodes
def ads_bibcode(bibcodes):
if isinstance(bibcodes, list) or isinstance(bibcodes, np.ndarray):
bibcodes = bibcodes
elif isinstance(bibcodes, str):
bibcodes = [bibcodes]
else:
raise Exception(("The type of bibcodes ({}) is not recognized"
.format(type(bibcodes))))
bibentries = ""
for bibcode in bibcodes:
url = ADSABS_EXPORT_URL + bibcode
header = "Bearer " + ADSABS_API_KEY
feed = rq.get(url, headers={'Authorization': header})
bibentry = feed.text
bibentry = bibentry[:-2]
bibentry += (",\n"
"\tarxtic_notes={},\n"
"\tarxtic_category={},\n"
"\tarxtic_keywords={},\n"
"\tarxtic_score={-1},\n"
"\tarxtic_filename={},\n"
"\tarxtic_date_read={},\n"
"}")
bibentry = (bibentry
.replace("adsurl", "url")
.replace("\n", " ")
.replace("\t", "")
.replace(" ", ""))
bibentries += bibentry
library = bib.parse_string(bibentries)
library.remove(library.failed_blocks)
return library

149
src/arxiv_api.py Normal file
View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
"""
ArXtic:
ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
Licence:
ArXtic
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
arxiv_api.py
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see www.gnu.org/licenses/.
"""
import os
from urllib.parse import urlencode, quote_plus
import requests as rq
import numpy as np
import textwrap as tw
import feedparser as fp
import bibtexparser as bib
FILTERS_DIR = os.environ.get("FILTERS_DIR")
DB_DIR = os.environ.get("DB_DIR")
PDF_DIR = os.environ.get("PDF_DIR")
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
COLOUR_DEFAULT="\033[0m"
COLOUR_INPUT="\033[36m"
COLOUR_OUTPUT="\033[32m"
COLOUR_INFO="\033[34m"
COLOUR_WARNING="\033[93m"
COLOUR_ERROR="\033[91m"
## Parse Entries
def parse_entries(feed):
entries_old = feed["entries"]
entries = []
for entry_old in entries_old:
entry = {}
entry["id"] = (entry_old["id"]
.replace("oai:", "")
.replace("arXiv.org:", ""))
entry["url"] = entry_old["link"]
entry["title"] = entry_old["title"]
tmp = []
for element in entry_old["authors"]:
if isinstance(element, dict):
tmp += element["name"].split(",")
entry["author"] = [a.strip() for a in tmp]
entry["abstract"] = entry_old["summary"]
entry["pubdate"] = entry_old["published"][0:10]
entries.append(entry)
return entries
## Parse Bibtex
def parse_bibtex(entries,
arxtic_notes = "",
arxtic_category = "",
arxtic_keywords = "",
arxtic_score = 0,
arxtic_filename = "",
arxtic_date_read = ""):
if not (isinstance(entries, list) or isinstance(entries, np.ndarray)):
entries = [entries]
bibentries = ""
for entry in entries:
key = entry["id"].replace("http://arxiv.org/abs/", "").replace("/", "_")
title = entry["title"]
author = " and ".join(entry["author"])
year = entry["pubdate"][0:4]
abstract = entry["abstract"]
eprint = key
url = entry["url"]
bibentry = (f"@misc{{{key},\n"
f"\ttitle={{{title}}},\n"
f"\tauthor={{{author}}},\n"
f"\tyear={{{year}}},\n"
f"\teprint={{{eprint}}},\n"
f"\turl={{{url}}},\n"
f"\tabstract={{{abstract}}},\n"
"\tarxtic_notes={},\n"
"\tarxtic_category={},\n"
"\tarxtic_keywords={},\n"
"\tarxtic_score={-1},\n"
"\tarxtic_filename={},\n"
"\tarxtic_date_read={},\n"
"}")
bibentries += bibentry
library = bib.parse_string(bibentries)
library.remove(library.failed_blocks)
return library
## arXiv API
def arxiv_today():
feed = fp.parse(ARXIV_RSS_URL)
for i in range(len(feed["entries"])):
feed["entries"][i]["summary"] = (" "
.join(feed["entries"][i]["summary"].split("\n")[1:])
.replace("Abstract: ", ""))
entries = parse_entries(feed)
library = parse_bibtex(entries)
return library
def arxiv_id(ids):
if isinstance(ids, list) or isinstance(ids, np.ndarray):
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
elif isinstance(ids, str):
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
else:
raise Exception(("The type of ids ({}) is not recognized"
.format(type(ids))))
query = urlencode({"id_list": ",".join(ids)})
url = ARXIV_QUERY_URL + query
feed = fp.parse(url)
for i in range(len(feed["entries"])):
feed["entries"][i]["summary"] = (feed["entries"][i]["summary"]
.replace("\n", " ")
.replace("\t", "")
.replace("Abstract: ", ""))
entries = parse_entries(feed)
library = parse_bibtex(entries)
return library

View File

@@ -8,7 +8,7 @@ ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr) @ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique @ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
de Strasbourg, UMR 7550, F-67000 Strasbourg, France de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-09-15 @ Date: 2025-10-10
Licence: Licence:
ArXtic ArXtic
@@ -31,21 +31,22 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see www.gnu.org/licenses/. along with this program. If not, see www.gnu.org/licenses/.
""" """
import os import os
from urllib.parse import urlencode, quote_plus
import requests as rq
import numpy as np
import textwrap as tw import textwrap as tw
import feedparser as fp import feedparser as fp
import bibtexparser as bib import bibtexparser as bib
import requests as rq
import numpy as np import arxiv_api
from urllib.parse import urlencode, quote_plus import ads_api
import local_api
import utils
FILTERS_DIR = os.environ.get("FILTERS_DIR") FILTERS_DIR = os.environ.get("FILTERS_DIR")
DB_DIR = os.environ.get("DB_DIR") DB_DIR = os.environ.get("DB_DIR")
PDF_DIR = os.environ.get("PDF_DIR") PDF_DIR = os.environ.get("PDF_DIR")
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
COLOUR_DEFAULT="\033[0m" COLOUR_DEFAULT="\033[0m"
COLOUR_INPUT="\033[36m" COLOUR_INPUT="\033[36m"
@@ -54,328 +55,13 @@ COLOUR_INFO="\033[34m"
COLOUR_WARNING="\033[93m" COLOUR_WARNING="\033[93m"
COLOUR_ERROR="\033[91m" COLOUR_ERROR="\033[91m"
## General ids = ["2510.06329", "2509.13163"]
bibcodes = ["2022A&A...658A.152V", "2021A&A...649A..97L"]
query = "first_author:\"Voggel, K\"year:(2022)"
def wrap(txt, length=80): utils.print_reference(arxiv_api.arxiv_today())
wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False)) utils.print_title_author(arxiv_api.arxiv_id(ids))
return wrapped_txt print(ads_api.ads_bibcode_search(query, num=2))
utils.print_abstract(ads_api.ads_bibcode(bibcodes))
## Filters utils.print_reference(local_api.update_local_pdf())
def get_filters():
filters = []
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
for i in range(len(filters_list)):
path = FILTERS_DIR + filters_list[i]
with open(path) as filter_file:
dic = {"fields": [], "values": [], "score": 1}
for line in filter_file.readlines():
if "#FIELD" in line:
field = line.split("=")[1].replace("\"", "").strip()
dic["fields"].append(field)
elif "#SCORE" in line:
field = line.split("=")[1].strip()
dic["score"] = int(field)
elif line[0] == "#" or line in [" \n", "\n", ""]:
continue
else:
value = line.replace("\n", "")
dic["values"].append(value)
filters.append(dic)
return filters
def filter_entries(filters, entries):
filtered_entries = []
filtered_fields = []
filtered_keywords = []
filtered_score = []
for entry in entries:
added = False
for filter_ in filters:
fields = filter_["fields"]
values = filter_["values"]
score = filter_["score"]
for field in fields:
for value in values:
if field in list(entry):
val = entry[field]
else:
val = ""
if not added and value.upper() in str(val).upper():
filtered_entries.append(entry)
filtered_fields.append([field])
filtered_keywords.append([value])
filtered_score.append(score)
added = True
elif added and value.upper() in str(val).upper():
filtered_score[-1] = filtered_score[-1] + score
if not field in filtered_fields[-1]:
filtered_fields[-1].append(field)
if not value in filtered_keywords[-1]:
filtered_keywords[-1].append(value)
filtered_data = {"fields": filtered_fields,
"keywords": filtered_keywords,
"score": filtered_score}
return filtered_entries, filtered_data
## Print entries
def print_entries(entries, data=None):
for i in range(len(entries)):
entry = entries[i]
print(COLOUR_INFO, end="")
if "bibcode" in list(entry):
print(entry["bibcode"], end="")
if "id" in list(entry):
print(entry["id"], end="")
if "arxiv_announce_type" in list(entry) :
print(" (" + entry["arxiv_announce_type"] + ")", end="")
print(" [" + entry["link"] + "]", end="")
print(COLOUR_DEFAULT)
print(COLOUR_DEFAULT + wrap(entry["title"]) + COLOUR_DEFAULT)
print(COLOUR_OUTPUT
+ wrap(", ".join(entry["author"]))
+ COLOUR_DEFAULT)
print(COLOUR_INPUT
+ wrap(entry["abstract"])
+ COLOUR_DEFAULT)
if data is not None:
print(COLOUR_ERROR
+ "Filtered field(s): "
+ ", ".join(data["fields"][i])
+ COLOUR_DEFAULT)
print(COLOUR_ERROR
+ "Filtered keyword(s): "
+ ", ".join(data["keywords"][i])
+ COLOUR_DEFAULT)
print(COLOUR_ERROR
+ "Filtered score: "
+ str(data["score"][i])
+ COLOUR_DEFAULT)
print("")
return 0
# IDs
def get_arxiv_ids(entries):
ids = []
for entry in entries:
ids.append(entry["id"])
return ids
def save_arxiv_ids(ids, library="saved"):
if isinstance(ids, list) or isinstance(ids, np.ndarray):
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
elif isinstance(ids, str):
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
else:
raise Exception(
"The type of ids ({}) is not recognized".format(type(ids))
)
with open(DB_DIR + library + ".txt", "a+") as db_file:
None # creates the file if not already in the directory
with open(DB_DIR + library + ".txt", "r+") as db_file:
known_ids = [line.replace("\n", "") for line in db_file.readlines()]
with open(DB_DIR + library + ".txt", "a+") as db_file:
for i in ids:
if not i in known_ids:
db_file.write(i)
db_file.write("\n")
return 0
## ArXiV
def get_arxiv_rss():
feed = fp.parse(ARXIV_RSS_URL)
return feed
def today_arxiv():
filters = get_filters()
feed = get_arxiv_rss()
entries = get_arxiv_entries(feed)
entries, data = filter_entries(filters, entries)
print_entries(entries, data)
return entries, data
def get_arxiv_from_ids(ids):
if isinstance(ids, list) or isinstance(ids, np.ndarray):
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
elif isinstance(ids, str):
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
else:
raise Exception(
"The type of ids ({}) is not recognized".format(type(ids))
)
query = urlencode({"id_list": ",".join(ids)})
url = ARXIV_QUERY_URL + query
feed = fp.parse(url)
return feed
## ADS-ABS
def ads_search(query, num=5, sort="date"):
query = urlencode({"q": query,
"fl": ("bibcode,title,author,abstract,bibstem,doi,"
"keyword,citation,pubdate"),
"rows": num,
"sort": sort})
url = ADSABS_QUERY_URL + query
header = "Bearer " + ADSABS_API_KEY
feed = rq.get(url, headers={'Authorization': header})
return feed
def ads_author(author, num=10, sort="date"):
filters = get_filters()
feed = ads_search("author:" + author, num=num, sort=sort)
entries = get_ads_entries(feed)
entries, data = filter_entries(filters, entries)
print_entries(entries, data)
return entries, data
# Entries
def get_arxiv_entries(rss):
entries_old = rss["entries"]
entries = []
for entry_old in entries_old:
entry = {}
entry["id"] = entry_old["id"].replace("oai:", "").replace("arXiv.org:", "")
entry["link"] = entry_old["link"]
entry["title"] = entry_old["title"]
tmp = []
for element in entry_old["authors"]:
if isinstance(element, dict):
tmp += element["name"].split(",")
entry["author"] = [a.strip() for a in tmp]
entry["abstract"] = "\n".join(entry_old["summary"].split("\n")[1:])[10:]
entry["pubdate"] = entry_old["published"][0:10]
entries.append(entry)
return entries
def get_ads_entries(feed):
num = len(feed.json()["response"]["docs"])
entries = []
for i in range(num):
entry = feed.json()["response"]["docs"][i]
entry["link"] = "https://ui.adsabs.harvard.edu/abs/" + entry["bibcode"]
entry["title"] = entry["title"][0]
entry["publisher"] = entry["bibstem"][0]
entries.append(entry)
return entries
# BibTeX
def arxiv_to_bibtex(entry,
arxtic_notes = "",
arxtic_category = "",
arxtic_keywords = "",
arxtic_score = 0,
arxtic_filename = ""):
key = entry["id"]
title = entry["title"]
author = " and ".join(entry["author"])
year = entry["pubdate"][0:4]
eprint = key
url = entry["link"]
bibentry = (f"@misc{{{key},\n"
f"\ttitle={{{title}}},\n"
f"\tauthor={{{author}}},\n"
f"\tyear={{{year}}},\n"
f"\teprint={{{eprint}}},\n"
f"\turl={{{url}}},\n"
f"\tarxtic_notes={{{arxtic_notes}}},\n"
f"\tarxtic_category={{{arxtic_category}}},\n"
f"\tarxtic_keywords={{{arxtic_keywords}}},\n"
f"\tarxtic_score={{{str(arxtic_score)}}},\n"
f"\tarxtic_filename={{{str(arxtic_filename)}}},\n"
"}")
bibtex = bib.parse_string(bibentry)
return bibtex
def ads_to_bibtex(entry,
arxtic_notes = "",
arxtic_category = "",
arxtic_keywords = "",
arxtic_score = 0,
arxtic_filename = ""):
bibcode = entry["bibcode"]
url = ADSABS_EXPORT_URL + bibcode
header = "Bearer " + ADSABS_API_KEY
feed = rq.get(url, headers={'Authorization': header})
bibentry = feed.text
bibentry = bibentry[:-2]
bibentry += (",\n"
f"\tarxtic_notes={{{arxtic_notes}}},\n"
f"\tarxtic_category={{{arxtic_category}}},\n"
f"\tarxtic_keywords={{{arxtic_keywords}}},\n"
f"\tarxtic_score={{{str(arxtic_score)}}},\n"
f"\tarxtic_filename={{{str(arxtic_filename)}}},\n"
"}")
bibtex = bib.parse_string(bibentry)
return bibtex
def list_pdf():
bibtex_list = []
pdf_names = [f for f in os.listdir(PDF_DIR)
if not f[0] == "." and ".pdf" in f]
for pdf_name in pdf_names:
fields = pdf_name.replace(".pdf", "").split("_")
if len(fields) < 2:
print(COLOUR_WARNING
+ f"Warning: {pdf_name} has not been correctly identified. "
+ "(unrecognized format #1)"
+ COLOUR+DEFAULT)
elif fields[1].upper() == "ARXIV":
arxiv_id = "/".join(fields[2:])
feed = get_arxiv_from_ids(arxiv_id)
entries = get_arxiv_entries(feed)
if len(entries) == 1:
entry = entries[0]
bibtex = arxiv_to_bibtex(entry,
arxtic_score=99,
arxtic_filename=pdf_name)
bibtex_list.append(bibtex)
else:
print(COLOUR_WARNING
+ f"Warning: {pdf_name} has not been correctly identified. "
+ "(ambiguous #1)"
+ COLOUR_DEFAULT)
elif len(fields) == 5:
first_author = fields[0]
year = fields[1]
bibstem = fields[2]
volume = fields[3]
page = fields[4]
if bibstem == "AA": bibstem = "A&A"
query=(f"first_author:\"{first_author}\""
f"year:({year})"
f"bibstem:\"{bibstem}\""
f"volume:\"{volume}\""
f"page:\"{page}\"")
feed = ads_search(query, num=2)
entries = get_ads_entries(feed)
if len(entries) == 1:
entry = entries[0]
bibtex = ads_to_bibtex(entry,
arxtic_score=99,
arxtic_filename=pdf_name)
bibtex_list.append(bibtex)
else:
print(COLOUR_WARNING
+ f"Warning: {pdf_name} has not been correctly identified. "
+ "(ambiguous #2)"
+ COLOUR_DEFAULT)
else:
print(COLOUR_WARNING
+ f"Warning: {pdf_name} has not been correctly identified. "
+ "(unrecognized format #2)"
+ COLOUR_DEFAULT)
return None
list_pdf()
#entries, data = today_arxiv()

194
src/legacy.py Normal file
View File

@@ -0,0 +1,194 @@
#!/usr/bin/env python
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
"""
ArXtic:
ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-09-15
Licence:
ArXtic
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
legacy.py
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see www.gnu.org/licenses/.
"""
import os
import textwrap as tw
import feedparser as fp
import bibtexparser as bib
import requests as rq
import numpy as np
from urllib.parse import urlencode, quote_plus
FILTERS_DIR = os.environ.get("FILTERS_DIR")
DB_DIR = os.environ.get("DB_DIR")
PDF_DIR = os.environ.get("PDF_DIR")
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
COLOUR_DEFAULT="\033[0m"
COLOUR_INPUT="\033[36m"
COLOUR_OUTPUT="\033[32m"
COLOUR_INFO="\033[34m"
COLOUR_WARNING="\033[93m"
COLOUR_ERROR="\033[91m"
## Filters
def get_filters():
filters = []
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
for i in range(len(filters_list)):
path = FILTERS_DIR + filters_list[i]
with open(path) as filter_file:
dic = {"fields": [], "values": [], "score": 1}
for line in filter_file.readlines():
if "#FIELD" in line:
field = line.split("=")[1].replace("\"", "").strip()
dic["fields"].append(field)
elif "#SCORE" in line:
field = line.split("=")[1].strip()
dic["score"] = int(field)
elif line[0] == "#" or line in [" \n", "\n", ""]:
continue
else:
value = line.replace("\n", "")
dic["values"].append(value)
filters.append(dic)
return filters
def filter_entries(filters, entries):
filtered_entries = []
filtered_fields = []
filtered_keywords = []
filtered_score = []
for entry in entries:
added = False
for filter_ in filters:
fields = filter_["fields"]
values = filter_["values"]
score = filter_["score"]
for field in fields:
for value in values:
if field in list(entry):
val = entry[field]
else:
val = ""
if not added and value.upper() in str(val).upper():
filtered_entries.append(entry)
filtered_fields.append([field])
filtered_keywords.append([value])
filtered_score.append(score)
added = True
elif added and value.upper() in str(val).upper():
filtered_score[-1] = filtered_score[-1] + score
if not field in filtered_fields[-1]:
filtered_fields[-1].append(field)
if not value in filtered_keywords[-1]:
filtered_keywords[-1].append(value)
filtered_data = {"fields": filtered_fields,
"keywords": filtered_keywords,
"score": filtered_score}
return filtered_entries, filtered_data
if __name__ == "__main__":
read = file_to_bibtex("read.bib")
unread = file_to_bibtex("unread.bib")
library = file_to_bibtex("library.bib")
quit_action = False
working_bibtex = bib.Library()
while not quit_action:
read_keys = [b.key for b in read.blocks]
unread_keys = [b.key for b in unread.blocks]
library_keys = [b.key for b in library.blocks]
library_keys = [b.key for b in library.blocks]
print(COLOUR_INPUT + "Select an action")
action = input("> " + COLOUR_DEFAULT)
if action.upper() in ["QUIT", "EXIT", "Q"]:
quit_action = True
elif action in ["", " ", "help", "h"]:
print(COLOUR_OUTPUT
+ "Available commands:\n"
+ "\t- quit, exit, q: exit\n"
+ COLOUR_DEFAULT)
# Print
elif action.split(" ")[0].upper() in ["PRINT", "P"]:
if len(action.split(" ")) == 1:
print_bibtex(working_bibtex)
elif action.split(" ")[1].upper() == "READ":
print_bibtex(read)
elif action.split(" ")[1].upper() == "UNREAD":
print_bibtex(unread)
elif action.split(" ")[1].upper() == "LIBRARY":
print_bibtex(library)
else:
search_key = action.split(" ")[1]
if search_key in read_keys:
print_bibtex(read.blocks[read_keys.index(search_key)])
elif search_key in unread_keys:
print_bibtex(unread.blocks[unread_keys.index(search_key)])
elif search_key in library_keys:
print_bibtex(library.blocks[library_keys.index(search_key)])
else:
print(COLOUR_WARNING
+ f"Warning: {search_key} cannot be found"
+ COLOUR_DEFAULT)
# Clear
elif action.upper() in ["CLEAR", "CLEAN"]:
working_bibtex = bib.Library()
# Today
elif action.upper() in ["TODAY"]:
today_bibtex = today_arxiv()
working_bibtex.add(today_bibtex.blocks)
# Library
elif action.upper() in ["LIBRARY"]:
library = list_pdf(library)
bibtex_to_file(library, "library.bib")
# Arxiv
elif action.split(" ")[0].upper() == "ARXIV":
arxiv_ids = action.split(" ")[1:]
feed = get_arxiv_from_ids(arxiv_ids)
entries = get_arxiv_entries(feed)
for entry in entries:
bibtex_entry = arxiv_to_bibtex(entry,
arxtic_score=99)
working_bibtex.add(bibtex_entry.blocks)
# ADS
elif action.split(" ")[0].upper() == "ADS":
ads_bibcode = "".join(action.split(" ")[1:])
feed = get_ads_from_bibcode(ads_bibcode)
entries = get_ads_entries(feed)
for entry in entries:
bibtex_entry = ads_to_bibtex(entry,
arxtic_score=99)
working_bibtex.add(bibtex_entry.blocks)

127
src/local_api.py Normal file
View File

@@ -0,0 +1,127 @@
#!/usr/bin/env python
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
"""
ArXtic:
ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
Licence:
ArXtic
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
local_api.py
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see www.gnu.org/licenses/.
"""
import os
from urllib.parse import urlencode, quote_plus
import requests as rq
import numpy as np
import textwrap as tw
import feedparser as fp
import bibtexparser as bib
import arxiv_api
import ads_api
FILTERS_DIR = os.environ.get("FILTERS_DIR")
DB_DIR = os.environ.get("DB_DIR")
PDF_DIR = os.environ.get("PDF_DIR")
COLOUR_DEFAULT="\033[0m"
COLOUR_INPUT="\033[36m"
COLOUR_OUTPUT="\033[32m"
COLOUR_INFO="\033[34m"
COLOUR_WARNING="\033[93m"
COLOUR_ERROR="\033[91m"
def file_to_bibtex(filename, directory=DB_DIR):
if not ".bib" in filename:
filename += ".bib"
bibtex = bib.parse_file(directory + filename)
bibtex.remove(bibtex.failed_blocks)
return bibtex
def bibtex_to_file(bibtex, filename, directory=DB_DIR):
if not ".bib" in filename:
filename += ".bib"
bibentry = bib.write_file(directory+filename, bibtex)
return bibtex
def update_local_pdf(library=None, directory=PDF_DIR):
# TODO: delete entry in library if the PDF file is deleted. <YM, 2025-10-11>
if library is None:
library = bib.Library()
known_pdf = []
else:
blocks = [b for b in library.blocks if isinstance(b, bib.model.Block)]
known_pdf = [block["arxtic_filename"] for block in blocks]
folder_pdf = [f for f in os.listdir(directory)
if not f[0] == "." and ".pdf" if f]
for pdf in folder_pdf:
fields = pdf.replace(".pdf", "").split("_")
if pdf in known_pdf:
continue
elif len(fields) < 2:
print(COLOUR_WARNING
+ f"Warning: {pdf} has not been correctly identified. "
+ "(unrecognized format #1)"
+ COLOUR_DEFAULT)
elif fields[1].upper() == "ARXIV":
arxiv_id = "/".join(fields[2:])
arxiv_library = arxiv_api.arxiv_id(arxiv_id)
if len(arxiv_library.blocks) == 1:
library.add(arxiv_library.blocks)
else:
print(COLOUR_WARNING
+ f"Warning: {pdf} has not been correctly identified. "
+ "(ambiguous #1)"
+ COLOUR_DEFAULT)
elif len(fields) == 5:
first_author = fields[0]
year = fields[1]
bibstem = fields[2]
volume = fields[3]
page = fields[4]
if bibstem == "AA": bibstem = "A&A"
query=(f"first_author:\"{first_author}\""
f"year:({year})"
f"bibstem:\"{bibstem}\""
f"volume:\"{volume}\""
f"page:\"{page}\"")
bibcodes = ads_api.ads_bibcode_search(query, num=2)
if len(bibcodes) == 1:
ads_library = ads_api.ads_bibcode(bibcodes)
library.add(ads_library.blocks)
else:
print(COLOUR_WARNING
+ f"Warning: {pdf} has not been correctly identified. "
+ "(ambiguous #2)"
+ COLOUR_DEFAULT)
else:
print(COLOUR_WARNING
+ f"Warning: {pdf} has not been correctly identified. "
+ "(unrecognized format #2)"
+ COLOUR_DEFAULT)
return library

103
src/utils.py Normal file
View File

@@ -0,0 +1,103 @@
#!/usr/bin/env python
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
"""
ArXtic:
ArXtic queries arXiv and filters the output.
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
@ Date: 2025-10-10
Licence:
ArXtic
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
utils.py
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see www.gnu.org/licenses/.
"""
import os
from urllib.parse import urlencode, quote_plus
import requests as rq
import numpy as np
import textwrap as tw
import feedparser as fp
import bibtexparser as bib
FILTERS_DIR = os.environ.get("FILTERS_DIR")
DB_DIR = os.environ.get("DB_DIR")
PDF_DIR = os.environ.get("PDF_DIR")
COLOUR_DEFAULT="\033[0m"
COLOUR_INPUT="\033[36m"
COLOUR_OUTPUT="\033[32m"
COLOUR_INFO="\033[34m"
COLOUR_WARNING="\033[93m"
COLOUR_ERROR="\033[91m"
def wrap(txt, length=80):
wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
return wrapped_txt
def print_abstract(library):
if not isinstance(library, bib.Library):
library = bib.Library(library)
for block in library.blocks:
if isinstance(block, bib.model.Block):
print(COLOUR_INFO, end="")
print(block.key, end="")
print(" [" + block["url"] + "]", end="")
print(COLOUR_DEFAULT)
print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT)
print(COLOUR_OUTPUT
+ wrap(", ".join(block["author"].split(" and ")))
+ COLOUR_DEFAULT)
print(COLOUR_INPUT
+ wrap(block["abstract"])
+ COLOUR_DEFAULT)
print("")
return 0
def print_title_author(library):
if not isinstance(library, bib.Library):
library = bib.Library(library)
for block in library.blocks:
if isinstance(block, bib.model.Block):
print(COLOUR_INFO, end="")
print(block.key, end="")
print(" [" + block["url"] + "]", end="")
print(COLOUR_DEFAULT)
print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT)
print(COLOUR_OUTPUT
+ wrap(", ".join(block["author"].split(" and ")))
+ COLOUR_DEFAULT)
print("")
return 0
def print_reference(library):
if not isinstance(library, bib.Library):
library = bib.Library(library)
for block in library.blocks:
if isinstance(block, bib.model.Block):
print(COLOUR_INFO, end="")
print(block.key, end="")
print(" [" + block["url"] + "]", end="")
print(COLOUR_DEFAULT)
return 0