mirror of
https://codeberg.org/Yael-II/ArXtic.git
synced 2026-03-14 22:06:27 +01:00
2025-10-11: Rewriting and segmentation of the code
This commit is contained in:
1
setup.sh
1
setup.sh
@@ -32,6 +32,7 @@ mkdir -p -v cfg/filters
|
|||||||
mkdir -p -v db
|
mkdir -p -v db
|
||||||
touch db/read.bib
|
touch db/read.bib
|
||||||
touch db/unread.bib
|
touch db/unread.bib
|
||||||
|
touch db/library.bib
|
||||||
chmod u+x *.sh
|
chmod u+x *.sh
|
||||||
echo "=== Done ==="
|
echo "=== Done ==="
|
||||||
|
|
||||||
|
|||||||
BIN
src/__pycache__/ads_api.cpython-312.pyc
Normal file
BIN
src/__pycache__/ads_api.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/arxiv_api.cpython-312.pyc
Normal file
BIN
src/__pycache__/arxiv_api.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/bibtex_interface.cpython-312.pyc
Normal file
BIN
src/__pycache__/bibtex_interface.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/local_api.cpython-312.pyc
Normal file
BIN
src/__pycache__/local_api.cpython-312.pyc
Normal file
Binary file not shown.
BIN
src/__pycache__/utils.cpython-312.pyc
Normal file
BIN
src/__pycache__/utils.cpython-312.pyc
Normal file
Binary file not shown.
112
src/ads_api.py
Normal file
112
src/ads_api.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
|
||||||
|
"""
|
||||||
|
ArXtic:
|
||||||
|
|
||||||
|
ArXtic queries arXiv and filters the output.
|
||||||
|
|
||||||
|
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
||||||
|
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
||||||
|
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
||||||
|
@ Date: 2025-10-10
|
||||||
|
|
||||||
|
Licence:
|
||||||
|
ArXtic
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
ads_api.py
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see www.gnu.org/licenses/.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
import requests as rq
|
||||||
|
import numpy as np
|
||||||
|
import textwrap as tw
|
||||||
|
import feedparser as fp
|
||||||
|
import bibtexparser as bib
|
||||||
|
|
||||||
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
|
PDF_DIR = os.environ.get("PDF_DIR")
|
||||||
|
|
||||||
|
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
|
||||||
|
ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
|
||||||
|
ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
|
||||||
|
|
||||||
|
COLOUR_DEFAULT="\033[0m"
|
||||||
|
COLOUR_INPUT="\033[36m"
|
||||||
|
COLOUR_OUTPUT="\033[32m"
|
||||||
|
COLOUR_INFO="\033[34m"
|
||||||
|
COLOUR_WARNING="\033[93m"
|
||||||
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
|
## Parse Entries
|
||||||
|
|
||||||
|
def parse_entries(feed):
|
||||||
|
num = len(feed.json()["response"]["docs"])
|
||||||
|
bibcodes = []
|
||||||
|
for i in range(num):
|
||||||
|
entry = feed.json()["response"]["docs"][i]
|
||||||
|
bibcodes.append(entry["bibcode"])
|
||||||
|
return bibcodes
|
||||||
|
|
||||||
|
## ADS API
|
||||||
|
|
||||||
|
def ads_bibcode_search(query, num=1, sort="date"):
|
||||||
|
query = urlencode({"q": query,
|
||||||
|
"fl": "bibcode",
|
||||||
|
"rows": num,
|
||||||
|
"sort": sort})
|
||||||
|
url = ADSABS_QUERY_URL + query
|
||||||
|
header = "Bearer " + ADSABS_API_KEY
|
||||||
|
feed = rq.get(url, headers={'Authorization': header})
|
||||||
|
bibcodes = parse_entries(feed)
|
||||||
|
return bibcodes
|
||||||
|
|
||||||
|
def ads_bibcode(bibcodes):
|
||||||
|
if isinstance(bibcodes, list) or isinstance(bibcodes, np.ndarray):
|
||||||
|
bibcodes = bibcodes
|
||||||
|
elif isinstance(bibcodes, str):
|
||||||
|
bibcodes = [bibcodes]
|
||||||
|
else:
|
||||||
|
raise Exception(("The type of bibcodes ({}) is not recognized"
|
||||||
|
.format(type(bibcodes))))
|
||||||
|
bibentries = ""
|
||||||
|
for bibcode in bibcodes:
|
||||||
|
url = ADSABS_EXPORT_URL + bibcode
|
||||||
|
header = "Bearer " + ADSABS_API_KEY
|
||||||
|
feed = rq.get(url, headers={'Authorization': header})
|
||||||
|
bibentry = feed.text
|
||||||
|
bibentry = bibentry[:-2]
|
||||||
|
bibentry += (",\n"
|
||||||
|
"\tarxtic_notes={},\n"
|
||||||
|
"\tarxtic_category={},\n"
|
||||||
|
"\tarxtic_keywords={},\n"
|
||||||
|
"\tarxtic_score={-1},\n"
|
||||||
|
"\tarxtic_filename={},\n"
|
||||||
|
"\tarxtic_date_read={},\n"
|
||||||
|
"}")
|
||||||
|
bibentry = (bibentry
|
||||||
|
.replace("adsurl", "url")
|
||||||
|
.replace("\n", " ")
|
||||||
|
.replace("\t", "")
|
||||||
|
.replace(" ", ""))
|
||||||
|
bibentries += bibentry
|
||||||
|
|
||||||
|
library = bib.parse_string(bibentries)
|
||||||
|
library.remove(library.failed_blocks)
|
||||||
|
return library
|
||||||
149
src/arxiv_api.py
Normal file
149
src/arxiv_api.py
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
|
||||||
|
"""
|
||||||
|
ArXtic:
|
||||||
|
|
||||||
|
ArXtic queries arXiv and filters the output.
|
||||||
|
|
||||||
|
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
||||||
|
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
||||||
|
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
||||||
|
@ Date: 2025-10-10
|
||||||
|
|
||||||
|
Licence:
|
||||||
|
ArXtic
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
arxiv_api.py
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see www.gnu.org/licenses/.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
import requests as rq
|
||||||
|
import numpy as np
|
||||||
|
import textwrap as tw
|
||||||
|
import feedparser as fp
|
||||||
|
import bibtexparser as bib
|
||||||
|
|
||||||
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
|
PDF_DIR = os.environ.get("PDF_DIR")
|
||||||
|
|
||||||
|
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
|
||||||
|
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
|
||||||
|
|
||||||
|
COLOUR_DEFAULT="\033[0m"
|
||||||
|
COLOUR_INPUT="\033[36m"
|
||||||
|
COLOUR_OUTPUT="\033[32m"
|
||||||
|
COLOUR_INFO="\033[34m"
|
||||||
|
COLOUR_WARNING="\033[93m"
|
||||||
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
|
## Parse Entries
|
||||||
|
|
||||||
|
def parse_entries(feed):
|
||||||
|
entries_old = feed["entries"]
|
||||||
|
entries = []
|
||||||
|
for entry_old in entries_old:
|
||||||
|
entry = {}
|
||||||
|
entry["id"] = (entry_old["id"]
|
||||||
|
.replace("oai:", "")
|
||||||
|
.replace("arXiv.org:", ""))
|
||||||
|
entry["url"] = entry_old["link"]
|
||||||
|
entry["title"] = entry_old["title"]
|
||||||
|
tmp = []
|
||||||
|
for element in entry_old["authors"]:
|
||||||
|
if isinstance(element, dict):
|
||||||
|
tmp += element["name"].split(",")
|
||||||
|
entry["author"] = [a.strip() for a in tmp]
|
||||||
|
entry["abstract"] = entry_old["summary"]
|
||||||
|
entry["pubdate"] = entry_old["published"][0:10]
|
||||||
|
entries.append(entry)
|
||||||
|
return entries
|
||||||
|
|
||||||
|
## Parse Bibtex
|
||||||
|
|
||||||
|
def parse_bibtex(entries,
|
||||||
|
arxtic_notes = "",
|
||||||
|
arxtic_category = "",
|
||||||
|
arxtic_keywords = "",
|
||||||
|
arxtic_score = 0,
|
||||||
|
arxtic_filename = "",
|
||||||
|
arxtic_date_read = ""):
|
||||||
|
if not (isinstance(entries, list) or isinstance(entries, np.ndarray)):
|
||||||
|
entries = [entries]
|
||||||
|
bibentries = ""
|
||||||
|
for entry in entries:
|
||||||
|
key = entry["id"].replace("http://arxiv.org/abs/", "").replace("/", "_")
|
||||||
|
title = entry["title"]
|
||||||
|
author = " and ".join(entry["author"])
|
||||||
|
year = entry["pubdate"][0:4]
|
||||||
|
abstract = entry["abstract"]
|
||||||
|
eprint = key
|
||||||
|
url = entry["url"]
|
||||||
|
bibentry = (f"@misc{{{key},\n"
|
||||||
|
f"\ttitle={{{title}}},\n"
|
||||||
|
f"\tauthor={{{author}}},\n"
|
||||||
|
f"\tyear={{{year}}},\n"
|
||||||
|
f"\teprint={{{eprint}}},\n"
|
||||||
|
f"\turl={{{url}}},\n"
|
||||||
|
f"\tabstract={{{abstract}}},\n"
|
||||||
|
"\tarxtic_notes={},\n"
|
||||||
|
"\tarxtic_category={},\n"
|
||||||
|
"\tarxtic_keywords={},\n"
|
||||||
|
"\tarxtic_score={-1},\n"
|
||||||
|
"\tarxtic_filename={},\n"
|
||||||
|
"\tarxtic_date_read={},\n"
|
||||||
|
"}")
|
||||||
|
bibentries += bibentry
|
||||||
|
library = bib.parse_string(bibentries)
|
||||||
|
library.remove(library.failed_blocks)
|
||||||
|
return library
|
||||||
|
|
||||||
|
## arXiv API
|
||||||
|
|
||||||
|
def arxiv_today():
|
||||||
|
feed = fp.parse(ARXIV_RSS_URL)
|
||||||
|
for i in range(len(feed["entries"])):
|
||||||
|
feed["entries"][i]["summary"] = (" "
|
||||||
|
.join(feed["entries"][i]["summary"].split("\n")[1:])
|
||||||
|
.replace("Abstract: ", ""))
|
||||||
|
entries = parse_entries(feed)
|
||||||
|
library = parse_bibtex(entries)
|
||||||
|
return library
|
||||||
|
|
||||||
|
def arxiv_id(ids):
|
||||||
|
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
||||||
|
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
||||||
|
elif isinstance(ids, str):
|
||||||
|
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
||||||
|
else:
|
||||||
|
raise Exception(("The type of ids ({}) is not recognized"
|
||||||
|
.format(type(ids))))
|
||||||
|
query = urlencode({"id_list": ",".join(ids)})
|
||||||
|
url = ARXIV_QUERY_URL + query
|
||||||
|
feed = fp.parse(url)
|
||||||
|
for i in range(len(feed["entries"])):
|
||||||
|
feed["entries"][i]["summary"] = (feed["entries"][i]["summary"]
|
||||||
|
.replace("\n", " ")
|
||||||
|
.replace("\t", "")
|
||||||
|
.replace("Abstract: ", ""))
|
||||||
|
entries = parse_entries(feed)
|
||||||
|
library = parse_bibtex(entries)
|
||||||
|
return library
|
||||||
|
|
||||||
|
|
||||||
350
src/arxtic.py
350
src/arxtic.py
@@ -8,7 +8,7 @@ ArXtic queries arXiv and filters the output.
|
|||||||
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
||||||
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
||||||
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
||||||
@ Date: 2025-09-15
|
@ Date: 2025-10-10
|
||||||
|
|
||||||
Licence:
|
Licence:
|
||||||
ArXtic
|
ArXtic
|
||||||
@@ -31,21 +31,22 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with this program. If not, see www.gnu.org/licenses/.
|
along with this program. If not, see www.gnu.org/licenses/.
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
import requests as rq
|
||||||
|
import numpy as np
|
||||||
import textwrap as tw
|
import textwrap as tw
|
||||||
import feedparser as fp
|
import feedparser as fp
|
||||||
import bibtexparser as bib
|
import bibtexparser as bib
|
||||||
import requests as rq
|
|
||||||
import numpy as np
|
import arxiv_api
|
||||||
from urllib.parse import urlencode, quote_plus
|
import ads_api
|
||||||
|
import local_api
|
||||||
|
import utils
|
||||||
|
|
||||||
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
DB_DIR = os.environ.get("DB_DIR")
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
PDF_DIR = os.environ.get("PDF_DIR")
|
PDF_DIR = os.environ.get("PDF_DIR")
|
||||||
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
|
|
||||||
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
|
|
||||||
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
|
|
||||||
ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
|
|
||||||
ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
|
|
||||||
|
|
||||||
COLOUR_DEFAULT="\033[0m"
|
COLOUR_DEFAULT="\033[0m"
|
||||||
COLOUR_INPUT="\033[36m"
|
COLOUR_INPUT="\033[36m"
|
||||||
@@ -54,328 +55,13 @@ COLOUR_INFO="\033[34m"
|
|||||||
COLOUR_WARNING="\033[93m"
|
COLOUR_WARNING="\033[93m"
|
||||||
COLOUR_ERROR="\033[91m"
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
## General
|
ids = ["2510.06329", "2509.13163"]
|
||||||
|
bibcodes = ["2022A&A...658A.152V", "2021A&A...649A..97L"]
|
||||||
|
query = "first_author:\"Voggel, K\"year:(2022)"
|
||||||
|
|
||||||
def wrap(txt, length=80):
|
utils.print_reference(arxiv_api.arxiv_today())
|
||||||
wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
|
utils.print_title_author(arxiv_api.arxiv_id(ids))
|
||||||
return wrapped_txt
|
print(ads_api.ads_bibcode_search(query, num=2))
|
||||||
|
utils.print_abstract(ads_api.ads_bibcode(bibcodes))
|
||||||
|
|
||||||
## Filters
|
utils.print_reference(local_api.update_local_pdf())
|
||||||
|
|
||||||
def get_filters():
|
|
||||||
filters = []
|
|
||||||
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
|
||||||
for i in range(len(filters_list)):
|
|
||||||
path = FILTERS_DIR + filters_list[i]
|
|
||||||
with open(path) as filter_file:
|
|
||||||
dic = {"fields": [], "values": [], "score": 1}
|
|
||||||
for line in filter_file.readlines():
|
|
||||||
if "#FIELD" in line:
|
|
||||||
field = line.split("=")[1].replace("\"", "").strip()
|
|
||||||
dic["fields"].append(field)
|
|
||||||
elif "#SCORE" in line:
|
|
||||||
field = line.split("=")[1].strip()
|
|
||||||
dic["score"] = int(field)
|
|
||||||
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
value = line.replace("\n", "")
|
|
||||||
dic["values"].append(value)
|
|
||||||
filters.append(dic)
|
|
||||||
return filters
|
|
||||||
|
|
||||||
def filter_entries(filters, entries):
|
|
||||||
filtered_entries = []
|
|
||||||
filtered_fields = []
|
|
||||||
filtered_keywords = []
|
|
||||||
filtered_score = []
|
|
||||||
for entry in entries:
|
|
||||||
added = False
|
|
||||||
for filter_ in filters:
|
|
||||||
fields = filter_["fields"]
|
|
||||||
values = filter_["values"]
|
|
||||||
score = filter_["score"]
|
|
||||||
for field in fields:
|
|
||||||
for value in values:
|
|
||||||
if field in list(entry):
|
|
||||||
val = entry[field]
|
|
||||||
else:
|
|
||||||
val = ""
|
|
||||||
if not added and value.upper() in str(val).upper():
|
|
||||||
filtered_entries.append(entry)
|
|
||||||
filtered_fields.append([field])
|
|
||||||
filtered_keywords.append([value])
|
|
||||||
filtered_score.append(score)
|
|
||||||
added = True
|
|
||||||
elif added and value.upper() in str(val).upper():
|
|
||||||
filtered_score[-1] = filtered_score[-1] + score
|
|
||||||
if not field in filtered_fields[-1]:
|
|
||||||
filtered_fields[-1].append(field)
|
|
||||||
if not value in filtered_keywords[-1]:
|
|
||||||
filtered_keywords[-1].append(value)
|
|
||||||
filtered_data = {"fields": filtered_fields,
|
|
||||||
"keywords": filtered_keywords,
|
|
||||||
"score": filtered_score}
|
|
||||||
return filtered_entries, filtered_data
|
|
||||||
|
|
||||||
## Print entries
|
|
||||||
|
|
||||||
def print_entries(entries, data=None):
|
|
||||||
for i in range(len(entries)):
|
|
||||||
entry = entries[i]
|
|
||||||
|
|
||||||
print(COLOUR_INFO, end="")
|
|
||||||
if "bibcode" in list(entry):
|
|
||||||
print(entry["bibcode"], end="")
|
|
||||||
if "id" in list(entry):
|
|
||||||
print(entry["id"], end="")
|
|
||||||
if "arxiv_announce_type" in list(entry) :
|
|
||||||
print(" (" + entry["arxiv_announce_type"] + ")", end="")
|
|
||||||
print(" [" + entry["link"] + "]", end="")
|
|
||||||
print(COLOUR_DEFAULT)
|
|
||||||
|
|
||||||
print(COLOUR_DEFAULT + wrap(entry["title"]) + COLOUR_DEFAULT)
|
|
||||||
print(COLOUR_OUTPUT
|
|
||||||
+ wrap(", ".join(entry["author"]))
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
print(COLOUR_INPUT
|
|
||||||
+ wrap(entry["abstract"])
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
if data is not None:
|
|
||||||
print(COLOUR_ERROR
|
|
||||||
+ "Filtered field(s): "
|
|
||||||
+ ", ".join(data["fields"][i])
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
print(COLOUR_ERROR
|
|
||||||
+ "Filtered keyword(s): "
|
|
||||||
+ ", ".join(data["keywords"][i])
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
print(COLOUR_ERROR
|
|
||||||
+ "Filtered score: "
|
|
||||||
+ str(data["score"][i])
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
print("")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# IDs
|
|
||||||
|
|
||||||
def get_arxiv_ids(entries):
|
|
||||||
ids = []
|
|
||||||
for entry in entries:
|
|
||||||
ids.append(entry["id"])
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def save_arxiv_ids(ids, library="saved"):
|
|
||||||
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
|
||||||
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
|
||||||
elif isinstance(ids, str):
|
|
||||||
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"The type of ids ({}) is not recognized".format(type(ids))
|
|
||||||
)
|
|
||||||
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
|
||||||
None # creates the file if not already in the directory
|
|
||||||
with open(DB_DIR + library + ".txt", "r+") as db_file:
|
|
||||||
known_ids = [line.replace("\n", "") for line in db_file.readlines()]
|
|
||||||
with open(DB_DIR + library + ".txt", "a+") as db_file:
|
|
||||||
for i in ids:
|
|
||||||
if not i in known_ids:
|
|
||||||
db_file.write(i)
|
|
||||||
db_file.write("\n")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
## ArXiV
|
|
||||||
|
|
||||||
def get_arxiv_rss():
|
|
||||||
feed = fp.parse(ARXIV_RSS_URL)
|
|
||||||
return feed
|
|
||||||
|
|
||||||
def today_arxiv():
|
|
||||||
filters = get_filters()
|
|
||||||
feed = get_arxiv_rss()
|
|
||||||
entries = get_arxiv_entries(feed)
|
|
||||||
entries, data = filter_entries(filters, entries)
|
|
||||||
print_entries(entries, data)
|
|
||||||
return entries, data
|
|
||||||
|
|
||||||
def get_arxiv_from_ids(ids):
|
|
||||||
if isinstance(ids, list) or isinstance(ids, np.ndarray):
|
|
||||||
ids = [i.replace("oai:", "").replace("arXiv.org:", "") for i in ids]
|
|
||||||
elif isinstance(ids, str):
|
|
||||||
ids = [ids.replace("oai:", "").replace("arXiv.org:", "")]
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"The type of ids ({}) is not recognized".format(type(ids))
|
|
||||||
)
|
|
||||||
query = urlencode({"id_list": ",".join(ids)})
|
|
||||||
url = ARXIV_QUERY_URL + query
|
|
||||||
feed = fp.parse(url)
|
|
||||||
return feed
|
|
||||||
|
|
||||||
## ADS-ABS
|
|
||||||
|
|
||||||
def ads_search(query, num=5, sort="date"):
|
|
||||||
query = urlencode({"q": query,
|
|
||||||
"fl": ("bibcode,title,author,abstract,bibstem,doi,"
|
|
||||||
"keyword,citation,pubdate"),
|
|
||||||
"rows": num,
|
|
||||||
"sort": sort})
|
|
||||||
url = ADSABS_QUERY_URL + query
|
|
||||||
header = "Bearer " + ADSABS_API_KEY
|
|
||||||
feed = rq.get(url, headers={'Authorization': header})
|
|
||||||
return feed
|
|
||||||
|
|
||||||
def ads_author(author, num=10, sort="date"):
|
|
||||||
filters = get_filters()
|
|
||||||
feed = ads_search("author:" + author, num=num, sort=sort)
|
|
||||||
entries = get_ads_entries(feed)
|
|
||||||
entries, data = filter_entries(filters, entries)
|
|
||||||
print_entries(entries, data)
|
|
||||||
return entries, data
|
|
||||||
|
|
||||||
# Entries
|
|
||||||
|
|
||||||
def get_arxiv_entries(rss):
|
|
||||||
entries_old = rss["entries"]
|
|
||||||
entries = []
|
|
||||||
for entry_old in entries_old:
|
|
||||||
entry = {}
|
|
||||||
entry["id"] = entry_old["id"].replace("oai:", "").replace("arXiv.org:", "")
|
|
||||||
entry["link"] = entry_old["link"]
|
|
||||||
entry["title"] = entry_old["title"]
|
|
||||||
tmp = []
|
|
||||||
for element in entry_old["authors"]:
|
|
||||||
if isinstance(element, dict):
|
|
||||||
tmp += element["name"].split(",")
|
|
||||||
entry["author"] = [a.strip() for a in tmp]
|
|
||||||
entry["abstract"] = "\n".join(entry_old["summary"].split("\n")[1:])[10:]
|
|
||||||
entry["pubdate"] = entry_old["published"][0:10]
|
|
||||||
entries.append(entry)
|
|
||||||
return entries
|
|
||||||
|
|
||||||
|
|
||||||
def get_ads_entries(feed):
|
|
||||||
num = len(feed.json()["response"]["docs"])
|
|
||||||
entries = []
|
|
||||||
for i in range(num):
|
|
||||||
entry = feed.json()["response"]["docs"][i]
|
|
||||||
entry["link"] = "https://ui.adsabs.harvard.edu/abs/" + entry["bibcode"]
|
|
||||||
entry["title"] = entry["title"][0]
|
|
||||||
entry["publisher"] = entry["bibstem"][0]
|
|
||||||
entries.append(entry)
|
|
||||||
return entries
|
|
||||||
|
|
||||||
# BibTeX
|
|
||||||
|
|
||||||
def arxiv_to_bibtex(entry,
|
|
||||||
arxtic_notes = "",
|
|
||||||
arxtic_category = "",
|
|
||||||
arxtic_keywords = "",
|
|
||||||
arxtic_score = 0,
|
|
||||||
arxtic_filename = ""):
|
|
||||||
key = entry["id"]
|
|
||||||
title = entry["title"]
|
|
||||||
author = " and ".join(entry["author"])
|
|
||||||
year = entry["pubdate"][0:4]
|
|
||||||
eprint = key
|
|
||||||
url = entry["link"]
|
|
||||||
bibentry = (f"@misc{{{key},\n"
|
|
||||||
f"\ttitle={{{title}}},\n"
|
|
||||||
f"\tauthor={{{author}}},\n"
|
|
||||||
f"\tyear={{{year}}},\n"
|
|
||||||
f"\teprint={{{eprint}}},\n"
|
|
||||||
f"\turl={{{url}}},\n"
|
|
||||||
f"\tarxtic_notes={{{arxtic_notes}}},\n"
|
|
||||||
f"\tarxtic_category={{{arxtic_category}}},\n"
|
|
||||||
f"\tarxtic_keywords={{{arxtic_keywords}}},\n"
|
|
||||||
f"\tarxtic_score={{{str(arxtic_score)}}},\n"
|
|
||||||
f"\tarxtic_filename={{{str(arxtic_filename)}}},\n"
|
|
||||||
"}")
|
|
||||||
bibtex = bib.parse_string(bibentry)
|
|
||||||
return bibtex
|
|
||||||
|
|
||||||
def ads_to_bibtex(entry,
|
|
||||||
arxtic_notes = "",
|
|
||||||
arxtic_category = "",
|
|
||||||
arxtic_keywords = "",
|
|
||||||
arxtic_score = 0,
|
|
||||||
arxtic_filename = ""):
|
|
||||||
bibcode = entry["bibcode"]
|
|
||||||
url = ADSABS_EXPORT_URL + bibcode
|
|
||||||
header = "Bearer " + ADSABS_API_KEY
|
|
||||||
feed = rq.get(url, headers={'Authorization': header})
|
|
||||||
bibentry = feed.text
|
|
||||||
bibentry = bibentry[:-2]
|
|
||||||
bibentry += (",\n"
|
|
||||||
f"\tarxtic_notes={{{arxtic_notes}}},\n"
|
|
||||||
f"\tarxtic_category={{{arxtic_category}}},\n"
|
|
||||||
f"\tarxtic_keywords={{{arxtic_keywords}}},\n"
|
|
||||||
f"\tarxtic_score={{{str(arxtic_score)}}},\n"
|
|
||||||
f"\tarxtic_filename={{{str(arxtic_filename)}}},\n"
|
|
||||||
"}")
|
|
||||||
bibtex = bib.parse_string(bibentry)
|
|
||||||
return bibtex
|
|
||||||
|
|
||||||
def list_pdf():
|
|
||||||
bibtex_list = []
|
|
||||||
pdf_names = [f for f in os.listdir(PDF_DIR)
|
|
||||||
if not f[0] == "." and ".pdf" in f]
|
|
||||||
for pdf_name in pdf_names:
|
|
||||||
fields = pdf_name.replace(".pdf", "").split("_")
|
|
||||||
if len(fields) < 2:
|
|
||||||
print(COLOUR_WARNING
|
|
||||||
+ f"Warning: {pdf_name} has not been correctly identified. "
|
|
||||||
+ "(unrecognized format #1)"
|
|
||||||
+ COLOUR+DEFAULT)
|
|
||||||
elif fields[1].upper() == "ARXIV":
|
|
||||||
arxiv_id = "/".join(fields[2:])
|
|
||||||
feed = get_arxiv_from_ids(arxiv_id)
|
|
||||||
entries = get_arxiv_entries(feed)
|
|
||||||
if len(entries) == 1:
|
|
||||||
entry = entries[0]
|
|
||||||
bibtex = arxiv_to_bibtex(entry,
|
|
||||||
arxtic_score=99,
|
|
||||||
arxtic_filename=pdf_name)
|
|
||||||
bibtex_list.append(bibtex)
|
|
||||||
else:
|
|
||||||
print(COLOUR_WARNING
|
|
||||||
+ f"Warning: {pdf_name} has not been correctly identified. "
|
|
||||||
+ "(ambiguous #1)"
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
elif len(fields) == 5:
|
|
||||||
first_author = fields[0]
|
|
||||||
year = fields[1]
|
|
||||||
bibstem = fields[2]
|
|
||||||
volume = fields[3]
|
|
||||||
page = fields[4]
|
|
||||||
if bibstem == "AA": bibstem = "A&A"
|
|
||||||
query=(f"first_author:\"{first_author}\""
|
|
||||||
f"year:({year})"
|
|
||||||
f"bibstem:\"{bibstem}\""
|
|
||||||
f"volume:\"{volume}\""
|
|
||||||
f"page:\"{page}\"")
|
|
||||||
feed = ads_search(query, num=2)
|
|
||||||
entries = get_ads_entries(feed)
|
|
||||||
if len(entries) == 1:
|
|
||||||
entry = entries[0]
|
|
||||||
bibtex = ads_to_bibtex(entry,
|
|
||||||
arxtic_score=99,
|
|
||||||
arxtic_filename=pdf_name)
|
|
||||||
bibtex_list.append(bibtex)
|
|
||||||
else:
|
|
||||||
print(COLOUR_WARNING
|
|
||||||
+ f"Warning: {pdf_name} has not been correctly identified. "
|
|
||||||
+ "(ambiguous #2)"
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
else:
|
|
||||||
print(COLOUR_WARNING
|
|
||||||
+ f"Warning: {pdf_name} has not been correctly identified. "
|
|
||||||
+ "(unrecognized format #2)"
|
|
||||||
+ COLOUR_DEFAULT)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
list_pdf()
|
|
||||||
|
|
||||||
#entries, data = today_arxiv()
|
|
||||||
|
|||||||
194
src/legacy.py
Normal file
194
src/legacy.py
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
|
||||||
|
"""
|
||||||
|
ArXtic:
|
||||||
|
|
||||||
|
ArXtic queries arXiv and filters the output.
|
||||||
|
|
||||||
|
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
||||||
|
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
||||||
|
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
||||||
|
@ Date: 2025-09-15
|
||||||
|
|
||||||
|
Licence:
|
||||||
|
ArXtic
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
legacy.py
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see www.gnu.org/licenses/.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import textwrap as tw
|
||||||
|
import feedparser as fp
|
||||||
|
import bibtexparser as bib
|
||||||
|
import requests as rq
|
||||||
|
import numpy as np
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
|
PDF_DIR = os.environ.get("PDF_DIR")
|
||||||
|
ARXIV_QUERY_URL = os.environ.get("ARXIV_QUERY_URL")
|
||||||
|
ARXIV_RSS_URL = os.environ.get("ARXIV_RSS_URL")
|
||||||
|
ADSABS_QUERY_URL = os.environ.get("ADSABS_QUERY_URL")
|
||||||
|
ADSABS_EXPORT_URL = os.environ.get("ADSABS_EXPORT_URL")
|
||||||
|
ADSABS_API_KEY = os.environ.get("ADSABS_API_KEY")
|
||||||
|
|
||||||
|
COLOUR_DEFAULT="\033[0m"
|
||||||
|
COLOUR_INPUT="\033[36m"
|
||||||
|
COLOUR_OUTPUT="\033[32m"
|
||||||
|
COLOUR_INFO="\033[34m"
|
||||||
|
COLOUR_WARNING="\033[93m"
|
||||||
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
|
## Filters
|
||||||
|
|
||||||
|
def get_filters():
|
||||||
|
filters = []
|
||||||
|
filters_list = [f for f in os.listdir(FILTERS_DIR) if not f[0] == "."]
|
||||||
|
for i in range(len(filters_list)):
|
||||||
|
path = FILTERS_DIR + filters_list[i]
|
||||||
|
with open(path) as filter_file:
|
||||||
|
dic = {"fields": [], "values": [], "score": 1}
|
||||||
|
for line in filter_file.readlines():
|
||||||
|
if "#FIELD" in line:
|
||||||
|
field = line.split("=")[1].replace("\"", "").strip()
|
||||||
|
dic["fields"].append(field)
|
||||||
|
elif "#SCORE" in line:
|
||||||
|
field = line.split("=")[1].strip()
|
||||||
|
dic["score"] = int(field)
|
||||||
|
elif line[0] == "#" or line in [" \n", "\n", ""]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
value = line.replace("\n", "")
|
||||||
|
dic["values"].append(value)
|
||||||
|
filters.append(dic)
|
||||||
|
return filters
|
||||||
|
|
||||||
|
def filter_entries(filters, entries):
|
||||||
|
filtered_entries = []
|
||||||
|
filtered_fields = []
|
||||||
|
filtered_keywords = []
|
||||||
|
filtered_score = []
|
||||||
|
for entry in entries:
|
||||||
|
added = False
|
||||||
|
for filter_ in filters:
|
||||||
|
fields = filter_["fields"]
|
||||||
|
values = filter_["values"]
|
||||||
|
score = filter_["score"]
|
||||||
|
for field in fields:
|
||||||
|
for value in values:
|
||||||
|
if field in list(entry):
|
||||||
|
val = entry[field]
|
||||||
|
else:
|
||||||
|
val = ""
|
||||||
|
if not added and value.upper() in str(val).upper():
|
||||||
|
filtered_entries.append(entry)
|
||||||
|
filtered_fields.append([field])
|
||||||
|
filtered_keywords.append([value])
|
||||||
|
filtered_score.append(score)
|
||||||
|
added = True
|
||||||
|
elif added and value.upper() in str(val).upper():
|
||||||
|
filtered_score[-1] = filtered_score[-1] + score
|
||||||
|
if not field in filtered_fields[-1]:
|
||||||
|
filtered_fields[-1].append(field)
|
||||||
|
if not value in filtered_keywords[-1]:
|
||||||
|
filtered_keywords[-1].append(value)
|
||||||
|
filtered_data = {"fields": filtered_fields,
|
||||||
|
"keywords": filtered_keywords,
|
||||||
|
"score": filtered_score}
|
||||||
|
return filtered_entries, filtered_data
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
read = file_to_bibtex("read.bib")
|
||||||
|
unread = file_to_bibtex("unread.bib")
|
||||||
|
library = file_to_bibtex("library.bib")
|
||||||
|
quit_action = False
|
||||||
|
|
||||||
|
working_bibtex = bib.Library()
|
||||||
|
|
||||||
|
while not quit_action:
|
||||||
|
read_keys = [b.key for b in read.blocks]
|
||||||
|
unread_keys = [b.key for b in unread.blocks]
|
||||||
|
library_keys = [b.key for b in library.blocks]
|
||||||
|
library_keys = [b.key for b in library.blocks]
|
||||||
|
|
||||||
|
|
||||||
|
print(COLOUR_INPUT + "Select an action")
|
||||||
|
action = input("> " + COLOUR_DEFAULT)
|
||||||
|
if action.upper() in ["QUIT", "EXIT", "Q"]:
|
||||||
|
quit_action = True
|
||||||
|
elif action in ["", " ", "help", "h"]:
|
||||||
|
print(COLOUR_OUTPUT
|
||||||
|
+ "Available commands:\n"
|
||||||
|
+ "\t- quit, exit, q: exit\n"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
|
||||||
|
# Print
|
||||||
|
elif action.split(" ")[0].upper() in ["PRINT", "P"]:
|
||||||
|
if len(action.split(" ")) == 1:
|
||||||
|
print_bibtex(working_bibtex)
|
||||||
|
elif action.split(" ")[1].upper() == "READ":
|
||||||
|
print_bibtex(read)
|
||||||
|
elif action.split(" ")[1].upper() == "UNREAD":
|
||||||
|
print_bibtex(unread)
|
||||||
|
elif action.split(" ")[1].upper() == "LIBRARY":
|
||||||
|
print_bibtex(library)
|
||||||
|
else:
|
||||||
|
search_key = action.split(" ")[1]
|
||||||
|
|
||||||
|
if search_key in read_keys:
|
||||||
|
print_bibtex(read.blocks[read_keys.index(search_key)])
|
||||||
|
elif search_key in unread_keys:
|
||||||
|
print_bibtex(unread.blocks[unread_keys.index(search_key)])
|
||||||
|
elif search_key in library_keys:
|
||||||
|
print_bibtex(library.blocks[library_keys.index(search_key)])
|
||||||
|
else:
|
||||||
|
print(COLOUR_WARNING
|
||||||
|
+ f"Warning: {search_key} cannot be found"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
# Clear
|
||||||
|
elif action.upper() in ["CLEAR", "CLEAN"]:
|
||||||
|
working_bibtex = bib.Library()
|
||||||
|
# Today
|
||||||
|
elif action.upper() in ["TODAY"]:
|
||||||
|
today_bibtex = today_arxiv()
|
||||||
|
working_bibtex.add(today_bibtex.blocks)
|
||||||
|
|
||||||
|
# Library
|
||||||
|
elif action.upper() in ["LIBRARY"]:
|
||||||
|
library = list_pdf(library)
|
||||||
|
bibtex_to_file(library, "library.bib")
|
||||||
|
|
||||||
|
# Arxiv
|
||||||
|
elif action.split(" ")[0].upper() == "ARXIV":
|
||||||
|
arxiv_ids = action.split(" ")[1:]
|
||||||
|
feed = get_arxiv_from_ids(arxiv_ids)
|
||||||
|
entries = get_arxiv_entries(feed)
|
||||||
|
for entry in entries:
|
||||||
|
bibtex_entry = arxiv_to_bibtex(entry,
|
||||||
|
arxtic_score=99)
|
||||||
|
working_bibtex.add(bibtex_entry.blocks)
|
||||||
|
|
||||||
|
# ADS
|
||||||
|
elif action.split(" ")[0].upper() == "ADS":
|
||||||
|
ads_bibcode = "".join(action.split(" ")[1:])
|
||||||
|
feed = get_ads_from_bibcode(ads_bibcode)
|
||||||
|
entries = get_ads_entries(feed)
|
||||||
|
for entry in entries:
|
||||||
|
bibtex_entry = ads_to_bibtex(entry,
|
||||||
|
arxtic_score=99)
|
||||||
|
working_bibtex.add(bibtex_entry.blocks)
|
||||||
127
src/local_api.py
Normal file
127
src/local_api.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
|
||||||
|
"""
|
||||||
|
ArXtic:
|
||||||
|
|
||||||
|
ArXtic queries arXiv and filters the output.
|
||||||
|
|
||||||
|
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
||||||
|
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
||||||
|
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
||||||
|
@ Date: 2025-10-10
|
||||||
|
|
||||||
|
Licence:
|
||||||
|
ArXtic
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
local_api.py
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see www.gnu.org/licenses/.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
import requests as rq
|
||||||
|
import numpy as np
|
||||||
|
import textwrap as tw
|
||||||
|
import feedparser as fp
|
||||||
|
import bibtexparser as bib
|
||||||
|
|
||||||
|
import arxiv_api
|
||||||
|
import ads_api
|
||||||
|
|
||||||
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
|
PDF_DIR = os.environ.get("PDF_DIR")
|
||||||
|
|
||||||
|
COLOUR_DEFAULT="\033[0m"
|
||||||
|
COLOUR_INPUT="\033[36m"
|
||||||
|
COLOUR_OUTPUT="\033[32m"
|
||||||
|
COLOUR_INFO="\033[34m"
|
||||||
|
COLOUR_WARNING="\033[93m"
|
||||||
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
|
def file_to_bibtex(filename, directory=DB_DIR):
|
||||||
|
if not ".bib" in filename:
|
||||||
|
filename += ".bib"
|
||||||
|
bibtex = bib.parse_file(directory + filename)
|
||||||
|
bibtex.remove(bibtex.failed_blocks)
|
||||||
|
return bibtex
|
||||||
|
|
||||||
|
def bibtex_to_file(bibtex, filename, directory=DB_DIR):
|
||||||
|
if not ".bib" in filename:
|
||||||
|
filename += ".bib"
|
||||||
|
bibentry = bib.write_file(directory+filename, bibtex)
|
||||||
|
return bibtex
|
||||||
|
|
||||||
|
def update_local_pdf(library=None, directory=PDF_DIR):
|
||||||
|
# TODO: delete entry in library if the PDF file is deleted. <YM, 2025-10-11>
|
||||||
|
if library is None:
|
||||||
|
library = bib.Library()
|
||||||
|
known_pdf = []
|
||||||
|
else:
|
||||||
|
blocks = [b for b in library.blocks if isinstance(b, bib.model.Block)]
|
||||||
|
known_pdf = [block["arxtic_filename"] for block in blocks]
|
||||||
|
|
||||||
|
folder_pdf = [f for f in os.listdir(directory)
|
||||||
|
if not f[0] == "." and ".pdf" if f]
|
||||||
|
|
||||||
|
for pdf in folder_pdf:
|
||||||
|
fields = pdf.replace(".pdf", "").split("_")
|
||||||
|
|
||||||
|
if pdf in known_pdf:
|
||||||
|
continue
|
||||||
|
elif len(fields) < 2:
|
||||||
|
print(COLOUR_WARNING
|
||||||
|
+ f"Warning: {pdf} has not been correctly identified. "
|
||||||
|
+ "(unrecognized format #1)"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
elif fields[1].upper() == "ARXIV":
|
||||||
|
arxiv_id = "/".join(fields[2:])
|
||||||
|
arxiv_library = arxiv_api.arxiv_id(arxiv_id)
|
||||||
|
if len(arxiv_library.blocks) == 1:
|
||||||
|
library.add(arxiv_library.blocks)
|
||||||
|
else:
|
||||||
|
print(COLOUR_WARNING
|
||||||
|
+ f"Warning: {pdf} has not been correctly identified. "
|
||||||
|
+ "(ambiguous #1)"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
elif len(fields) == 5:
|
||||||
|
first_author = fields[0]
|
||||||
|
year = fields[1]
|
||||||
|
bibstem = fields[2]
|
||||||
|
volume = fields[3]
|
||||||
|
page = fields[4]
|
||||||
|
if bibstem == "AA": bibstem = "A&A"
|
||||||
|
query=(f"first_author:\"{first_author}\""
|
||||||
|
f"year:({year})"
|
||||||
|
f"bibstem:\"{bibstem}\""
|
||||||
|
f"volume:\"{volume}\""
|
||||||
|
f"page:\"{page}\"")
|
||||||
|
bibcodes = ads_api.ads_bibcode_search(query, num=2)
|
||||||
|
if len(bibcodes) == 1:
|
||||||
|
ads_library = ads_api.ads_bibcode(bibcodes)
|
||||||
|
library.add(ads_library.blocks)
|
||||||
|
else:
|
||||||
|
print(COLOUR_WARNING
|
||||||
|
+ f"Warning: {pdf} has not been correctly identified. "
|
||||||
|
+ "(ambiguous #2)"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
else:
|
||||||
|
print(COLOUR_WARNING
|
||||||
|
+ f"Warning: {pdf} has not been correctly identified. "
|
||||||
|
+ "(unrecognized format #2)"
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
return library
|
||||||
103
src/utils.py
Normal file
103
src/utils.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#[TLP:AMBER] LIMITED DISTRIBUTION: WORK IN PROGRESS
|
||||||
|
"""
|
||||||
|
ArXtic:
|
||||||
|
|
||||||
|
ArXtic queries arXiv and filters the output.
|
||||||
|
|
||||||
|
@ Author: Moussouni, Yaël (MSc student; yael.moussouni@etu.unistra.fr)
|
||||||
|
@ Institution: Université de Strasbourg, CNRS, Observatoire astronomique
|
||||||
|
de Strasbourg, UMR 7550, F-67000 Strasbourg, France
|
||||||
|
@ Date: 2025-10-10
|
||||||
|
|
||||||
|
Licence:
|
||||||
|
ArXtic
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
utils.py
|
||||||
|
Copyright (C) 2025 Yaël Moussouni (yael.moussouni@etu.unistra.fr)
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see www.gnu.org/licenses/.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
from urllib.parse import urlencode, quote_plus
|
||||||
|
|
||||||
|
import requests as rq
|
||||||
|
import numpy as np
|
||||||
|
import textwrap as tw
|
||||||
|
import feedparser as fp
|
||||||
|
import bibtexparser as bib
|
||||||
|
|
||||||
|
FILTERS_DIR = os.environ.get("FILTERS_DIR")
|
||||||
|
DB_DIR = os.environ.get("DB_DIR")
|
||||||
|
PDF_DIR = os.environ.get("PDF_DIR")
|
||||||
|
|
||||||
|
COLOUR_DEFAULT="\033[0m"
|
||||||
|
COLOUR_INPUT="\033[36m"
|
||||||
|
COLOUR_OUTPUT="\033[32m"
|
||||||
|
COLOUR_INFO="\033[34m"
|
||||||
|
COLOUR_WARNING="\033[93m"
|
||||||
|
COLOUR_ERROR="\033[91m"
|
||||||
|
|
||||||
|
def wrap(txt, length=80):
|
||||||
|
wrapped_txt = '\n'.join(tw.wrap(txt, length, break_long_words=False))
|
||||||
|
return wrapped_txt
|
||||||
|
|
||||||
|
def print_abstract(library):
|
||||||
|
if not isinstance(library, bib.Library):
|
||||||
|
library = bib.Library(library)
|
||||||
|
for block in library.blocks:
|
||||||
|
if isinstance(block, bib.model.Block):
|
||||||
|
print(COLOUR_INFO, end="")
|
||||||
|
print(block.key, end="")
|
||||||
|
print(" [" + block["url"] + "]", end="")
|
||||||
|
print(COLOUR_DEFAULT)
|
||||||
|
|
||||||
|
print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT)
|
||||||
|
print(COLOUR_OUTPUT
|
||||||
|
+ wrap(", ".join(block["author"].split(" and ")))
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
print(COLOUR_INPUT
|
||||||
|
+ wrap(block["abstract"])
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
print("")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def print_title_author(library):
|
||||||
|
if not isinstance(library, bib.Library):
|
||||||
|
library = bib.Library(library)
|
||||||
|
for block in library.blocks:
|
||||||
|
if isinstance(block, bib.model.Block):
|
||||||
|
print(COLOUR_INFO, end="")
|
||||||
|
print(block.key, end="")
|
||||||
|
print(" [" + block["url"] + "]", end="")
|
||||||
|
print(COLOUR_DEFAULT)
|
||||||
|
|
||||||
|
print(COLOUR_DEFAULT + wrap(block["title"]) + COLOUR_DEFAULT)
|
||||||
|
print(COLOUR_OUTPUT
|
||||||
|
+ wrap(", ".join(block["author"].split(" and ")))
|
||||||
|
+ COLOUR_DEFAULT)
|
||||||
|
print("")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def print_reference(library):
|
||||||
|
if not isinstance(library, bib.Library):
|
||||||
|
library = bib.Library(library)
|
||||||
|
for block in library.blocks:
|
||||||
|
if isinstance(block, bib.model.Block):
|
||||||
|
print(COLOUR_INFO, end="")
|
||||||
|
print(block.key, end="")
|
||||||
|
print(" [" + block["url"] + "]", end="")
|
||||||
|
print(COLOUR_DEFAULT)
|
||||||
|
return 0
|
||||||
Reference in New Issue
Block a user