diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 074ec2041e6ec6e9900274746712488c8e078ea1..159b6ed256d90944cee6ed45517ba1a24d8350cf 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -297,24 +297,13 @@ get-zotero: extends: .df-wiki-cli-run stage: zotero script: - - df-wiki-cli articles --key ${ZOTERO_API_KEY} --output content/_data/_articles.json + - df-wiki-cli articles missing-doi --dir ./content/ --key ${ZOTERO_API_KEY} + - df-wiki-cli articles fetch-from-zotero --key ${ZOTERO_API_KEY} --output content/_data/_articles.json artifacts: paths: - content/_data/_articles.json rules: - - if: $CI_COMMIT_BRANCH == "main" - -# get-pfam: -# extends: .df-wiki-cli-run -# stage: get-data -# script: -# - df-wiki-cli pfam --output public/pfam-a-hmm.csv -# artifacts: -# paths: -# - public/pfam-a-hmm.csv - # rules: - # - if: $CI_COMMIT_BRANCH == "main" - + - if: $CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH == "rework-references" ################ BUILD ########################## diff --git a/content/3.defense-systems/paris.md b/content/3.defense-systems/paris.md index 7aaafe5058f11deef029c573d18a4118bfa2909d..a09b72fca555cde404bc032bccbf8a9fd83f9ce0 100644 --- a/content/3.defense-systems/paris.md +++ b/content/3.defense-systems/paris.md @@ -23,7 +23,7 @@ PARIS (for Phage Anti-Restriction-Induced System) is a novel anti-phage system. ## Molecular mechanisms -This system relies on an unknown [Abortive infection](/general-concepts/abortive-infection) mechanism to trigger growth arrest upon sensing a phage-encoded protein (Ocr). Interestingly, the Ocr protein has been found to inhibit R-M systems and BREX systems, making PARIS a suitable defense mechanism against RM resistant and/or BREX resistant phages :ref{doi=0.1016/j.chom.2022.02.018,10.1093/nar/gkaa510,10.1007/BF01036001}. +This system relies on an unknown [Abortive infection](/general-concepts/abortive-infection) mechanism to trigger growth arrest upon sensing a phage-encoded protein (Ocr). Interestingly, the Ocr protein has been found to inhibit R-M systems and BREX systems, making PARIS a suitable defense mechanism against RM resistant and/or BREX resistant phages :ref{doi=10.1016/j.chom.2022.02.018,10.1093/nar/gkaa510,10.1007/BF01036001}. ## Example of genomic structure diff --git a/packages/df-wiki-cli/df_wiki_cli/articles/__init__.py b/packages/df-wiki-cli/df_wiki_cli/articles/__init__.py index 123615c1548e058679701b7ced37c10c0e567b34..9b4fed681bc93dd94c4c3a9920a231bfd47d91dc 100644 --- a/packages/df-wiki-cli/df_wiki_cli/articles/__init__.py +++ b/packages/df-wiki-cli/df_wiki_cli/articles/__init__.py @@ -1,9 +1,15 @@ import json from pyzotero import zotero from pathlib import Path +from habanero import Crossref +from rich.console import Console +console = Console() -def fetch_articles(key: str, batch_size: int = 100, output: Path = "articles.json"): + +def fetch_articles_from_zotero( + key: str, batch_size: int = 100, output: Path = "articles.json" +): zot = zotero.Zotero("5151022", "group", key) collection = zot.collection("BSWL96X3") tot_items = collection["meta"]["numItems"] @@ -26,3 +32,82 @@ def fetch_articles(key: str, batch_size: int = 100, output: Path = "articles.jso json_object = json.dumps(items, indent=2) with open(output, "w") as outfile: outfile.write(json_object) + + +def add_articles_to_zotero_from_doi(doi, key): + cr = Crossref(mailto="defense-finder@pasteur.fr") + res = cr.works(ids=[doi]) + + zot = zotero.Zotero("5151022", "group", key) + itemtype = "journalArticle" + # pyzotero grabs the template dict from the server + zitem = zot.item_template(itemtype) + # console.print(zitem) + message = res["message"] + # console.print(message) + # console.print(message["title"]) + zitem["title"] = message["title"][0] + if "page" in message: + zitem["pages"] = message["page"] + if "abstract" in message: + zitem["abstractNote"] = message["abstract"] + + if "container-title" in message and len(message["container-title"]) > 0: + zitem["publicationTitle"] = message["container-title"][0] + if "short-container-title" in message and len(message["short-container-title"]) > 0: + zitem["journalAbbreviation"] = message["short-container-title"][0] + zitem["creators"] = [ + { + "creatorType": "author", + "firstName": author["given"], + "lastName": author["family"], + } + for author in message["author"] + ] + zitem["libraryCatalog"] = "DOI.org (Crossref)" + if "ISSN" in message: + zitem["ISSN"] = ", ".join(message["ISSN"]) + zitem["url"] = message["resource"]["primary"]["URL"] + zitem["date"] = "/".join([str(d) for d in message["published"]["date-parts"][0]]) + for key in ["DOI", "volume", "issue", "language"]: + if key in message: + zitem[key] = message[key] + + # { + # 'itemType': 'journalArticle', + # 'title': '', + # 'creators': [{'creatorType': 'author', 'firstName': '', 'lastName': ''}], + # 'abstractNote': '', + # 'publicationTitle': '', + # 'volume': '', + # 'issue': '', + # 'pages': '', + # 'date': '', + # 'series': '', + # 'seriesTitle': '', + # 'seriesText': '', + # 'journalAbbreviation': '', + # 'language': '', + # 'DOI': '', + # 'ISSN': '', + # 'shortTitle': '', + # 'url': '', + # 'accessDate': '', + # 'archive': '', + # 'archiveLocation': '', + # 'libraryCatalog': '', + # 'callNumber': '', + # 'rights': '', + # 'extra': '', + # 'tags': [], + # 'collections': [], + # 'relations': {} + # } + + items_to_add = [zitem] + zot.check_items(items_to_add) + res = zot.create_items(items_to_add) + # console.print(res) + new_item = res["successful"]["0"] + zot.addto_collection("BSWL96X3", new_item) + console.print(f"add doi {doi}") diff --git a/packages/df-wiki-cli/df_wiki_cli/articles/main.py b/packages/df-wiki-cli/df_wiki_cli/articles/main.py new file mode 100644 index 0000000000000000000000000000000000000000..c1ff6b328fb4de5b783e557586a5a8ac041d567a --- /dev/null +++ b/packages/df-wiki-cli/df_wiki_cli/articles/main.py @@ -0,0 +1,97 @@ +import typer +from typing_extensions import Annotated +from pathlib import Path +from df_wiki_cli.articles import ( + add_articles_to_zotero_from_doi, + fetch_articles_from_zotero, +) +from pyzotero import zotero +from habanero import Crossref +from rich.console import Console +import frontmatter +import re +import json + +app = typer.Typer() +console = Console() + + +@app.command() +def fetch_from_zotero( + output: Annotated[ + Path, + typer.Option( + exists=False, + file_okay=True, + writable=True, + ), + ] = "articles.json", + key: Annotated[str, typer.Option(help="Zotero api key")] = "", + batch_size: Annotated[ + int, typer.Option(help="Number articles get per request") + ] = 100, +): + """ + Get articles metadata from Zotero collection + + """ + if key != "": + fetch_articles_from_zotero(key, batch_size, output) + else: + print("You must provide a zotero api key") + raise typer.Exit(code=1) + + +@app.command() +def missing_doi( + dir: Annotated[ + Path, + typer.Option(exists=False, file_okay=False, readable=True, dir_okay=True), + ], + key: Annotated[str, typer.Option(help="Zotero api key")] = "", + batch_size: Annotated[ + int, typer.Option(help="Number articles get per request") + ] = 100, +): + # parse content to look at dois + + # get current list of dois in zotero + zotero_list = "/tmp/zotero-dois.json" + fetch_articles_from_zotero(key, batch_size, zotero_list) + zotero_dois_set = set() + with open(zotero_list) as zotero_f: + zotero_data = json.load(zotero_f) + for d in zotero_data: + if "DOI" in d: + zotero_dois_set.add(d["DOI"].lower()) + + dois_set = set() + for file in dir.rglob("*"): + if file.suffix == ".md": + console.rule(f"[bold blue]{file.name}", style="blue") + with open(file) as f: + metadata, content = frontmatter.parse(f.read()) + if ( + "relevantAbstracts" in metadata + and len(metadata["relevantAbstracts"]) > 0 + ): + dois = [d["doi"].lower() for d in metadata["relevantAbstracts"]] + dois_set.update(dois) + + # handle content + group = re.findall(r":ref{doi=(.*?)}", content) + for g in group: + splitted = [doi.lower() for doi in re.split(",", g)] + dois_set.update(splitted) + + for doi in dois_set - zotero_dois_set: + add_articles_to_zotero_from_doi(doi, key) + + +@app.command() +def from_apicrossref( + doi: Annotated[str, typer.Option(help="DOI identifier")], + key: Annotated[str, typer.Option(help="Zotero api key")] = "", +): + + add_articles_to_zotero_from_doi(doi, key) diff --git a/packages/df-wiki-cli/df_wiki_cli/main.py b/packages/df-wiki-cli/df_wiki_cli/main.py index ace7d2e92bbc20738e98c10a7a882fe23523a121..c6992782b6e983a434513bf10a08235d7ce0cc56 100644 --- a/packages/df-wiki-cli/df_wiki_cli/main.py +++ b/packages/df-wiki-cli/df_wiki_cli/main.py @@ -1,15 +1,16 @@ import typer from pathlib import Path from typing_extensions import Annotated -from df_wiki_cli.articles import fetch_articles from df_wiki_cli.pfam import fetch_pfam from df_wiki_cli.meilisearch import main as ms_main from df_wiki_cli.content import main as content_main +from df_wiki_cli.articles import main as articles_main # from df_wiki_cli.ms import main as ms_main app = typer.Typer() app.add_typer(ms_main.app, name="meilisearch") app.add_typer(content_main.app, name="content") +app.add_typer(articles_main.app, name="articles") @app.callback() @@ -21,33 +22,6 @@ def callback(): """ - -@app.command() -def articles( - output: Annotated[ - Path, - typer.Option( - exists=False, - file_okay=True, - writable=True, - ), - ] = "articles.json", - key: Annotated[str, typer.Option(help="Zotero api key")] = "", - batch_size: Annotated[ - int, typer.Option(help="Number articles get per request") - ] = 100, -): - """ - Get articles metadata from Zotero collection - - """ - if key != "": - fetch_articles(key, batch_size, output) - else: - print("You must provide a zotero api key") - raise typer.Exit(code=1) - - @app.command() def pfam( output: Annotated[ diff --git a/packages/df-wiki-cli/poetry.lock b/packages/df-wiki-cli/poetry.lock index 3b10a841198f28079546217a59f1b62eabd5f82a..8d3b6e0c6dc2126973b96b586c8c0ad2b8250d32 100644 --- a/packages/df-wiki-cli/poetry.lock +++ b/packages/df-wiki-cli/poetry.lock @@ -373,6 +373,26 @@ ufo = ["fs (>=2.2.0,<3)"] unicode = ["unicodedata2 (>=15.1.0)"] woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] +[[package]] +name = "habanero" +version = "1.2.6" +description = "Low Level Client for Crossref Search API" +optional = false +python-versions = ">=3.7" +files = [ + {file = "habanero-1.2.6-py2.py3-none-any.whl", hash = "sha256:ccc57e7dc9cc1850961b4e6f85fc7749896cba6ddcc06ea15297dbad9af2b62e"}, + {file = "habanero-1.2.6.tar.gz", hash = "sha256:b206d49f44f41c2289f0ad731f259a50d4376c747d8ecbb219a73874d45309d4"}, +] + +[package.dependencies] +requests = ">=2.7.0" +tqdm = "*" + +[package.extras] +all = ["pytest"] +bibtex = ["bibtexparser (>=2.0.0)"] +test = ["pytest"] + [[package]] name = "idna" version = "3.4" @@ -1297,6 +1317,26 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "tqdm" +version = "4.66.2" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"}, + {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typer" version = "0.9.0" @@ -1378,4 +1418,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "c424b174806be51ccfd8655e9f31c5ac7e1d8a7a362ba99f69746b2e71a59174" +content-hash = "d526b2f2acfadf5a8eccfae5e902ab702eaedd8ca88ec310d2f6c4dab88bea8a" diff --git a/packages/df-wiki-cli/pyproject.toml b/packages/df-wiki-cli/pyproject.toml index 22f5ffe80cd5293c84c985efe6daf7e7e3c715e5..d0a3341443d8102a33b93a44f44f7b16797bb3ed 100644 --- a/packages/df-wiki-cli/pyproject.toml +++ b/packages/df-wiki-cli/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "df-wiki-cli" -version = "0.1.5" +version = "0.1.6" description = "" authors = ["Remi PLANEL <rplanel@pasteur.fr>"] readme = "README.md" @@ -18,6 +18,7 @@ pydantic = "^2.4.2" pydantic-yaml = "^1.2.0" python-frontmatter = "^1.0.1" matplotlib = "^3.8.2" +habanero = "^1.2.6" [tool.poetry.group.dev.dependencies]