Source code for opencitations_client.json_api_client

"""A client for the OpenCitations JSON API."""

from __future__ import annotations

import re
from collections.abc import Iterator
from typing import Literal, overload

import pystow
import requests
from curies import Reference
from ratelimit import limits, sleep_and_retry

from .models import (
    Citation,
    CitationReturnType,
    Work,
    get_reference_with_prefix,
    handle_input,
    process_citation,
    process_work,
)
from .version import get_version

__all__ = [
    "get_articles",
    "get_articles_for_author",
    "get_articles_for_editor",
    "get_incoming_citations_from_api",
    "get_outgoing_citations_from_api",
]

META_V1 = "https://api.opencitations.net/meta/v1"
BASE_V2 = "https://api.opencitations.net/index/v2"
AGENT = f"python-opencitations-client v{get_version()}"


# docstr-coverage:excused `overload`
@overload
def get_outgoing_citations_from_api(
    reference: str | Reference,
    *,
    token: str | None = ...,
    return_type: Literal["str"] = ...,
) -> list[str]: ...


# docstr-coverage:excused `overload`
@overload
def get_outgoing_citations_from_api(
    reference: str | Reference,
    *,
    token: str | None = ...,
    return_type: Literal["reference"] = ...,
) -> list[Reference]: ...


# docstr-coverage:excused `overload`
@overload
def get_outgoing_citations_from_api(
    reference: str | Reference,
    *,
    token: str | None = ...,
    return_type: Literal["citation"] = ...,
) -> list[Citation]: ...


[docs] def get_outgoing_citations_from_api( reference: str | Reference, *, token: str | None = None, return_type: CitationReturnType = "citation", ) -> list[Citation] | list[Reference] | list[str]: """Get the articles that the given article cites, from OpenCitations. :param reference: The reference to get citations for :param token: The token to use for authentication. Loaded via :func:`pystow.get_config` if not given explicitly :param return_type: The return type for citations. If using references or strings, will filter by the same prefix as the query reference :return: A list of citations .. seealso:: https://api.opencitations.net/index/v2#/references/{id} """ reference = handle_input(reference) res = _get_index_v2(f"/references/{reference.curie}", token=token) res.raise_for_status() citations: Iterator[Citation] = (process_citation(record) for record in res.json()) if return_type == "citation": return list(citations) references = ( incoming_reference for citation in citations if (incoming_reference := get_reference_with_prefix(citation.cited, reference.prefix)) ) if return_type == "reference": return list(references) return [r.identifier for r in references]
# docstr-coverage:excused `overload` @overload def get_incoming_citations_from_api( reference: str | Reference, *, token: str | None = ..., return_type: Literal["str"] = ..., ) -> list[str]: ... # docstr-coverage:excused `overload` @overload def get_incoming_citations_from_api( reference: str | Reference, *, token: str | None = ..., return_type: Literal["reference"] = ..., ) -> list[Reference]: ... # docstr-coverage:excused `overload` @overload def get_incoming_citations_from_api( reference: str | Reference, *, token: str | None = ..., return_type: Literal["citation"] = ..., ) -> list[Citation]: ...
[docs] def get_incoming_citations_from_api( reference: str | Reference, *, token: str | None = None, return_type: CitationReturnType = "citation", ) -> list[Citation] | list[Reference] | list[str]: """Get the articles that cite a given article, from OpenCitations. :param reference: The reference to get citations for :param token: The token to use for authentication. Loaded via :func:`pystow.get_config` if not given explicitly :param return_type: The return type for citations. If using references or strings, will filter by the same prefix as the query reference :return: A list of citations .. seealso:: https://api.opencitations.net/index/v2#/citations/{id} """ reference = handle_input(reference) res = _get_index_v2(f"/citations/{reference.curie}", token=token) res.raise_for_status() citations = [process_citation(record) for record in res.json()] if return_type == "citation": return citations references = ( incoming_reference for citation in citations if (incoming_reference := get_reference_with_prefix(citation.citing, reference.prefix)) ) if return_type == "reference": return list(references) return [r.identifier for r in references]
def _get_index_v2(part: str, *, token: str | None = None) -> requests.Response: return _get(f"{BASE_V2}/{part.lstrip('/')}", token=token) METADATA_ID_RE = re.compile( r"(doi|issn|isbn|omid|openalex|pmid|pmcid):.+?(__(doi|issn|isbn|omid|openalex|pmid|pmcid):.+?)*$" ) ALLOWED_ARTICLE_PREFIXES = {"doi", "issn", "isbn", "omid", "openalex", "pmid", "pmcid"}
[docs] def get_articles(references: list[Reference], *, token: str | None = None) -> list[Work]: """Get documents by reference. :param references: A list of references to articles, using :param token: The token to use for authentication. Loaded via :func:`pystow.get_config` if not given explicitly :return: A list of articles for the references .. seealso:: https://api.opencitations.net/meta/v1#/metadata/{ids} """ invalid_references = [ reference for reference in references if reference.prefix not in ALLOWED_ARTICLE_PREFIXES ] if invalid_references: raise ValueError(f"invalid references: {invalid_references}") value = "__".join(reference.curie for reference in references) res = _get_meta_v1(f"/metadata/{value}", token=token) res.raise_for_status() return [process_work(record) for record in res.json()]
[docs] def get_articles_for_author(reference: Reference, *, token: str | None = None) -> list[Work]: """Get documents incident to the author. :param reference: A reference for an author, using ``orcid`` or ``omid`` as a prefix :param token: The token to use for authentication. Loaded via :func:`pystow.get_config` if not given explicitly :return: A list of articles associated with the author .. seealso:: https://api.opencitations.net/meta/v1#/author/{id} """ _raise_for_invalid_person(reference) res = _get_meta_v1(f"/author/{reference.curie}", token=token) res.raise_for_status() return [process_work(record) for record in res.json()]
[docs] def get_articles_for_editor(reference: Reference, *, token: str | None = None) -> list[Work]: """Get documents incident to the editor. :param reference: A reference for an editor, using ``orcid`` or ``omid`` as a prefix :param token: The token to use for authentication. Loaded via :func:`pystow.get_config` if not given explicitly :return: A list of articles associated with the editor .. seealso:: https://api.opencitations.net/meta/v1#/editor/{id} """ _raise_for_invalid_person(reference) res = _get_meta_v1(f"/editor/{reference.curie}", token=token) res.raise_for_status() return [process_work(record) for record in res.json()]
def _raise_for_invalid_person(reference: Reference) -> None: if reference.prefix == "omid": if not reference.identifier.startswith("ra/"): raise ValueError elif reference.prefix == "orcid": pass else: raise ValueError def _get_meta_v1(part: str, *, token: str | None = None) -> requests.Response: return _get(f"{META_V1}/{part.lstrip('/')}", token=token) @sleep_and_retry @limits(calls=180, period=60) # the OpenCitations team told me 180 calls per minute def _get(url: str, *, token: str | None = None) -> requests.Response: token = pystow.get_config("opencitations", "token", passthrough=token, raise_on_missing=True) return requests.get(url, headers={"authorization": token, "User-Agent": AGENT}, timeout=15)