Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace STCN reader #53

Open
wants to merge 32 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
8374cf9
Outfactor generic CERL part of SBTI API
tijmenbaarda Oct 24, 2024
e386722
Fix bug: previous records were being overwritten
tijmenbaarda Oct 24, 2024
6488e56
Add fields that belong to STCN
tijmenbaarda Oct 24, 2024
3cc511b
Remove redundant expression
tijmenbaarda Oct 24, 2024
9ac0a31
Formatting
tijmenbaarda Oct 24, 2024
4976183
Replace STCN reader: use different API
tijmenbaarda Oct 24, 2024
4a8a7dc
Black formatting
tijmenbaarda Oct 24, 2024
8d7805d
Revert "Black formatting"
tijmenbaarda Oct 24, 2024
466a418
Avoid format attribute name
tijmenbaarda Oct 24, 2024
87becc4
Fix name of "name" field in bibliographical record
tijmenbaarda Nov 7, 2024
9390bc9
Create a separate persons reader for STCN
tijmenbaarda Dec 19, 2024
2f15e2b
Remove unused and nonexisting ActivityField import
tijmenbaarda Dec 19, 2024
facfbf1
Align KVCS to new BiographicalRecord fields
tijmenbaarda Dec 19, 2024
e603efe
Add STCNPersonsReader to shell
tijmenbaarda Dec 19, 2024
3e579c1
Fix errors detected by ruff
tijmenbaarda Dec 19, 2024
6dd7dd5
Python 3.8 compatibility
tijmenbaarda Dec 19, 2024
dc1fc0c
More Python 3.8 compatibility
tijmenbaarda Dec 19, 2024
8b6c45b
More Python 3.8 compatibility
tijmenbaarda Dec 19, 2024
e03e407
More Python 3.8 compatibility
tijmenbaarda Dec 19, 2024
04575d5
Clarify difference between STCN Persons and STCN Printers databases
tijmenbaarda Dec 23, 2024
29969e6
Add spacing after docstring
tijmenbaarda Dec 23, 2024
28962d0
Clarify what the STCN Persons database contains
tijmenbaarda Dec 23, 2024
9fb1c5f
Remove leftover print
tijmenbaarda Dec 23, 2024
c150a6d
Add a check for the length of attribute_chain
tijmenbaarda Dec 23, 2024
c6e9228
Add unit tests for safeget function
tijmenbaarda Dec 23, 2024
721e9a6
Simpler formatting
tijmenbaarda Dec 23, 2024
145a6fc
Simplify _get_contributors method
tijmenbaarda Dec 23, 2024
ede0473
Add docstring to safeget method and an additional test
tijmenbaarda Dec 23, 2024
d34f9a4
Simplify code for record creation
tijmenbaarda Dec 23, 2024
4ad8f6b
Use list comprehension for holdings list
tijmenbaarda Dec 23, 2024
3318704
Consistently use safeget
tijmenbaarda Dec 23, 2024
edc52ef
Remove unused import
tijmenbaarda Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 3 additions & 14 deletions edpop_explorer/cerl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class CERLReader(Reader):
This is an abstract class -- to use, derive from this class, set the
``API_URL``, ``API_BY_ID_BASE_URL`` and ``LINK_BASE_URL`` constant
attributes, and implement the ``_convert_record`` class method."""

API_URL: str
tijmenbaarda marked this conversation as resolved.
Show resolved Hide resolved
"""The base URL of the search API, of the form ``https://data.cerl.org/<CATALOGUE>/_search``."""
API_BY_ID_BASE_URL: str
Expand Down Expand Up @@ -49,7 +50,6 @@ def _perform_query(self, start_record: int, maximum_records: Optional[int]) -> L
assert isinstance(self.prepared_query, str)
if maximum_records is None:
maximum_records = self.DEFAULT_RECORDS_PER_PAGE
print(f'The query is: {self.prepared_query}')
try:
response = requests.get(
self.API_URL,
Expand All @@ -64,9 +64,7 @@ def _perform_query(self, start_record: int, maximum_records: Optional[int]) -> L
'Accept': 'application/json'
}
).json()
except (
requests.exceptions.RequestException
) as err:
except requests.exceptions.RequestException as err:
raise ReaderError('Error during server request: ' + str(err))

# TODO: check for error responses
Expand All @@ -78,16 +76,7 @@ def _perform_query(self, start_record: int, maximum_records: Optional[int]) -> L
except KeyError:
raise ReaderError('Number of hits not given in server response')

if 'rows' not in response:
# There are no rows in the response, so stop here
return []

records: List[Record] = []
for rawrecord in response['rows']:
record = self._convert_record(rawrecord)
records.append(record)

return records
return [self._convert_record(x) for x in response['rows']] if 'rows' in response else []

@classmethod
def transform_query(cls, query) -> str:
Expand Down
2 changes: 1 addition & 1 deletion edpop_explorer/edpopxshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def do_stcn(self, args) -> None:
self._query(STCNReader, args)

def do_stcnpers(self, args) -> None:
'Short Title Catalogue Netherlands – Persons'
'Short Title Catalogue Netherlands – Persons (authors and other contributors)'
self._query(STCNPersonsReader, args)

def do_sbti(self, args) -> None:
Expand Down
42 changes: 24 additions & 18 deletions edpop_explorer/readers/stcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ def _remove_markup(input_str: str) -> str:


def safeget(dictionary: Optional[dict], attribute_chain: tuple, first: bool = False):
tijmenbaarda marked this conversation as resolved.
Show resolved Hide resolved
"""Safely get a (nested) attribute in a JSON-like structure. If the
result is a list and ``first`` is ``True``, return the first item
of the list."""
if len(attribute_chain) == 0:
raise ValueError("The attribute_chain argument cannot be empty")
attribute = attribute_chain[0]
tijmenbaarda marked this conversation as resolved.
Show resolved Hide resolved
if dictionary is None or attribute not in dictionary:
return None
Expand All @@ -25,6 +30,20 @@ def safeget(dictionary: Optional[dict], attribute_chain: tuple, first: bool = Fa
return safeget(value, attribute_chain[1:], first)


def _wrap_contributor(actor_data: dict) -> ContributorField:
field = ContributorField(actor_data['preferred'])
field.name = actor_data['preferred']
field.role = safeget(actor_data, ('role',), first=True)
return field


def _wrap_holding(holding_data: dict) -> Field:
institution = safeget(holding_data, ("data", "institutionName"))
shelfmark = safeget(holding_data, ("data", "shelfmark"))
summary = f"{institution} - {shelfmark}"
return Field(summary)


class STCNBaseReader(CERLReader):
"""STCN uses the same search API for its bibliographical records and
its biographical records (persons and publishers/printers), but the
Expand All @@ -34,6 +53,8 @@ class STCNBaseReader(CERLReader):


class STCNPersonsReader(STCNBaseReader):
"""STCN Persons reader. This reader does not include printers and
publishers, because they are in a separate database."""
API_BY_ID_BASE_URL = 'https://data.cerl.org/stcn_persons/'
LINK_BASE_URL = 'https://data.cerl.org/stcn_persons/'
CATALOG_URIREF = URIRef(
Expand Down Expand Up @@ -114,24 +135,15 @@ def _get_contributors(cls, rawrecord: dict) -> List[Field]:
actors = safeget(rawrecord, ("data", "agent"))
if not actors:
return []
contributors = []
for actor in actors:
name = actor.get("preferred", None)
if name is None:
continue
contributor = ContributorField(name)
contributor.name = name
contributor.role = safeget(actor, ('role',), first=True)
contributors.append(contributor)
return contributors
return [_wrap_contributor(x) for x in actors if x.get('preferred')]

@classmethod
def _get_publisher_or_printer(cls, rawrecord: dict) -> Optional[Field]:
# TODO: support multiple publishers/printers
provision_agent = safeget(rawrecord, ("data", "provisionAgent"), first=True)
if provision_agent is None:
return None
name = provision_agent.get("preferred", None)
name = safeget(provision_agent, ("preferred",))
if name is None:
return None
field = Field(name)
Expand Down Expand Up @@ -211,13 +223,7 @@ def _get_holdings(cls, rawrecord: dict) -> List[Field]:
holdings = safeget(rawrecord, ("data", "holdings"))
if holdings is None:
return []
fields = []
for holding in holdings:
institution = safeget(holding, ("data", "institutionName"))
shelfmark = safeget(holding, ("data", "shelfmark"))
summary = f"{institution} - {shelfmark}"
fields.append(Field(summary))
return fields
return [_wrap_holding(x) for x in holdings]

@classmethod
def _convert_record(cls, rawrecord: dict) -> BibliographicalRecord:
Expand Down
Empty file added tests/readers/__init__.py
Empty file.
46 changes: 46 additions & 0 deletions tests/readers/test_stcn.py

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good!

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest

from edpop_explorer.readers.stcn import safeget


def test_safeget_empty_attribute_chain():
with pytest.raises(ValueError):
safeget(None, ())

def test_safeget_empty_dict():
assert safeget({}, ("attribute",)) is None

def test_safeget_none():
assert safeget(None, ("attribute",)) is None

def test_safeget_simple():
assert safeget({"attribute": "value"}, ("attribute",)) == "value"

def test_safeget_nested():
assert safeget(
{
"attribute": {"attribute2": "value"}
}, ("attribute", "attribute2")
) == "value"

def test_safeget_nested_first_attribute_none():
assert safeget({
"attribute": None
}, ("attribute", "attribute2")) is None

def test_safeget_nested_first_attribute_nonexistent():
assert safeget({
"other_attribute": None
}, ("attribute", "attribute2")) is None

def test_safeget_nested_second_attribute_nonexistent():
assert safeget({
"attribute": {
"other_attribute": "value"
}
}, ("attribute", "attribute2")) is None

def test_safeget_first():
assert safeget({
"attribute": ["value1", "value2"]
}, ("attribute",), True) == "value1"
Loading