diff --git a/extras/fileformats/extras/application/medical.py b/extras/fileformats/extras/application/medical.py index 07f4c14..4020014 100644 --- a/extras/fileformats/extras/application/medical.py +++ b/extras/fileformats/extras/application/medical.py @@ -1,5 +1,4 @@ import typing as ty -from random import Random from pathlib import Path import pydicom from fileformats.core import FileSet @@ -8,8 +7,10 @@ @FileSet.read_metadata.register -def dicom_read_metadata(dicom: Dicom, specific_tags=None) -> ty.Mapping[str, ty.Any]: - dcm = pydicom.dcmread(dicom.fspath, specific_tags=specific_tags) +def dicom_read_metadata( + dicom: Dicom, selected_keys: ty.Optional[ty.Sequence[str]] = None +) -> ty.Mapping[str, ty.Any]: + dcm = pydicom.dcmread(dicom.fspath, specific_tags=selected_keys) [getattr(dcm, a, None) for a in dir(dcm)] # Ensure all keywords are set metadata = { e.keyword: e.value # type: ignore[union-attr] @@ -21,8 +22,8 @@ def dicom_read_metadata(dicom: Dicom, specific_tags=None) -> ty.Mapping[str, ty. @FileSet.generate_sample_data.register def dicom_generate_sample_data( - dicom: Dicom, dest_dir: Path, seed: ty.Union[int, Random], stem: ty.Optional[str] -): + dicom: Dicom, dest_dir: Path, seed: int, stem: ty.Optional[str] +) -> ty.Iterable[Path]: return next( medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c.get_image().iterdir() ) diff --git a/extras/fileformats/extras/application/serialization.py b/extras/fileformats/extras/application/serialization.py index cd2a000..8456197 100644 --- a/extras/fileformats/extras/application/serialization.py +++ b/extras/fileformats/extras/application/serialization.py @@ -25,13 +25,13 @@ def convert_data_serialization( @DataSerialization.load.register -def yaml_load(yml: Yaml): +def yaml_load(yml: Yaml) -> dict: with open(yml.fspath) as f: data = yaml.load(f, Loader=yaml.Loader) return data @DataSerialization.save.register -def yaml_save(yml: Yaml, data): +def yaml_save(yml: Yaml, data: dict): with open(yml.fspath, "w") as f: yaml.dump(data, f) diff --git a/fileformats/application/serialization.py b/fileformats/application/serialization.py index fdedc9e..5c25ce0 100644 --- a/fileformats/application/serialization.py +++ b/fileformats/application/serialization.py @@ -2,7 +2,7 @@ import typing as ty from random import Random from pathlib import Path -from ..core import hook, DataType +from ..core import hook, DataType, FileSet from ..core.mixin import WithClassifiers from ..generic import File from ..core.exceptions import FormatMismatchError @@ -42,12 +42,12 @@ class DataSerialization(WithClassifiers, File): iana_mime = None @hook.extra - def load(self): + def load(self) -> dict: """Load the contents of the file into a dictionary""" raise NotImplementedError @hook.extra - def save(data): + def save(self, data: dict): """Serialise a dictionary to a new file""" raise NotImplementedError @@ -93,10 +93,10 @@ class Toml(DataSerialization): ext = ".toml" -@File.generate_sample_data.register +@FileSet.generate_sample_data.register def generate_json_sample_data( js: Json, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: js_file = dest_dir / gen_filename(seed, file_type=js, stem=stem) rng = Random(seed + 1) with open(js_file, "w") as f: @@ -107,10 +107,10 @@ def generate_json_sample_data( return [js_file] -@File.generate_sample_data.register +@FileSet.generate_sample_data.register def generate_yaml_sample_data( yml: Yaml, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: yml_file = dest_dir / gen_filename(seed, file_type=yml, stem=stem) rng = Random(seed + 1) with open(yml_file, "w") as f: diff --git a/fileformats/core/__init__.py b/fileformats/core/__init__.py index 9e64352..c3cf12a 100644 --- a/fileformats/core/__init__.py +++ b/fileformats/core/__init__.py @@ -1,7 +1,7 @@ from ._version import __version__ from .classifier import Classifier, ClassifierCategory from .datatype import DataType -from .fileset import FileSet +from .fileset import FileSet, MockMixin from .field import Field from .utils import ( to_mime, diff --git a/fileformats/core/fileset.py b/fileformats/core/fileset.py index 61246b2..08051c3 100644 --- a/fileformats/core/fileset.py +++ b/fileformats/core/fileset.py @@ -1,4 +1,3 @@ -from __future__ import annotations import os from copy import copy import struct @@ -189,18 +188,31 @@ def metadata(self) -> ty.Dict[str, ty.Any]: return metadata @hook.extra - def read_metadata(self) -> ty.Dict[str, ty.Any]: - """Reads any metadata associated with the fileset and returns it as a dict""" + def read_metadata( + self, selected_keys: ty.Optional[ty.Sequence[str]] = None + ) -> ty.Mapping[str, ty.Any]: + """Reads any metadata associated with the fileset and returns it as a dict + + Parameters + ---------- + selected_keys : Sequence[str], optional + selected keys to load instead of loading the complete metadata set + + Returns + ------- + metadata : Mapping[str, Any] + a mapping from names of the metadata fields to their values + """ raise NotImplementedError @classmethod - def required_properties(cls): + def required_properties(cls) -> ty.Generator[str, None, None]: """Find all properties required to treat file-set as being in the format specified by the class Returns ------- - iter(str) + Generator[str, None, None] an iterator over all properties names marked as "required" """ fileset_props = dir(FileSet) @@ -235,7 +247,7 @@ def required_paths(self) -> ty.Set[Path]: required.add(path) return required - def nested_filesets(self) -> ty.List[FileSet]: + def nested_filesets(self) -> ty.List["FileSet"]: """Returns all nested filesets that are required for the format Returns @@ -609,7 +621,7 @@ def byte_chunks( relative_to: ty.Optional[os.PathLike] = None, ignore_hidden_files: bool = False, ignore_hidden_dirs: bool = False, - ) -> ty.Generator[str, ty.Generator[bytes]]: + ) -> ty.Generator[ty.Tuple[str, bytes], None, None]: """Yields relative paths for all files within the file-set along with iterators over their byte-contents. To be used when generating hashes for the file set. @@ -859,7 +871,7 @@ def sample( @hook.extra def generate_sample_data( - self, dest_dir: Path, seed: int = 0, stem: str = None + self, dest_dir: Path, seed: int = 0, stem: ty.Optional[str] = None ) -> ty.Iterable[Path]: """Generate test data at the fspaths of the file-set diff --git a/fileformats/core/hook.py b/fileformats/core/hook.py index 317b535..1bf529a 100644 --- a/fileformats/core/hook.py +++ b/fileformats/core/hook.py @@ -1,5 +1,7 @@ import importlib import typing as ty +import inspect +from itertools import zip_longest import functools import attrs import urllib.error @@ -158,5 +160,57 @@ def decorated(obj, *args, **kwargs): ) raise FileFormatsExtrasError(msg) - decorated.register = dispatch_method.register + decorated.register = ExtraRegisterer(dispatch_method) return decorated + + +class ExtraRegisterer: + def __init__(self, dispatch): + self.dispatch = dispatch + + def __call__(self, function: ty.Callable): + method = self.dispatch.__wrapped__ + msig = inspect.signature(method) + fsig = inspect.signature(function) + + def type_match(a, b) -> bool: + return isinstance(a, str) or isinstance(b, str) or a == b + + differences = [] + for i, (mparam, fparam) in enumerate( + zip_longest( + list(msig.parameters.values())[1:], list(fsig.parameters.values())[1:] + ) + ): + if mparam is None: + differences.append( + f"found additional argument, '{fparam.name}', at position {i}" + ) + continue + if fparam is None: + if mparam.default is mparam.empty: + differences.append( + f"override missing required argument '{mparam.name}'" + ) + continue + mname = mparam.name + fname = fparam.name + mtype = mparam.annotation + ftype = fparam.annotation + if mname != fname: + differences.append( + f"name of parameter at position {i}: {mname} vs {fname}" + ) + elif not type_match(mtype, ftype): + differences.append(f"Type of '{mname}' arg: {mtype} vs {ftype}") + if not type_match(msig.return_annotation, fsig.return_annotation): + differences.append( + f"return type: {msig.return_annotation} vs {fsig.return_annotation}" + ) + if differences: + raise TypeError( + f"Arguments differ between the signature of the " + f"decorated method {method} and the registered override {function}:\n" + + "\n".join(differences) + ) + self.dispatch.register(function) diff --git a/fileformats/core/tests/test_hooks.py b/fileformats/core/tests/test_hooks.py new file mode 100644 index 0000000..3b18559 --- /dev/null +++ b/fileformats/core/tests/test_hooks.py @@ -0,0 +1,73 @@ +from pathlib import Path +import platform +import typing as ty +import pytest +from fileformats.core import hook, MockMixin, FileSet +from fileformats.testing import Foo + + +def test_sample(): + test_inst = Foo.sample() + assert test_inst.fspath.exists() + assert test_inst.fspath.suffix == ".foo" + + +def test_mock(): + mock = Foo.mock() + if platform.system() == "Windows": + expected = Path(f"{Path().cwd().drive}\\mock\\foo.foo") + else: + expected = Path("/mock/foo.foo") + assert mock.fspath == expected + assert not mock.fspath.exists() + assert isinstance(mock, MockMixin) + + +class Woo(FileSet): + @hook.extra + def test_hook(self, a: int, b: float, c: ty.Optional[str] = None) -> float: + raise NotImplementedError + + +def test_hook_signature_no_default(): + @Woo.test_hook.register + def woo_test_hook(woo: Woo, a: int, b: float) -> float: + pass + + +def test_hook_signature1(): + + with pytest.raises(TypeError, match="missing required argument"): + + @Woo.test_hook.register + def woo_test_hook(woo: Woo, a: int) -> float: + pass + + +def test_hook_signature2(): + + with pytest.raises(TypeError, match="name of parameter"): + + @Woo.test_hook.register + def woo_test_hook(woo: Woo, a: int, d: str) -> float: + pass + + +def test_hook_signature3(): + + with pytest.raises(TypeError, match="found additional argument"): + + @Woo.test_hook.register + def woo_test_hook( + woo: Woo, a: int, b: float, c: ty.Optional[str], d: int + ) -> float: + pass + + +def test_hook_signature4(): + + with pytest.raises(TypeError, match="return type"): + + @Woo.test_hook.register + def woo_test_hook(woo: Woo, a: int, b: str) -> int: + pass diff --git a/fileformats/core/tests/test_test_extras.py b/fileformats/core/tests/test_test_extras.py deleted file mode 100644 index f087d94..0000000 --- a/fileformats/core/tests/test_test_extras.py +++ /dev/null @@ -1,21 +0,0 @@ -from pathlib import Path -import platform -from fileformats.core.fileset import MockMixin -from fileformats.testing import Foo - - -def test_sample(): - test_inst = Foo.sample() - assert test_inst.fspath.exists() - assert test_inst.fspath.suffix == ".foo" - - -def test_mock(): - mock = Foo.mock() - if platform.system() == "Windows": - expected = Path(f"{Path().cwd().drive}\\mock\\foo.foo") - else: - expected = Path("/mock/foo.foo") - assert mock.fspath == expected - assert not mock.fspath.exists() - assert isinstance(mock, MockMixin) diff --git a/fileformats/generic/__init__.py b/fileformats/generic/__init__.py index 98ea046..54427fb 100644 --- a/fileformats/generic/__init__.py +++ b/fileformats/generic/__init__.py @@ -279,7 +279,7 @@ class SetOf(WithClassifiers, TypedSet): @FileSet.generate_sample_data.register def fsobject_generate_sample_data( fsobject: FsObject, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: a_file = dest_dir / gen_filename(seed, file_type=fsobject, stem=stem) a_file.write_text("".join(random.choices(string.printable, k=FILE_LENGTH))) return [a_file] @@ -288,7 +288,7 @@ def fsobject_generate_sample_data( @FileSet.generate_sample_data.register def file_generate_sample_data( file: File, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: fname = gen_filename(seed, file_type=file, stem=stem) stem = fname[: -len(file.strext)] a_file = dest_dir / fname @@ -322,7 +322,7 @@ def file_generate_sample_data( @FileSet.generate_sample_data.register def directory_generate_sample_data( directory: Directory, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: rng = Random(str(seed) + directory.mime_like) a_dir = dest_dir / gen_filename(rng, stem=stem) a_dir.mkdir() @@ -333,7 +333,7 @@ def directory_generate_sample_data( @FileSet.generate_sample_data.register def directory_containing_generate_sample_data( directory: DirectoryContaining, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: rng = Random(str(seed) + directory.mime_like) a_dir = dest_dir / gen_filename(rng, stem=stem) a_dir.mkdir() @@ -345,7 +345,7 @@ def directory_containing_generate_sample_data( @FileSet.generate_sample_data.register def set_of_sample_data( set_of: SetOf, dest_dir: Path, seed: int, stem: ty.Optional[str] -) -> ty.List[Path]: +) -> ty.Iterable[Path]: rng = Random(str(seed) + set_of.mime_like) return list( itertools.chain(