From dbadbd53d7a19806384cacab4c5945fe5b03dacf Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:52:10 -0500 Subject: [PATCH 01/40] reconcile io path validator differences --- src/pynwb/validate.py | 119 ++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index aecfb2556..281443aba 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -27,10 +27,64 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: builder = io.read_builder() validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace)) return validator.validate(builder) + +def _check_namespaces_to_validate(io = None, path = None, use_cached_namespaces = False, namespace = None, verbose = False, driver = None): + status = 0 + namespaces_to_validate = [] + namespace_message = "PyNWB namespace information" + io_kwargs = dict(path=path, mode="r", driver=driver) + + if use_cached_namespaces: + if io is not None: + cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate(io=io) + else: + cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate( + path=path, driver=driver + ) + io_kwargs.update(manager=manager) + + if any(cached_namespaces): + namespaces_to_validate = cached_namespaces + namespace_message = "cached namespace information" + else: + namespaces_to_validate = [CORE_NAMESPACE] + if verbose: + print( + f"The file {path} has no cached namespace information. Falling back to {namespace_message}.", + file=sys.stderr, + ) + else: + io_kwargs.update(load_namespaces=False) + namespaces_to_validate = [CORE_NAMESPACE] + + if namespace is not None: + if namespace in namespaces_to_validate: + namespaces_to_validate = [namespace] + elif use_cached_namespaces and namespace in namespace_dependencies: # validating against a dependency + for namespace_dependency in namespace_dependencies: + if namespace in namespace_dependencies[namespace_dependency]: + status = 1 + print( + f"The namespace '{namespace}' is included by the namespace " + f"'{namespace_dependency}'. Please validate against that namespace instead.", + file=sys.stderr, + ) + else: + status = 1 + print( + f"The namespace '{namespace}' could not be found in {namespace_message} as only " + f"{namespaces_to_validate} is present.", + file=sys.stderr, + ) + return status, namespaces_to_validate, io_kwargs, namespace_message def _get_cached_namespaces_to_validate( +<<<<<<< HEAD path: str, driver: Optional[str] = None, aws_region: Optional[str] = None, +======= + path: Optional[str] = None, driver: Optional[str] = None, io: Optional[HDMFIO] = None +>>>>>>> 3e6dee50 (reconcile io path validator differences) ) -> Tuple[List[str], BuildManager, Dict[str, str]]: """ Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation. @@ -58,12 +112,13 @@ def _get_cached_namespaces_to_validate( catalog = NamespaceCatalog( group_spec_cls=NWBGroupSpec, dataset_spec_cls=NWBDatasetSpec, spec_namespace_cls=NWBNamespace ) - namespace_dependencies = NWBHDF5IO.load_namespaces( - namespace_catalog=catalog, - path=path, - driver=driver, - aws_region=aws_region - ) + + if io is not None: + # do I want to load these here if it's already an io object? Or just somehow get the dependencies? + #namespace_dependencies = io.manager.namespace_catalog._NamespaceCatalog__namespaces these are not dependencies + namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, file=io._HDF5IO__file, aws_region=aws_region) + else: + namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, path=path, driver=driver, aws_region=aws_region) # Determine which namespaces are the most specific (i.e. extensions) and validate against those candidate_namespaces = set(namespace_dependencies.keys()) @@ -138,56 +193,17 @@ def validate(**kwargs): assert io != paths, "Both 'io' and 'paths' were specified! Please choose only one." if io is not None: - validation_errors = _validate_helper(io=io, namespace=namespace or CORE_NAMESPACE) + status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate(io, paths, use_cached_namespaces, namespace, verbose, driver) + for validation_namespace in namespaces_to_validate: + if verbose: + print(f"Validating against {namespace_message} using namespace '{validation_namespace}'.") + validation_errors = _validate_helper(io=io, namespace=validation_namespace) return validation_errors status = 0 validation_errors = list() for path in paths: - namespaces_to_validate = [] - namespace_message = "PyNWB namespace information" - io_kwargs = dict(path=path, mode="r", driver=driver) - - if use_cached_namespaces: - cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate( - path=path, driver=driver - ) - io_kwargs.update(manager=manager) - - if any(cached_namespaces): - namespaces_to_validate = cached_namespaces - namespace_message = "cached namespace information" - else: - namespaces_to_validate = [CORE_NAMESPACE] - if verbose: - print( - f"The file {path} has no cached namespace information. Falling back to {namespace_message}.", - file=sys.stderr, - ) - else: - io_kwargs.update(load_namespaces=False) - namespaces_to_validate = [CORE_NAMESPACE] - - if namespace is not None: - if namespace in namespaces_to_validate: - namespaces_to_validate = [namespace] - elif use_cached_namespaces and namespace in namespace_dependencies: # validating against a dependency - for namespace_dependency in namespace_dependencies: - if namespace in namespace_dependencies[namespace_dependency]: - status = 1 - print( - f"The namespace '{namespace}' is included by the namespace " - f"'{namespace_dependency}'. Please validate against that namespace instead.", - file=sys.stderr, - ) - else: - status = 1 - print( - f"The namespace '{namespace}' could not be found in {namespace_message} as only " - f"{namespaces_to_validate} is present.", - file=sys.stderr, - ) - + status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate(io, path, use_cached_namespaces, namespace, verbose, driver) if status == 1: continue @@ -227,7 +243,6 @@ def validate_cli(): ) parser.set_defaults(no_cached_namespace=False) args = parser.parse_args() - status = 0 if args.list_namespaces: for path in args.paths: From fe81ad3ff0be52b7935fe0cdc2e8a338056a0a7a Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:52:03 -0500 Subject: [PATCH 02/40] make io and path output the same --- src/pynwb/validate.py | 16 ++++++---------- validation_testing.py | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 10 deletions(-) create mode 100644 validation_testing.py diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 281443aba..4d6dab769 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -80,11 +80,7 @@ def _check_namespaces_to_validate(io = None, path = None, use_cached_namespaces return status, namespaces_to_validate, io_kwargs, namespace_message def _get_cached_namespaces_to_validate( -<<<<<<< HEAD - path: str, driver: Optional[str] = None, aws_region: Optional[str] = None, -======= - path: Optional[str] = None, driver: Optional[str] = None, io: Optional[HDMFIO] = None ->>>>>>> 3e6dee50 (reconcile io path validator differences) + path: Optional[str] = None, driver: Optional[str] = None, aws_region: Optional[str] = None, io: Optional[HDMFIO] = None ) -> Tuple[List[str], BuildManager, Dict[str, str]]: """ Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation. @@ -115,7 +111,7 @@ def _get_cached_namespaces_to_validate( if io is not None: # do I want to load these here if it's already an io object? Or just somehow get the dependencies? - #namespace_dependencies = io.manager.namespace_catalog._NamespaceCatalog__namespaces these are not dependencies + #namespace_dependencies = io.manager.namespace_catalog._NamespaceCatalog__namespaces namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, file=io._HDF5IO__file, aws_region=aws_region) else: namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, path=path, driver=driver, aws_region=aws_region) @@ -192,16 +188,16 @@ def validate(**kwargs): ) assert io != paths, "Both 'io' and 'paths' were specified! Please choose only one." + status = 0 + validation_errors = list() if io is not None: status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate(io, paths, use_cached_namespaces, namespace, verbose, driver) for validation_namespace in namespaces_to_validate: if verbose: print(f"Validating against {namespace_message} using namespace '{validation_namespace}'.") - validation_errors = _validate_helper(io=io, namespace=validation_namespace) - return validation_errors + validation_errors += _validate_helper(io=io, namespace=validation_namespace) + return validation_errors, status - status = 0 - validation_errors = list() for path in paths: status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate(io, path, use_cached_namespaces, namespace, verbose, driver) if status == 1: diff --git a/validation_testing.py b/validation_testing.py new file mode 100644 index 000000000..df3773ab5 --- /dev/null +++ b/validation_testing.py @@ -0,0 +1,26 @@ +import h5py +from pynwb.validate import validate +from pynwb import NWBHDF5IO + +path = 'tests/back_compat/2.1.0_nwbfile_with_extension.nwb' +io = NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') + +print('Validating all with paths') +validate(paths=[path], namespace="core", verbose=True) + +print('Validating core with io') +validate(io=io, namespace="core", verbose=True) + +print('Validating core with paths') +validate(paths=[path], namespace="core", verbose=True) + +print('Validating all with io') +validate(io=io, verbose=True, use_cached_namespaces=False) + +#namespace_dependencies = NWBHDF5IO.get_namespaces(namespace_catalog=catalog, file=io._HDF5IO__file) +#io.get_namespaces(file=io._HDF5IO__file) +# manager = io.manager +# io.manager.namespace_catalog.get_namespace('name') then can get dependencies from that +# manager.namespace_catalog._NamespaceCatalog__namespaces + +print('done') \ No newline at end of file From 18826f84cf7d0f0be91f491bef02395cfafdede1 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:59:30 -0500 Subject: [PATCH 03/40] update tests for io validation --- src/pynwb/validate.py | 25 +++++++--- tests/validation/test_validate.py | 81 +++++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 4d6dab769..910aecb03 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -28,7 +28,12 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace)) return validator.validate(builder) -def _check_namespaces_to_validate(io = None, path = None, use_cached_namespaces = False, namespace = None, verbose = False, driver = None): +def _check_namespaces_to_validate(io = None, + path = None, + use_cached_namespaces = False, + namespace = None, + verbose = False, + driver = None): status = 0 namespaces_to_validate = [] namespace_message = "PyNWB namespace information" @@ -110,8 +115,6 @@ def _get_cached_namespaces_to_validate( ) if io is not None: - # do I want to load these here if it's already an io object? Or just somehow get the dependencies? - #namespace_dependencies = io.manager.namespace_catalog._NamespaceCatalog__namespaces namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, file=io._HDF5IO__file, aws_region=aws_region) else: namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, path=path, driver=driver, aws_region=aws_region) @@ -177,9 +180,6 @@ def _get_cached_namespaces_to_validate( ) def validate(**kwargs): """Validate NWB file(s) against a namespace or its cached namespaces. - - NOTE: If an io object is provided and no namespace name is specified, then the file will be validated - against the core namespace, even if use_cached_namespaces is True. """ from . import NWBHDF5IO # TODO: modularize to avoid circular import @@ -191,7 +191,11 @@ def validate(**kwargs): status = 0 validation_errors = list() if io is not None: - status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate(io, paths, use_cached_namespaces, namespace, verbose, driver) + (status, + namespaces_to_validate, + io_kwargs, + namespace_message) = _check_namespaces_to_validate(io, paths, use_cached_namespaces, namespace, verbose, + driver) for validation_namespace in namespaces_to_validate: if verbose: print(f"Validating against {namespace_message} using namespace '{validation_namespace}'.") @@ -199,7 +203,11 @@ def validate(**kwargs): return validation_errors, status for path in paths: - status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate(io, path, use_cached_namespaces, namespace, verbose, driver) + (status, + namespaces_to_validate, + io_kwargs, + namespace_message) = _check_namespaces_to_validate(io, path, use_cached_namespaces, namespace, verbose, + driver) if status == 1: continue @@ -239,6 +247,7 @@ def validate_cli(): ) parser.set_defaults(no_cached_namespace=False) args = parser.parse_args() + status = 0 if args.list_namespaces: for path in args.paths: diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index c2829ee1f..cc0d73370 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -194,41 +194,63 @@ def get_io(self, path): def test_validate_io_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: - errors = validate(io) + errors, _ = validate(io) self.assertEqual(errors, []) def test_validate_io_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: - with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): - validate(io, 'notfound') + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: + results, status = validate(io=io, namespace='notfound') + self.assertEqual(results, []) + self.assertEqual(status, 1) + stderr_regex = ( + r"The namespace 'notfound' could not be found in PyNWB namespace information as only " + r"\['core'\] is present.\n" + ) + self.assertRegex(fake_err.getvalue(), stderr_regex) + self.assertEqual(fake_out.getvalue(), "") def test_validate_io_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: - errors = validate(io) + errors, _ = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension(self): """Test that validating a file with cached spec against its cached namespaces succeeds.""" with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: - errors = validate(io) + errors, _ = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension_pass_ns(self): """Test that validating a file with cached extension spec against the extension namespace succeeds.""" with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: - errors = validate(io, 'ndx-testextension') + errors, _ = validate(io, 'ndx-testextension') self.assertEqual(errors, []) def test_validate_io_cached_core_with_io(self): """ - For back-compatability, test that validating a file with cached extension spec against the core - namespace succeeds when using the `io` + `namespace` keywords. + Test that validating a file with cached extension spec against the core + namespace succeeds rases an error when using the `io` + `namespace` keywords. """ - with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: - results = validate(io=io, namespace="core") - self.assertEqual(results, []) + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + results, status = validate(io=io, namespace="core", verbose=True) + self.assertEqual(results, []) + self.assertEqual(status, 1) + self.assertEqual( + fake_err.getvalue(), + ( + "The namespace 'core' is included by the namespace 'ndx-testextension'. " + "Please validate against that namespace instead.\n" + ) + ) + self.assertEqual(fake_out.getvalue(), + "Validating against cached namespace information using " + "namespace 'ndx-testextension'.\n") def test_validate_file_cached_extension(self): """ @@ -282,14 +304,33 @@ def test_validate_file_cached_no_cache_bad_ns(self): def test_validate_io_cached_bad_ns(self): """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: - with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): - validate(io, 'notfound') + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: + results, status = validate(io=io, namespace='notfound') + self.assertEqual(results, []) + self.assertEqual(status, 1) + stderr_regex = ( + r"The namespace 'notfound' could not be found in cached namespace information as only " + r"\['core'\] is present.\n" + ) + self.assertRegex(fake_err.getvalue(), stderr_regex) + self.assertEqual(fake_out.getvalue(), "") def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: - # TODO this error should not be different from the error when using the validate script above - msg = "builder must have data type defined with attribute 'data_type'" - with self.assertRaisesWith(ValueError, msg): - validate(io, 'hdmf-common') + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io(path='tests/back_compat/1.1.2_nwbfile.nwb') as io: + results, status = validate(io=io, namespace="hdmf-common", verbose=True) + self.assertEqual(results, []) + self.assertEqual(status, 1) + self.assertEqual( + fake_err.getvalue(), + ( + "The namespace 'hdmf-common' is included by the namespace 'core'. " + "Please validate against that namespace instead.\n" + ) + ) + self.assertEqual(fake_out.getvalue(), + "Validating against cached namespace information using namespace 'core'.\n") From 923c1d37b0b27e0ed5c0b03d6e7ac9bada37cb2f Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 18 Apr 2024 16:31:34 -0500 Subject: [PATCH 04/40] add test that io and path output is same --- tests/validation/test_validate.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index cc0d73370..7345c4da8 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -334,3 +334,24 @@ def test_validate_io_cached_hdmf_common(self): ) self.assertEqual(fake_out.getvalue(), "Validating against cached namespace information using namespace 'core'.\n") + + def test_validate_io_and_path_same(self): + """Test that validating a file with an io object and a path return the same results.""" + path = 'tests/back_compat/1.1.2_nwbfile.nwb' + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io(path=path) as io: + results_io, status_io = validate(io=io, namespace="hdmf-common", verbose=True) + fake_err_io = fake_err.getvalue() + fake_out_io = fake_out.getvalue() + + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as v: + results_path, status_path = validate(paths=[path], namespace="hdmf-common", verbose=True) + fake_err_path = fake_err.getvalue() + fake_out_path = fake_out.getvalue() + + self.assertEqual(results_io, results_path) + self.assertEqual(status_io, status_path) + self.assertEqual(fake_err_io, fake_err_path) + self.assertEqual(fake_out_io, fake_out_path) From 41d9b9ded897dd7a5d11df3edaa652d37f1e7e51 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 10:33:32 -0700 Subject: [PATCH 05/40] update tests for io output with status --- tests/back_compat/test_read.py | 2 +- tests/integration/hdf5/test_io.py | 2 +- tests/integration/hdf5/test_modular_storage.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/back_compat/test_read.py b/tests/back_compat/test_read.py index 16a119690..4ba685597 100644 --- a/tests/back_compat/test_read.py +++ b/tests/back_compat/test_read.py @@ -60,7 +60,7 @@ def test_read(self): with warnings.catch_warnings(record=True) as warnings_on_read: warnings.simplefilter("always") with self.get_io(f) as io: - errors = validate(io) + errors, _ = validate(io) io.read() for w in warnings_on_read: if f.name in self.expected_warnings: diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index d68334c89..ffd5fce37 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -289,7 +289,7 @@ def test_append(self): np.testing.assert_equal(nwb.acquisition['timeseries2'].data[:], ts2.data) self.assertIs(nwb.processing['test_proc_mod']['LFP'].electrical_series['test_es'].electrodes, nwb.acquisition['timeseries2'].electrodes) - errors = validate(io) + errors, _ = validate(io) self.assertEqual(len(errors), 0, errors) def test_electrode_id_uniqueness(self): diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index fba5d02db..1a936a1c7 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -192,7 +192,7 @@ def validate(self): for fn in filenames: if os.path.exists(fn): with NWBHDF5IO(fn, mode='r') as io: - errors = pynwb_validate(io) + errors, _ = pynwb_validate(io) if errors: for err in errors: raise Exception(err) From 8c921a3a1f61d6efd8854b4658c2810ce89a1ce4 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 10:35:30 -0700 Subject: [PATCH 06/40] add validate helper function and fix status tracking --- src/pynwb/validate.py | 144 ++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 75 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 910aecb03..46292b04f 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -27,62 +27,7 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: builder = io.read_builder() validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace)) return validator.validate(builder) - -def _check_namespaces_to_validate(io = None, - path = None, - use_cached_namespaces = False, - namespace = None, - verbose = False, - driver = None): - status = 0 - namespaces_to_validate = [] - namespace_message = "PyNWB namespace information" - io_kwargs = dict(path=path, mode="r", driver=driver) - - if use_cached_namespaces: - if io is not None: - cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate(io=io) - else: - cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate( - path=path, driver=driver - ) - io_kwargs.update(manager=manager) - - if any(cached_namespaces): - namespaces_to_validate = cached_namespaces - namespace_message = "cached namespace information" - else: - namespaces_to_validate = [CORE_NAMESPACE] - if verbose: - print( - f"The file {path} has no cached namespace information. Falling back to {namespace_message}.", - file=sys.stderr, - ) - else: - io_kwargs.update(load_namespaces=False) - namespaces_to_validate = [CORE_NAMESPACE] - if namespace is not None: - if namespace in namespaces_to_validate: - namespaces_to_validate = [namespace] - elif use_cached_namespaces and namespace in namespace_dependencies: # validating against a dependency - for namespace_dependency in namespace_dependencies: - if namespace in namespace_dependencies[namespace_dependency]: - status = 1 - print( - f"The namespace '{namespace}' is included by the namespace " - f"'{namespace_dependency}'. Please validate against that namespace instead.", - file=sys.stderr, - ) - else: - status = 1 - print( - f"The namespace '{namespace}' could not be found in {namespace_message} as only " - f"{namespaces_to_validate} is present.", - file=sys.stderr, - ) - - return status, namespaces_to_validate, io_kwargs, namespace_message def _get_cached_namespaces_to_validate( path: Optional[str] = None, driver: Optional[str] = None, aws_region: Optional[str] = None, io: Optional[HDMFIO] = None @@ -99,9 +44,10 @@ def _get_cached_namespaces_to_validate( >>> path = "my_nwb_file.nwb" >>> validate_namespaces, manager, cached_namespaces = _get_cached_namespaces_to_validate(path) >>> with NWBHDF5IO(path, "r", manager=manager) as reader: - >>> errors = [] + >>> validation_errors = [] >>> for ns in validate_namespaces: - >>> errors += validate(io=reader, namespace=ns) + errors, status = validate(io=reader, namespace=ns) + >>> validation_errors += errors :param path: Path for the NWB file :return: Tuple with: - List of strings with the most specific namespace(s) to use for validation. @@ -136,6 +82,64 @@ def _get_cached_namespaces_to_validate( return cached_namespaces, manager, namespace_dependencies +def _check_namespaces_to_validate(io = None, + path = None, + use_cached_namespaces = False, + namespace = None, + verbose = False, + driver = None, + status = 0): + namespaces_to_validate = [] + namespace_message = "PyNWB namespace information" + io_kwargs = dict(path=path, mode="r", driver=driver) + + if use_cached_namespaces: + cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate(path=path, driver=driver, io=io) + io_kwargs.update(manager=manager) + + if any(cached_namespaces): + namespaces_to_validate = cached_namespaces + namespace_message = "cached namespace information" + else: + namespaces_to_validate = [CORE_NAMESPACE] + if verbose: + print( + f"The file {path} has no cached namespace information. Falling back to {namespace_message}.", + file=sys.stderr, + ) + else: + io_kwargs.update(load_namespaces=False) + namespaces_to_validate = [CORE_NAMESPACE] + + if namespace is not None: + if namespace in namespaces_to_validate: + namespaces_to_validate = [namespace] + elif use_cached_namespaces and namespace in namespace_dependencies: # validating against a dependency + for namespace_dependency in namespace_dependencies: + if namespace in namespace_dependencies[namespace_dependency]: + status = 1 + print( + f"The namespace '{namespace}' is included by the namespace " + f"'{namespace_dependency}'. Please validate against that namespace instead.", + file=sys.stderr, + ) + else: + status = 1 + print( + f"The namespace '{namespace}' could not be found in {namespace_message} as only " + f"{namespaces_to_validate} is present.", + file=sys.stderr, + ) + + return status, namespaces_to_validate, io_kwargs, namespace_message + +def _validate_against_namespaces(io, namespaces_to_validate, namespace_message, verbose): + validation_errors = [] + for validation_namespace in namespaces_to_validate: + if verbose: + print(f"Validating against {namespace_message} using namespace '{validation_namespace}'.") + validation_errors += _validate_helper(io=io, namespace=validation_namespace) + return validation_errors @docval( { @@ -191,31 +195,21 @@ def validate(**kwargs): status = 0 validation_errors = list() if io is not None: - (status, - namespaces_to_validate, - io_kwargs, - namespace_message) = _check_namespaces_to_validate(io, paths, use_cached_namespaces, namespace, verbose, - driver) - for validation_namespace in namespaces_to_validate: - if verbose: - print(f"Validating against {namespace_message} using namespace '{validation_namespace}'.") - validation_errors += _validate_helper(io=io, namespace=validation_namespace) + status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( + io, paths, use_cached_namespaces, namespace, verbose, driver, status + ) + validation_errors += _validate_against_namespaces(io, namespaces_to_validate, namespace_message, verbose) return validation_errors, status for path in paths: - (status, - namespaces_to_validate, - io_kwargs, - namespace_message) = _check_namespaces_to_validate(io, path, use_cached_namespaces, namespace, verbose, - driver) + status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( + io, path, use_cached_namespaces, namespace, verbose, driver, status + ) if status == 1: continue with NWBHDF5IO(**io_kwargs) as io: - for validation_namespace in namespaces_to_validate: - if verbose: - print(f"Validating {path} against {namespace_message} using namespace '{validation_namespace}'.") - validation_errors += _validate_helper(io=io, namespace=validation_namespace) + validation_errors += _validate_against_namespaces(io, namespaces_to_validate, namespace_message, verbose) return validation_errors, status From 95556a828ac51f2aa4cb317df89723f86dfa53ba Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 12:03:49 -0700 Subject: [PATCH 07/40] update arguments and status checks --- src/pynwb/validate.py | 73 ++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 25 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 46292b04f..3d6ab7931 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -29,8 +29,10 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: return validator.validate(builder) -def _get_cached_namespaces_to_validate( - path: Optional[str] = None, driver: Optional[str] = None, aws_region: Optional[str] = None, io: Optional[HDMFIO] = None +def _get_cached_namespaces_to_validate(path: Optional[str] = None, + driver: Optional[str] = None, + aws_region: Optional[str] = None, + io: Optional[HDMFIO] = None ) -> Tuple[List[str], BuildManager, Dict[str, str]]: """ Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation. @@ -61,9 +63,13 @@ def _get_cached_namespaces_to_validate( ) if io is not None: - namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, file=io._HDF5IO__file, aws_region=aws_region) + namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, + file=io._HDF5IO__file) else: - namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, path=path, driver=driver, aws_region=aws_region) + namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, + path=path, + driver=driver, + aws_region=aws_region) # Determine which namespaces are the most specific (i.e. extensions) and validate against those candidate_namespaces = set(namespace_dependencies.keys()) @@ -82,19 +88,21 @@ def _get_cached_namespaces_to_validate( return cached_namespaces, manager, namespace_dependencies -def _check_namespaces_to_validate(io = None, - path = None, - use_cached_namespaces = False, - namespace = None, - verbose = False, - driver = None, - status = 0): +def _check_namespaces_to_validate(io: Optional[HDMFIO] = None, + path: Optional[str] = None, + use_cached_namespaces: Optional[bool] = False, + namespace: Optional[str] = None, + verbose: Optional[bool] = False, + driver: Optional[str] = None) -> Tuple[int, List[str], Dict[str, str], str]: + status = 0 namespaces_to_validate = [] namespace_message = "PyNWB namespace information" io_kwargs = dict(path=path, mode="r", driver=driver) if use_cached_namespaces: - cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate(path=path, driver=driver, io=io) + cached_namespaces, manager, namespace_dependencies = _get_cached_namespaces_to_validate(path=path, + driver=driver, + io=io) io_kwargs.update(manager=manager) if any(cached_namespaces): @@ -133,11 +141,16 @@ def _check_namespaces_to_validate(io = None, return status, namespaces_to_validate, io_kwargs, namespace_message -def _validate_against_namespaces(io, namespaces_to_validate, namespace_message, verbose): +def _validate_against_namespaces(io: Optional[HDMFIO] = None, + path: Optional[str] = None, + namespaces_to_validate: Optional[List[str]] = None, + namespace_message: Optional[str] = None, + verbose: Optional[bool] = False): validation_errors = [] for validation_namespace in namespaces_to_validate: if verbose: - print(f"Validating against {namespace_message} using namespace '{validation_namespace}'.") + print(f"Validating {f'{path} ' if path is not None else ''}against " + f"{namespace_message} using namespace '{validation_namespace}'.") validation_errors += _validate_helper(io=io, namespace=validation_namespace) return validation_errors @@ -196,20 +209,30 @@ def validate(**kwargs): validation_errors = list() if io is not None: status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( - io, paths, use_cached_namespaces, namespace, verbose, driver, status + io=io, use_cached_namespaces=use_cached_namespaces, namespace=namespace, verbose=verbose, ) - validation_errors += _validate_against_namespaces(io, namespaces_to_validate, namespace_message, verbose) - return validation_errors, status + if status == 0: + validation_errors += _validate_against_namespaces(io=io, + namespaces_to_validate=namespaces_to_validate, + namespace_message=namespace_message, + verbose=verbose) + else: + for path in paths: + path_status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( + path=path, driver=driver, use_cached_namespaces=use_cached_namespaces, namespace=namespace, + verbose=verbose, + ) + status = status or path_status + if path_status == 1: + continue - for path in paths: - status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( - io, path, use_cached_namespaces, namespace, verbose, driver, status - ) - if status == 1: - continue + with NWBHDF5IO(**io_kwargs) as io: + validation_errors += _validate_against_namespaces(io=io, + path=path, + namespaces_to_validate=namespaces_to_validate, + namespace_message=namespace_message, + verbose=verbose) - with NWBHDF5IO(**io_kwargs) as io: - validation_errors += _validate_against_namespaces(io, namespaces_to_validate, namespace_message, verbose) return validation_errors, status From 87b1f1edd11ac2083d861d74f5fbecc5b2cadf2f Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 12:04:16 -0700 Subject: [PATCH 08/40] fix test comparing io and path outputs --- tests/validation/test_validate.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 7345c4da8..2d6c2e4e1 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -248,9 +248,7 @@ def test_validate_io_cached_core_with_io(self): "Please validate against that namespace instead.\n" ) ) - self.assertEqual(fake_out.getvalue(), - "Validating against cached namespace information using " - "namespace 'ndx-testextension'.\n") + self.assertEqual(fake_out.getvalue(),"") def test_validate_file_cached_extension(self): """ @@ -332,8 +330,7 @@ def test_validate_io_cached_hdmf_common(self): "Please validate against that namespace instead.\n" ) ) - self.assertEqual(fake_out.getvalue(), - "Validating against cached namespace information using namespace 'core'.\n") + self.assertEqual(fake_out.getvalue(), "") def test_validate_io_and_path_same(self): """Test that validating a file with an io object and a path return the same results.""" @@ -346,7 +343,7 @@ def test_validate_io_and_path_same(self): fake_out_io = fake_out.getvalue() with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as v: + with patch("sys.stdout", new=StringIO()) as fake_out: results_path, status_path = validate(paths=[path], namespace="hdmf-common", verbose=True) fake_err_path = fake_err.getvalue() fake_out_path = fake_out.getvalue() From 6a748a5fc08d53e588803b240024dd0bd315c737 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 13:00:26 -0700 Subject: [PATCH 09/40] update error message for io inputs --- src/pynwb/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 3d6ab7931..6737ad38c 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -112,7 +112,7 @@ def _check_namespaces_to_validate(io: Optional[HDMFIO] = None, namespaces_to_validate = [CORE_NAMESPACE] if verbose: print( - f"The file {path} has no cached namespace information. Falling back to {namespace_message}.", + f"The file {f'{path} ' if path is not None else ''}has no cached namespace information. Falling back to {namespace_message}.", file=sys.stderr, ) else: From 250513e19ead7e65b8b10c638595eb13dd4c5f8c Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 13:04:02 -0700 Subject: [PATCH 10/40] add tests to compare all io and path outputs --- tests/validation/test_validate.py | 84 ++++++++++++------------------- 1 file changed, 32 insertions(+), 52 deletions(-) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 2d6c2e4e1..96852a075 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -230,26 +230,6 @@ def test_validate_io_cached_extension_pass_ns(self): errors, _ = validate(io, 'ndx-testextension') self.assertEqual(errors, []) - def test_validate_io_cached_core_with_io(self): - """ - Test that validating a file with cached extension spec against the core - namespace succeeds rases an error when using the `io` + `namespace` keywords. - """ - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: - results, status = validate(io=io, namespace="core", verbose=True) - self.assertEqual(results, []) - self.assertEqual(status, 1) - self.assertEqual( - fake_err.getvalue(), - ( - "The namespace 'core' is included by the namespace 'ndx-testextension'. " - "Please validate against that namespace instead.\n" - ) - ) - self.assertEqual(fake_out.getvalue(),"") - def test_validate_file_cached_extension(self): """ Test that validating a file with cached extension spec against the core @@ -300,20 +280,6 @@ def test_validate_file_cached_no_cache_bad_ns(self): self.assertRegex(fake_err.getvalue(), stderr_regex) self.assertEqual(fake_out.getvalue(), "") - def test_validate_io_cached_bad_ns(self): - """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: - results, status = validate(io=io, namespace='notfound') - self.assertEqual(results, []) - self.assertEqual(status, 1) - stderr_regex = ( - r"The namespace 'notfound' could not be found in cached namespace information as only " - r"\['core'\] is present.\n" - ) - self.assertRegex(fake_err.getvalue(), stderr_regex) - self.assertEqual(fake_out.getvalue(), "") def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" @@ -334,21 +300,35 @@ def test_validate_io_cached_hdmf_common(self): def test_validate_io_and_path_same(self): """Test that validating a file with an io object and a path return the same results.""" - path = 'tests/back_compat/1.1.2_nwbfile.nwb' - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - with self.get_io(path=path) as io: - results_io, status_io = validate(io=io, namespace="hdmf-common", verbose=True) - fake_err_io = fake_err.getvalue() - fake_out_io = fake_out.getvalue() - - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - results_path, status_path = validate(paths=[path], namespace="hdmf-common", verbose=True) - fake_err_path = fake_err.getvalue() - fake_out_path = fake_out.getvalue() - - self.assertEqual(results_io, results_path) - self.assertEqual(status_io, status_path) - self.assertEqual(fake_err_io, fake_err_path) - self.assertEqual(fake_out_io, fake_out_path) + tests = [('tests/back_compat/1.0.2_nwbfile.nwb', None), + ('tests/back_compat/1.0.2_nwbfile.nwb', 'notfound'), + ('tests/back_compat/1.1.2_nwbfile.nwb', None), + ('tests/back_compat/1.1.2_nwbfile.nwb', 'core'), + ('tests/back_compat/1.1.2_nwbfile.nwb', 'notfound'), + ('tests/back_compat/1.1.2_nwbfile.nwb', 'hdmf-common'), + ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', None), + ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'core'), + ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'ndx-testextension'),] + + for path, ns in tests: + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io(path=path) as io: + results_io, status_io = validate(io=io, namespace=ns, verbose=True) + fake_err_io = fake_err.getvalue() + fake_out_io = fake_out.getvalue() + + with patch("sys.stderr", new=StringIO()) as fake_err: + with patch("sys.stdout", new=StringIO()) as fake_out: + results_path, status_path = validate(paths=[path], namespace=ns, verbose=True) + fake_err_path = fake_err.getvalue() + fake_out_path = fake_out.getvalue() + + # remove path from error messages since it will not be included in io outputs + fake_err_path = fake_err_path.replace(f'{path} ', '') + fake_out_path = fake_out_path.replace(f'{path} ', '') + + self.assertEqual(results_io, results_path) + self.assertEqual(status_io, status_path) + self.assertEqual(fake_err_io, fake_err_path) + self.assertEqual(fake_out_io, fake_out_path) From 76a4f0f76fe20ab1bda6c5472bb667e0eb586b6a Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 29 May 2024 14:23:09 -0700 Subject: [PATCH 11/40] remove script for testing --- validation_testing.py | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 validation_testing.py diff --git a/validation_testing.py b/validation_testing.py deleted file mode 100644 index df3773ab5..000000000 --- a/validation_testing.py +++ /dev/null @@ -1,26 +0,0 @@ -import h5py -from pynwb.validate import validate -from pynwb import NWBHDF5IO - -path = 'tests/back_compat/2.1.0_nwbfile_with_extension.nwb' -io = NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') - -print('Validating all with paths') -validate(paths=[path], namespace="core", verbose=True) - -print('Validating core with io') -validate(io=io, namespace="core", verbose=True) - -print('Validating core with paths') -validate(paths=[path], namespace="core", verbose=True) - -print('Validating all with io') -validate(io=io, verbose=True, use_cached_namespaces=False) - -#namespace_dependencies = NWBHDF5IO.get_namespaces(namespace_catalog=catalog, file=io._HDF5IO__file) -#io.get_namespaces(file=io._HDF5IO__file) -# manager = io.manager -# io.manager.namespace_catalog.get_namespace('name') then can get dependencies from that -# manager.namespace_catalog._NamespaceCatalog__namespaces - -print('done') \ No newline at end of file From 25fc892a564b09312ad2b25bc3de39f7cf28aa71 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 09:36:06 -0800 Subject: [PATCH 12/40] add initial json export option --- src/pynwb/validate.py | 57 +++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index f69f22092..5a001772a 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -1,7 +1,9 @@ """Command line tool to Validate an NWB file against a namespace.""" +import json import sys from argparse import ArgumentParser from typing import Tuple, List, Dict, Optional +from pathlib import Path from hdmf.spec import NamespaceCatalog from hdmf.build import BuildManager @@ -194,6 +196,12 @@ def _validate_against_namespaces(io: Optional[HDMFIO] = None, "doc": "Driver for h5py to use when opening the HDF5 file.", "default": None, }, + { + "name": "json_file_path", + "type": str, + "doc": "Write json output to this location", + "default": None, + }, returns="Validation errors in the file.", rtype=(list, (list, bool)), is_method=False, @@ -201,40 +209,18 @@ def _validate_against_namespaces(io: Optional[HDMFIO] = None, def validate(**kwargs): """Validate NWB file(s) against a namespace or its cached namespaces. """ - from . import NWBHDF5IO # TODO: modularize to avoid circular import - io, paths, use_cached_namespaces, namespace, verbose, driver = getargs( - "io", "paths", "use_cached_namespaces", "namespace", "verbose", "driver", kwargs + io, path, use_cached_namespaces, namespace, verbose, driver, json_file_path = getargs( + "io", "path", "use_cached_namespaces", "namespace", "verbose", "driver", "json_file_path", kwargs ) - assert io != paths, "Both 'io' and 'paths' were specified! Please choose only one." - - status = 0 - validation_errors = list() - if io is not None: - status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( - io=io, use_cached_namespaces=use_cached_namespaces, namespace=namespace, verbose=verbose, - ) - if status == 0: - validation_errors += _validate_against_namespaces(io=io, - namespaces_to_validate=namespaces_to_validate, - namespace_message=namespace_message, - verbose=verbose) + assert io != path, "Both 'io' and 'path' were specified! Please choose only one." + path = str(path) if isinstance(path, Path) else path else: - for path in paths: - path_status, namespaces_to_validate, io_kwargs, namespace_message = _check_namespaces_to_validate( - path=path, driver=driver, use_cached_namespaces=use_cached_namespaces, namespace=namespace, - verbose=verbose, - ) - status = status or path_status - if path_status == 1: - continue - - with NWBHDF5IO(**io_kwargs) as io: - validation_errors += _validate_against_namespaces(io=io, - path=path, - namespaces_to_validate=namespaces_to_validate, - namespace_message=namespace_message, - verbose=verbose) + # write output to json file + if json_file_path is not None: + with open(json_file_path, "w") as f: + json.dump(obj=validation_errors, f=f) + print(f"Report saved to {str(Path(json_file_path).absolute())}!") return validation_errors, status @@ -258,6 +244,7 @@ def validate_cli(): # Common args to the API validate parser.add_argument("paths", type=str, nargs="+", help="NWB file paths") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") + parser.add_argument("--json-file-path", help="Write json output to this location.") feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument( "--no-cached-namespace", @@ -274,10 +261,10 @@ def validate_cli(): cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=path) print("\n".join(cached_namespaces)) else: - validation_errors, validation_status = validate( - paths=args.paths, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True - ) - if not validation_status: + validation_errors = validate( + path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, + json_file_path=args.json_file_path + ) _print_errors(validation_errors=validation_errors) status = status or validation_status or (validation_errors is not None and len(validation_errors) > 0) From 6caa46bca1f6cc7305e326b616867e44a1e820f5 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 09:37:36 -0800 Subject: [PATCH 13/40] update validator inputs and outputs, remove hdf5io references --- src/pynwb/validate.py | 169 ++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 87 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 5a001772a..51345c3ab 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -6,8 +6,7 @@ from pathlib import Path from hdmf.spec import NamespaceCatalog -from hdmf.build import BuildManager -from hdmf.build import TypeMap as TypeMap +from hdmf.build import BuildManager, TypeMap from hdmf.utils import docval, getargs from hdmf.backends.io import HDMFIO from hdmf.validate import ValidatorMap @@ -24,6 +23,12 @@ def _print_errors(validation_errors: list): else: print(" - no errors found.") +def _get_backend(path: str): + from . import NWBHDF5IO + from hdmf_zarr import NWBZarrIO + + backend_io_classes = [NWBHDF5IO, NWBZarrIO] + return [b for b in backend_io_classes if b.can_read(path=path)][0] def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: builder = io.read_builder() @@ -33,8 +38,8 @@ def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: def get_cached_namespaces_to_validate(path: Optional[str] = None, driver: Optional[str] = None, - aws_region: Optional[str] = None, - io: Optional[HDMFIO] = None + aws_region: Optional[str] = None, + io: Optional[HDMFIO] = None ) -> Tuple[List[str], BuildManager, Dict[str, str]]: """ Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation. @@ -61,7 +66,6 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, - BuildManager object for opening the file for validation - Dict with the full result from NWBHDF5IO.load_namespaces """ - from . import NWBHDF5IO # TODO: modularize to avoid circular import catalog = NamespaceCatalog( group_spec_cls=NWBGroupSpec, dataset_spec_cls=NWBDatasetSpec, spec_namespace_cls=NWBNamespace @@ -69,12 +73,14 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, if io is not None: namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, - file=io._HDF5IO__file) + file=io.file) # TODO would need to update HDMFIO, ZarrIO to support file input else: - namespace_dependencies = NWBHDF5IO.load_namespaces(namespace_catalog=catalog, - path=path, - driver=driver, - aws_region=aws_region) + backend_io = _get_backend(path) + namespace_dependencies = backend_io.load_namespaces(namespace_catalog=catalog, + path=path, + driver=driver, + aws_region=aws_region) + # Determine which namespaces are the most specific (i.e. extensions) and validate against those candidate_namespaces = set(namespace_dependencies.keys()) @@ -93,72 +99,6 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, return cached_namespaces, manager, namespace_dependencies -def _check_namespaces_to_validate(io: Optional[HDMFIO] = None, - path: Optional[str] = None, - use_cached_namespaces: Optional[bool] = False, - namespace: Optional[str] = None, - verbose: Optional[bool] = False, - driver: Optional[str] = None) -> Tuple[int, List[str], Dict[str, str], str]: - status = 0 - namespaces_to_validate = [] - namespace_message = "PyNWB namespace information" - io_kwargs = dict(path=path, mode="r", driver=driver) - - if use_cached_namespaces: - cached_namespaces, manager, namespace_dependencies = get_cached_namespaces_to_validate(path=path, - driver=driver, - io=io) - io_kwargs.update(manager=manager) - - if any(cached_namespaces): - namespaces_to_validate = cached_namespaces - namespace_message = "cached namespace information" - else: - namespaces_to_validate = [CORE_NAMESPACE] - if verbose: - print( - f"The file {f'{path} ' if path is not None else ''}has no cached namespace information. Falling back to {namespace_message}.", - file=sys.stderr, - ) - else: - io_kwargs.update(load_namespaces=False) - namespaces_to_validate = [CORE_NAMESPACE] - - if namespace is not None: - if namespace in namespaces_to_validate: - namespaces_to_validate = [namespace] - elif use_cached_namespaces and namespace in namespace_dependencies: # validating against a dependency - for namespace_dependency in namespace_dependencies: - if namespace in namespace_dependencies[namespace_dependency]: - status = 1 - print( - f"The namespace '{namespace}' is included by the namespace " - f"'{namespace_dependency}'. Please validate against that namespace instead.", - file=sys.stderr, - ) - else: - status = 1 - print( - f"The namespace '{namespace}' could not be found in {namespace_message} as only " - f"{namespaces_to_validate} is present.", - file=sys.stderr, - ) - - return status, namespaces_to_validate, io_kwargs, namespace_message - -def _validate_against_namespaces(io: Optional[HDMFIO] = None, - path: Optional[str] = None, - namespaces_to_validate: Optional[List[str]] = None, - namespace_message: Optional[str] = None, - verbose: Optional[bool] = False): - validation_errors = [] - for validation_namespace in namespaces_to_validate: - if verbose: - print(f"Validating {f'{path} ' if path is not None else ''}against " - f"{namespace_message} using namespace '{validation_namespace}'.") - validation_errors += _validate_helper(io=io, namespace=validation_namespace) - return validation_errors - @docval( { "name": "io", @@ -173,9 +113,9 @@ def _validate_against_namespaces(io: Optional[HDMFIO] = None, "default": None, }, # Argument order is for back-compatability { - "name": "paths", - "type": list, - "doc": "List of NWB file paths.", + "name": "path", + "type": (str, Path), + "doc": "NWB file path.", "default": None, }, { @@ -215,14 +155,66 @@ def validate(**kwargs): ) assert io != path, "Both 'io' and 'path' were specified! Please choose only one." path = str(path) if isinstance(path, Path) else path + + # get namespaces to validate + namespace_message = "PyNWB namespace information" + io_kwargs = dict(path=path, mode="r", driver=driver) + + if use_cached_namespaces: + cached_namespaces, manager, namespace_dependencies = get_cached_namespaces_to_validate(path=path, + driver=driver, + io=io) + io_kwargs.update(manager=manager) + + if any(cached_namespaces): + namespaces_to_validate = cached_namespaces + namespace_message = "cached namespace information" + else: + namespaces_to_validate = [CORE_NAMESPACE] + if verbose: + raise UserWarning( + f"The file {f'{path} ' if path is not None else ''}has no cached namespace information. " + "Falling back to {namespace_message}.", + ) else: + io_kwargs.update(load_namespaces=False) + namespaces_to_validate = [CORE_NAMESPACE] + + # get io object if not provided + if io is None: + backend_io = _get_backend(path) + io = backend_io(**io_kwargs) + + # check namespaces are accurate + if namespace is not None: + if namespace in namespaces_to_validate: + namespaces_to_validate = [namespace] + elif use_cached_namespaces and namespace in namespace_dependencies: # validating against a dependency + for namespace_dependency in namespace_dependencies: + if namespace in namespace_dependencies[namespace_dependency]: + raise ValueError( + f"The namespace '{namespace}' is included by the namespace " + f"'{namespace_dependency}'. Please validate against that namespace instead.") + else: + raise ValueError( + f"The namespace '{namespace}' could not be found in {namespace_message} as only " + f"{namespaces_to_validate} is present.",) + + # validate against namespaces + validation_errors = [] + for validation_namespace in namespaces_to_validate: + if verbose: + print(f"Validating {f'{path} ' if path is not None else ''}against " + f"{namespace_message} using namespace '{validation_namespace}'.") + validation_errors += _validate_helper(io=io, namespace=validation_namespace) + # write output to json file if json_file_path is not None: with open(json_file_path, "w") as f: json.dump(obj=validation_errors, f=f) print(f"Report saved to {str(Path(json_file_path).absolute())}!") - return validation_errors, status + return validation_errors def validate_cli(): @@ -242,7 +234,7 @@ def validate_cli(): ) # Common args to the API validate - parser.add_argument("paths", type=str, nargs="+", help="NWB file paths") + parser.add_argument("path", type=str, help="NWB file path") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") parser.add_argument("--json-file-path", help="Write json output to this location.") feature_parser = parser.add_mutually_exclusive_group(required=False) @@ -254,20 +246,23 @@ def validate_cli(): ) parser.set_defaults(no_cached_namespace=False) args = parser.parse_args() - status = 0 if args.list_namespaces: - for path in args.paths: - cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=path) - print("\n".join(cached_namespaces)) + cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=args.path) + print("\n".join(cached_namespaces)) + status = 0 else: + try: validation_errors = validate( path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, json_file_path=args.json_file_path ) _print_errors(validation_errors=validation_errors) - status = status or validation_status or (validation_errors is not None and len(validation_errors) > 0) - + status = validation_errors is not None and len(validation_errors) > 0 + except ValueError as e: + print(e, file=sys.stderr) + status = 1 + sys.exit(status) From 4c48c6766707fdd0581b8f5595298602aa86f1d9 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 11:02:13 -0800 Subject: [PATCH 14/40] update tests for non status output --- src/pynwb/testing/testh5io.py | 8 +- src/pynwb/validate.py | 20 ++- tests/back_compat/test_read.py | 2 +- tests/integration/hdf5/test_io.py | 2 +- .../integration/hdf5/test_modular_storage.py | 2 +- tests/integration/ros3/test_ros3.py | 3 +- tests/validation/test_validate.py | 147 +++++++----------- 7 files changed, 81 insertions(+), 103 deletions(-) diff --git a/src/pynwb/testing/testh5io.py b/src/pynwb/testing/testh5io.py index 7234e79f5..265cd5944 100644 --- a/src/pynwb/testing/testh5io.py +++ b/src/pynwb/testing/testh5io.py @@ -163,12 +163,12 @@ def getContainer(self, nwbfile): def validate(self): """ Validate the created files """ if os.path.exists(self.filename): - errors, _ = pynwb_validate(paths=[self.filename]) + errors = pynwb_validate(path=self.filename) if errors: raise Exception("\n".join(errors)) if os.path.exists(self.export_filename): - errors, _ = pynwb_validate(paths=[self.export_filename]) + errors = pynwb_validate(path=self.export_filename) if errors: raise Exception("\n".join(errors)) @@ -366,11 +366,11 @@ def roundtripExportContainer(self, cache_spec=True): def validate(self): """Validate the created files.""" if os.path.exists(self.filename): - errors, _ = pynwb_validate(paths=[self.filename]) + errors = pynwb_validate(path=self.filename) if errors: raise Exception("\n".join(errors)) if os.path.exists(self.export_filename): - errors, _ = pynwb_validate(paths=[self.export_filename]) + errors = pynwb_validate(path=self.export_filename) if errors: raise Exception("\n".join(errors)) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 51345c3ab..2fe1c8c37 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -4,6 +4,7 @@ from argparse import ArgumentParser from typing import Tuple, List, Dict, Optional from pathlib import Path +from warnings import warn from hdmf.spec import NamespaceCatalog from hdmf.build import BuildManager, TypeMap @@ -28,7 +29,16 @@ def _get_backend(path: str): from hdmf_zarr import NWBZarrIO backend_io_classes = [NWBHDF5IO, NWBZarrIO] - return [b for b in backend_io_classes if b.can_read(path=path)][0] + backend_options = [b for b in backend_io_classes if b.can_read(path=path)] + if len(backend_options) == 0: + warn(f"Could not find an IO to read the file '{path}'." + f"This may be due to an older file version or invalid file." + f"Defaulting to NWBHDF5IO.", UserWarning) + return NWBHDF5IO + elif len(backend_options) > 1: + raise ValueError(f"Multiple backends found for file '{path}': {backend_options}") + else: + return backend_options[0] def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: builder = io.read_builder() @@ -172,10 +182,8 @@ def validate(**kwargs): else: namespaces_to_validate = [CORE_NAMESPACE] if verbose: - raise UserWarning( - f"The file {f'{path} ' if path is not None else ''}has no cached namespace information. " - "Falling back to {namespace_message}.", - ) + warn(f"The file {f'{path} ' if path is not None else ''}has no cached namespace information. " + f"Falling back to {namespace_message}.", UserWarning) else: io_kwargs.update(load_namespaces=False) namespaces_to_validate = [CORE_NAMESPACE] @@ -205,7 +213,7 @@ def validate(**kwargs): for validation_namespace in namespaces_to_validate: if verbose: print(f"Validating {f'{path} ' if path is not None else ''}against " - f"{namespace_message} using namespace '{validation_namespace}'.") + f"{namespace_message} using namespace '{validation_namespace}'.") validation_errors += _validate_helper(io=io, namespace=validation_namespace) # write output to json file diff --git a/tests/back_compat/test_read.py b/tests/back_compat/test_read.py index 4ba685597..16a119690 100644 --- a/tests/back_compat/test_read.py +++ b/tests/back_compat/test_read.py @@ -60,7 +60,7 @@ def test_read(self): with warnings.catch_warnings(record=True) as warnings_on_read: warnings.simplefilter("always") with self.get_io(f) as io: - errors, _ = validate(io) + errors = validate(io) io.read() for w in warnings_on_read: if f.name in self.expected_warnings: diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index e619104e3..1e6ed0593 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -296,7 +296,7 @@ def test_append(self): np.testing.assert_equal(nwb.acquisition['timeseries2'].data[:], ts2.data) self.assertIs(nwb.processing['test_proc_mod']['LFP'].electrical_series['test_es'].electrodes, nwb.acquisition['timeseries2'].electrodes) - errors, _ = validate(io) + errors = validate(io) self.assertEqual(len(errors), 0, errors) def test_electrode_id_uniqueness(self): diff --git a/tests/integration/hdf5/test_modular_storage.py b/tests/integration/hdf5/test_modular_storage.py index 1a936a1c7..fba5d02db 100644 --- a/tests/integration/hdf5/test_modular_storage.py +++ b/tests/integration/hdf5/test_modular_storage.py @@ -192,7 +192,7 @@ def validate(self): for fn in filenames: if os.path.exists(fn): with NWBHDF5IO(fn, mode='r') as io: - errors, _ = pynwb_validate(io) + errors = pynwb_validate(io) if errors: for err in errors: raise Exception(err) diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py index 2571e6199..4327b2d66 100644 --- a/tests/integration/ros3/test_ros3.py +++ b/tests/integration/ros3/test_ros3.py @@ -93,7 +93,6 @@ def test_dandi_get_cached_namespaces(self): self.assertDictEqual(d1=expected_namespace_dependencies, d2=expected_namespace_dependencies) def test_dandi_validate(self): - result, status = validate(paths=[self.s3_test_path], driver="ros3") + result = validate(path=self.s3_test_path, driver="ros3") assert result == [] - assert status == 0 diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 96852a075..3a5297a3a 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -54,13 +54,15 @@ def test_validate_file_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" result = run_coverage(["tests/back_compat/1.0.2_nwbfile.nwb", "--ns", "notfound"]) - stderr_regex = re.compile( + stderr_regex_1 = re.compile( r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " - r"Falling back to PyNWB namespace information\.\s*" + r"Falling back to PyNWB namespace information\.\s*") + stderr_regex_2 = re.compile( r"The namespace 'notfound' could not be found in PyNWB namespace information as only " - r"\['core'\] is present\.\s*" - ) - self.assertRegex(result.stderr.decode('utf-8'), stderr_regex) + r"\['core'\] is present\.\s*") + + self.assertRegex(result.stderr.decode('utf-8'), stderr_regex_1) + self.assertRegex(result.stderr.decode('utf-8'), stderr_regex_2) self.assertEqual(result.stdout.decode('utf-8'), '') @@ -194,40 +196,33 @@ def get_io(self, path): def test_validate_io_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: - errors, _ = validate(io) + errors = validate(io) self.assertEqual(errors, []) def test_validate_io_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: - results, status = validate(io=io, namespace='notfound') - self.assertEqual(results, []) - self.assertEqual(status, 1) - stderr_regex = ( - r"The namespace 'notfound' could not be found in PyNWB namespace information as only " - r"\['core'\] is present.\n" - ) - self.assertRegex(fake_err.getvalue(), stderr_regex) - self.assertEqual(fake_out.getvalue(), "") + expected_error = ("The namespace 'notfound' could not be found in PyNWB namespace information as only " + "['core'] is present.") + with self.assertRaisesWith(ValueError, expected_error): + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: + validate(io=io, namespace='notfound') def test_validate_io_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: - errors, _ = validate(io) + errors = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension(self): """Test that validating a file with cached spec against its cached namespaces succeeds.""" with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: - errors, _ = validate(io) + errors = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension_pass_ns(self): """Test that validating a file with cached extension spec against the extension namespace succeeds.""" with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: - errors, _ = validate(io, 'ndx-testextension') + errors = validate(io, 'ndx-testextension') self.assertEqual(errors, []) def test_validate_file_cached_extension(self): @@ -236,99 +231,75 @@ def test_validate_file_cached_extension(self): namespace raises an error with the new CLI-mimicing paths keyword. """ nwbfile_path = "tests/back_compat/2.1.0_nwbfile_with_extension.nwb" - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - results, status = validate(paths=[nwbfile_path], namespace="core", verbose=True) - self.assertEqual(results, []) - self.assertEqual(status, 1) - self.assertEqual( - fake_err.getvalue(), - ( - "The namespace 'core' is included by the namespace 'ndx-testextension'. " - "Please validate against that namespace instead.\n" - ) - ) - self.assertEqual(fake_out.getvalue(), "") + expected_error = ("The namespace 'core' is included by the namespace 'ndx-testextension'. " + "Please validate against that namespace instead.") + with self.assertRaisesWith(ValueError, expected_error): + validate(path=nwbfile_path, namespace="core", verbose=True) def test_validate_file_cached_core(self): """ Test that validating a file with cached core spec with verbose=False. """ nwbfile_path = "tests/back_compat/1.1.2_nwbfile.nwb" - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - results, status = validate(paths=[nwbfile_path], namespace="core") - self.assertEqual(results, []) - self.assertEqual(status, 0) - self.assertEqual(fake_err.getvalue(), "") - self.assertEqual(fake_out.getvalue(), "") + results = validate(path=nwbfile_path, namespace="core") + self.assertEqual(results, []) def test_validate_file_cached_no_cache_bad_ns(self): """ Test that validating a file with no cached namespace, a namespace that is not found, and verbose=False. """ nwbfile_path = "tests/back_compat/1.0.2_nwbfile.nwb" - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - results, status = validate(paths=[nwbfile_path], namespace="notfound") - self.assertEqual(results, []) - self.assertEqual(status, 1) - stderr_regex = ( - r"The namespace 'notfound' could not be found in PyNWB namespace information as only " - r"\['core'\] is present.\n" - ) - self.assertRegex(fake_err.getvalue(), stderr_regex) - self.assertEqual(fake_out.getvalue(), "") + expected_error = ("The namespace 'notfound' could not be found in PyNWB namespace information as only " + "['core'] is present.") + with self.assertRaisesWith(ValueError, expected_error): + validate(path=nwbfile_path, namespace="notfound") def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - with self.get_io(path='tests/back_compat/1.1.2_nwbfile.nwb') as io: - results, status = validate(io=io, namespace="hdmf-common", verbose=True) - self.assertEqual(results, []) - self.assertEqual(status, 1) - self.assertEqual( - fake_err.getvalue(), - ( - "The namespace 'hdmf-common' is included by the namespace 'core'. " - "Please validate against that namespace instead.\n" - ) - ) - self.assertEqual(fake_out.getvalue(), "") + expected_error = ("The namespace 'hdmf-common' is included by the namespace 'core'. " + "Please validate against that namespace instead.") + with self.assertRaisesWith(ValueError, expected_error): + with self.get_io(path='tests/back_compat/1.1.2_nwbfile.nwb') as io: + validate(io=io, namespace="hdmf-common", verbose=True) def test_validate_io_and_path_same(self): """Test that validating a file with an io object and a path return the same results.""" tests = [('tests/back_compat/1.0.2_nwbfile.nwb', None), - ('tests/back_compat/1.0.2_nwbfile.nwb', 'notfound'), ('tests/back_compat/1.1.2_nwbfile.nwb', None), ('tests/back_compat/1.1.2_nwbfile.nwb', 'core'), - ('tests/back_compat/1.1.2_nwbfile.nwb', 'notfound'), - ('tests/back_compat/1.1.2_nwbfile.nwb', 'hdmf-common'), ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', None), - ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'core'), ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'ndx-testextension'),] + tests_with_error = [('tests/back_compat/1.0.2_nwbfile.nwb', 'notfound'), + ('tests/back_compat/1.1.2_nwbfile.nwb', 'notfound'), + ('tests/back_compat/1.1.2_nwbfile.nwb', 'hdmf-common'), + ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'core'),] + + # paths that cause errors for path, ns in tests: - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - with self.get_io(path=path) as io: - results_io, status_io = validate(io=io, namespace=ns, verbose=True) - fake_err_io = fake_err.getvalue() - fake_out_io = fake_out.getvalue() - - with patch("sys.stderr", new=StringIO()) as fake_err: - with patch("sys.stdout", new=StringIO()) as fake_out: - results_path, status_path = validate(paths=[path], namespace=ns, verbose=True) - fake_err_path = fake_err.getvalue() - fake_out_path = fake_out.getvalue() + with patch("sys.stdout", new=StringIO()) as fake_out: + with self.get_io(path=path) as io: + results_io = validate(io=io, namespace=ns, verbose=True) + out_io = fake_out.getvalue() - # remove path from error messages since it will not be included in io outputs - fake_err_path = fake_err_path.replace(f'{path} ', '') - fake_out_path = fake_out_path.replace(f'{path} ', '') + with patch("sys.stdout", new=StringIO()) as fake_out: + results_path = validate(path=path, namespace=ns, verbose=True) + out_path = fake_out.getvalue() + # remove path from error messages since it will not be included in io outputs + out_path = out_path.replace(f'{path} ', '') self.assertEqual(results_io, results_path) - self.assertEqual(status_io, status_path) - self.assertEqual(fake_err_io, fake_err_path) - self.assertEqual(fake_out_io, fake_out_path) + self.assertEqual(out_io, out_path) + + # paths that return no errors + for path, ns in tests_with_error: + with self.assertRaises(ValueError) as e_io: + with self.get_io(path=path) as io: + results_io = validate(io=io, namespace=ns, verbose=True) + + with self.assertRaises(ValueError) as e_path: + results_path = validate(path=path, namespace=ns, verbose=True) + + # remove path from error messages since it will not be included in io outputs + self.assertEqual(str(e_io.exception), str(e_path.exception)) From 216315a735ef7564416c5c38309557fe89a69d87 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:50:42 -0800 Subject: [PATCH 15/40] add tests for json validator output --- src/pynwb/validate.py | 31 +++++++++++++------------------ tests/validation/test_validate.py | 9 ++++++++- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 2fe1c8c37..21f4f007e 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -145,13 +145,7 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, "type": str, "doc": "Driver for h5py to use when opening the HDF5 file.", "default": None, - }, - { - "name": "json_file_path", - "type": str, - "doc": "Write json output to this location", - "default": None, - }, + }, returns="Validation errors in the file.", rtype=(list, (list, bool)), is_method=False, @@ -160,8 +154,8 @@ def validate(**kwargs): """Validate NWB file(s) against a namespace or its cached namespaces. """ - io, path, use_cached_namespaces, namespace, verbose, driver, json_file_path = getargs( - "io", "path", "use_cached_namespaces", "namespace", "verbose", "driver", "json_file_path", kwargs + io, path, use_cached_namespaces, namespace, verbose, driver = getargs( + "io", "path", "use_cached_namespaces", "namespace", "verbose", "driver", kwargs ) assert io != path, "Both 'io' and 'path' were specified! Please choose only one." path = str(path) if isinstance(path, Path) else path @@ -216,12 +210,6 @@ def validate(**kwargs): f"{namespace_message} using namespace '{validation_namespace}'.") validation_errors += _validate_helper(io=io, namespace=validation_namespace) - # write output to json file - if json_file_path is not None: - with open(json_file_path, "w") as f: - json.dump(obj=validation_errors, f=f) - print(f"Report saved to {str(Path(json_file_path).absolute())}!") - return validation_errors @@ -244,10 +232,10 @@ def validate_cli(): # Common args to the API validate parser.add_argument("path", type=str, help="NWB file path") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") - parser.add_argument("--json-file-path", help="Write json output to this location.") + parser.add_argument("--json-file-path", dest="json_file_path", type=str, help="Write json output to this location.") feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument( - "--no-cached-namespace", + "--no-cached-namespace", # NOTE - update to match validate inputs? dest="no_cached_namespace", action="store_true", help="Use the PyNWB loaded namespace (true) or use the cached namespace (false; default).", @@ -260,10 +248,10 @@ def validate_cli(): print("\n".join(cached_namespaces)) status = 0 else: + validation_errors = [] try: validation_errors = validate( path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, - json_file_path=args.json_file_path ) _print_errors(validation_errors=validation_errors) status = validation_errors is not None and len(validation_errors) > 0 @@ -271,6 +259,13 @@ def validate_cli(): print(e, file=sys.stderr) status = 1 + # write output to json file + if args.json_file_path is not None: + with open(args.json_file_path, "w") as f: + json_report = {'status': int(status), 'validation_errors': [str(e) for e in validation_errors]} + json.dump(obj=json_report, fp=f) + print(f"Report saved to {str(Path(args.json_file_path).absolute())}!") + sys.exit(status) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 3a5297a3a..a99862d7f 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -1,5 +1,6 @@ import subprocess import re +import os import sys from unittest.mock import patch from io import StringIO @@ -176,6 +177,13 @@ def test_validate_file_list_namespaces_extension(self): stdout_regex = re.compile(r"ndx-testextension\s*") self.assertRegex(result.stdout.decode('utf-8'), stdout_regex) + def test_validate_file_json_output(self): + """Test that validating an invalid file with the json flag ouputs a json file.""" + json_path = "test_validation.json" + run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", "--json-file-path", json_path]) + self.assertTrue(os.path.exists(json_path)) + os.remove(json_path) + class TestValidateFunction(TestCase): @@ -254,7 +262,6 @@ def test_validate_file_cached_no_cache_bad_ns(self): with self.assertRaisesWith(ValueError, expected_error): validate(path=nwbfile_path, namespace="notfound") - def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" expected_error = ("The namespace 'hdmf-common' is included by the namespace 'core'. " From 7c5f20fc44ac37f40faf2910f4c6202cdd992685 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 12:59:29 -0800 Subject: [PATCH 16/40] move pynwb.validate into validation module --- src/pynwb/__init__.py | 2 +- src/pynwb/{validate.py => validation.py} | 2 +- test.py | 6 +++--- tests/integration/ros3/test_ros3.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) rename src/pynwb/{validate.py => validation.py} (99%) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 3a4d95e98..166bcdabd 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -23,7 +23,7 @@ CORE_NAMESPACE = 'core' from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace # noqa E402 -from .validate import validate # noqa: F401, E402 +from .validation import validate # noqa: F401, E402 try: # see https://effigies.gitlab.io/posts/python-packaging-2023/ diff --git a/src/pynwb/validate.py b/src/pynwb/validation.py similarity index 99% rename from src/pynwb/validate.py rename to src/pynwb/validation.py index 21f4f007e..6381c5754 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validation.py @@ -62,7 +62,7 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, .. code-block:: python from pynwb import validate - from pynwb.validate import get_cached_namespaces_to_validate + from pynwb.validation import get_cached_namespaces_to_validate path = "my_nwb_file.nwb" validate_namespaces, manager, cached_namespaces = get_cached_namespaces_to_validate(path) with NWBHDF5IO(path, "r", manager=manager) as reader: diff --git a/test.py b/test.py index f64fcd75d..f58951d6d 100644 --- a/test.py +++ b/test.py @@ -157,7 +157,7 @@ def validate_nwbs(): examples_nwbs = [x for x in examples_nwbs if not x.startswith('sub-')] import pynwb - from pynwb.validate import get_cached_namespaces_to_validate + from pynwb.validation import get_cached_namespaces_to_validate, validate for nwb in examples_nwbs: try: @@ -169,7 +169,7 @@ def validate_nwbs(): is_family_nwb_file = False try: with pynwb.NWBHDF5IO(nwb, mode='r') as io: - errors = pynwb.validate(io) + errors = validate(io) except OSError as e: # if the file was created with the family driver, need to use the family driver to open it if 'family driver should be used' in str(e): @@ -179,7 +179,7 @@ def validate_nwbs(): memb_size = 1024**2 # note: the memb_size must be the same as the one used to create the file with h5py.File(filename_pattern, mode='r', driver='family', memb_size=memb_size) as f: with pynwb.NWBHDF5IO(file=f, manager=None, mode='r') as io: - errors = pynwb.validate(io) + errors = validate(io) else: raise e diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py index 4327b2d66..835172a3c 100644 --- a/tests/integration/ros3/test_ros3.py +++ b/tests/integration/ros3/test_ros3.py @@ -1,6 +1,6 @@ from pynwb import NWBHDF5IO from pynwb import validate -from pynwb.validate import get_cached_namespaces_to_validate +from pynwb.validation import get_cached_namespaces_to_validate from pynwb.testing import TestCase import urllib.request import h5py From d791f6f3cfb7a815adbfc5b87797e0f132e1481f Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 13:17:35 -0800 Subject: [PATCH 17/40] update json report --- src/pynwb/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index 6381c5754..1e4003f14 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -254,7 +254,7 @@ def validate_cli(): path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, ) _print_errors(validation_errors=validation_errors) - status = validation_errors is not None and len(validation_errors) > 0 + status = int(validation_errors is not None and len(validation_errors) > 0) except ValueError as e: print(e, file=sys.stderr) status = 1 @@ -262,7 +262,7 @@ def validate_cli(): # write output to json file if args.json_file_path is not None: with open(args.json_file_path, "w") as f: - json_report = {'status': int(status), 'validation_errors': [str(e) for e in validation_errors]} + json_report = {'exitcode': status, 'errors': [str(e) for e in validation_errors]} json.dump(obj=json_report, fp=f) print(f"Report saved to {str(Path(args.json_file_path).absolute())}!") From 096013e371c2e157a4ad88e8d33f6cb165ba23f5 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 13:17:56 -0800 Subject: [PATCH 18/40] add validation entry point --- pyproject.toml | 3 +++ tests/validation/test_validate.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 14a11f5d5..d3733c361 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,9 @@ dynamic = ["version"] # the build backend will compute the version dynamically f "Homepage" = "https://github.com/NeurodataWithoutBorders/pynwb" "Bug Tracker" = "https://github.com/NeurodataWithoutBorders/pynwb/issues" +[project.scripts] +pynwb-validate = "pynwb.validation:validate_cli" + [tool.hatch.version] source = "vcs" diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index a99862d7f..ef06ab7af 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -24,7 +24,7 @@ # combine the individual coverage reports into one .coverage file. def run_coverage(extra_args: list[str]): return subprocess.run( - [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validate"] + [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validation"] + extra_args, capture_output=True ) @@ -178,12 +178,19 @@ def test_validate_file_list_namespaces_extension(self): self.assertRegex(result.stdout.decode('utf-8'), stdout_regex) def test_validate_file_json_output(self): - """Test that validating an invalid file with the json flag ouputs a json file.""" + """Test that validating a file with the json flag ouputs a json file.""" json_path = "test_validation.json" run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", "--json-file-path", json_path]) self.assertTrue(os.path.exists(json_path)) os.remove(json_path) + def test_validation_entry_point(self): + """Test that using the validation entry point succesfully executes the validate CLI.""" + json_path = "test_validation_entry_point.json" + subprocess.run(["pynwb-validate", "tests/back_compat/1.0.2_str_experimenter.nwb", "--json-file-path", json_path]) + self.assertTrue(os.path.exists(json_path)) + os.remove(json_path) + class TestValidateFunction(TestCase): From ea4af1e2738ecd1ea42c374812df0e98bb8a1640 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 13 Nov 2024 14:51:19 -0800 Subject: [PATCH 19/40] separate cli and validation function files --- pyproject.toml | 2 +- src/pynwb/validation.py | 72 +---------------------------- src/pynwb/validation_cli.py | 76 +++++++++++++++++++++++++++++++ test.py | 8 ++-- tests/validation/test_validate.py | 2 +- 5 files changed, 83 insertions(+), 77 deletions(-) create mode 100644 src/pynwb/validation_cli.py diff --git a/pyproject.toml b/pyproject.toml index d3733c361..c2458b420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dynamic = ["version"] # the build backend will compute the version dynamically f "Bug Tracker" = "https://github.com/NeurodataWithoutBorders/pynwb/issues" [project.scripts] -pynwb-validate = "pynwb.validation:validate_cli" +pynwb-validate = "pynwb.validation_cli:validation_cli" [tool.hatch.version] source = "vcs" diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index 1e4003f14..55c8938b6 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -1,7 +1,4 @@ -"""Command line tool to Validate an NWB file against a namespace.""" -import json -import sys -from argparse import ArgumentParser +"""Module to validate an NWB file against a namespace.""" from typing import Tuple, List, Dict, Optional from pathlib import Path from warnings import warn @@ -16,14 +13,6 @@ from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace -def _print_errors(validation_errors: list): - if validation_errors: - print(" - found the following errors:", file=sys.stderr) - for err in validation_errors: - print(str(err), file=sys.stderr) - else: - print(" - no errors found.") - def _get_backend(path: str): from . import NWBHDF5IO from hdmf_zarr import NWBZarrIO @@ -212,62 +201,3 @@ def validate(**kwargs): return validation_errors - -def validate_cli(): - """CLI wrapper around pynwb.validate.""" - parser = ArgumentParser( - description="Validate an NWB file", - epilog="If --ns is not specified, validate against all namespaces in the NWB file.", - ) - - # Special arg specific to CLI - parser.add_argument( - "-lns", - "--list-namespaces", - dest="list_namespaces", - action="store_true", - help="List the available namespaces and exit.", - ) - - # Common args to the API validate - parser.add_argument("path", type=str, help="NWB file path") - parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") - parser.add_argument("--json-file-path", dest="json_file_path", type=str, help="Write json output to this location.") - feature_parser = parser.add_mutually_exclusive_group(required=False) - feature_parser.add_argument( - "--no-cached-namespace", # NOTE - update to match validate inputs? - dest="no_cached_namespace", - action="store_true", - help="Use the PyNWB loaded namespace (true) or use the cached namespace (false; default).", - ) - parser.set_defaults(no_cached_namespace=False) - args = parser.parse_args() - - if args.list_namespaces: - cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=args.path) - print("\n".join(cached_namespaces)) - status = 0 - else: - validation_errors = [] - try: - validation_errors = validate( - path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, - ) - _print_errors(validation_errors=validation_errors) - status = int(validation_errors is not None and len(validation_errors) > 0) - except ValueError as e: - print(e, file=sys.stderr) - status = 1 - - # write output to json file - if args.json_file_path is not None: - with open(args.json_file_path, "w") as f: - json_report = {'exitcode': status, 'errors': [str(e) for e in validation_errors]} - json.dump(obj=json_report, fp=f) - print(f"Report saved to {str(Path(args.json_file_path).absolute())}!") - - sys.exit(status) - - -if __name__ == "__main__": # pragma: no cover - validate_cli() diff --git a/src/pynwb/validation_cli.py b/src/pynwb/validation_cli.py new file mode 100644 index 000000000..5cc6261a8 --- /dev/null +++ b/src/pynwb/validation_cli.py @@ -0,0 +1,76 @@ +"""Command line tool to Validate an NWB file against a namespace.""" +import json +import sys +from argparse import ArgumentParser +from pathlib import Path + +from pynwb.validation import validate, get_cached_namespaces_to_validate + + +def _print_errors(validation_errors: list): + if validation_errors: + print(" - found the following errors:", file=sys.stderr) + for err in validation_errors: + print(str(err), file=sys.stderr) + else: + print(" - no errors found.") + + +def validation_cli(): + """CLI wrapper around pynwb.validate.""" + parser = ArgumentParser( + description="Validate an NWB file", + epilog="If --ns is not specified, validate against all namespaces in the NWB file.", + ) + + # Special arg specific to CLI + parser.add_argument( + "-lns", + "--list-namespaces", + dest="list_namespaces", + action="store_true", + help="List the available namespaces and exit.", + ) + + # Common args to the API validate + parser.add_argument("path", type=str, help="NWB file path") + parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") + parser.add_argument("--json-file-path", dest="json_file_path", type=str, help="Write json output to this location.") + feature_parser = parser.add_mutually_exclusive_group(required=False) + feature_parser.add_argument( + "--no-cached-namespace", # NOTE - update to match validate inputs? + dest="no_cached_namespace", + action="store_true", + help="Use the PyNWB loaded namespace (true) or use the cached namespace (false; default).", + ) + parser.set_defaults(no_cached_namespace=False) + args = parser.parse_args() + + if args.list_namespaces: + cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=args.path) + print("\n".join(cached_namespaces)) + status = 0 + else: + validation_errors = [] + try: + validation_errors = validate( + path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, + ) + _print_errors(validation_errors=validation_errors) + status = int(validation_errors is not None and len(validation_errors) > 0) + except ValueError as e: + print(e, file=sys.stderr) + status = 1 + + # write output to json file + if args.json_file_path is not None: + with open(args.json_file_path, "w") as f: + json_report = {'exitcode': status, 'errors': [str(e) for e in validation_errors]} + json.dump(obj=json_report, fp=f) + print(f"Report saved to {str(Path(args.json_file_path).absolute())}!") + + sys.exit(status) + + +if __name__ == "__main__": # pragma: no cover + validation_cli() diff --git a/test.py b/test.py index f58951d6d..df1bc8354 100644 --- a/test.py +++ b/test.py @@ -157,7 +157,7 @@ def validate_nwbs(): examples_nwbs = [x for x in examples_nwbs if not x.startswith('sub-')] import pynwb - from pynwb.validation import get_cached_namespaces_to_validate, validate + from pynwb.validation import validate, get_cached_namespaces_to_validate for nwb in examples_nwbs: try: @@ -202,14 +202,14 @@ def validate_nwbs(): ERRORS += 1 cmds = [] - cmds += [["python", "-m", "pynwb.validate", nwb]] - cmds += [["python", "-m", "pynwb.validate", "--no-cached-namespace", nwb]] + cmds += [["pynwb-validate", nwb]] + cmds += [["pynwb-validate", "--no-cached-namespace", nwb]] for ns in namespaces: # for some reason, this logging command is necessary to correctly printing the namespace in the # next logging command logging.info("Namespace found: %s" % ns) - cmds += [["python", "-m", "pynwb.validate", "--ns", ns, nwb]] + cmds += [["pynwb-validate", "--ns", ns, nwb]] for cmd in cmds: logging.info("Validating with \"%s\"." % (" ".join(cmd[:-1]))) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index ef06ab7af..11c3029db 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -24,7 +24,7 @@ # combine the individual coverage reports into one .coverage file. def run_coverage(extra_args: list[str]): return subprocess.run( - [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validation"] + [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validation_cli"] + extra_args, capture_output=True ) From c14931a84595233c1df2eb5aa5e88f8261ccf2ec Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:37:55 -0800 Subject: [PATCH 20/40] update validation tutorial --- docs/source/validation.rst | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/source/validation.rst b/docs/source/validation.rst index 73c138127..65272db3f 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -3,12 +3,21 @@ Validating NWB files ==================== +.. note:: + + The pynwb validation CLI checks for structural compliance of NWB files with the NWB schema. + It is recommended to use the `NWBInspector CLI `_ + for more comprehensive validation of both structural compliance with the NWB schema and + compliance of data with NWB best practices. The NWBInspector runs both PyNWB validation as + described here and additional data checks. + + Validating NWB files is handled by a command-line tool available in :py:mod:`~pynwb`. The validator can be invoked like so: .. code-block:: bash - python -m pynwb.validate test.nwb + pynwb-validate test.nwb If the file contains no NWB extensions, then this command will validate the file ``test.nwb`` against the *core* NWB specification. On success, the output will be: @@ -29,7 +38,7 @@ within the ``test.nwb`` file. .. code-block:: bash - python -m pynwb.validate -n ndx-my-extension test.nwb + pynwb-validate -n ndx-my-extension test.nwb To validate against the version of the **core** NWB specification that is included with the installed version of PyNWB, use the ``--no-cached-namespace`` flag. This can be useful in validating files against newer or older versions @@ -37,27 +46,28 @@ of the **core** NWB specification that are installed with newer or older version .. code-block:: bash - python -m pynwb.validate --no-cached-namespace test.nwb + pynwb-validate --no-cached-namespace test.nwb .. Last updated 8/13/2021 .. code-block:: text - $python -m pynwb.validate --help - usage: validate.py [-h] [-n NS] [-lns] [--cached-namespace | --no-cached-namespace] paths [paths ...] + $pynwb-validate --help + usage: pynwb-validate [-h] [-lns] [-n NS] [--json-file-path JSON_FILE_PATH] [--no-cached-namespace] paths [paths ...] Validate an NWB file positional arguments: paths NWB file paths - optional arguments: + options: -h, --help show this help message and exit - -n NS, --ns NS the namespace to validate against -lns, --list-namespaces List the available namespaces and exit. - --cached-namespace Use the cached namespace (default). + -n NS, --ns NS the namespace to validate against + --json-file-path JSON_FILE_PATH + Write json output to this location. --no-cached-namespace - Don't use the cached namespace. + Use the PyNWB loaded namespace (true) or use the cached namespace (false; default). If --ns is not specified, validate against all namespaces in the NWB file. From c2a95228a321669ce7f4d2461da3c87844ec9ee5 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:47:53 -0800 Subject: [PATCH 21/40] move get_backend to pynwb init --- src/pynwb/__init__.py | 18 ++++++++++++++++ src/pynwb/validation.py | 24 ++++++--------------- src/pynwb/validation_cli.py | 43 +++++++++++++++++++++---------------- 3 files changed, 50 insertions(+), 35 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 166bcdabd..d2004cd0d 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -354,6 +354,24 @@ def get_sum(self, a, b): return __TYPE_MAP.get_dt_container_cls(neurodata_type, namespace) +@docval({'name': 'path', 'type': str, 'doc': 'the neurodata_type to get the NWBContainer class for'}, + is_method=False) +def _get_backend(path: str): + from hdmf_zarr import NWBZarrIO + + backend_io_classes = [NWBHDF5IO, NWBZarrIO] + backend_options = [b for b in backend_io_classes if b.can_read(path=path)] + if len(backend_options) == 0: + warn(f"Could not find an IO to read the file '{path}'." + f"This may be due to an older file version or invalid file." + f"Defaulting to NWBHDF5IO.", UserWarning) + return NWBHDF5IO + elif len(backend_options) > 1: + raise ValueError(f"Multiple backends found for file '{path}': {backend_options}") + else: + return backend_options[0] + + class NWBHDF5IO(_HDF5IO): @staticmethod diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index 55c8938b6..c5be91aed 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -13,22 +13,6 @@ from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace -def _get_backend(path: str): - from . import NWBHDF5IO - from hdmf_zarr import NWBZarrIO - - backend_io_classes = [NWBHDF5IO, NWBZarrIO] - backend_options = [b for b in backend_io_classes if b.can_read(path=path)] - if len(backend_options) == 0: - warn(f"Could not find an IO to read the file '{path}'." - f"This may be due to an older file version or invalid file." - f"Defaulting to NWBHDF5IO.", UserWarning) - return NWBHDF5IO - elif len(backend_options) > 1: - raise ValueError(f"Multiple backends found for file '{path}': {backend_options}") - else: - return backend_options[0] - def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list: builder = io.read_builder() validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace)) @@ -72,8 +56,9 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, if io is not None: namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, - file=io.file) # TODO would need to update HDMFIO, ZarrIO to support file input + file=io._file) # TODO would need to update HDMFIO, ZarrIO to support file input else: + from pynwb import _get_backend backend_io = _get_backend(path) namespace_dependencies = backend_io.load_namespaces(namespace_catalog=catalog, path=path, @@ -141,6 +126,10 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, ) def validate(**kwargs): """Validate NWB file(s) against a namespace or its cached namespaces. + + Note: this function checks for compliance with the NWB schema. + It is recommended to use the NWBInspector for more comprehensive validation of both + compliance with the schema and compliance of data with NWB best practices. """ io, path, use_cached_namespaces, namespace, verbose, driver = getargs( @@ -173,6 +162,7 @@ def validate(**kwargs): # get io object if not provided if io is None: + from pynwb import _get_backend backend_io = _get_backend(path) io = backend_io(**io_kwargs) diff --git a/src/pynwb/validation_cli.py b/src/pynwb/validation_cli.py index 5cc6261a8..e11562642 100644 --- a/src/pynwb/validation_cli.py +++ b/src/pynwb/validation_cli.py @@ -17,7 +17,12 @@ def _print_errors(validation_errors: list): def validation_cli(): - """CLI wrapper around pynwb.validate.""" + """CLI wrapper around pynwb.validate. + + Note: this CLI wrapper checks for compliance with the NWB schema. + It is recommended to use the NWBInspector CLI for more comprehensive validation of both + compliance with the schema and compliance of data with NWB best practices. + """ parser = ArgumentParser( description="Validate an NWB file", epilog="If --ns is not specified, validate against all namespaces in the NWB file.", @@ -33,12 +38,12 @@ def validation_cli(): ) # Common args to the API validate - parser.add_argument("path", type=str, help="NWB file path") + parser.add_argument("paths", type=str, nargs="+", help="NWB file paths") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") parser.add_argument("--json-file-path", dest="json_file_path", type=str, help="Write json output to this location.") feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument( - "--no-cached-namespace", # NOTE - update to match validate inputs? + "--no-cached-namespace", dest="no_cached_namespace", action="store_true", help="Use the PyNWB loaded namespace (true) or use the cached namespace (false; default).", @@ -46,21 +51,23 @@ def validation_cli(): parser.set_defaults(no_cached_namespace=False) args = parser.parse_args() - if args.list_namespaces: - cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=args.path) - print("\n".join(cached_namespaces)) - status = 0 - else: - validation_errors = [] - try: - validation_errors = validate( - path=args.path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, - ) - _print_errors(validation_errors=validation_errors) - status = int(validation_errors is not None and len(validation_errors) > 0) - except ValueError as e: - print(e, file=sys.stderr) - status = 1 + status = 0 + for path in args.paths: + if args.list_namespaces: + cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=path) + print("\n".join(cached_namespaces)) + else: + validation_errors = [] + try: + val_errors = validate( + path=path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, + ) + _print_errors(validation_errors=val_errors) + status = status or int(val_errors is not None and len(val_errors) > 0) + validation_errors.append(val_errors) + except ValueError as e: + print(e, file=sys.stderr) + status = 1 # write output to json file if args.json_file_path is not None: From 2224b4540d012282b7c7245caa8f3c6780271a2e Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:25:40 -0800 Subject: [PATCH 22/40] update example validation for new io behavior --- src/pynwb/validation.py | 4 +++- test.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index c5be91aed..b8784a073 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -55,8 +55,10 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, ) if io is not None: + # TODO update HDF5IO to have .file property to make consistent with ZarrIO + # then update input arguments here namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, - file=io._file) # TODO would need to update HDMFIO, ZarrIO to support file input + file=io._file) else: from pynwb import _get_backend backend_io = _get_backend(path) diff --git a/test.py b/test.py index df1bc8354..d00a0d07c 100644 --- a/test.py +++ b/test.py @@ -169,7 +169,7 @@ def validate_nwbs(): is_family_nwb_file = False try: with pynwb.NWBHDF5IO(nwb, mode='r') as io: - errors = validate(io) + errors = validate(io, use_cached_namespaces=False) # previously io did not validate against cached namespaces except OSError as e: # if the file was created with the family driver, need to use the family driver to open it if 'family driver should be used' in str(e): @@ -179,7 +179,7 @@ def validate_nwbs(): memb_size = 1024**2 # note: the memb_size must be the same as the one used to create the file with h5py.File(filename_pattern, mode='r', driver='family', memb_size=memb_size) as f: with pynwb.NWBHDF5IO(file=f, manager=None, mode='r') as io: - errors = validate(io) + errors = validate(io, use_cached_namespaces=False) else: raise e From f0912f37bddedc9cd82484f4edb270aa25320ccb Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:13:36 -0800 Subject: [PATCH 23/40] update ruff ignores --- pyproject.toml | 2 +- src/pynwb/validation.py | 4 ++-- tests/validation/test_validate.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c2458b420..1b80cb839 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,7 +114,7 @@ line-length = 120 "docs/gallery/*" = ["E402", "T201"] "src/*/__init__.py" = ["F401"] "src/pynwb/_version.py" = ["T201"] -"src/pynwb/validate.py" = ["T201"] +"src/pynwb/validation_cli.py" = ["T201"] "scripts/*" = ["T201"] # "test_gallery.py" = ["T201"] # Uncomment when test_gallery.py is created diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index b8784a073..ad856d3b1 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -187,8 +187,8 @@ def validate(**kwargs): validation_errors = [] for validation_namespace in namespaces_to_validate: if verbose: - print(f"Validating {f'{path} ' if path is not None else ''}against " - f"{namespace_message} using namespace '{validation_namespace}'.") + print(f"Validating {f'{path} ' if path is not None else ''}against " # noqa: T201 + f"{namespace_message} using namespace '{validation_namespace}'.") validation_errors += _validate_helper(io=io, namespace=validation_namespace) return validation_errors diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 11c3029db..45493403b 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -180,14 +180,16 @@ def test_validate_file_list_namespaces_extension(self): def test_validate_file_json_output(self): """Test that validating a file with the json flag ouputs a json file.""" json_path = "test_validation.json" - run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", "--json-file-path", json_path]) + run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", + "--json-file-path", json_path]) self.assertTrue(os.path.exists(json_path)) os.remove(json_path) def test_validation_entry_point(self): """Test that using the validation entry point succesfully executes the validate CLI.""" json_path = "test_validation_entry_point.json" - subprocess.run(["pynwb-validate", "tests/back_compat/1.0.2_str_experimenter.nwb", "--json-file-path", json_path]) + subprocess.run(["pynwb-validate", "tests/back_compat/1.0.2_str_experimenter.nwb", + "--json-file-path", json_path]) self.assertTrue(os.path.exists(json_path)) os.remove(json_path) From 8263b778d4d94995a1dba83b205a08354be8d261 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:14:18 -0800 Subject: [PATCH 24/40] fix test comments --- tests/validation/test_validate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 45493403b..8290687cc 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -178,7 +178,7 @@ def test_validate_file_list_namespaces_extension(self): self.assertRegex(result.stdout.decode('utf-8'), stdout_regex) def test_validate_file_json_output(self): - """Test that validating a file with the json flag ouputs a json file.""" + """Test that validating a file with the json flag outputs a json file.""" json_path = "test_validation.json" run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", "--json-file-path", json_path]) @@ -186,7 +186,7 @@ def test_validate_file_json_output(self): os.remove(json_path) def test_validation_entry_point(self): - """Test that using the validation entry point succesfully executes the validate CLI.""" + """Test that using the validation entry point successfully executes the validate CLI.""" json_path = "test_validation_entry_point.json" subprocess.run(["pynwb-validate", "tests/back_compat/1.0.2_str_experimenter.nwb", "--json-file-path", json_path]) From 8e5aa62efea21874eaa773b598265d12d5d5c86a Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:40:59 -0500 Subject: [PATCH 25/40] update CHANGELOG --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0157f54d..9f4cefded 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # PyNWB Changelog +## PyNWB 3.0.0 (Upcoming) + +### Breaking changes +- The validation methods have been updated with multiple breaking changes. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911) + - The behavior of `pynwb.validate(io=...)` now matches the behavior of `pynwb.validate(path=...)`. In previous pynwb versions, `pynwb.validate(io=...)` did not use the cached namespaces during validation. To obtain the same behavior as in previous versions, you can update the function call to `pynwb.validate(io=..., used_cached_namespaces=False)` + - The validate module has been renamed to `validation.py`. The validate method can be + imported using `import pynwb; pynwb.validate` or `from pynwb.validation import validate` + +### Enhancements and minor changes +- Added enhancements to the validation CLI. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911) + - Added an entry point for the validation module. You can now use `pynwb-validate "file.nwb"` . + - Added the `--json-file-path` CLI argument to output validation results in a machine readable format. + ## PyNWB 2.8.3 (November 19, 2024) ### Enhancements and minor changes From 72779d54a643d367c7a353d9547c756b02706994 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:41:31 -0500 Subject: [PATCH 26/40] add tests for _get_backend --- src/pynwb/__init__.py | 7 +----- tests/integration/utils/test_io_utils.py | 27 ++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index d2004cd0d..64cc15cfb 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -362,12 +362,7 @@ def _get_backend(path: str): backend_io_classes = [NWBHDF5IO, NWBZarrIO] backend_options = [b for b in backend_io_classes if b.can_read(path=path)] if len(backend_options) == 0: - warn(f"Could not find an IO to read the file '{path}'." - f"This may be due to an older file version or invalid file." - f"Defaulting to NWBHDF5IO.", UserWarning) - return NWBHDF5IO - elif len(backend_options) > 1: - raise ValueError(f"Multiple backends found for file '{path}': {backend_options}") + raise ValueError(f"Could not find an IO to read the file '{path}'.") else: return backend_options[0] diff --git a/tests/integration/utils/test_io_utils.py b/tests/integration/utils/test_io_utils.py index 73732924f..fe3a5010b 100644 --- a/tests/integration/utils/test_io_utils.py +++ b/tests/integration/utils/test_io_utils.py @@ -1,10 +1,13 @@ """Tests related to pynwb.io.utils.""" import pytest +from datetime import datetime +from dateutil.tz import tzutc + from hdmf.build import GroupBuilder from pynwb.io.utils import get_nwb_version -from pynwb.testing import TestCase - +from pynwb.testing import TestCase, remove_test_file +from pynwb import NWBFile, NWBHDF5IO, _get_backend class TestGetNWBVersion(TestCase): @@ -53,3 +56,23 @@ def test_get_nwb_version_20b(self): builder1.set_attribute(name="nwb_version", value="2.0b") assert get_nwb_version(builder1) == (2, 0, 0) assert get_nwb_version(builder1, include_prerelease=True) == (2, 0, 0, "b") + +class TestGetNWBVersion(TestCase): + def setUp(self): + self.nwbfile = NWBFile(session_description='a test NWB File', + identifier='TEST123', + session_start_time=datetime(1970, 1, 1, 12, tzinfo=tzutc())) + self.hdf5_path = "test_pynwb_io_nwbhdf5.nwb" + with NWBHDF5IO(self.hdf5_path, 'w') as io: + io.write(self.nwbfile) + + def tearDown(self): + remove_test_file(self.hdf5_path) + + def test_get_backend_invalid_file(self): + with self.assertRaises(ValueError): + _get_backend('not_a_file.nwb') + + def test_get_backend_HDF5(self): + backend_io = _get_backend(self.hdf5_path) + self.assertEqual(backend_io, NWBHDF5IO) \ No newline at end of file From be666b673a2f85aa78b5703214622f005827a89b Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 27 Nov 2024 14:23:58 -0500 Subject: [PATCH 27/40] update backend imports for optional zarr --- src/pynwb/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 64cc15cfb..6ccce7ee8 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -357,12 +357,16 @@ def get_sum(self, a, b): @docval({'name': 'path', 'type': str, 'doc': 'the neurodata_type to get the NWBContainer class for'}, is_method=False) def _get_backend(path: str): - from hdmf_zarr import NWBZarrIO + try: + from hdmf_zarr import NWBZarrIO + backend_io_classes = [NWBHDF5IO, NWBZarrIO] + except ImportError: + backend_io_classes = [NWBHDF5IO] - backend_io_classes = [NWBHDF5IO, NWBZarrIO] backend_options = [b for b in backend_io_classes if b.can_read(path=path)] if len(backend_options) == 0: - raise ValueError(f"Could not find an IO to read the file '{path}'.") + raise ValueError(f"Could not find an IO to read the file '{path}'. If you are trying to read " + f"a Zarr file, make sure you have hdmf-zarr installed.") else: return backend_options[0] From f319e882ad7880f5f9d6785b33abe5b281f6841b Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:38:49 -0500 Subject: [PATCH 28/40] fix test name --- tests/integration/utils/test_io_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/utils/test_io_utils.py b/tests/integration/utils/test_io_utils.py index fe3a5010b..44e7baffb 100644 --- a/tests/integration/utils/test_io_utils.py +++ b/tests/integration/utils/test_io_utils.py @@ -57,7 +57,7 @@ def test_get_nwb_version_20b(self): assert get_nwb_version(builder1) == (2, 0, 0) assert get_nwb_version(builder1, include_prerelease=True) == (2, 0, 0, "b") -class TestGetNWBVersion(TestCase): +class TestGetNWBBackend(TestCase): def setUp(self): self.nwbfile = NWBFile(session_description='a test NWB File', identifier='TEST123', From 12bc84c4b6966a1244006833d90a89d067b8eab6 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:41:20 -0500 Subject: [PATCH 29/40] update test filename --- tests/integration/utils/test_io_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/utils/test_io_utils.py b/tests/integration/utils/test_io_utils.py index 44e7baffb..7f36e5474 100644 --- a/tests/integration/utils/test_io_utils.py +++ b/tests/integration/utils/test_io_utils.py @@ -62,7 +62,7 @@ def setUp(self): self.nwbfile = NWBFile(session_description='a test NWB File', identifier='TEST123', session_start_time=datetime(1970, 1, 1, 12, tzinfo=tzutc())) - self.hdf5_path = "test_pynwb_io_nwbhdf5.nwb" + self.hdf5_path = "test_pynwb_nwb_backend.nwb" with NWBHDF5IO(self.hdf5_path, 'w') as io: io.write(self.nwbfile) From 35181ed35bbf7d4844330e132b43ee2a7ffb6fdf Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:49:32 -0800 Subject: [PATCH 30/40] close io after validation --- src/pynwb/validation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index ad856d3b1..583f23682 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -67,7 +67,6 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, driver=driver, aws_region=aws_region) - # Determine which namespaces are the most specific (i.e. extensions) and validate against those candidate_namespaces = set(namespace_dependencies.keys()) for namespace_dependency in namespace_dependencies: @@ -163,7 +162,7 @@ def validate(**kwargs): namespaces_to_validate = [CORE_NAMESPACE] # get io object if not provided - if io is None: + if path is not None: from pynwb import _get_backend backend_io = _get_backend(path) io = backend_io(**io_kwargs) @@ -191,5 +190,8 @@ def validate(**kwargs): f"{namespace_message} using namespace '{validation_namespace}'.") validation_errors += _validate_helper(io=io, namespace=validation_namespace) + if path is not None: + io.close() # close the io object if it was created within this function, otherwise leave as is + return validation_errors From d2f7561b469d105502ec4c84d36b41a898c5abf3 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:49:53 -0800 Subject: [PATCH 31/40] fix test assertion --- tests/integration/ros3/test_ros3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py index 835172a3c..347b070e4 100644 --- a/tests/integration/ros3/test_ros3.py +++ b/tests/integration/ros3/test_ros3.py @@ -90,7 +90,7 @@ def test_dandi_get_cached_namespaces(self): ) self.assertCountEqual(first=found_namespaces, second=expected_namespaces) - self.assertDictEqual(d1=expected_namespace_dependencies, d2=expected_namespace_dependencies) + self.assertDictEqual(d1=found_namespace_dependencies, d2=expected_namespace_dependencies) def test_dandi_validate(self): result = validate(path=self.s3_test_path, driver="ros3") From 11cc2328130f9420359282cee4657536415a449b Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:08:55 -0800 Subject: [PATCH 32/40] add condition for ros3 validation --- src/pynwb/__init__.py | 6 +++++- src/pynwb/validation.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 6ccce7ee8..37779a2bc 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -355,8 +355,12 @@ def get_sum(self, a, b): @docval({'name': 'path', 'type': str, 'doc': 'the neurodata_type to get the NWBContainer class for'}, + {"name": "method", "type": str, "doc": "the method to use when opening the file", 'default': None}, is_method=False) -def _get_backend(path: str): +def _get_backend(path: str, method: str = None): + if method == "ros3": + return NWBHDF5IO # TODO - add additional conditions for other streaming methods + try: from hdmf_zarr import NWBZarrIO backend_io_classes = [NWBHDF5IO, NWBZarrIO] diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index 583f23682..01b62627d 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -61,7 +61,7 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, file=io._file) else: from pynwb import _get_backend - backend_io = _get_backend(path) + backend_io = _get_backend(path, method=driver) namespace_dependencies = backend_io.load_namespaces(namespace_catalog=catalog, path=path, driver=driver, @@ -164,7 +164,7 @@ def validate(**kwargs): # get io object if not provided if path is not None: from pynwb import _get_backend - backend_io = _get_backend(path) + backend_io = _get_backend(path, method=driver) io = backend_io(**io_kwargs) # check namespaces are accurate From bb4c31c62664db041cec498af4ebdb937edb86a3 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sun, 22 Dec 2024 10:16:47 -0800 Subject: [PATCH 33/40] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df06cdd06..0694aa427 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Breaking changes - The validation methods have been updated with multiple breaking changes. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911) - - The behavior of `pynwb.validate(io=...)` now matches the behavior of `pynwb.validate(path=...)`. In previous pynwb versions, `pynwb.validate(io=...)` did not use the cached namespaces during validation. To obtain the same behavior as in previous versions, you can update the function call to `pynwb.validate(io=..., used_cached_namespaces=False)` + - The behavior of `pynwb.validate(io=...)` now matches the behavior of `pynwb.validate(path=...)`. In previous pynwb versions, `pynwb.validate(io=...)` did not use the cached namespaces during validation. To obtain the same behavior as in previous versions, you can update the function call to `pynwb.validate(io=..., use_cached_namespaces=False)` - The validate module has been renamed to `validation.py`. The validate method can be imported using `import pynwb; pynwb.validate` or `from pynwb.validation import validate` From 46ade5438da1e515fe87c57abb502a4fd00f59c1 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 10:27:54 -0500 Subject: [PATCH 34/40] Apply suggestions from code review Co-authored-by: Ryan Ly --- CHANGELOG.md | 2 +- src/pynwb/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0694aa427..6d0bb5e87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - The validation methods have been updated with multiple breaking changes. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911) - The behavior of `pynwb.validate(io=...)` now matches the behavior of `pynwb.validate(path=...)`. In previous pynwb versions, `pynwb.validate(io=...)` did not use the cached namespaces during validation. To obtain the same behavior as in previous versions, you can update the function call to `pynwb.validate(io=..., use_cached_namespaces=False)` - The validate module has been renamed to `validation.py`. The validate method can be - imported using `import pynwb; pynwb.validate` or `from pynwb.validation import validate` + imported using `import pynwb; pynwb.validate` or `from pynwb import validate` ### Enhancements and minor changes - Added support for NWB schema 2.8.0. @rly [#2001](https://github.com/NeurodataWithoutBorders/pynwb/pull/2001) diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 611c90c06..b42f07b24 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -350,7 +350,7 @@ def get_sum(self, a, b): return __TYPE_MAP.get_dt_container_cls(neurodata_type, namespace) -@docval({'name': 'path', 'type': str, 'doc': 'the neurodata_type to get the NWBContainer class for'}, +@docval({'name': 'path', 'type': str, 'doc': 'Path to the NWB file which can be an HDF5 file or a Zarr store.'}, {"name": "method", "type": str, "doc": "the method to use when opening the file", 'default': None}, is_method=False) def _get_backend(path: str, method: str = None): From 9ff9eba21695094f8fcbf5e3d436c822d69b3cd8 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 10:52:04 -0500 Subject: [PATCH 35/40] update cli args and docs --- docs/source/validation.rst | 4 ++-- src/pynwb/validation_cli.py | 10 +++++----- tests/validation/test_validate.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/source/validation.rst b/docs/source/validation.rst index 65272db3f..f267a88cb 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -64,10 +64,10 @@ of the **core** NWB specification that are installed with newer or older version -lns, --list-namespaces List the available namespaces and exit. -n NS, --ns NS the namespace to validate against - --json-file-path JSON_FILE_PATH + --json-output-path JSON_OUTPUT_PATH Write json output to this location. --no-cached-namespace - Use the PyNWB loaded namespace (true) or use the cached namespace (false; default). + Use the namespaces installed by PyNWB (true) or use the cached namespaces (false; default). If --ns is not specified, validate against all namespaces in the NWB file. diff --git a/src/pynwb/validation_cli.py b/src/pynwb/validation_cli.py index e11562642..01de93729 100644 --- a/src/pynwb/validation_cli.py +++ b/src/pynwb/validation_cli.py @@ -40,13 +40,13 @@ def validation_cli(): # Common args to the API validate parser.add_argument("paths", type=str, nargs="+", help="NWB file paths") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") - parser.add_argument("--json-file-path", dest="json_file_path", type=str, help="Write json output to this location.") + parser.add_argument("--json-output-path", dest="json_output_path", type=str, help="Write json output to this location.") feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument( "--no-cached-namespace", dest="no_cached_namespace", action="store_true", - help="Use the PyNWB loaded namespace (true) or use the cached namespace (false; default).", + help="Use the namespaces installed by PyNWB (true) or use the cached namespaces (false; default).", ) parser.set_defaults(no_cached_namespace=False) args = parser.parse_args() @@ -70,11 +70,11 @@ def validation_cli(): status = 1 # write output to json file - if args.json_file_path is not None: - with open(args.json_file_path, "w") as f: + if args.json_output_path is not None: + with open(args.json_output_path, "w") as f: json_report = {'exitcode': status, 'errors': [str(e) for e in validation_errors]} json.dump(obj=json_report, fp=f) - print(f"Report saved to {str(Path(args.json_file_path).absolute())}!") + print(f"Report saved to {str(Path(args.json_output_path).absolute())}!") sys.exit(status) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 8290687cc..8b2c34888 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -181,7 +181,7 @@ def test_validate_file_json_output(self): """Test that validating a file with the json flag outputs a json file.""" json_path = "test_validation.json" run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", - "--json-file-path", json_path]) + "--json-output-path", json_path]) self.assertTrue(os.path.exists(json_path)) os.remove(json_path) @@ -189,7 +189,7 @@ def test_validation_entry_point(self): """Test that using the validation entry point successfully executes the validate CLI.""" json_path = "test_validation_entry_point.json" subprocess.run(["pynwb-validate", "tests/back_compat/1.0.2_str_experimenter.nwb", - "--json-file-path", json_path]) + "--json-output-path", json_path]) self.assertTrue(os.path.exists(json_path)) os.remove(json_path) @@ -292,7 +292,7 @@ def test_validate_io_and_path_same(self): ('tests/back_compat/1.1.2_nwbfile.nwb', 'hdmf-common'), ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'core'),] - # paths that cause errors + # paths that cause no errors for path, ns in tests: with patch("sys.stdout", new=StringIO()) as fake_out: with self.get_io(path=path) as io: @@ -308,7 +308,7 @@ def test_validate_io_and_path_same(self): self.assertEqual(results_io, results_path) self.assertEqual(out_io, out_path) - # paths that return no errors + # paths that return errors for path, ns in tests_with_error: with self.assertRaises(ValueError) as e_io: with self.get_io(path=path) as io: From 0337442416371ad96bbf377c5d20696594437d6b Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 10:52:43 -0500 Subject: [PATCH 36/40] fix formatting --- src/pynwb/validation_cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pynwb/validation_cli.py b/src/pynwb/validation_cli.py index 01de93729..ad774f25b 100644 --- a/src/pynwb/validation_cli.py +++ b/src/pynwb/validation_cli.py @@ -40,7 +40,8 @@ def validation_cli(): # Common args to the API validate parser.add_argument("paths", type=str, nargs="+", help="NWB file paths") parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against") - parser.add_argument("--json-output-path", dest="json_output_path", type=str, help="Write json output to this location.") + parser.add_argument("--json-output-path", dest="json_output_path", type=str, + help="Write json output to this location.") feature_parser = parser.add_mutually_exclusive_group(required=False) feature_parser.add_argument( "--no-cached-namespace", From 0d4a40bf7671a0db3a88111cd57a1d1fe34b8da3 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 14:25:48 -0500 Subject: [PATCH 37/40] update json-file-path argname --- CHANGELOG.md | 2 +- docs/source/validation.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35ff87df4..857c1cec5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ - Deprecated `ImageMaskSeries` neurodata type. @rly [#1941](https://github.com/NeurodataWithoutBorders/pynwb/pull/1941) - Added enhancements to the validation CLI. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911) - Added an entry point for the validation module. You can now use `pynwb-validate "file.nwb"`. - - Added the `--json-file-path` CLI argument to output validation results in a machine readable format. + - Added the `--json-outpath-path` CLI argument to output validation results in a machine readable format. - Removed python 3.8 support, added python 3.13 support. @stephprince [#2007](https://github.com/NeurodataWithoutBorders/pynwb/pull/2007) ### Documentation and tutorial enhancements diff --git a/docs/source/validation.rst b/docs/source/validation.rst index f267a88cb..6e3ac7d2d 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -52,7 +52,7 @@ of the **core** NWB specification that are installed with newer or older version .. code-block:: text $pynwb-validate --help - usage: pynwb-validate [-h] [-lns] [-n NS] [--json-file-path JSON_FILE_PATH] [--no-cached-namespace] paths [paths ...] + usage: pynwb-validate [-h] [-lns] [-n NS] [--json-output-path JSON_OUTPUT_PATH] [--no-cached-namespace] paths [paths ...] Validate an NWB file From a02147553a5481033a20d4968d98fdfb20ac0203 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:48:17 -0500 Subject: [PATCH 38/40] update extension tutorial --- docs/gallery/general/extensions.py | 5 ++--- test.py | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py index ddf9159c7..3e41e7a91 100644 --- a/docs/gallery/general/extensions.py +++ b/docs/gallery/general/extensions.py @@ -37,8 +37,7 @@ ns_builder = NWBNamespaceBuilder( "Extension for use in my Lab", "mylab", version="0.1.0" ) - -ns_builder.include_type("ElectricalSeries", namespace="core") +ns_builder.include_namespace("core") ext = NWBGroupSpec( "A custom ElectricalSeries for my lab", @@ -264,7 +263,7 @@ def __init__(self, **kwargs): ext_source = name + ".extensions.yaml" ns_builder = NWBNamespaceBuilder(name + " extensions", name, version="0.1.0") -ns_builder.include_type("NWBDataInterface", namespace="core") +ns_builder.include_namespace("core") potato = NWBGroupSpec( neurodata_type_def="Potato", diff --git a/test.py b/test.py index d00a0d07c..721cd803f 100644 --- a/test.py +++ b/test.py @@ -169,7 +169,8 @@ def validate_nwbs(): is_family_nwb_file = False try: with pynwb.NWBHDF5IO(nwb, mode='r') as io: - errors = validate(io, use_cached_namespaces=False) # previously io did not validate against cached namespaces + errors = validate(io, use_cached_namespaces=False) + errors.extend(validate(io, use_cached_namespaces=True)) except OSError as e: # if the file was created with the family driver, need to use the family driver to open it if 'family driver should be used' in str(e): @@ -180,6 +181,7 @@ def validate_nwbs(): with h5py.File(filename_pattern, mode='r', driver='family', memb_size=memb_size) as f: with pynwb.NWBHDF5IO(file=f, manager=None, mode='r') as io: errors = validate(io, use_cached_namespaces=False) + errors.extend(validate(io, use_cached_namespaces=True)) else: raise e From f7de447e6fd3f44448c74add7aba829313c4f58a Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:34:48 -0500 Subject: [PATCH 39/40] Apply suggestions from code review Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/pynwb/validation.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 857c1cec5..0c11c71eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Breaking changes - The validation methods have been updated with multiple breaking changes. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911) - The behavior of `pynwb.validate(io=...)` now matches the behavior of `pynwb.validate(path=...)`. In previous pynwb versions, `pynwb.validate(io=...)` did not use the cached namespaces during validation. To obtain the same behavior as in previous versions, you can update the function call to `pynwb.validate(io=..., use_cached_namespaces=False)` + - `pynwb.validate` will return only a list of validation errors instead of a tuple: (list of validation_errors, status code) - The validate module has been renamed to `validation.py`. The validate method can be imported using `import pynwb; pynwb.validate` or `from pynwb import validate` diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index 01b62627d..fee80adff 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -122,7 +122,7 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, "default": None, }, returns="Validation errors in the file.", - rtype=(list, (list, bool)), + rtype=list, is_method=False, ) def validate(**kwargs): From e91a483dbe96b884c2193a34a4b7e9b836ca31f6 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:38:03 -0500 Subject: [PATCH 40/40] warn for positional args in validation --- src/pynwb/validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py index fee80adff..d2fb5cb17 100644 --- a/src/pynwb/validation.py +++ b/src/pynwb/validation.py @@ -5,7 +5,7 @@ from hdmf.spec import NamespaceCatalog from hdmf.build import BuildManager, TypeMap -from hdmf.utils import docval, getargs +from hdmf.utils import docval, getargs, AllowPositional from hdmf.backends.io import HDMFIO from hdmf.validate import ValidatorMap @@ -124,6 +124,7 @@ def get_cached_namespaces_to_validate(path: Optional[str] = None, returns="Validation errors in the file.", rtype=list, is_method=False, + allow_positional=AllowPositional.WARNING, ) def validate(**kwargs): """Validate NWB file(s) against a namespace or its cached namespaces.