From f02e61b4bad762703ea443c363f172a16d3bac52 Mon Sep 17 00:00:00 2001
From: Steph Prince <40640337+stephprince@users.noreply.github.com>
Date: Fri, 3 Jan 2025 12:02:06 -0500
Subject: [PATCH] Upgrade validation methods (#1911)

* reconcile io path validator differences

* make io and path output the same

* update tests for io validation

* add test that io and path output is same

* update tests for io output with status

* add validate helper function and fix status tracking

* update arguments and status checks

* fix test comparing io and path outputs

* update error message for io inputs

* add tests to compare all io and path outputs

* remove script for testing

* add initial json export option

* update validator inputs and outputs, remove hdf5io references

* update tests for non status output

* add tests for json validator output

* move pynwb.validate into validation module

* update json report

* add validation entry point

* separate cli and validation function files

* update validation tutorial

* move get_backend to pynwb init

* update example validation for new io behavior

* update ruff ignores

* fix test comments

* update CHANGELOG

* add tests for _get_backend

* update backend imports for optional zarr

* fix test name

* update test filename

* close io after validation

* fix test assertion

* add condition for ros3 validation

* Update CHANGELOG.md

* Apply suggestions from code review

Co-authored-by: Ryan Ly <rly@lbl.gov>

* update cli args and docs

* fix formatting

* update json-file-path argname

* update extension tutorial

* Apply suggestions from code review

Co-authored-by: Ryan Ly <rly@lbl.gov>

* warn for positional args in validation

---------

Co-authored-by: Ryan Ly <rly@lbl.gov>
---
 CHANGELOG.md                             |  10 +
 docs/gallery/general/extensions.py       |   5 +-
 docs/source/validation.rst               |  28 ++-
 pyproject.toml                           |   5 +-
 src/pynwb/__init__.py                    |  23 ++-
 src/pynwb/testing/testh5io.py            |   8 +-
 src/pynwb/validate.py                    | 252 -----------------------
 src/pynwb/validation.py                  | 198 ++++++++++++++++++
 src/pynwb/validation_cli.py              |  84 ++++++++
 test.py                                  |  14 +-
 tests/integration/ros3/test_ros3.py      |   7 +-
 tests/integration/utils/test_io_utils.py |  27 ++-
 tests/validation/test_validate.py        | 146 +++++++------
 13 files changed, 465 insertions(+), 342 deletions(-)
 delete mode 100644 src/pynwb/validate.py
 create mode 100644 src/pynwb/validation.py
 create mode 100644 src/pynwb/validation_cli.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 503df39e6..b416b5025 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 ## PyNWB 3.0.0 (Upcoming)
 
+### Breaking changes
+- The validation methods have been updated with multiple breaking changes. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911)
+   - The behavior of `pynwb.validate(io=...)` now matches the behavior of `pynwb.validate(path=...)`. In previous pynwb versions, `pynwb.validate(io=...)` did not use the cached namespaces during validation. To obtain the same behavior as in previous versions, you can update the function call to `pynwb.validate(io=..., use_cached_namespaces=False)`
+   - `pynwb.validate` will return only a list of validation errors instead of a tuple: (list of validation_errors, status code)
+   - The validate module has been renamed to `validation.py`. The validate method can be 
+   imported using `import pynwb; pynwb.validate` or `from pynwb import validate`
+
 ### Deprecations
 - The following deprecated classes will now raise errors when creating new instances of these classes: ``ClusteringWaveforms``, ``Clustering``, ``SweepTable``. Reading files using these data types will continue to be supported.
 - The following methods and arguments have been deprecated:
@@ -30,6 +37,9 @@
   - Added support for `model_number`, `model_name`, and `serial_number` fields to `Device`. @stephprince [#1997](https://github.com/NeurodataWithoutBorders/pynwb/pull/1997)
   - Deprecated `EventWaveform` neurodata type. @rly [#1940](https://github.com/NeurodataWithoutBorders/pynwb/pull/1940)
   - Deprecated `ImageMaskSeries` neurodata type. @rly [#1941](https://github.com/NeurodataWithoutBorders/pynwb/pull/1941)
+- Added enhancements to the validation CLI. @stephprince [#1911](https://github.com/NeurodataWithoutBorders/pynwb/pull/1911)
+  - Added an entry point for the validation module. You can now use `pynwb-validate "file.nwb"`.
+  - Added the `--json-outpath-path` CLI argument to output validation results in a machine readable format.
 - Removed python 3.8 support, added python 3.13 support. @stephprince [#2007](https://github.com/NeurodataWithoutBorders/pynwb/pull/2007)
 - Added warnings when using positional arguments in `Container` constructor methods. Positional arguments will raise errors in the next major release. @stephprince [#1972](https://github.com/NeurodataWithoutBorders/pynwb/pull/1972)
 
diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py
index ddf9159c7..3e41e7a91 100644
--- a/docs/gallery/general/extensions.py
+++ b/docs/gallery/general/extensions.py
@@ -37,8 +37,7 @@
 ns_builder = NWBNamespaceBuilder(
     "Extension for use in my Lab", "mylab", version="0.1.0"
 )
-
-ns_builder.include_type("ElectricalSeries", namespace="core")
+ns_builder.include_namespace("core")
 
 ext = NWBGroupSpec(
     "A custom ElectricalSeries for my lab",
@@ -264,7 +263,7 @@ def __init__(self, **kwargs):
 ext_source = name + ".extensions.yaml"
 
 ns_builder = NWBNamespaceBuilder(name + " extensions", name, version="0.1.0")
-ns_builder.include_type("NWBDataInterface", namespace="core")
+ns_builder.include_namespace("core")
 
 potato = NWBGroupSpec(
     neurodata_type_def="Potato",
diff --git a/docs/source/validation.rst b/docs/source/validation.rst
index 73c138127..6e3ac7d2d 100644
--- a/docs/source/validation.rst
+++ b/docs/source/validation.rst
@@ -3,12 +3,21 @@
 Validating NWB files
 ====================
 
+.. note:: 
+  
+  The pynwb validation CLI checks for structural compliance of NWB files with the NWB schema. 
+  It is recommended to use the `NWBInspector CLI <https://nwbinspector.readthedocs.io/en/dev/>`_ 
+  for more comprehensive validation of both structural compliance with the NWB schema and 
+  compliance of data with NWB best practices. The NWBInspector runs both PyNWB validation as 
+  described here and additional data checks.
+
+
 Validating NWB files is handled by a command-line tool available in :py:mod:`~pynwb`.
 The validator can be invoked like so:
 
 .. code-block:: bash
 
-  python -m pynwb.validate test.nwb
+  pynwb-validate test.nwb
 
 If the file contains no NWB extensions, then this command will validate the file ``test.nwb`` against the
 *core* NWB specification. On success, the output will be:
@@ -29,7 +38,7 @@ within the ``test.nwb`` file.
 
 .. code-block:: bash
 
-  python -m pynwb.validate -n ndx-my-extension test.nwb
+  pynwb-validate -n ndx-my-extension test.nwb
 
 To validate against the version of the **core** NWB specification that is included with the installed version of
 PyNWB, use the ``--no-cached-namespace`` flag. This can be useful in validating files against newer or older versions
@@ -37,27 +46,28 @@ of the **core** NWB specification that are installed with newer or older version
 
 .. code-block:: bash
 
-  python -m pynwb.validate --no-cached-namespace test.nwb
+  pynwb-validate --no-cached-namespace test.nwb
 
 .. Last updated 8/13/2021
 .. code-block:: text
 
-  $python -m pynwb.validate --help
-  usage: validate.py [-h] [-n NS] [-lns] [--cached-namespace | --no-cached-namespace] paths [paths ...]
+  $pynwb-validate --help
+  usage: pynwb-validate [-h] [-lns] [-n NS] [--json-output-path JSON_OUTPUT_PATH] [--no-cached-namespace] paths [paths ...]
 
   Validate an NWB file
 
   positional arguments:
     paths                 NWB file paths
 
-  optional arguments:
+  options:
     -h, --help            show this help message and exit
-    -n NS, --ns NS        the namespace to validate against
     -lns, --list-namespaces
                           List the available namespaces and exit.
-    --cached-namespace    Use the cached namespace (default).
+    -n NS, --ns NS        the namespace to validate against
+    --json-output-path JSON_OUTPUT_PATH
+                          Write json output to this location.
     --no-cached-namespace
-                          Don't use the cached namespace.
+                          Use the namespaces installed by PyNWB (true) or use the cached namespaces (false; default).
 
   If --ns is not specified, validate against all namespaces in the NWB file.
 
diff --git a/pyproject.toml b/pyproject.toml
index 122fbd05b..755d3384e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,9 @@ dynamic = ["version"] # the build backend will compute the version dynamically f
 "Homepage" = "https://github.com/NeurodataWithoutBorders/pynwb"
 "Bug Tracker" = "https://github.com/NeurodataWithoutBorders/pynwb/issues"
 
+[project.scripts]
+pynwb-validate = "pynwb.validation_cli:validation_cli"
+
 [tool.hatch.version]
 source = "vcs"
 
@@ -111,7 +114,7 @@ line-length = 120
 "docs/gallery/*" = ["E402", "T201"]
 "src/*/__init__.py" = ["F401"]
 "src/pynwb/_version.py" = ["T201"]
-"src/pynwb/validate.py" = ["T201"]
+"src/pynwb/validation_cli.py" = ["T201"]
 "scripts/*" = ["T201"]
 
 # "test_gallery.py" = ["T201"] # Uncomment when test_gallery.py is created
diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py
index 5f8fec157..409c28c34 100644
--- a/src/pynwb/__init__.py
+++ b/src/pynwb/__init__.py
@@ -23,7 +23,7 @@
 CORE_NAMESPACE = 'core'
 
 from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace  # noqa E402
-from .validate import validate  # noqa: F401, E402
+from .validation import validate  # noqa: F401, E402
 
 try:
     # see https://effigies.gitlab.io/posts/python-packaging-2023/
@@ -344,6 +344,27 @@ def get_sum(self, a, b):
     return __TYPE_MAP.get_dt_container_cls(neurodata_type, namespace)
 
 
+@docval({'name': 'path', 'type': str, 'doc': 'Path to the NWB file which can be an HDF5 file or a Zarr store.'},
+        {"name": "method", "type": str, "doc": "the method to use when opening the file", 'default': None},
+        is_method=False)
+def _get_backend(path: str, method: str = None):
+    if method == "ros3":
+        return NWBHDF5IO  # TODO - add additional conditions for other streaming methods
+
+    try:
+        from hdmf_zarr import NWBZarrIO
+        backend_io_classes = [NWBHDF5IO, NWBZarrIO]
+    except ImportError:
+        backend_io_classes = [NWBHDF5IO]
+
+    backend_options = [b for b in backend_io_classes if b.can_read(path=path)]
+    if len(backend_options) == 0:
+        raise ValueError(f"Could not find an IO to read the file '{path}'. If you are trying to read "
+                         f"a Zarr file, make sure you have hdmf-zarr installed.")
+    else:
+        return backend_options[0]
+
+
 class NWBHDF5IO(_HDF5IO):
 
     @staticmethod
diff --git a/src/pynwb/testing/testh5io.py b/src/pynwb/testing/testh5io.py
index 7234e79f5..265cd5944 100644
--- a/src/pynwb/testing/testh5io.py
+++ b/src/pynwb/testing/testh5io.py
@@ -163,12 +163,12 @@ def getContainer(self, nwbfile):
     def validate(self):
         """ Validate the created files """
         if os.path.exists(self.filename):
-            errors, _ = pynwb_validate(paths=[self.filename])
+            errors = pynwb_validate(path=self.filename)
             if errors:
                 raise Exception("\n".join(errors))
 
         if os.path.exists(self.export_filename):
-            errors, _ = pynwb_validate(paths=[self.export_filename])
+            errors = pynwb_validate(path=self.export_filename)
             if errors:
                 raise Exception("\n".join(errors))
 
@@ -366,11 +366,11 @@ def roundtripExportContainer(self, cache_spec=True):
     def validate(self):
         """Validate the created files."""
         if os.path.exists(self.filename):
-            errors, _ = pynwb_validate(paths=[self.filename])
+            errors = pynwb_validate(path=self.filename)
             if errors:
                 raise Exception("\n".join(errors))
 
         if os.path.exists(self.export_filename):
-            errors, _ = pynwb_validate(paths=[self.export_filename])
+            errors = pynwb_validate(path=self.export_filename)
             if errors:
                 raise Exception("\n".join(errors))
diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py
deleted file mode 100644
index 880f860a6..000000000
--- a/src/pynwb/validate.py
+++ /dev/null
@@ -1,252 +0,0 @@
-"""Command line tool to Validate an NWB file against a namespace."""
-import sys
-from argparse import ArgumentParser
-from typing import Tuple, List, Dict, Optional
-
-from hdmf.spec import NamespaceCatalog
-from hdmf.build import BuildManager
-from hdmf.build import TypeMap as TypeMap
-from hdmf.utils import docval, getargs
-from hdmf.backends.io import HDMFIO
-from hdmf.validate import ValidatorMap
-
-from pynwb import CORE_NAMESPACE
-from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace
-
-
-def _print_errors(validation_errors: list):
-    if validation_errors:
-        print(" - found the following errors:", file=sys.stderr)
-        for err in validation_errors:
-            print(str(err), file=sys.stderr)
-    else:
-        print(" - no errors found.")
-
-
-def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list:
-    builder = io.read_builder()
-    validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace))
-    return validator.validate(builder)
-
-
-def get_cached_namespaces_to_validate(
-    path: str, driver: Optional[str] = None, aws_region: Optional[str] = None,
-) -> Tuple[List[str], BuildManager, Dict[str, str]]:
-    """
-    Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation.
-
-    Example
-    -------
-    The following example illustrates how we can use this function to validate against namespaces
-    cached in a file. This is useful, e.g., when a file was created using an extension
-
-    .. code-block:: python
-
-        from pynwb import validate
-        from pynwb.validate import get_cached_namespaces_to_validate
-        path = "my_nwb_file.nwb"
-        validate_namespaces, manager, cached_namespaces = get_cached_namespaces_to_validate(path)
-        with NWBHDF5IO(path, "r", manager=manager) as reader:
-            errors = []
-            for ns in validate_namespaces:
-                errors += validate(io=reader, namespace=ns)
-
-    :param path: Path for the NWB file
-    :return: Tuple with:
-      - List of strings with the most specific namespace(s) to use for validation.
-      - BuildManager object for opening the file for validation
-      - Dict with the full result from NWBHDF5IO.load_namespaces
-    """
-    from . import NWBHDF5IO  # TODO: modularize to avoid circular import
-
-    catalog = NamespaceCatalog(
-        group_spec_cls=NWBGroupSpec, dataset_spec_cls=NWBDatasetSpec, spec_namespace_cls=NWBNamespace
-    )
-    namespace_dependencies = NWBHDF5IO.load_namespaces(
-        namespace_catalog=catalog,
-        path=path,
-        driver=driver,
-        aws_region=aws_region
-    )
-
-    # Determine which namespaces are the most specific (i.e. extensions) and validate against those
-    candidate_namespaces = set(namespace_dependencies.keys())
-    for namespace_dependency in namespace_dependencies:
-        candidate_namespaces -= namespace_dependencies[namespace_dependency].keys()
-
-    # TODO: remove this workaround for issue https://github.com/NeurodataWithoutBorders/pynwb/issues/1357
-    candidate_namespaces.discard("hdmf-experimental")  # remove validation of hdmf-experimental for now
-    cached_namespaces = sorted(candidate_namespaces)
-
-    if len(cached_namespaces) > 0:
-        type_map = TypeMap(namespaces=catalog)
-        manager = BuildManager(type_map=type_map)
-    else:
-        manager = None
-
-    return cached_namespaces, manager, namespace_dependencies
-
-
-@docval(
-    {
-        "name": "io",
-        "type": HDMFIO,
-        "doc": "An open IO to an NWB file.",
-        "default": None,
-    },  # For back-compatability
-    {
-        "name": "namespace",
-        "type": str,
-        "doc": "A specific namespace to validate against.",
-        "default": None,
-    },  # Argument order is for back-compatability
-    {
-        "name": "paths",
-        "type": list,
-        "doc": "List of NWB file paths.",
-        "default": None,
-    },
-    {
-        "name": "use_cached_namespaces",
-        "type": bool,
-        "doc": "Whether to use namespaces cached within the file for validation.",
-        "default": True,
-    },
-    {
-        "name": "verbose",
-        "type": bool,
-        "doc": "Whether or not to print messages to stdout.",
-        "default": False,
-    },
-    {
-        "name": "driver",
-        "type": str,
-        "doc": "Driver for h5py to use when opening the HDF5 file.",
-        "default": None,
-    },
-    returns="Validation errors in the file.",
-    rtype=(list, (list, bool)),
-    is_method=False,
-)
-def validate(**kwargs):
-    """Validate NWB file(s) against a namespace or its cached namespaces.
-
-    NOTE: If an io object is provided and no namespace name is specified, then the file will be validated
-    against the core namespace, even if use_cached_namespaces is True.
-    """
-    from . import NWBHDF5IO  # TODO: modularize to avoid circular import
-
-    io, paths, use_cached_namespaces, namespace, verbose, driver = getargs(
-        "io", "paths", "use_cached_namespaces", "namespace", "verbose", "driver", kwargs
-    )
-    assert io != paths, "Both 'io' and 'paths' were specified! Please choose only one."
-
-    if io is not None:
-        validation_errors = _validate_helper(io=io, namespace=namespace or CORE_NAMESPACE)
-        return validation_errors
-
-    status = 0
-    validation_errors = list()
-    for path in paths:
-        namespaces_to_validate = []
-        namespace_message = "PyNWB namespace information"
-        io_kwargs = dict(path=path, mode="r", driver=driver)
-
-        if use_cached_namespaces:
-            cached_namespaces, manager, namespace_dependencies = get_cached_namespaces_to_validate(
-                path=path, driver=driver
-            )
-            io_kwargs.update(manager=manager)
-
-            if any(cached_namespaces):
-                namespaces_to_validate = cached_namespaces
-                namespace_message = "cached namespace information"
-            else:
-                namespaces_to_validate = [CORE_NAMESPACE]
-                if verbose:
-                    print(
-                        f"The file {path} has no cached namespace information. Falling back to {namespace_message}.",
-                        file=sys.stderr,
-                    )
-        else:
-            io_kwargs.update(load_namespaces=False)
-            namespaces_to_validate = [CORE_NAMESPACE]
-
-        if namespace is not None:
-            if namespace in namespaces_to_validate:
-                namespaces_to_validate = [namespace]
-            elif use_cached_namespaces and namespace in namespace_dependencies:  # validating against a dependency
-                for namespace_dependency in namespace_dependencies:
-                    if namespace in namespace_dependencies[namespace_dependency]:
-                        status = 1
-                        print(
-                            f"The namespace '{namespace}' is included by the namespace "
-                            f"'{namespace_dependency}'. Please validate against that namespace instead.",
-                            file=sys.stderr,
-                        )
-            else:
-                status = 1
-                print(
-                    f"The namespace '{namespace}' could not be found in {namespace_message} as only "
-                    f"{namespaces_to_validate} is present.",
-                    file=sys.stderr,
-                )
-
-        if status == 1:
-            continue
-
-        with NWBHDF5IO(**io_kwargs) as io:
-            for validation_namespace in namespaces_to_validate:
-                if verbose:
-                    print(f"Validating {path} against {namespace_message} using namespace '{validation_namespace}'.")
-                validation_errors += _validate_helper(io=io, namespace=validation_namespace)
-    return validation_errors, status
-
-
-def validate_cli():
-    """CLI wrapper around pynwb.validate."""
-    parser = ArgumentParser(
-        description="Validate an NWB file",
-        epilog="If --ns is not specified, validate against all namespaces in the NWB file.",
-    )
-
-    # Special arg specific to CLI
-    parser.add_argument(
-        "-lns",
-        "--list-namespaces",
-        dest="list_namespaces",
-        action="store_true",
-        help="List the available namespaces and exit.",
-    )
-
-    # Common args to the API validate
-    parser.add_argument("paths", type=str, nargs="+", help="NWB file paths")
-    parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against")
-    feature_parser = parser.add_mutually_exclusive_group(required=False)
-    feature_parser.add_argument(
-        "--no-cached-namespace",
-        dest="no_cached_namespace",
-        action="store_true",
-        help="Use the PyNWB loaded namespace (true) or use the cached namespace (false; default).",
-    )
-    parser.set_defaults(no_cached_namespace=False)
-    args = parser.parse_args()
-    status = 0
-
-    if args.list_namespaces:
-        for path in args.paths:
-            cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=path)
-            print("\n".join(cached_namespaces))
-    else:
-        validation_errors, validation_status = validate(
-            paths=args.paths, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True
-        )
-        if not validation_status:
-            _print_errors(validation_errors=validation_errors)
-        status = status or validation_status or (validation_errors is not None and len(validation_errors) > 0)
-
-    sys.exit(status)
-
-
-if __name__ == "__main__":  # pragma: no cover
-    validate_cli()
diff --git a/src/pynwb/validation.py b/src/pynwb/validation.py
new file mode 100644
index 000000000..d2fb5cb17
--- /dev/null
+++ b/src/pynwb/validation.py
@@ -0,0 +1,198 @@
+"""Module to validate an NWB file against a namespace."""
+from typing import Tuple, List, Dict, Optional
+from pathlib import Path
+from warnings import warn
+
+from hdmf.spec import NamespaceCatalog
+from hdmf.build import BuildManager, TypeMap
+from hdmf.utils import docval, getargs, AllowPositional
+from hdmf.backends.io import HDMFIO
+from hdmf.validate import ValidatorMap
+
+from pynwb import CORE_NAMESPACE
+from pynwb.spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace
+
+
+def _validate_helper(io: HDMFIO, namespace: str = CORE_NAMESPACE) -> list:
+    builder = io.read_builder()
+    validator = ValidatorMap(io.manager.namespace_catalog.get_namespace(name=namespace))
+    return validator.validate(builder)
+
+
+def get_cached_namespaces_to_validate(path: Optional[str] = None, 
+                                      driver: Optional[str] = None, 
+                                      aws_region: Optional[str] = None,
+                                      io: Optional[HDMFIO] = None 
+) -> Tuple[List[str], BuildManager, Dict[str, str]]:
+    """
+    Determine the most specific namespace(s) that are cached in the given NWBFile that can be used for validation.
+
+    Example
+    -------
+    The following example illustrates how we can use this function to validate against namespaces
+    cached in a file. This is useful, e.g., when a file was created using an extension
+
+    .. code-block:: python
+
+        from pynwb import validate
+        from pynwb.validation import get_cached_namespaces_to_validate
+        path = "my_nwb_file.nwb"
+        validate_namespaces, manager, cached_namespaces = get_cached_namespaces_to_validate(path)
+        with NWBHDF5IO(path, "r", manager=manager) as reader:
+            errors = []
+            for ns in validate_namespaces:
+                errors += validate(io=reader, namespace=ns)
+
+    :param path: Path for the NWB file
+    :return: Tuple with:
+      - List of strings with the most specific namespace(s) to use for validation.
+      - BuildManager object for opening the file for validation
+      - Dict with the full result from NWBHDF5IO.load_namespaces
+    """
+
+    catalog = NamespaceCatalog(
+        group_spec_cls=NWBGroupSpec, dataset_spec_cls=NWBDatasetSpec, spec_namespace_cls=NWBNamespace
+    )
+
+    if io is not None:
+        # TODO update HDF5IO to have .file property to make consistent with ZarrIO
+        # then update input arguments here
+        namespace_dependencies = io.load_namespaces(namespace_catalog=catalog, 
+                                                    file=io._file)
+    else:
+        from pynwb import _get_backend
+        backend_io = _get_backend(path, method=driver)
+        namespace_dependencies = backend_io.load_namespaces(namespace_catalog=catalog, 
+                                                            path=path, 
+                                                            driver=driver, 
+                                                            aws_region=aws_region)
+
+    # Determine which namespaces are the most specific (i.e. extensions) and validate against those
+    candidate_namespaces = set(namespace_dependencies.keys())
+    for namespace_dependency in namespace_dependencies:
+        candidate_namespaces -= namespace_dependencies[namespace_dependency].keys()
+
+    # TODO: remove this workaround for issue https://github.com/NeurodataWithoutBorders/pynwb/issues/1357
+    candidate_namespaces.discard("hdmf-experimental")  # remove validation of hdmf-experimental for now
+    cached_namespaces = sorted(candidate_namespaces)
+
+    if len(cached_namespaces) > 0:
+        type_map = TypeMap(namespaces=catalog)
+        manager = BuildManager(type_map=type_map)
+    else:
+        manager = None
+
+    return cached_namespaces, manager, namespace_dependencies
+
+@docval(
+    {
+        "name": "io",
+        "type": HDMFIO,
+        "doc": "An open IO to an NWB file.",
+        "default": None,
+    },  # For back-compatability
+    {
+        "name": "namespace",
+        "type": str,
+        "doc": "A specific namespace to validate against.",
+        "default": None,
+    },  # Argument order is for back-compatability
+    {
+        "name": "path",
+        "type": (str, Path),
+        "doc": "NWB file path.",
+        "default": None,
+    },
+    {
+        "name": "use_cached_namespaces",
+        "type": bool,
+        "doc": "Whether to use namespaces cached within the file for validation.",
+        "default": True,
+    },
+    {
+        "name": "verbose",
+        "type": bool,
+        "doc": "Whether or not to print messages to stdout.",
+        "default": False,
+    },
+    {
+        "name": "driver",
+        "type": str,
+        "doc": "Driver for h5py to use when opening the HDF5 file.",
+        "default": None,
+    }, 
+    returns="Validation errors in the file.",
+    rtype=list,
+    is_method=False,
+    allow_positional=AllowPositional.WARNING,
+)
+def validate(**kwargs):
+    """Validate NWB file(s) against a namespace or its cached namespaces.
+
+    Note: this function checks for compliance with the NWB schema. 
+    It is recommended to use the NWBInspector for more comprehensive validation of both
+    compliance with the schema and compliance of data with NWB best practices.
+    """
+
+    io, path, use_cached_namespaces, namespace, verbose, driver = getargs(
+        "io", "path", "use_cached_namespaces", "namespace", "verbose", "driver", kwargs
+    )
+    assert io != path, "Both 'io' and 'path' were specified! Please choose only one."
+    path = str(path) if isinstance(path, Path) else path
+
+    # get namespaces to validate
+    namespace_message = "PyNWB namespace information"
+    io_kwargs = dict(path=path, mode="r", driver=driver)
+        
+    if use_cached_namespaces:
+        cached_namespaces, manager, namespace_dependencies = get_cached_namespaces_to_validate(path=path, 
+                                                                                               driver=driver, 
+                                                                                               io=io)
+        io_kwargs.update(manager=manager)
+
+        if any(cached_namespaces):
+            namespaces_to_validate = cached_namespaces
+            namespace_message = "cached namespace information"
+        else:
+            namespaces_to_validate = [CORE_NAMESPACE]
+            if verbose:
+                warn(f"The file {f'{path} ' if path is not None else ''}has no cached namespace information. "
+                     f"Falling back to {namespace_message}.", UserWarning)
+    else:
+        io_kwargs.update(load_namespaces=False)
+        namespaces_to_validate = [CORE_NAMESPACE]
+
+    # get io object if not provided
+    if path is not None:
+        from pynwb import _get_backend
+        backend_io = _get_backend(path, method=driver)
+        io = backend_io(**io_kwargs)
+
+    # check namespaces are accurate
+    if namespace is not None:
+        if namespace in namespaces_to_validate:
+            namespaces_to_validate = [namespace]
+        elif use_cached_namespaces and namespace in namespace_dependencies:  # validating against a dependency
+            for namespace_dependency in namespace_dependencies:
+                if namespace in namespace_dependencies[namespace_dependency]:
+                    raise ValueError(
+                        f"The namespace '{namespace}' is included by the namespace "
+                        f"'{namespace_dependency}'. Please validate against that namespace instead.")
+        else:
+            raise ValueError(
+                f"The namespace '{namespace}' could not be found in {namespace_message} as only "
+                f"{namespaces_to_validate} is present.",)
+  
+    # validate against namespaces
+    validation_errors = []
+    for validation_namespace in namespaces_to_validate:
+        if verbose:
+            print(f"Validating {f'{path} ' if path is not None else ''}against "  # noqa: T201
+                  f"{namespace_message} using namespace '{validation_namespace}'.")  
+        validation_errors += _validate_helper(io=io, namespace=validation_namespace)
+
+    if path is not None:
+        io.close()  # close the io object if it was created within this function, otherwise leave as is
+    
+    return validation_errors
+
diff --git a/src/pynwb/validation_cli.py b/src/pynwb/validation_cli.py
new file mode 100644
index 000000000..ad774f25b
--- /dev/null
+++ b/src/pynwb/validation_cli.py
@@ -0,0 +1,84 @@
+"""Command line tool to Validate an NWB file against a namespace."""
+import json
+import sys
+from argparse import ArgumentParser
+from pathlib import Path
+
+from pynwb.validation import validate, get_cached_namespaces_to_validate
+
+
+def _print_errors(validation_errors: list):
+    if validation_errors:
+        print(" - found the following errors:", file=sys.stderr)
+        for err in validation_errors:
+            print(str(err), file=sys.stderr)
+    else:
+        print(" - no errors found.")
+
+
+def validation_cli():
+    """CLI wrapper around pynwb.validate.
+    
+    Note: this CLI wrapper checks for compliance with the NWB schema. 
+    It is recommended to use the NWBInspector CLI for more comprehensive validation of both
+    compliance with the schema and compliance of data with NWB best practices.
+    """
+    parser = ArgumentParser(
+        description="Validate an NWB file",
+        epilog="If --ns is not specified, validate against all namespaces in the NWB file.",
+    )
+
+    # Special arg specific to CLI
+    parser.add_argument(
+        "-lns",
+        "--list-namespaces",
+        dest="list_namespaces",
+        action="store_true",
+        help="List the available namespaces and exit.",
+    )
+
+    # Common args to the API validate
+    parser.add_argument("paths", type=str, nargs="+", help="NWB file paths")
+    parser.add_argument("-n", "--ns", type=str, help="the namespace to validate against")
+    parser.add_argument("--json-output-path", dest="json_output_path", type=str, 
+                        help="Write json output to this location.")
+    feature_parser = parser.add_mutually_exclusive_group(required=False)
+    feature_parser.add_argument(
+        "--no-cached-namespace",
+        dest="no_cached_namespace",
+        action="store_true",
+        help="Use the namespaces installed by PyNWB (true) or use the cached namespaces (false; default).",
+    )
+    parser.set_defaults(no_cached_namespace=False)
+    args = parser.parse_args()
+
+    status = 0
+    for path in args.paths:
+        if args.list_namespaces:
+            cached_namespaces, _, _ = get_cached_namespaces_to_validate(path=path)
+            print("\n".join(cached_namespaces))
+        else:
+            validation_errors = []
+            try:
+                val_errors = validate(
+                    path=path, use_cached_namespaces=not args.no_cached_namespace, namespace=args.ns, verbose=True, 
+                )
+                _print_errors(validation_errors=val_errors)
+                status = status or int(val_errors is not None and len(val_errors) > 0)
+                validation_errors.append(val_errors)
+            except ValueError as e:
+                print(e, file=sys.stderr)
+                status = 1
+        
+    # write output to json file
+    if args.json_output_path is not None:
+        with open(args.json_output_path, "w") as f:
+            json_report = {'exitcode': status, 'errors': [str(e) for e in validation_errors]}
+            json.dump(obj=json_report, fp=f)
+            print(f"Report saved to {str(Path(args.json_output_path).absolute())}!")
+
+    sys.exit(status)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    validation_cli()
diff --git a/test.py b/test.py
index de3c2c762..a93f6237d 100644
--- a/test.py
+++ b/test.py
@@ -156,7 +156,7 @@ def validate_nwbs():
     examples_nwbs = [x for x in examples_nwbs if not x.startswith('sub-')]
 
     import pynwb
-    from pynwb.validate import get_cached_namespaces_to_validate
+    from pynwb.validation import validate, get_cached_namespaces_to_validate
 
     for nwb in examples_nwbs:
         try:
@@ -168,7 +168,8 @@ def validate_nwbs():
                 is_family_nwb_file = False
                 try:
                     with pynwb.NWBHDF5IO(nwb, mode='r') as io:
-                        errors = pynwb.validate(io)
+                        errors = validate(io, use_cached_namespaces=False)
+                        errors.extend(validate(io, use_cached_namespaces=True))
                 except OSError as e:
                     # if the file was created with the family driver, need to use the family driver to open it
                     if 'family driver should be used' in str(e):
@@ -178,7 +179,8 @@ def validate_nwbs():
                         memb_size = 1024**2  # note: the memb_size must be the same as the one used to create the file
                         with h5py.File(filename_pattern, mode='r', driver='family', memb_size=memb_size) as f:
                             with pynwb.NWBHDF5IO(file=f, manager=None, mode='r') as io:
-                                errors = pynwb.validate(io)
+                                errors = validate(io, use_cached_namespaces=False)
+                                errors.extend(validate(io, use_cached_namespaces=True))
                     else:
                         raise e
 
@@ -201,14 +203,14 @@ def validate_nwbs():
                     ERRORS += 1
 
                 cmds = []
-                cmds += [["python", "-m", "pynwb.validate", nwb]]
-                cmds += [["python", "-m", "pynwb.validate", "--no-cached-namespace", nwb]]
+                cmds += [["pynwb-validate", nwb]]
+                cmds += [["pynwb-validate", "--no-cached-namespace", nwb]]
 
                 for ns in namespaces:
                     # for some reason, this logging command is necessary to correctly printing the namespace in the
                     # next logging command
                     logging.info("Namespace found: %s" % ns)
-                    cmds += [["python", "-m", "pynwb.validate", "--ns", ns, nwb]]
+                    cmds += [["pynwb-validate", "--ns", ns, nwb]]
 
                 for cmd in cmds:
                     logging.info("Validating with \"%s\"." % (" ".join(cmd[:-1])))
diff --git a/tests/integration/ros3/test_ros3.py b/tests/integration/ros3/test_ros3.py
index 2571e6199..347b070e4 100644
--- a/tests/integration/ros3/test_ros3.py
+++ b/tests/integration/ros3/test_ros3.py
@@ -1,6 +1,6 @@
 from pynwb import NWBHDF5IO
 from pynwb import validate
-from pynwb.validate import get_cached_namespaces_to_validate
+from pynwb.validation import get_cached_namespaces_to_validate
 from pynwb.testing import TestCase
 import urllib.request
 import h5py
@@ -90,10 +90,9 @@ def test_dandi_get_cached_namespaces(self):
         )
 
         self.assertCountEqual(first=found_namespaces, second=expected_namespaces)
-        self.assertDictEqual(d1=expected_namespace_dependencies, d2=expected_namespace_dependencies)
+        self.assertDictEqual(d1=found_namespace_dependencies, d2=expected_namespace_dependencies)
 
     def test_dandi_validate(self):
-        result, status = validate(paths=[self.s3_test_path], driver="ros3")
+        result = validate(path=self.s3_test_path, driver="ros3")
 
         assert result == []
-        assert status == 0
diff --git a/tests/integration/utils/test_io_utils.py b/tests/integration/utils/test_io_utils.py
index 73732924f..7f36e5474 100644
--- a/tests/integration/utils/test_io_utils.py
+++ b/tests/integration/utils/test_io_utils.py
@@ -1,10 +1,13 @@
 """Tests related to pynwb.io.utils."""
 import pytest
 
+from datetime import datetime
+from dateutil.tz import tzutc
+
 from hdmf.build import GroupBuilder
 from pynwb.io.utils import get_nwb_version
-from pynwb.testing import TestCase
-
+from pynwb.testing import TestCase, remove_test_file
+from pynwb import NWBFile, NWBHDF5IO, _get_backend
 
 class TestGetNWBVersion(TestCase):
 
@@ -53,3 +56,23 @@ def test_get_nwb_version_20b(self):
         builder1.set_attribute(name="nwb_version", value="2.0b")
         assert get_nwb_version(builder1) == (2, 0, 0)
         assert get_nwb_version(builder1, include_prerelease=True) == (2, 0, 0, "b")
+
+class TestGetNWBBackend(TestCase):
+    def setUp(self):
+        self.nwbfile = NWBFile(session_description='a test NWB File',
+                               identifier='TEST123',
+                               session_start_time=datetime(1970, 1, 1, 12, tzinfo=tzutc()))
+        self.hdf5_path = "test_pynwb_nwb_backend.nwb"
+        with NWBHDF5IO(self.hdf5_path, 'w') as io:
+            io.write(self.nwbfile)
+
+    def tearDown(self):
+        remove_test_file(self.hdf5_path)
+
+    def test_get_backend_invalid_file(self):
+        with self.assertRaises(ValueError):
+            _get_backend('not_a_file.nwb')
+
+    def test_get_backend_HDF5(self):
+        backend_io = _get_backend(self.hdf5_path)
+        self.assertEqual(backend_io, NWBHDF5IO)
\ No newline at end of file
diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py
index c2829ee1f..8b2c34888 100644
--- a/tests/validation/test_validate.py
+++ b/tests/validation/test_validate.py
@@ -1,5 +1,6 @@
 import subprocess
 import re
+import os
 import sys
 from unittest.mock import patch
 from io import StringIO
@@ -23,7 +24,7 @@
 # combine the individual coverage reports into one .coverage file.
 def run_coverage(extra_args: list[str]):
     return subprocess.run(
-        [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validate"]
+        [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validation_cli"]
         + extra_args,
         capture_output=True
     )
@@ -54,13 +55,15 @@ def test_validate_file_no_cache_bad_ns(self):
         """Test that validating a file with no cached spec against a specified, unknown namespace fails."""
         result = run_coverage(["tests/back_compat/1.0.2_nwbfile.nwb", "--ns", "notfound"])
 
-        stderr_regex = re.compile(
+        stderr_regex_1 = re.compile(
             r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. "
-            r"Falling back to PyNWB namespace information\.\s*"
+            r"Falling back to PyNWB namespace information\.\s*")
+        stderr_regex_2 = re.compile(
             r"The namespace 'notfound' could not be found in PyNWB namespace information as only "
-            r"\['core'\] is present\.\s*"
-        )
-        self.assertRegex(result.stderr.decode('utf-8'), stderr_regex)
+            r"\['core'\] is present\.\s*")
+        
+        self.assertRegex(result.stderr.decode('utf-8'), stderr_regex_1)
+        self.assertRegex(result.stderr.decode('utf-8'), stderr_regex_2)
 
         self.assertEqual(result.stdout.decode('utf-8'), '')
 
@@ -174,6 +177,22 @@ def test_validate_file_list_namespaces_extension(self):
         stdout_regex = re.compile(r"ndx-testextension\s*")
         self.assertRegex(result.stdout.decode('utf-8'), stdout_regex)
 
+    def test_validate_file_json_output(self):
+        """Test that validating a file with the json flag outputs a json file."""
+        json_path = "test_validation.json"
+        run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace", 
+                      "--json-output-path", json_path])
+        self.assertTrue(os.path.exists(json_path))
+        os.remove(json_path)
+
+    def test_validation_entry_point(self):
+        """Test that using the validation entry point successfully executes the validate CLI."""
+        json_path = "test_validation_entry_point.json"
+        subprocess.run(["pynwb-validate", "tests/back_compat/1.0.2_str_experimenter.nwb", 
+                        "--json-output-path", json_path])
+        self.assertTrue(os.path.exists(json_path))
+        os.remove(json_path)
+
 
 class TestValidateFunction(TestCase):
 
@@ -199,9 +218,11 @@ def test_validate_io_no_cache(self):
 
     def test_validate_io_no_cache_bad_ns(self):
         """Test that validating a file with no cached spec against a specified, unknown namespace fails."""
-        with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io:
-            with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""):
-                validate(io, 'notfound')
+        expected_error = ("The namespace 'notfound' could not be found in PyNWB namespace information as only "
+                          "['core'] is present.")
+        with self.assertRaisesWith(ValueError, expected_error):
+            with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io:
+                validate(io=io, namespace='notfound')
 
     def test_validate_io_cached(self):
         """Test that validating a file with cached spec against its cached namespace succeeds."""
@@ -221,75 +242,80 @@ def test_validate_io_cached_extension_pass_ns(self):
             errors = validate(io, 'ndx-testextension')
             self.assertEqual(errors, [])
 
-    def test_validate_io_cached_core_with_io(self):
-        """
-        For back-compatability, test that validating a file with cached extension spec against the core
-        namespace succeeds when using the `io` + `namespace` keywords.
-        """
-        with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io:
-            results = validate(io=io, namespace="core")
-            self.assertEqual(results, [])
-
     def test_validate_file_cached_extension(self):
         """
         Test that validating a file with cached extension spec against the core
         namespace raises an error with the new CLI-mimicing paths keyword.
         """
         nwbfile_path = "tests/back_compat/2.1.0_nwbfile_with_extension.nwb"
-        with patch("sys.stderr", new=StringIO()) as fake_err:
-            with patch("sys.stdout", new=StringIO()) as fake_out:
-                results, status = validate(paths=[nwbfile_path], namespace="core", verbose=True)
-                self.assertEqual(results, [])
-                self.assertEqual(status, 1)
-                self.assertEqual(
-                    fake_err.getvalue(),
-                    (
-                        "The namespace 'core' is included by the namespace 'ndx-testextension'. "
-                        "Please validate against that namespace instead.\n"
-                    )
-                )
-                self.assertEqual(fake_out.getvalue(), "")
+        expected_error = ("The namespace 'core' is included by the namespace 'ndx-testextension'. "
+                          "Please validate against that namespace instead.")
+        with self.assertRaisesWith(ValueError, expected_error):
+            validate(path=nwbfile_path, namespace="core", verbose=True)
 
     def test_validate_file_cached_core(self):
         """
         Test that validating a file with cached core spec with verbose=False.
         """
         nwbfile_path = "tests/back_compat/1.1.2_nwbfile.nwb"
-        with patch("sys.stderr", new=StringIO()) as fake_err:
-            with patch("sys.stdout", new=StringIO()) as fake_out:
-                results, status = validate(paths=[nwbfile_path], namespace="core")
-                self.assertEqual(results, [])
-                self.assertEqual(status, 0)
-                self.assertEqual(fake_err.getvalue(), "")
-                self.assertEqual(fake_out.getvalue(), "")
+        results = validate(path=nwbfile_path, namespace="core")
+        self.assertEqual(results, [])
 
     def test_validate_file_cached_no_cache_bad_ns(self):
         """
         Test that validating a file with no cached namespace, a namespace that is not found, and verbose=False.
         """
         nwbfile_path = "tests/back_compat/1.0.2_nwbfile.nwb"
-        with patch("sys.stderr", new=StringIO()) as fake_err:
-            with patch("sys.stdout", new=StringIO()) as fake_out:
-                results, status = validate(paths=[nwbfile_path], namespace="notfound")
-                self.assertEqual(results, [])
-                self.assertEqual(status, 1)
-                stderr_regex = (
-                    r"The namespace 'notfound' could not be found in PyNWB namespace information as only "
-                    r"\['core'\] is present.\n"
-                )
-                self.assertRegex(fake_err.getvalue(), stderr_regex)
-                self.assertEqual(fake_out.getvalue(), "")
-
-    def test_validate_io_cached_bad_ns(self):
-        """Test that validating a file with cached spec against a specified, unknown namespace fails."""
-        with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io:
-            with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""):
-                validate(io, 'notfound')
+        expected_error = ("The namespace 'notfound' could not be found in PyNWB namespace information as only "
+                          "['core'] is present.")
+        with self.assertRaisesWith(ValueError, expected_error):
+            validate(path=nwbfile_path, namespace="notfound")
 
     def test_validate_io_cached_hdmf_common(self):
         """Test that validating a file with cached spec against the hdmf-common namespace fails."""
-        with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io:
-            # TODO this error should not be different from the error when using the validate script above
-            msg = "builder must have data type defined with attribute 'data_type'"
-            with self.assertRaisesWith(ValueError, msg):
-                validate(io, 'hdmf-common')
+        expected_error = ("The namespace 'hdmf-common' is included by the namespace 'core'. "
+                          "Please validate against that namespace instead.")
+        with self.assertRaisesWith(ValueError, expected_error):
+            with self.get_io(path='tests/back_compat/1.1.2_nwbfile.nwb') as io:
+                validate(io=io, namespace="hdmf-common", verbose=True)
+
+    def test_validate_io_and_path_same(self):
+        """Test that validating a file with an io object and a path return the same results."""
+        tests = [('tests/back_compat/1.0.2_nwbfile.nwb', None),
+                 ('tests/back_compat/1.1.2_nwbfile.nwb', None),
+                 ('tests/back_compat/1.1.2_nwbfile.nwb', 'core'),
+                 ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', None),
+                 ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'ndx-testextension'),]
+        
+        tests_with_error = [('tests/back_compat/1.0.2_nwbfile.nwb', 'notfound'),
+                            ('tests/back_compat/1.1.2_nwbfile.nwb', 'notfound'), 
+                            ('tests/back_compat/1.1.2_nwbfile.nwb', 'hdmf-common'),
+                            ('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'core'),]
+        
+        # paths that cause no errors
+        for path, ns in tests:
+            with patch("sys.stdout", new=StringIO()) as fake_out:
+                with self.get_io(path=path) as io:
+                    results_io = validate(io=io, namespace=ns, verbose=True)
+                out_io = fake_out.getvalue()        
+
+            with patch("sys.stdout", new=StringIO()) as fake_out:
+                results_path = validate(path=path, namespace=ns, verbose=True)
+                out_path = fake_out.getvalue()
+
+            # remove path from error messages since it will not be included in io outputs
+            out_path = out_path.replace(f'{path} ', '')
+            self.assertEqual(results_io, results_path)
+            self.assertEqual(out_io, out_path)
+        
+        # paths that return errors
+        for path, ns in tests_with_error:
+            with self.assertRaises(ValueError) as e_io:
+                with self.get_io(path=path) as io:
+                    results_io = validate(io=io, namespace=ns, verbose=True)
+            
+            with self.assertRaises(ValueError) as e_path:
+                results_path = validate(path=path, namespace=ns, verbose=True)
+
+            # remove path from error messages since it will not be included in io outputs
+            self.assertEqual(str(e_io.exception), str(e_path.exception))