diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
new file mode 100644
index 0000000..a2867f6
--- /dev/null
+++ b/.github/workflows/run_tests.yaml
@@ -0,0 +1,49 @@
+name: Tests with Spark
+
+on:
+  push:
+    branches:
+      - dev-tests
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - uses: actions/setup-java@v4
+        with:
+          java-version: '21'
+          distribution: temurin
+
+      - uses: vemonet/setup-spark@v1
+        with:
+          spark-version: '3.4.0'
+          hadoop-version: '3'
+
+      - run: spark-submit --version
+
+    #   - name: Install dependencies
+    #     run: |
+    #       python -m pip install --upgrade pip
+    #       pip install pytest pytest-cov
+
+    #   - name: Run tests with pytest
+    #     run: |
+    #       pytest --cov=voluseg --cov-report=term-missing
+
+    #   - name: Upload coverage report
+    #     if: success()
+    #     uses: actions/upload-artifact@v3
+    #     with:
+    #       name: coverage-report
+    #       path: htmlcov/
diff --git a/.gitignore b/.gitignore
index 918f73b..b37bc0e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,6 @@ coverage_html/
 htmlcov/
 
 # Local data
-data/*
+sample_data/*
 output/*
 logs/*
\ No newline at end of file
diff --git a/README_docker.md b/README_docker.md
index d1505ef..c50c4b5 100644
--- a/README_docker.md
+++ b/README_docker.md
@@ -13,7 +13,7 @@ docker run -v $(pwd)/data:/voluseg/data voluseg
 Run with local data volume mount and hot reload for the voluseg package:
 ```bash
 docker run \
--v $(pwd)/data:/voluseg/data \
+-v $(pwd)/sample_data:/voluseg/data \
 -v $(pwd)/output:/voluseg/output \
 -v $(pwd)/voluseg:/voluseg/voluseg \
 -v $(pwd)/app:/voluseg/app \
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..12c5ece
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,7 @@
+# Tests
+
+Run tests locally with `pytest`:
+
+```bash
+pytest -s
+```
\ No newline at end of file
diff --git a/tests/requirements.txt b/tests/requirements.txt
new file mode 100644
index 0000000..efe3dd5
--- /dev/null
+++ b/tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest==8.3.2
+pytest-cov==5.0.0
\ No newline at end of file
diff --git a/tests/tests.py b/tests/test_voluseg.py
similarity index 85%
rename from tests/tests.py
rename to tests/test_voluseg.py
index 81367ca..3abdc77 100644
--- a/tests/tests.py
+++ b/tests/test_voluseg.py
@@ -2,6 +2,7 @@
 import pprint
 import voluseg
 import pytest
+from pathlib import Path
 
 
 @pytest.fixture
@@ -9,7 +10,8 @@ def setup_parameters(tmp_path):
     # Define parameters and paths
     parameters0 = voluseg.parameter_dictionary()
     parameters0['dir_ants'] = "/home/luiz/Downloads/ants-2.5.2/bin"  # Change this to your actual ANTs bin path
-    parameters0['dir_input'] = '/home/luiz/Desktop/voluseg/input/'  # Change this to your actual input directory
+    # parameters0['dir_input'] = str((Path(".").resolve().parent / "sample_data"))
+    parameters0['dir_input'] = "/mnt/shared_storage/taufferconsulting/client_catalystneuro/project_voluseg/sample_data"
     parameters0['dir_output'] = str(tmp_path)  # Use pytest's tmp_path fixture for output
     parameters0['registration'] = 'high'
     parameters0['diam_cell'] = 5.0
@@ -22,10 +24,8 @@ def setup_parameters(tmp_path):
     return parameters0
 
 
-def test_voluseg_pipeline(setup_parameters):
+def test_voluseg_pipeline_h5_dir(setup_parameters):
     parameters = setup_parameters
-
-    # Load and print parameters (for debugging purposes)
     filename_parameters = os.path.join(parameters['dir_output'], 'parameters.pickle')
     parameters = voluseg.load_parameters(filename_parameters)
     pprint.pprint(parameters)
@@ -54,5 +54,5 @@ def test_voluseg_pipeline(setup_parameters):
     # assert os.path.exists(os.path.join(parameters['dir_output'], 'step5_output_file.ext')), "Step 5 output file missing"
 
 
-if __name__ == "__main__":
-    pytest.main()
+# if __name__ == "__main__":
+#     pytest.main()
diff --git a/voluseg/_tools/nwb.py b/voluseg/_tools/nwb.py
index 4e2144a..166e98b 100644
--- a/voluseg/_tools/nwb.py
+++ b/voluseg/_tools/nwb.py
@@ -1,4 +1,10 @@
 import pynwb
+import h5py
+import numpy as np
+from datetime import datetime
+from uuid import uuid4
+from dateutil import tz
+from pathlib import Path
 from contextlib import contextmanager
 
 
@@ -47,4 +53,71 @@ def get_nwbfile_volume(
     pynwb.NWBFile
         NWB file.
     """
-    return nwbfile.acquisition[acquisition_name].data[volume_index]
\ No newline at end of file
+    return nwbfile.acquisition[acquisition_name].data[volume_index]
+
+
+def h5_dir_to_nwbfile(
+    h5_dir: str,
+    acquisition_name: str = "TwoPhotonSeries",
+    max_volumes: int = None,
+) -> pynwb.NWBFile:
+    """
+    Convert a directory of HDF5 files to a single NWB file.
+    Each h5 file is assumed to contain a single volume.
+
+    Parameters
+    ----------
+    h5_dir : str
+        Directory of HDF5 files.
+    acquisition_name : str
+        Acquisition name.
+    max_volumes : int
+        Maximum number of volumes to read.
+
+    Returns
+    -------
+    pynwb.NWBFile
+        NWB file.
+    """
+    sorted_paths = sorted([str(p.resolve()) for p in Path(h5_dir).glob("*.h5")])
+    datasets = []
+    if max_volumes is None:
+        max_volumes = len(sorted_paths)
+    for p in sorted_paths[:max_volumes]:
+        with h5py.File(p, 'r') as hdf:
+            dataset = hdf['default'][:]
+            datasets.append(dataset)
+    concatenated_dataset = np.concatenate(datasets, axis=0)
+    nwbfile = pynwb.NWBFile(
+        session_description="description",
+        identifier=str(uuid4()),
+        session_start_time=datetime(2018, 4, 25, 2, 30, 3, tzinfo=tz.gettz("US/Pacific")),
+    )
+    device = nwbfile.create_device(
+        name="Microscope",
+        description="My two-photon microscope",
+    )
+    optical_channel = pynwb.ophys.OpticalChannel(
+        name="OpticalChannel",
+        description="an optical channel",
+        emission_lambda=500.0,
+    )
+    imaging_plane = nwbfile.create_imaging_plane(
+        name="ImagingPlane",
+        optical_channel=optical_channel,
+        description="a very interesting part of the brain",
+        device=device,
+        excitation_lambda=600.0,
+        indicator="GFP",
+        location="V1",
+    )
+    two_p_series = pynwb.ophys.TwoPhotonSeries(
+        name=acquisition_name,
+        description="Raw 2p data",
+        data=concatenated_dataset,
+        imaging_plane=imaging_plane,
+        rate=1.0,
+        unit="normalized amplitude",
+    )
+    nwbfile.add_acquisition(two_p_series)
+    return nwbfile
\ No newline at end of file