From 5005e3506a3c54b79ba829991eea13db52f3fa13 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Tue, 12 Dec 2023 14:28:21 +0100 Subject: [PATCH 01/14] Readme: Update instructions for implicit/explicit enabling. --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 47d4577..9c95c2d 100644 --- a/README.rst +++ b/README.rst @@ -15,15 +15,15 @@ Benchmarks of this technique show 2x-5x speed-ups compared with normal filter-ba Usage ----- -This optimized access works for slices with step 1 on Blosc2-compressed datasets using the native byte order. It is enabled by monkey-patching the ``h5py.Dataset`` class to extend the slicing operation. This is done on module import, so the only thing you need to do is:: +This optimized access works for slices with step 1 on Blosc2-compressed datasets using the native byte order. It is enabled by monkey-patching the ``h5py.Dataset`` class to extend the slicing operation. The easiest way to do this is:: - import b2h5py + import b2h5py.auto After that, optimization will be attempted for any slicing of a dataset (of the form ``dataset[...]`` or ``dataset.__getitem__(...)``). If the optimization is not possible in a particular case, normal h5py slicing code will be used (which performs HDF5 filter-based access, backed by hdf5plugin_ to support Blosc2). .. _hdf5plugin: https://github.com/silx-kit/hdf5plugin -You may globally disable the optimization after importing ``b2h5py`` by calling ``b2h5py.disable_fast_slicing()``, and enable it again with ``b2h5py.enable_fast_slicing()``. You may also enable it temporarily by using ``b2h5py.fast_slicing()`` to get a context manager. +You may instead just ``import b2h5py`` and explicitly enable the optimization globally by calling ``b2h5py.enable_fast_slicing()``, and disable it again with ``b2h5py.disable_fast_slicing()``. You may also enable it temporarily by using ``b2h5py.fast_slicing()`` to get a context manager. Building -------- From ec429343d82b3109f867c6e5addf5ea32b3f61b8 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Tue, 12 Dec 2023 14:31:41 +0100 Subject: [PATCH 02/14] Readme: Better introduction of context manager. For explicit temporary optimization support. --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 9c95c2d..ce5d9d8 100644 --- a/README.rst +++ b/README.rst @@ -23,7 +23,10 @@ After that, optimization will be attempted for any slicing of a dataset (of the .. _hdf5plugin: https://github.com/silx-kit/hdf5plugin -You may instead just ``import b2h5py`` and explicitly enable the optimization globally by calling ``b2h5py.enable_fast_slicing()``, and disable it again with ``b2h5py.disable_fast_slicing()``. You may also enable it temporarily by using ``b2h5py.fast_slicing()`` to get a context manager. +You may instead just ``import b2h5py`` and explicitly enable the optimization globally by calling ``b2h5py.enable_fast_slicing()``, and disable it again with ``b2h5py.disable_fast_slicing()``. You may also enable it temporarily by using a context manager:: + + with b2h5py.fast_slicing(): + # ... code that will use Blosc2 optimized slicing ... Building -------- From 9419dad78aec861322a2f7d2f81628abda522ee0 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Tue, 12 Dec 2023 14:35:59 +0100 Subject: [PATCH 03/14] Example: It no longer contains mentions to package dependencies. --- examples/blosc2_optimized_slicing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/blosc2_optimized_slicing.py b/examples/blosc2_optimized_slicing.py index 831573d..e86482d 100644 --- a/examples/blosc2_optimized_slicing.py +++ b/examples/blosc2_optimized_slicing.py @@ -3,8 +3,7 @@ It creates a 2-dimensional dataset made of different chunks, compressed with Blosc2. Then it proceeds to slice the dataset in ways that may and may not benefit from Blosc2 optimized slicing. Some hints about forcing the use of -the HDF5 filter pipeline are included, as well as comments on the Python -package dependencies required for the different use cases. +the HDF5 filter pipeline are included. Optimized slicing can provide considerable speed-ups in certain use cases, please see `this benchmark`__ which evaluates applying the same technique in From 4f688d22d102b8491bab3e85f6a6f04726f1d5a6 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Tue, 12 Dec 2023 14:38:50 +0100 Subject: [PATCH 04/14] Example: Update note about instructions to enable optimization. --- examples/blosc2_optimized_slicing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/blosc2_optimized_slicing.py b/examples/blosc2_optimized_slicing.py index e86482d..9908e2d 100644 --- a/examples/blosc2_optimized_slicing.py +++ b/examples/blosc2_optimized_slicing.py @@ -2,8 +2,8 @@ It creates a 2-dimensional dataset made of different chunks, compressed with Blosc2. Then it proceeds to slice the dataset in ways that may and may not -benefit from Blosc2 optimized slicing. Some hints about forcing the use of -the HDF5 filter pipeline are included. +benefit from Blosc2 optimized slicing. Examples of different ways to enable +Blosc2 optimized slicing are shown. Optimized slicing can provide considerable speed-ups in certain use cases, please see `this benchmark`__ which evaluates applying the same technique in From 71a7bd28941060b78215a5e5463b274d219026e0 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 09:10:55 +0100 Subject: [PATCH 05/14] Add module for automatic, global enabling of optimizations. This provides a very simple interface to enabling the optimizations, while allowing the plain import to only perform the dataset class patching explicitly. --- b2h5py/auto.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 b2h5py/auto.py diff --git a/b2h5py/auto.py b/b2h5py/auto.py new file mode 100644 index 0000000..39053eb --- /dev/null +++ b/b2h5py/auto.py @@ -0,0 +1,14 @@ +"""Automatic activation of Blosc2 optimized slicing for h5py. + +Importing this module enables the optimization globally, just use:: + + import b2h5py.auto + +After that, all slicing operations on Blosc2-compressed datasets will be +transparently optimized when possible. +""" + +from .blosc2 import enable_fast_slicing + + +enable_fast_slicing() From d4cc1f9485aa934f6a236d055dd494ac2a0de489 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 09:22:40 +0100 Subject: [PATCH 06/14] Disable automatic patching when importing the main module. Now the patching needs to be explicit. --- b2h5py/__init__.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/b2h5py/__init__.py b/b2h5py/__init__.py index c7cabc2..74902ac 100644 --- a/b2h5py/__init__.py +++ b/b2h5py/__init__.py @@ -2,17 +2,17 @@ Optimizations are applied to slices of the form ``dataset[...]`` or ``dataset.__getitem__(...)`` with step 1 on Blosc2-compressed datasets using -the native byte order. +the native byte order. They are implemented by monkey-patching the +``h5py.Dataset`` class. -They are enabled automatically on module import, by monkey-patching the -``h5py.Dataset`` class. You may explicitly undo this patching and deactivate -optimization globally with `disable_fast_slicing()` and redo it and activate -it again with `enable_fast_slicing()`. You may also patch the class and -activate optimization temporarily using `fast_slicing()` to get a context -manager. +Optimizations need to be enabled explicitly. One option is to call +`enable_fast_slicing()` to enable them globally (by performing the patching). +Then `disable_fast_slicing()` may be called to disable them again (by undoing +the patching). As an alternative, you may also activate optimizations +temporarily using `fast_slicing()` to get a context manager. -**Note:** For testing and debugging purposes, you may force-disable the -optimization at any time by setting ``BLOSC2_FILTER=1`` in the environment. +**Note:** For testing and debugging purposes, you may force-disable +optimizations at any time by setting ``BLOSC2_FILTER=1`` in the environment. """ from .blosc2 import (disable_fast_slicing, @@ -25,6 +25,3 @@ 'enable_fast_slicing', 'fast_slicing', 'is_fast_slicing_enabled'] - - -enable_fast_slicing() From edbcc5759979bc07fb4a86d6f1daa822764cff49 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 09:28:22 +0100 Subject: [PATCH 07/14] Example: Update to explicit patching. --- examples/blosc2_optimized_slicing.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/blosc2_optimized_slicing.py b/examples/blosc2_optimized_slicing.py index 9908e2d..ab602da 100644 --- a/examples/blosc2_optimized_slicing.py +++ b/examples/blosc2_optimized_slicing.py @@ -65,11 +65,11 @@ def printl(*args, **kwargs): # Benefitting from Blosc2 optimized slicing # ----------------------------------------- -# After importing `b2h5py`, +# After importing `b2h5py.auto`, # support for Blosc2 optimized slicing is enabled by default. print("# Using Blosc2 optimized slicing") with h5py.File(file_name, 'r') as f: - import b2h5py + import b2h5py.auto assert(b2h5py.is_fast_slicing_enabled()) # One just uses slicing as usual. dataset = f[dataset_name] @@ -82,21 +82,22 @@ def printl(*args, **kwargs): printl("Sparse slice from dataset (filter):", dataset[150::2, 150::2]) printl("Sparse slice from input array:", data[150::2, 150::2]) print() +b2h5py.disable_fast_slicing() # back to normal -# Disabling Blosc2 optimized slicing -# ---------------------------------- +# Enabling Blosc2 optimized slicing +# --------------------------------- # Utility functions are provided to enable and disable optimization globally. -print("# Disabling Blosc2 optimized slicing globally") +print("# Enabling Blosc2 optimized slicing globally") with h5py.File(file_name, 'r') as f: import b2h5py - assert(b2h5py.is_fast_slicing_enabled()) - b2h5py.disable_fast_slicing() assert(not b2h5py.is_fast_slicing_enabled()) + b2h5py.enable_fast_slicing() + assert(b2h5py.is_fast_slicing_enabled()) dataset = f[dataset_name] - printl("Slice from dataset (filter):", dataset[150:, 150:]) + printl("Slice from dataset (optimized):", dataset[150:, 150:]) printl("Slice from input array:", data[150:, 150:]) - b2h5py.enable_fast_slicing() # back to normal - assert(b2h5py.is_fast_slicing_enabled()) + b2h5py.disable_fast_slicing() # back to normal + assert(not b2h5py.is_fast_slicing_enabled()) print() # Enabling Blosc2 optimized slicing temporarily @@ -106,7 +107,6 @@ def printl(*args, **kwargs): print("# Enabling Blosc2 optimized slicing temporarily") with h5py.File(file_name, 'r') as f: import b2h5py - b2h5py.disable_fast_slicing() assert(not b2h5py.is_fast_slicing_enabled()) dataset = f[dataset_name] printl("Slice from dataset (filter):", dataset[150:, 150:]) From 68e61e8812b8c2e5d6a03820150edcc7d8b48690 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 09:34:03 +0100 Subject: [PATCH 08/14] Run h5py tests under optimizing context manager. Since `import b2h5py` no longer enables optimization implicitly. --- b2h5py/tests/test_patched_h5py.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/b2h5py/tests/test_patched_h5py.py b/b2h5py/tests/test_patched_h5py.py index 1adcc8f..4f12d89 100644 --- a/b2h5py/tests/test_patched_h5py.py +++ b/b2h5py/tests/test_patched_h5py.py @@ -21,7 +21,8 @@ def run_h5py_tests(): os.path.dirname(h5py.tests.__file__), top_level_dir=os.path.dirname(os.path.dirname(h5py.__file__))) test_runner = unittest.TextTestRunner() - test_runner.run(test_suite) + with b2h5py.fast_slicing(): + test_runner.run(test_suite) if __name__ == '__main__': From 60f18662fd5acd132ebb5aedd45967a668281d25 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 09:47:12 +0100 Subject: [PATCH 09/14] Update context manager tests to explicit patching. --- b2h5py/tests/test_dataset_patching.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/b2h5py/tests/test_dataset_patching.py b/b2h5py/tests/test_dataset_patching.py index 05d4e45..3724479 100644 --- a/b2h5py/tests/test_dataset_patching.py +++ b/b2h5py/tests/test_dataset_patching.py @@ -6,7 +6,7 @@ import contextlib import functools -import b2h5py # monkey-patches h5py.Dataset +import b2h5py from h5py import Dataset from h5py.tests.common import TestCase @@ -98,14 +98,6 @@ class ContextManagerTestCase(TestCase): shall_raise = False - def setUp(self): - super().setUp() - b2h5py.disable_fast_slicing() - - def tearDown(self): - b2h5py.enable_fast_slicing() - super().tearDown() - def patching_cmgr(self): """Checks for error if `self.shall_raise`, patches dataset class""" test_case = self @@ -142,10 +134,13 @@ def test_already_patched(self): """Not unpatching if already patched before entry""" b2h5py.enable_fast_slicing() self.assertTrue(b2h5py.is_fast_slicing_enabled()) - with self.patching_cmgr(): + try: + with self.patching_cmgr(): + self.assertTrue(b2h5py.is_fast_slicing_enabled()) + self.maybe_raise() self.assertTrue(b2h5py.is_fast_slicing_enabled()) - self.maybe_raise() - self.assertTrue(b2h5py.is_fast_slicing_enabled()) + finally: + b2h5py.disable_fast_slicing() def test_nested(self): """Nesting patching context managers""" From 0e6a55650df97854f726fe22dd32352078f6786f Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 09:56:07 +0100 Subject: [PATCH 10/14] Update dataset class patching tests to explicit patching. --- b2h5py/tests/test_dataset_patching.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/b2h5py/tests/test_dataset_patching.py b/b2h5py/tests/test_dataset_patching.py index 3724479..810c3f8 100644 --- a/b2h5py/tests/test_dataset_patching.py +++ b/b2h5py/tests/test_dataset_patching.py @@ -13,26 +13,22 @@ class Blosc2DatasetPatchingTestCase(TestCase): - def setUp(self): - super().setUp() - b2h5py.enable_fast_slicing() - def tearDown(self): - b2h5py.enable_fast_slicing() + b2h5py.disable_fast_slicing() super().tearDown() def test_default(self): - """Dataset class is patched by default""" - self.assertTrue(b2h5py.is_fast_slicing_enabled()) - - def test_unpatch_patch(self): - """Unpatching and patching dataset class again""" - b2h5py.disable_fast_slicing() + """Dataset class is not patched by default""" self.assertFalse(b2h5py.is_fast_slicing_enabled()) + def test_patch_unpatch(self): + """Patching and unpatching dataset class again""" b2h5py.enable_fast_slicing() self.assertTrue(b2h5py.is_fast_slicing_enabled()) + b2h5py.disable_fast_slicing() + self.assertFalse(b2h5py.is_fast_slicing_enabled()) + def test_patch_again(self): """Patching the dataset class twice""" b2h5py.enable_fast_slicing() @@ -53,8 +49,6 @@ def test_unpatch_again(self): def test_patch_patched(self): """Patching when already patched by someone else""" - b2h5py.disable_fast_slicing() - @functools.wraps(Dataset.__getitem__) def foreign_getitem(self, args, new_dtype=None): return 42 @@ -75,6 +69,7 @@ def foreign_getitem(self, args, new_dtype=None): def test_unpatch_foreign(self): """Unpatching when patched over by someone else""" + b2h5py.enable_fast_slicing() @functools.wraps(Dataset.__getitem__) def foreign_getitem(self, args, new_dtype=None): From d977c89ee17bb31881b0e98faa98215277c41e02 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 10:26:36 +0100 Subject: [PATCH 11/14] Ensure that patching and context manager default tests run first. --- b2h5py/tests/test_dataset_patching.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/b2h5py/tests/test_dataset_patching.py b/b2h5py/tests/test_dataset_patching.py index 810c3f8..d93ca19 100644 --- a/b2h5py/tests/test_dataset_patching.py +++ b/b2h5py/tests/test_dataset_patching.py @@ -17,7 +17,7 @@ def tearDown(self): b2h5py.disable_fast_slicing() super().tearDown() - def test_default(self): + def test_00default(self): """Dataset class is not patched by default""" self.assertFalse(b2h5py.is_fast_slicing_enabled()) @@ -110,7 +110,7 @@ def maybe_raise(self): if self.shall_raise: raise CMTestError - def test_default(self): + def test_00default(self): """Dataset class is patched then unpatched""" self.assertFalse(b2h5py.is_fast_slicing_enabled()) with self.patching_cmgr(): From bedf90299fbcd119631757f315b468545c7fe965 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 10:27:22 +0100 Subject: [PATCH 12/14] Add test for importing `b2h5py.auto`. --- b2h5py/tests/test_dataset_patching.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/b2h5py/tests/test_dataset_patching.py b/b2h5py/tests/test_dataset_patching.py index d93ca19..5e4eda8 100644 --- a/b2h5py/tests/test_dataset_patching.py +++ b/b2h5py/tests/test_dataset_patching.py @@ -83,6 +83,12 @@ def foreign_getitem(self, args, new_dtype=None): finally: Dataset.__getitem__ = foreign_getitem.__wrapped__ + def test_auto(self): + """Patching on importing auto module""" + self.assertFalse(b2h5py.is_fast_slicing_enabled()) + import b2h5py.auto as b2a + self.assertTrue(b2h5py.is_fast_slicing_enabled()) + class CMTestError(Exception): pass From cab125765e203f3c49ba610fedd448e340080a76 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 10:46:59 +0100 Subject: [PATCH 13/14] Update optimized slicing tests to explicit patching. --- b2h5py/tests/test_slicing_blosc2.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/b2h5py/tests/test_slicing_blosc2.py b/b2h5py/tests/test_slicing_blosc2.py index 1c567bf..fb324ef 100644 --- a/b2h5py/tests/test_slicing_blosc2.py +++ b/b2h5py/tests/test_slicing_blosc2.py @@ -8,7 +8,7 @@ import functools import random -import b2h5py # monkey-patches h5py.Dataset +import b2h5py import hdf5plugin as h5p import numpy as np @@ -53,7 +53,7 @@ def checked_test(self): class Blosc2OptSlicingTestCase(TestCase, StoreArrayMixin): - """Blosc2 optimized slicing""" + """Blosc2 optimized slicing by patching dataset class""" blosc2_force_filter = False @@ -67,8 +67,10 @@ def setUp(self): self.blosc2_filter_env = os.environ.get('BLOSC2_FILTER', '0') os.environ['BLOSC2_FILTER'] = '1' if self.blosc2_force_filter else '0' + b2h5py.enable_fast_slicing() def tearDown(self): + b2h5py.disable_fast_slicing() os.environ['BLOSC2_FILTER'] = self.blosc2_filter_env super().tearDown() @@ -167,7 +169,7 @@ def test_astype(self): class Blosc2FiltSlicingTestCase(Blosc2OptSlicingTestCase): - """Blosc2 filter slicing""" + """Blosc2 filter slicing forced by environment variable""" blosc2_force_filter = True @@ -179,10 +181,6 @@ def setUp(self): super().setUp() b2h5py.disable_fast_slicing() - def tearDown(self): - b2h5py.enable_fast_slicing() - super().tearDown() - def should_enable_opt(self): return False @@ -214,6 +212,11 @@ def setUp(self): self.chunks = (2, 2, 1) self.arr = np.arange(np.prod(shape), dtype="u1").reshape(shape) StoreArrayMixin.setUp(self) + b2h5py.enable_fast_slicing() + + def tearDown(self): + b2h5py.disable_fast_slicing() + TestCase().tearDown() def should_enable_opt(self): return True @@ -246,6 +249,11 @@ def setUp(self): arr[4, 4] = (9, 9) self.arr = arr StoreArrayMixin.setUp(self) + b2h5py.enable_fast_slicing() + + def tearDown(self): + b2h5py.disable_fast_slicing() + TestCase().tearDown() def should_enable_opt(self): return True From 2d47f23badf6e01f037a51ed484e556373d8efb8 Mon Sep 17 00:00:00 2001 From: Ivan Vilata-i-Balaguer Date: Wed, 13 Dec 2023 10:47:20 +0100 Subject: [PATCH 14/14] Do check for enabled optimization in optimized slicing tests. Otherwise the exception that should be raised never is. --- b2h5py/tests/test_slicing_blosc2.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/b2h5py/tests/test_slicing_blosc2.py b/b2h5py/tests/test_slicing_blosc2.py index fb324ef..5eb67b3 100644 --- a/b2h5py/tests/test_slicing_blosc2.py +++ b/b2h5py/tests/test_slicing_blosc2.py @@ -42,6 +42,9 @@ def check_opt_slicing(test): def checked_test(self): if not self.should_enable_opt(): return test(self) + # If the dataset class is not patched, + # the exception set below is never raised anyway. + self.assertTrue(b2h5py.is_fast_slicing_enabled()) # Force an exception if the optimization is not used. orig_exc = b2h5py.blosc2._no_opt_error b2h5py.blosc2._no_opt_error = Blosc2OptNotUsedError