Merge branch 'explicit-patching'.

This changes the default behaviour of importing `b2h5py` to need explicit patching of the `h5py.Dataset` class either by calling the function `b2h5py.enable_fast_slicing()` or by using the `b2h5py.fast_slicing()` context manager. The old behaviour can still be obtained by explicitly importing `b2h5py.auto`. Documentation and tests have been updated accordingly.
Blosc · Dec 13, 2023 · 229d25e · 229d25e
2 parents 0cd283e + 2d47f23
commit 229d25e
Show file tree

Hide file tree

Showing 7 changed files with 85 additions and 64 deletions.
diff --git a/README.rst b/README.rst
@@ -15,15 +15,18 @@ Benchmarks of this technique show 2x-5x speed-ups compared with normal filter-ba
 Usage
 -----
 
-This optimized access works for slices with step 1 on Blosc2-compressed datasets using the native byte order. It is enabled by monkey-patching the ``h5py.Dataset`` class to extend the slicing operation. This is done on module import, so the only thing you need to do is::
+This optimized access works for slices with step 1 on Blosc2-compressed datasets using the native byte order. It is enabled by monkey-patching the ``h5py.Dataset`` class to extend the slicing operation. The easiest way to do this is::
 
-    import b2h5py
+    import b2h5py.auto
 
 After that, optimization will be attempted for any slicing of a dataset (of the form ``dataset[...]`` or ``dataset.__getitem__(...)``). If the optimization is not possible in a particular case, normal h5py slicing code will be used (which performs HDF5 filter-based access, backed by hdf5plugin_ to support Blosc2).
 
 .. _hdf5plugin: https://github.com/silx-kit/hdf5plugin
 
-You may globally disable the optimization after importing ``b2h5py`` by calling ``b2h5py.disable_fast_slicing()``, and enable it again with ``b2h5py.enable_fast_slicing()``. You may also enable it temporarily by using ``b2h5py.fast_slicing()`` to get a context manager.
+You may instead just ``import b2h5py`` and explicitly enable the optimization globally by calling ``b2h5py.enable_fast_slicing()``, and disable it again with ``b2h5py.disable_fast_slicing()``. You may also enable it temporarily by using a context manager::
+
+    with b2h5py.fast_slicing():
+        # ... code that will use Blosc2 optimized slicing ...
 
 Building
 --------

diff --git a/b2h5py/__init__.py b/b2h5py/__init__.py
@@ -2,17 +2,17 @@
 
 Optimizations are applied to slices of the form ``dataset[...]`` or
 ``dataset.__getitem__(...)`` with step 1 on Blosc2-compressed datasets using
-the native byte order.
+the native byte order.  They are implemented by monkey-patching the
+``h5py.Dataset`` class.
 
-They are enabled automatically on module import, by monkey-patching the
-``h5py.Dataset`` class.  You may explicitly undo this patching and deactivate
-optimization globally with `disable_fast_slicing()` and redo it and activate
-it again with `enable_fast_slicing()`.  You may also patch the class and
-activate optimization temporarily using `fast_slicing()` to get a context
-manager.
+Optimizations need to be enabled explicitly.  One option is to call
+`enable_fast_slicing()` to enable them globally (by performing the patching).
+Then `disable_fast_slicing()` may be called to disable them again (by undoing
+the patching).  As an alternative, you may also activate optimizations
+temporarily using `fast_slicing()` to get a context manager.
 
-**Note:** For testing and debugging purposes, you may force-disable the
-optimization at any time by setting ``BLOSC2_FILTER=1`` in the environment.
+**Note:** For testing and debugging purposes, you may force-disable
+optimizations at any time by setting ``BLOSC2_FILTER=1`` in the environment.
 """
 
 from .blosc2 import (disable_fast_slicing,
@@ -25,6 +25,3 @@
            'enable_fast_slicing',
            'fast_slicing',
            'is_fast_slicing_enabled']
-
-
-enable_fast_slicing()
diff --git a/b2h5py/auto.py b/b2h5py/auto.py
@@ -0,0 +1,14 @@
+"""Automatic activation of Blosc2 optimized slicing for h5py.
+
+Importing this module enables the optimization globally, just use::
+
+    import b2h5py.auto
+
+After that, all slicing operations on Blosc2-compressed datasets will be
+transparently optimized when possible.
+"""
+
+from .blosc2 import enable_fast_slicing
+
+
+enable_fast_slicing()
diff --git a/b2h5py/tests/test_dataset_patching.py b/b2h5py/tests/test_dataset_patching.py
@@ -6,33 +6,29 @@
 import contextlib
 import functools
 
-import b2h5py  # monkey-patches h5py.Dataset
+import b2h5py
 
 from h5py import Dataset
 from h5py.tests.common import TestCase
 
 
 class Blosc2DatasetPatchingTestCase(TestCase):
-    def setUp(self):
-        super().setUp()
-        b2h5py.enable_fast_slicing()
-
     def tearDown(self):
-        b2h5py.enable_fast_slicing()
+        b2h5py.disable_fast_slicing()
         super().tearDown()
 
-    def test_default(self):
-        """Dataset class is patched by default"""
-        self.assertTrue(b2h5py.is_fast_slicing_enabled())
-
-    def test_unpatch_patch(self):
-        """Unpatching and patching dataset class again"""
-        b2h5py.disable_fast_slicing()
+    def test_00default(self):
+        """Dataset class is not patched by default"""
         self.assertFalse(b2h5py.is_fast_slicing_enabled())
 
+    def test_patch_unpatch(self):
+        """Patching and unpatching dataset class again"""
         b2h5py.enable_fast_slicing()
         self.assertTrue(b2h5py.is_fast_slicing_enabled())
 
+        b2h5py.disable_fast_slicing()
+        self.assertFalse(b2h5py.is_fast_slicing_enabled())
+
     def test_patch_again(self):
         """Patching the dataset class twice"""
         b2h5py.enable_fast_slicing()
@@ -53,8 +49,6 @@ def test_unpatch_again(self):
 
     def test_patch_patched(self):
         """Patching when already patched by someone else"""
-        b2h5py.disable_fast_slicing()
-
         @functools.wraps(Dataset.__getitem__)
         def foreign_getitem(self, args, new_dtype=None):
             return 42
@@ -75,6 +69,7 @@ def foreign_getitem(self, args, new_dtype=None):
 
     def test_unpatch_foreign(self):
         """Unpatching when patched over by someone else"""
+        b2h5py.enable_fast_slicing()
 
         @functools.wraps(Dataset.__getitem__)
         def foreign_getitem(self, args, new_dtype=None):
@@ -88,6 +83,12 @@ def foreign_getitem(self, args, new_dtype=None):
         finally:
             Dataset.__getitem__ = foreign_getitem.__wrapped__
 
+    def test_auto(self):
+        """Patching on importing auto module"""
+        self.assertFalse(b2h5py.is_fast_slicing_enabled())
+        import b2h5py.auto as b2a
+        self.assertTrue(b2h5py.is_fast_slicing_enabled())
+
 
 class CMTestError(Exception):
     pass
@@ -98,14 +99,6 @@ class ContextManagerTestCase(TestCase):
 
     shall_raise = False
 
-    def setUp(self):
-        super().setUp()
-        b2h5py.disable_fast_slicing()
-
-    def tearDown(self):
-        b2h5py.enable_fast_slicing()
-        super().tearDown()
-
     def patching_cmgr(self):
         """Checks for error if `self.shall_raise`, patches dataset class"""
         test_case = self
@@ -123,7 +116,7 @@ def maybe_raise(self):
         if self.shall_raise:
             raise CMTestError
 
-    def test_default(self):
+    def test_00default(self):
         """Dataset class is patched then unpatched"""
         self.assertFalse(b2h5py.is_fast_slicing_enabled())
         with self.patching_cmgr():
@@ -142,10 +135,13 @@ def test_already_patched(self):
         """Not unpatching if already patched before entry"""
         b2h5py.enable_fast_slicing()
         self.assertTrue(b2h5py.is_fast_slicing_enabled())
-        with self.patching_cmgr():
+        try:
+            with self.patching_cmgr():
+                self.assertTrue(b2h5py.is_fast_slicing_enabled())
+                self.maybe_raise()
             self.assertTrue(b2h5py.is_fast_slicing_enabled())
-            self.maybe_raise()
-        self.assertTrue(b2h5py.is_fast_slicing_enabled())
+        finally:
+            b2h5py.disable_fast_slicing()
 
     def test_nested(self):
         """Nesting patching context managers"""

diff --git a/b2h5py/tests/test_patched_h5py.py b/b2h5py/tests/test_patched_h5py.py
@@ -21,7 +21,8 @@ def run_h5py_tests():
         os.path.dirname(h5py.tests.__file__),
         top_level_dir=os.path.dirname(os.path.dirname(h5py.__file__)))
     test_runner = unittest.TextTestRunner()
-    test_runner.run(test_suite)
+    with b2h5py.fast_slicing():
+        test_runner.run(test_suite)
 
 
 if __name__ == '__main__':

diff --git a/b2h5py/tests/test_slicing_blosc2.py b/b2h5py/tests/test_slicing_blosc2.py
@@ -8,7 +8,7 @@
 import functools
 import random
 
-import b2h5py  # monkey-patches h5py.Dataset
+import b2h5py
 import hdf5plugin as h5p
 import numpy as np
 
@@ -42,6 +42,9 @@ def check_opt_slicing(test):
     def checked_test(self):
         if not self.should_enable_opt():
             return test(self)
+        # If the dataset class is not patched,
+        # the exception set below is never raised anyway.
+        self.assertTrue(b2h5py.is_fast_slicing_enabled())
         # Force an exception if the optimization is not used.
         orig_exc = b2h5py.blosc2._no_opt_error
         b2h5py.blosc2._no_opt_error = Blosc2OptNotUsedError
@@ -53,7 +56,7 @@ def checked_test(self):
 
 
 class Blosc2OptSlicingTestCase(TestCase, StoreArrayMixin):
-    """Blosc2 optimized slicing"""
+    """Blosc2 optimized slicing by patching dataset class"""
 
     blosc2_force_filter = False
 
@@ -67,8 +70,10 @@ def setUp(self):
 
         self.blosc2_filter_env = os.environ.get('BLOSC2_FILTER', '0')
         os.environ['BLOSC2_FILTER'] = '1' if self.blosc2_force_filter else '0'
+        b2h5py.enable_fast_slicing()
 
     def tearDown(self):
+        b2h5py.disable_fast_slicing()
         os.environ['BLOSC2_FILTER'] = self.blosc2_filter_env
         super().tearDown()
 
@@ -167,7 +172,7 @@ def test_astype(self):
 
 
 class Blosc2FiltSlicingTestCase(Blosc2OptSlicingTestCase):
-    """Blosc2 filter slicing"""
+    """Blosc2 filter slicing forced by environment variable"""
 
     blosc2_force_filter = True
 
@@ -179,10 +184,6 @@ def setUp(self):
         super().setUp()
         b2h5py.disable_fast_slicing()
 
-    def tearDown(self):
-        b2h5py.enable_fast_slicing()
-        super().tearDown()
-
     def should_enable_opt(self):
         return False
 
@@ -214,6 +215,11 @@ def setUp(self):
         self.chunks = (2, 2, 1)
         self.arr = np.arange(np.prod(shape), dtype="u1").reshape(shape)
         StoreArrayMixin.setUp(self)
+        b2h5py.enable_fast_slicing()
+
+    def tearDown(self):
+        b2h5py.disable_fast_slicing()
+        TestCase().tearDown()
 
     def should_enable_opt(self):
         return True
@@ -246,6 +252,11 @@ def setUp(self):
         arr[4, 4] = (9, 9)
         self.arr = arr
         StoreArrayMixin.setUp(self)
+        b2h5py.enable_fast_slicing()
+
+    def tearDown(self):
+        b2h5py.disable_fast_slicing()
+        TestCase().tearDown()
 
     def should_enable_opt(self):
         return True

diff --git a/examples/blosc2_optimized_slicing.py b/examples/blosc2_optimized_slicing.py
@@ -2,9 +2,8 @@
 
 It creates a 2-dimensional dataset made of different chunks, compressed with
 Blosc2.  Then it proceeds to slice the dataset in ways that may and may not
-benefit from Blosc2 optimized slicing.  Some hints about forcing the use of
-the HDF5 filter pipeline are included, as well as comments on the Python
-package dependencies required for the different use cases.
+benefit from Blosc2 optimized slicing.  Examples of different ways to enable
+Blosc2 optimized slicing are shown.
 
 Optimized slicing can provide considerable speed-ups in certain use cases,
 please see `this benchmark`__ which evaluates applying the same technique in
@@ -66,11 +65,11 @@ def printl(*args, **kwargs):
 
 # Benefitting from Blosc2 optimized slicing
 # -----------------------------------------
-# After importing `b2h5py`,
+# After importing `b2h5py.auto`,
 # support for Blosc2 optimized slicing is enabled by default.
 print("# Using Blosc2 optimized slicing")
 with h5py.File(file_name, 'r') as f:
-    import b2h5py
+    import b2h5py.auto
     assert(b2h5py.is_fast_slicing_enabled())
     # One just uses slicing as usual.
     dataset = f[dataset_name]
@@ -83,21 +82,22 @@ def printl(*args, **kwargs):
     printl("Sparse slice from dataset (filter):", dataset[150::2, 150::2])
     printl("Sparse slice from input array:", data[150::2, 150::2])
     print()
+b2h5py.disable_fast_slicing()  # back to normal
 
-# Disabling Blosc2 optimized slicing
-# ----------------------------------
+# Enabling Blosc2 optimized slicing
+# ---------------------------------
 # Utility functions are provided to enable and disable optimization globally.
-print("# Disabling Blosc2 optimized slicing globally")
+print("# Enabling Blosc2 optimized slicing globally")
 with h5py.File(file_name, 'r') as f:
     import b2h5py
-    assert(b2h5py.is_fast_slicing_enabled())
-    b2h5py.disable_fast_slicing()
     assert(not b2h5py.is_fast_slicing_enabled())
+    b2h5py.enable_fast_slicing()
+    assert(b2h5py.is_fast_slicing_enabled())
     dataset = f[dataset_name]
-    printl("Slice from dataset (filter):", dataset[150:, 150:])
+    printl("Slice from dataset (optimized):", dataset[150:, 150:])
     printl("Slice from input array:", data[150:, 150:])
-    b2h5py.enable_fast_slicing()  # back to normal
-    assert(b2h5py.is_fast_slicing_enabled())
+    b2h5py.disable_fast_slicing()  # back to normal
+    assert(not b2h5py.is_fast_slicing_enabled())
     print()
 
 # Enabling Blosc2 optimized slicing temporarily
@@ -107,7 +107,6 @@ def printl(*args, **kwargs):
 print("# Enabling Blosc2 optimized slicing temporarily")
 with h5py.File(file_name, 'r') as f:
     import b2h5py
-    b2h5py.disable_fast_slicing()
     assert(not b2h5py.is_fast_slicing_enabled())
     dataset = f[dataset_name]
     printl("Slice from dataset (filter):", dataset[150:, 150:])