First Review: Enhanced Schunk Examples

Blosc · Sep 11, 2024 · 36d36aa · 36d36aa
1 parent 7f80f0c
commit 36d36aa
Showing 1 changed file with 53 additions and 47 deletions.
diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py
@@ -400,7 +400,7 @@ def append_data(self, data):
         >>> import numpy as np
         >>> schunk = blosc2.SChunk(chunksize=200*1000*4)
         >>> data = np.arange(200 * 1000, dtype='int32')
-        >>> print(schunk.append_data(data))
+        >>> schunk.append_data(data)
         1
         """
         blosc2_ext.check_access_mode(self.urlpath, self.mode)
@@ -440,8 +440,8 @@ def fill_special(self, nitems, special_value):
         >>> nchunks = schunk.fill_special(nitems, special_value)
         >>> print(f"Number of chunks filled: {nchunks}")
         Number of chunks filled: 1
-        >>> print(f"Number of chunks after fill_special: {len(schunk)}")
-        Number of chunks after fill_special: 200000
+        >>> print(f"Number of chunks after fill_special: {schunk.nchunks}")
+        Number of chunks after fill_special: 1
         """
         if not isinstance(special_value, SpecialValue):
             raise TypeError("special_value must be a SpecialValue instance")
@@ -524,8 +524,8 @@ def get_chunk(self, nchunk):
         >>> # Check the type and length of the compressed chunk
         >>> print(type(chunk))
         <class 'bytes'>
-        >>> print(len(chunk) > 0)
-        True
+        >>> print(len(chunk))
+        3742 # The compressed size is smaller than the chunk size (200*1000*4)
         """
         return super().get_chunk(nchunk)
 
@@ -596,7 +596,7 @@ def insert_chunk(self, nchunk, chunk):
         >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams={"typesize": 4})
         >>> # Get a compressed chunk from the SChunk
         >>> chunk = schunk.get_chunk(0)
-        >>> # Insert the chunk at a different position (in this case, at index 1)
+        >>> # Insert a chunk in the second position (index 1)"
         >>> schunk.insert_chunk(1, chunk)
         >>> # Verify the total number of chunks after insertion
         >>> print(schunk.nchunks)
@@ -634,8 +634,7 @@ def insert_data(self, nchunk, data, copy):
         >>> # Create an SChunk with 2 chunks
         >>> data = np.arange(400 * 1000, dtype=np.int32)
         >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams={"typesize": 4})
-        >>> # New data; create a NumPy array containing 200,000 sequential integers, starting from 0 up to 199,999.
-        >>> # Each element in the array is of type int32.
+        >>> # Create a new array to insert into the second chunk of the SChunk
         >>> new_data = np.arange(200 * 1000, dtype=np.int32)
         >>> # Insert the new data at position 1, compressing it
         >>> schunk.insert_data(1, new_data, copy=True)
@@ -674,18 +673,16 @@ def update_chunk(self, nchunk, chunk):
         >>> chunk_size = 200 * 1000 * 4
         >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32)
         >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams={"typesize": 4})
-        >>> initial_nchunks = schunk.nchunks
-        >>> print(f"Initial number of chunks: {initial_nchunks}")
+        >>> print(f"Initial number of chunks: {schunk.nchunks}")
         Initial number of chunks: 5
-        >>> chunk_index = 1
-        >>> new_data = np.full(chunk_size // 4, fill_value=chunk_index, dtype=np.int32).tobytes()
-        >>> compressed_data = blosc2.compress(new_data, typesize=4)
+        >>> c_index = 1
+        >>> new_data = np.full(chunk_size // 4, fill_value=c_index, dtype=np.int32).tobytes()
+        >>> compressed_data = blosc2.compress2(new_data, typesize=4)
         >>> # Update the 2nd chunk (index 1) with new data
-        >>> nchunks = schunk.update_chunk(chunk_index, compressed_data)
+        >>> nchunks = schunk.update_chunk(c_index, compressed_data)
         >>> print(f"Number of chunks after update: {nchunks}")
         Number of chunks after update: 5
         """
-
         blosc2_ext.check_access_mode(self.urlpath, self.mode)
         return super().update_chunk(nchunk, chunk)
 
@@ -719,14 +716,12 @@ def update_data(self, nchunk, data, copy):
         >>> chunk_size = 200 * 1000 * 4
         >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32)
         >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams={"typesize": 4})
-        >>> initial_nchunks = schunk.nchunks
-        >>> print(f"Initial number of chunks: {initial_nchunks}")
+        >>> print(f"Initial number of chunks: {schunk.nchunks}")
         Initial number of chunks: 4
-        >>> chunk_index = 1 # Update the 2nd chunk (index 1)
-        >>> new_data = np.full(chunk_size // 4, fill_value=chunk_index, dtype=np.int32).tobytes()
-        >>> nchunks = schunk.update_data(chunk_index, new_data, copy=True)
-        >>> final_nchunks = schunk.nchunks
-        >>> print(f"Number of chunks after update: {final_nchunks}")
+        >>> c_index = 1 # Update the 2nd chunk (index 1)
+        >>> new_data = np.full(chunk_size // 4, fill_value=c_index, dtype=np.int32).tobytes()
+        >>> nchunks = schunk.update_data(c_index, new_data, copy=True)
+        >>> print(f"Number of chunks after update: {schunk.nchunks}")
         Number of chunks after update: 4
         """
         blosc2_ext.check_access_mode(self.urlpath, self.mode)
@@ -785,16 +780,12 @@ def get_slice(self, start=0, stop=None, out=None):
         >>> slice_size = stop_index - start_index
         >>> out_buffer = bytearray(slice_size * 4)  # Ensure the buffer is large enough
         >>> result = schunk.get_slice(start=start_index, stop=stop_index, out=out_buffer)
-        >>> # Check if `result` is None and if the output buffer was filled
-        >>> if result is None:
-        >>>     print(f"Data slice obtained successfully. Length of slice: {len(out_buffer)}")
-        Data slice obtained successfully. Length of slice: 400000
-        >>>     # Convert bytearray to NumPy array for easier inspection
-        >>>     slice_array = np.frombuffer(out_buffer, dtype=np.int32)
-        >>>     print(f"Slice data: {slice_array[:10]} ...")  # Print the first 10 elements
+        >>> # Assert that the output buffer matches the expected slice from the original data
+        >>> assert out_buffer == data[start_index:stop_index].tobytes(), "Slice data does not match expected values."
+        >>> # Convert bytearray to NumPy array for easier inspection
+        >>> slice_array = np.frombuffer(out_buffer, dtype=np.int32)
+        >>> print(f"Slice data: {slice_array[:10]} ...")  # Print the first 10 elements
         Slice data: [100000 100001 100002 100003 100004 100005 100006 100007 100008 100009] ...
-        >>> else:
-        >>>     print("Data slice obtained successfully.")
         """
         return super().get_slice(start, stop, out)
 
@@ -833,13 +824,15 @@ def __getitem__(self, item):
         >>> chunk_size = 200 * 1000 * 4
         >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32)
         >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams={"typesize": 4})
-        >>> # Define a slice of the data array that we want to compare with the result from SChunk
+        >>> # Define a slice of the original data array to compare with the result from __getitem__
         >>> sl = data[150:155]
-        >>> # Use the get_slice method of SChunk to get the data from the same slice range
-        >>> res = schunk.get_slice(150, 155)
-        >>> # Check if the retrieved slice from SChunk matches the original slice
-        >>> # Convert the original slice to bytes and compare with the result from SChunk
-        >>> assert res == sl.tobytes()
+        >>> # Use __getitem__ to retrieve the same slice of data from the SChunk
+        >>> res = schunk[150:155]
+        >>> # Compare the result of __getitem__ with the original slice converted to bytes
+        >>> assert res == sl.tobytes(), "The result from __getitem__ does not match the original data."
+        >>> # Display the first few elements of the retrieved slice
+        >>> print(f"Slice data: {np.frombuffer(res, dtype=np.int32)}")
+        Slice data: [150 151 152 153 154]
         """
         if isinstance(item, int):
             if item == -1:
@@ -894,10 +887,12 @@ def __setitem__(self, key, value):
         >>> start_ = 1000
         >>> stop = 2000
         >>> new_values = np.arange(start_, stop, dtype=np.int32) * 2
-        >>> schunk.__setitem__(slice(start_, stop), new_values)
-        >>> sl = schunk[start_:stop]
-        >>> res = schunk.get_slice(start_, stop)
-        >>> assert res == sl
+        >>> # Use __setitem__ by the slicing syntax to update the chunk
+        >>> schunk[start_:stop] = new_values
+        >>> # Retrieve the updated slice using the slicing syntax
+        >>> retrieved_slice = np.frombuffer(schunk[start_:stop], dtype=np.int32)
+        >>> # Compare the retrieved slice with the new values to ensure they match
+        >>> assert np.array_equal(retrieved_slice, new_values), "The updated slice does not match the new values."
         >>> print("The slice comparison is successful!")
         The slice comparison is successful!
         """
@@ -1128,6 +1123,10 @@ def remove_postfilter(self, func_name, _new_ctx=True):
         >>> print("Data slice with postfilter applied (first 8 elements):", out[:8])
         Data slice with postfilter applied (first 8 elements): [ 0  2  4  6  8 10 12 14]
         >>> schunk.remove_postfilter('postfilter')
+        >>> retrieved_data = np.empty(data.size, dtype=dtype)
+        >>> schunk.get_slice(out=retrieved_data)
+        >>> # Compare the retrieved data to the original data
+        >>> assert np.array_equal(retrieved_data, data), "The data without postfilter does not match the original data."
         >>> print("Original data (first 8 elements):", data[:8])
         Original data (first 8 elements): [0 1 2 3 4 5 6 7]
         """
@@ -1297,23 +1296,30 @@ def remove_prefilter(self, func_name, _new_ctx=True):
         >>> chunk_size = 20_000 * dtype.itemsize
         >>> cparams = {"typesize": dtype.itemsize, "nthreads": 1}
         >>> data = np.arange(1000, dtype=np.int32)
+        >>> output_dtype = np.float32
         >>> schunk = blosc2.SChunk(chunksize=chunk_size, cparams=cparams)
         >>> # Define the prefilter function
         >>> @schunk.prefilter(input_dtype, output_dtype)
         >>> def prefilter(input, output, offset):
         >>>     output[:] = input - np.pi
         >>> schunk[:1000] = data
         >>> # Retrieve compressed data with prefilter applied
-        >>> compressed_data_with_filter = schunk[0:5]
+        >>> compressed_data_with_filter = schunk.get_slice()
         >>> # Convert the bytes to NumPy array for comparison
         >>> compressed_array_with_filter = np.frombuffer(compressed_data_with_filter, dtype=output_dtype)
-        >>> print("Compressed data with prefilter applied:", compressed_array_with_filter)
-        Compressed data with prefilter applied: [-3.1415927  -2.1415927  -1.1415927  -0.14159274  0.85840726]
+        >>> print("Compressed data with prefilter applied (first 8 elements):", compressed_array_with_filter[:8])
+        Compressed data with prefilter applied (first 8 elements): [-3.1415927  -2.1415927  -1.1415926  -0.14159265  0.8584073   1.8584074
+         2.8584073   3.8584073 ]
         >>> schunk.remove_prefilter('prefilter')
         >>> schunk[:1000] = data
-        >>> compressed_data_without_filter = schunk[0:5]
-        >>> compressed_array_without_filter = np.frombuffer(schunk[0:5], dtype=output_dtype)
-        >>> print("Compressed data without prefilter:", compressed_array_without_filter)
+        >>> compressed_data_without_filter = schunk.get_slice()
+        >>> compressed_array_without_filter = np.frombuffer(compressed_data_without_filter, dtype=output_dtype)
+        >>> print("Compressed data without prefilter (first 8 elements):", compressed_array_without_filter[:8])
+        Compressed data without prefilter (first 8 elements): [0. 1. 2. 3. 4. 5. 6. 7.]
+        >>> # Compare the decompressed data to the original data
+        >>> assert np.array_equal(compressed_array_without_filter, data), "The data without prefilter does not match the original data."
+        >>> print("The slice comparison is successful!")
+        The slice comparison is successful!
         """
         return super().remove_prefilter(func_name)