datastax · blambov · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/src/java/org/apache/cassandra/cache/ChunkCache.java b/src/java/org/apache/cassandra/cache/ChunkCache.java
diff --git a/src/java/org/apache/cassandra/io/util/BufferManagingRebufferer.java b/src/java/org/apache/cassandra/io/util/BufferManagingRebufferer.java
@@ -28,12 +28,14 @@
 
 import org.apache.cassandra.utils.memory.BufferPools;
 
-/**
- * Buffer manager used for reading from a ChunkReader when cache is not in use. Instances of this class are
- * reader-specific and thus do not need to be thread-safe since the reader itself isn't.
- *
- * The instances reuse themselves as the BufferHolder to avoid having to return a new object for each rebuffer call.
- */
+/// Buffer manager used for reading from a [ChunkReader] when cache is not in use. They use a buffer produced by the
+/// "networking" buffer pool, which is the one to be used for buffers that are not to be retained for a long time
+/// (the lifetime of this object is contained by the lifetime of a [RandomAccessReader] which is contained in a read
+/// operation's lifetime).
+///
+/// Instances of this class are reader-specific and thus do not need to be thread-safe since the reader itself isn't.
+///
+/// The instances reuse themselves as the BufferHolder to avoid having to return a new object for each rebuffer call.
 public abstract class BufferManagingRebufferer implements Rebufferer, Rebufferer.BufferHolder
 {
     protected final ChunkReader source;
@@ -45,14 +47,20 @@ public abstract class BufferManagingRebufferer implements Rebufferer, Rebufferer
     protected BufferManagingRebufferer(ChunkReader wrapped)
     {
         this.source = wrapped;
-        buffer = BufferPools.forChunkCache().get(wrapped.chunkSize(), wrapped.preferredBufferType()).order(ByteOrder.BIG_ENDIAN);
+        // Note: This class uses the networking buffer pool which makes better sense for short-lifetime buffers.
+        // Because this is meant to be used when the chunk cache is disabled, it also makes sense to use any memory
+        // that may have been allocated for in-flight data by using the chunk-cache pool.
+        // However, if some new functionality decides to use this class in the presence of the chunk cache (e.g.
+        // cache-bypassing compaction), using the chunk-cache pool here will certainly cause hard-to-diagnose issues
+        // that we would prefer to avoid.
+        buffer = BufferPools.forNetworking().get(wrapped.chunkSize(), wrapped.preferredBufferType()).order(ByteOrder.BIG_ENDIAN);
         buffer.limit(0);
     }
 
     @Override
     public void closeReader()
     {
-        BufferPools.forChunkCache().put(buffer);
+        BufferPools.forNetworking().put(buffer);
         offset = -1;
     }
 
@@ -102,31 +110,6 @@ public ByteBuffer buffer()
         return buffer.duplicate();
     }
 
-    @Override
-    public ByteOrder order()
-    {
-        return buffer.order();
-    }
-
-    @Override
-    public FloatBuffer floatBuffer()
-    {
-        return buffer.asFloatBuffer();
-    }
-
-    @Override
-    public IntBuffer intBuffer()
-    {
-        return buffer.asIntBuffer();
-    }
-
-    @Override
-    public LongBuffer longBuffer()
-    {
-        return buffer.asLongBuffer();
-    }
-
-
     public long offset()
     {
         return offset;

diff --git a/src/java/org/apache/cassandra/io/util/ChunkReader.java b/src/java/org/apache/cassandra/io/util/ChunkReader.java
@@ -52,4 +52,17 @@ public interface ChunkReader extends RebuffererFactory
      * This is not guaranteed to be fulfilled.
      */
     BufferType preferredBufferType();
+
+    /**
+     * In some cases we may end up with both compressed and uncompressed data for the same file in
+     * the cache. This type is used to distinguish between them.
+     */
+    enum ReaderType
+    {
+        SIMPLE,
+        COMPRESSED;
+        /** The number of types. Declared as a constant to avoid allocating on values(). */
+        public static final int COUNT = ReaderType.values().length;
+    }
+    ReaderType type();
 }
diff --git a/src/java/org/apache/cassandra/io/util/CompressedChunkReader.java b/src/java/org/apache/cassandra/io/util/CompressedChunkReader.java
@@ -30,6 +30,7 @@
 import org.apache.cassandra.io.sstable.CorruptSSTableException;
 import org.apache.cassandra.io.storage.StorageProvider;
 import org.apache.cassandra.utils.ChecksumType;
+import org.apache.cassandra.utils.memory.BufferPools;
 
 public abstract class CompressedChunkReader extends AbstractReaderFileProxy implements ChunkReader
 {
@@ -91,11 +92,14 @@ public Rebufferer instantiateRebufferer()
         return new BufferManagingRebufferer.Aligned(this);
     }
 
+    public ReaderType type()
+    {
+        return ReaderType.COMPRESSED;
+    }
+
     public static class Standard extends CompressedChunkReader
     {
         // we read the raw compressed bytes into this buffer, then uncompressed them into the provided one.
-        private final ThreadLocalByteBufferHolder bufferHolder;
-
         public Standard(ChannelProxy channel, CompressionMetadata metadata)
         {
             this(channel, metadata, 0);
@@ -104,7 +108,6 @@ public Standard(ChannelProxy channel, CompressionMetadata metadata)
         public Standard(ChannelProxy channel, CompressionMetadata metadata, long startOffset)
         {
             super(channel, metadata, startOffset);
-            bufferHolder = new ThreadLocalByteBufferHolder(metadata.compressor().preferredBufferType());
         }
 
         @Override
@@ -122,57 +125,54 @@ public void readChunk(long position, ByteBuffer uncompressed)
                                             : chunk.length;
 
                 long chunkOffset = chunk.offset - onDiskStartOffset;
-                if (chunk.length < maxCompressedLength)
+                boolean shouldDecompress = chunk.length < maxCompressedLength;
+                if (shouldDecompress || shouldCheckCrc) // when we need to read the CRC too, follow the decompression path to avoid a second channel read call
                 {
-                    ByteBuffer compressed = bufferHolder.getBuffer(length);
+                    ByteBuffer compressed = BufferPools.forNetworking().getAtLeast(length, metadata.compressor().preferredBufferType());
 
-                    if (channel.read(compressed, chunkOffset) != length)
-                        throw new CorruptBlockException(channel.filePath(), chunk);
-
-                    compressed.flip();
-                    compressed.limit(chunk.length);
-                    uncompressed.clear();
-
-                    if (shouldCheckCrc)
+                    try
                     {
-                        int checksum = (int) ChecksumType.CRC32.of(compressed);
-
                         compressed.limit(length);
-                        int storedChecksum = compressed.getInt();
-                        if (storedChecksum != checksum)
-                            throw new CorruptBlockException(channel.filePath(), chunk, storedChecksum, checksum);
+                        if (channel.read(compressed, chunkOffset) != length)
+                            throw new CorruptBlockException(channel.filePath(), chunk);
 
-                        compressed.position(0).limit(chunk.length);
-                    }
+                        if (shouldCheckCrc)
+                        {
+                            // compute checksum of the compressed data
+                            compressed.position(0).limit(chunk.length);
+                            int checksum = (int) ChecksumType.CRC32.of(compressed);
+                            // the remaining bytes are the checksum
+                            compressed.limit(length);
+                            int storedChecksum = compressed.getInt();
+                            if (storedChecksum != checksum)
+                                throw new CorruptBlockException(channel.filePath(), chunk, storedChecksum, checksum);
+                        }
 
-                    try
-                    {
-                        metadata.compressor().uncompress(compressed, uncompressed);
+                        compressed.position(0).limit(chunk.length);
+                        uncompressed.clear();
+
+                        try
+                        {
+                            if (shouldDecompress)
+                                metadata.compressor().uncompress(compressed, uncompressed);
+                            else
+                                uncompressed.put(compressed);
+                        }
+                        catch (IOException e)
+                        {
+                            throw new CorruptBlockException(channel.filePath(), chunk, e);
+                        }
                     }
-                    catch (IOException e)
+                    finally
                     {
-                        throw new CorruptBlockException(channel.filePath(), chunk, e);
+                        BufferPools.forNetworking().put(compressed);
                     }
                 }
                 else
                 {
                     uncompressed.position(0).limit(chunk.length);
                     if (channel.read(uncompressed, chunkOffset) != chunk.length)
                         throw new CorruptBlockException(channel.filePath(), chunk);
-
-                    if (shouldCheckCrc)
-                    {
-                        uncompressed.flip();
-                        int checksum = (int) ChecksumType.CRC32.of(uncompressed);
-
-                        ByteBuffer scratch = bufferHolder.getBuffer(Integer.BYTES);
-
-                        if (channel.read(scratch, chunkOffset + chunk.length) != Integer.BYTES)
-                            throw new CorruptBlockException(channel.filePath(), chunk);
-                        int storedChecksum = scratch.getInt(0);
-                        if (storedChecksum != checksum)
-                            throw new CorruptBlockException(channel.filePath(), chunk, storedChecksum, checksum);
-                    }
                 }
                 uncompressed.flip();
             }
@@ -223,24 +223,22 @@ public void readChunk(long position, ByteBuffer uncompressed)
                 int chunkOffsetInSegment = Ints.checkedCast(chunk.offset - segmentOffset);
                 ByteBuffer compressedChunk = region.buffer();
 
-                compressedChunk.position(chunkOffsetInSegment).limit(chunkOffsetInSegment + chunk.length);
-
-                uncompressed.clear();
-
                 try
                 {
                     if (shouldCheckCrc())
                     {
+                        compressedChunk.position(chunkOffsetInSegment).limit(chunkOffsetInSegment + chunk.length);
                         int checksum = (int) ChecksumType.CRC32.of(compressedChunk);
 
                         compressedChunk.limit(compressedChunk.capacity());
                         int storedChecksum = compressedChunk.getInt();
                         if (storedChecksum != checksum)
                             throw new CorruptBlockException(channel.filePath(), chunk, storedChecksum, checksum);
-
-                        compressedChunk.position(chunkOffsetInSegment).limit(chunkOffsetInSegment + chunk.length);
                     }
 
+                    compressedChunk.position(chunkOffsetInSegment).limit(chunkOffsetInSegment + chunk.length);
+                    uncompressed.clear();
+
                     if (chunk.length < maxCompressedLength)
                         metadata.compressor().uncompress(compressedChunk, uncompressed);
                     else

diff --git a/src/java/org/apache/cassandra/io/util/FileHandle.java b/src/java/org/apache/cassandra/io/util/FileHandle.java
@@ -238,7 +238,7 @@ public String name()
 
         public void tidy()
         {
-            chunkCache.ifPresent(cache -> cache.invalidateFile(name()));
+            ChunkCache.removeFileIdFromCache(channel.getFile());
             try
             {
                 if (compressionMetadata != null)

diff --git a/src/java/org/apache/cassandra/io/util/MmappedRegions.java b/src/java/org/apache/cassandra/io/util/MmappedRegions.java
@@ -254,30 +254,6 @@ public ByteBuffer buffer()
             return buffer.duplicate();
         }
 
-        @Override
-        public ByteOrder order()
-        {
-            return buffer.order();
-        }
-
-        public FloatBuffer floatBuffer()
-        {
-            // this does an implicit duplicate(), so we need to expose it directly to avoid doing it twice unnecessarily
-            return buffer.asFloatBuffer();
-        }
-
-        public IntBuffer intBuffer()
-        {
-            // this does an implicit duplicate(), so we need to expose it directly to avoid doing it twice unnecessarily
-            return buffer.asIntBuffer();
-        }
-
-        public LongBuffer longBuffer()
-        {
-            // this does an implicit duplicate(), so we need to expose it directly to avoid doing it twice unnecessarily
-            return buffer.asLongBuffer();
-        }
-
         public long offset()
         {
             return offset;

diff --git a/src/java/org/apache/cassandra/io/util/Rebufferer.java b/src/java/org/apache/cassandra/io/util/Rebufferer.java
@@ -51,30 +51,6 @@ interface BufferHolder
          */
         ByteBuffer buffer();
 
-        /**
-         * Return the order of the underlying {@link ByteBuffer} held by this class. This is only relevant for the
-         * {@link #floatBuffer()}, {@link #intBuffer()} and {@link #longBuffer()} methods because the caller cannot
-         * change the order of those returned buffer objects. Further, it is not generally relevant for calls to
-         * {@link #buffer()} since the call to {@link ByteBuffer#duplicate()} sets the byte order to
-         * {@link ByteOrder#BIG_ENDIAN} and the caller can change the order of the returned buffer.
-         */
-        ByteOrder order();
-
-        default FloatBuffer floatBuffer()
-        {
-            throw new UnsupportedOperationException("not implemented in " + this.getClass());
-        }
-
-        default IntBuffer intBuffer()
-        {
-            throw new UnsupportedOperationException("not implemented in " + this.getClass());
-        }
-
-        default LongBuffer longBuffer()
-        {
-            throw new UnsupportedOperationException("not implemented in " + this.getClass());
-        }
-
         /**
          * Position in the file of the start of the buffer.
          */
@@ -97,30 +73,6 @@ public ByteBuffer buffer()
             return EMPTY_BUFFER;
         }
 
-        @Override
-        public ByteOrder order()
-        {
-            return EMPTY_BUFFER.order();
-        }
-
-        @Override
-        public FloatBuffer floatBuffer()
-        {
-            return EMPTY_BUFFER.asFloatBuffer();
-        }
-
-        @Override
-        public IntBuffer intBuffer()
-        {
-            return EMPTY_BUFFER.asIntBuffer();
-        }
-
-        @Override
-        public LongBuffer longBuffer()
-        {
-            return EMPTY_BUFFER.asLongBuffer();
-        }
-
         @Override
         public long offset()
         {
@@ -144,12 +96,6 @@ public ByteBuffer buffer()
                 return EMPTY.buffer();
             }
 
-            @Override
-            public ByteOrder order()
-            {
-                return EMPTY.order();
-            }
-
             @Override
             public long offset()
             {

diff --git a/src/java/org/apache/cassandra/io/util/SequentialWriter.java b/src/java/org/apache/cassandra/io/util/SequentialWriter.java
@@ -22,6 +22,7 @@
 import java.nio.channels.FileChannel;
 import java.nio.file.StandardOpenOption;
 
+import org.apache.cassandra.cache.ChunkCache;
 import org.apache.cassandra.io.FSReadError;
 import org.apache.cassandra.io.FSWriteError;
 import org.apache.cassandra.utils.PageAware;
@@ -124,6 +125,9 @@ private static FileChannel openChannel(File file)
                     try { channel.close(); }
                     catch (Throwable t2) { t.addSuppressed(t2); }
                 }
+
+                // Invalidate any cache entries that may exist for a previous file with the same name.
+                ChunkCache.removeFileIdFromCache(file);
                 return channel;
             }
         }

diff --git a/src/java/org/apache/cassandra/io/util/SimpleChunkReader.java b/src/java/org/apache/cassandra/io/util/SimpleChunkReader.java
@@ -78,6 +78,11 @@ public void invalidateIfCached(long position)
     {
     }
 
+    public ReaderType type()
+    {
+        return ReaderType.SIMPLE;
+    }
+
     @Override
     public String toString()
     {