Skip to content

Commit

Permalink
Detect compressed files by magic numbers
Browse files Browse the repository at this point in the history
Rather than by file extension. This makes us handle misnamed files
correctly.

Also fix a bug in how we display the filename in the UI. Before this
change, we just displayed the plain file name even if we uncompressed
it.

With this change in place, we now display the filename without any
compression extension.
  • Loading branch information
walles committed Jan 5, 2025
1 parent 69a7f5d commit abb03b2
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 21 deletions.
2 changes: 1 addition & 1 deletion m/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ func NewReaderFromFilename(filename string, formatter chroma.Formatter, options
options.Lexer = lexers.Match(highlightingFilename)
}

returnMe := newReaderFromStream(stream, &filename, formatter, options)
returnMe := newReaderFromStream(stream, &highlightingFilename, formatter, options)

if options.Lexer == nil {
returnMe.highlightingDone.Store(true)
Expand Down
71 changes: 51 additions & 20 deletions m/zopen.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,68 @@ import (
"github.com/ulikunitz/xz"
)

var gzipMagic = []byte{0x1f, 0x8b}
var bzip2Magic = []byte{0x42, 0x5a, 0x68}
var zstdMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
var xzMagic = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}

// The second return value is the file name with any compression extension removed.
func ZOpen(filename string) (io.ReadCloser, string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, "", err
}

// Read the first 6 bytes to determine the compression type
firstBytes := make([]byte, 6)
_, err = file.Read(firstBytes)
if err != nil {
if err == io.EOF {
// File was empty
return file, filename, nil
}
return nil, "", fmt.Errorf("failed to read file: %w", err)
}

// Reset file reader to start of file
_, err = file.Seek(0, 0)
if err != nil {
return nil, "", fmt.Errorf("failed to seek to start of file: %w", err)
}

switch {
case strings.HasSuffix(filename, ".gz"):
case bytes.HasPrefix(firstBytes, gzipMagic):
reader, err := gzip.NewReader(file)
return reader, strings.TrimSuffix(filename, ".gz"), err
if err != nil {
return nil, "", err
}

// Ref: https://github.com/walles/moar/issues/194
case strings.HasSuffix(filename, ".tgz"):
reader, err := gzip.NewReader(file)
return reader, strings.TrimSuffix(filename, ".tgz"), err
newName := strings.TrimSuffix(filename, ".gz")

case strings.HasSuffix(filename, ".bz2"):
// Ref: https://github.com/walles/moar/issues/194
if strings.HasSuffix(newName, ".tgz") {
newName = strings.TrimSuffix(newName, ".tgz") + ".tar"
}

return reader, newName, err

case bytes.HasPrefix(firstBytes, bzip2Magic):
return struct {
io.Reader
io.Closer
}{bzip2.NewReader(file), file}, strings.TrimSuffix(filename, ".bz2"), nil

case strings.HasSuffix(filename, ".zst") || strings.HasSuffix(filename, ".zstd"):
case bytes.HasPrefix(firstBytes, zstdMagic):
decoder, err := zstd.NewReader(file)
if err != nil {
return nil, "", err
}
return decoder.IOReadCloser(), strings.TrimSuffix(filename, ".zst"), nil

case strings.HasSuffix(filename, ".xz"):
newName := strings.TrimSuffix(filename, ".zst")
newName = strings.TrimSuffix(newName, ".zstd")
return decoder.IOReadCloser(), newName, nil

case bytes.HasPrefix(firstBytes, xzMagic):
xzReader, err := xz.NewReader(file)
if err != nil {
return nil, "", err
Expand All @@ -66,30 +97,30 @@ func ZOpen(filename string) (io.ReadCloser, string, error) {
// Ref: https://github.com/walles/moar/issues/261
func ZReader(input io.Reader) (io.Reader, error) {
// Read the first 6 bytes to determine the compression type
buffer := make([]byte, 6)
_, err := input.Read(buffer)
firstBytes := make([]byte, 6)
_, err := input.Read(firstBytes)
if err != nil {
if err == io.EOF {
// Return a reader for the short input
return bytes.NewReader(buffer), nil
// Stream was empty
return input, nil
}
return nil, fmt.Errorf("failed to read input: %w", err)
return nil, fmt.Errorf("failed to read stream: %w", err)
}

// Reset input reader to start of stream
input = io.MultiReader(bytes.NewReader(buffer), input)
input = io.MultiReader(bytes.NewReader(firstBytes), input)

switch {
case bytes.HasPrefix(buffer, []byte{0x1f, 0x8b}): // Gzip magic numbers
case bytes.HasPrefix(firstBytes, gzipMagic):
log.Info("Input stream is gzip compressed")
return gzip.NewReader(input)
case bytes.HasPrefix(buffer, []byte{0x28, 0xb5, 0x2f, 0xfd}): // Zstd magic numbers
case bytes.HasPrefix(firstBytes, zstdMagic):
log.Info("Input stream is zstd compressed")
return zstd.NewReader(input)
case bytes.HasPrefix(buffer, []byte{0x42, 0x5a, 0x68}): // Bzip2 magic numbers
case bytes.HasPrefix(firstBytes, bzip2Magic):
log.Info("Input stream is bzip2 compressed")
return bzip2.NewReader(input), nil
case bytes.HasPrefix(buffer, []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}): // XZ magic numbers
case bytes.HasPrefix(firstBytes, xzMagic):
log.Info("Input stream is xz compressed")
return xz.NewReader(input)
default:
Expand Down

0 comments on commit abb03b2

Please sign in to comment.