archive/zip: reduce CPU usage in index construction

Constructing the zip index (which is done once when first opening
a file in an archive) can consume large amounts of CPU when
processing deeply-nested directory paths.

Switch to a less inefficient algorithm.

Thanks to Jakub Ciolek for reporting this issue.

	goos: darwin
	goarch: arm64
	pkg: archive/zip
	cpu: Apple M4 Pro
	                          │  /tmp/bench.0  │            /tmp/bench.1            │
	                          │     sec/op     │   sec/op     vs base               │
	ReaderOneDeepDir-14         25983.62m ± 2%   46.01m ± 2%  -99.82% (p=0.000 n=8)
	ReaderManyDeepDirs-14          16.221 ± 1%    2.763 ± 6%  -82.96% (p=0.000 n=8)
	ReaderManyShallowFiles-14      130.3m ± 1%   128.8m ± 2%   -1.20% (p=0.003 n=8)
	geomean                         3.801        253.9m       -93.32%

Fixes #77102
Fixes CVE-2025-61728

Change-Id: I2c9c864be01b2a2769eb67fbab1b250aeb8f6c42
Reviewed-on: https://go-internal-review.googlesource.com/c/go/+/3060
Reviewed-by: Nicholas Husin <husin@google.com>
Reviewed-by: Neal Patel <nealpatel@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/736713
Auto-Submit: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
Damien Neil
2025-11-04 17:00:33 -08:00
committed by Gopher Robot
parent 2dcaaa7512
commit bb7c0c717c
2 changed files with 91 additions and 1 deletions

View File

@@ -834,7 +834,16 @@ func (r *Reader) initFileList() {
continue
}
for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
dir := name
for {
if idx := strings.LastIndex(dir, "/"); idx < 0 {
break
} else {
dir = dir[:idx]
}
if dirs[dir] {
break
}
dirs[dir] = true
}

View File

@@ -9,6 +9,7 @@ import (
"encoding/binary"
"encoding/hex"
"errors"
"fmt"
"internal/obscuretestdata"
"io"
"io/fs"
@@ -1874,3 +1875,83 @@ func TestBaseOffsetPlusOverflow(t *testing.T) {
// as the section reader offset & size were < 0.
NewReader(bytes.NewReader(data), int64(len(data))+1875)
}
func BenchmarkReaderOneDeepDir(b *testing.B) {
var buf bytes.Buffer
zw := NewWriter(&buf)
for i := range 4000 {
name := strings.Repeat("a/", i) + "data"
zw.CreateHeader(&FileHeader{
Name: name,
Method: Store,
})
}
if err := zw.Close(); err != nil {
b.Fatal(err)
}
data := buf.Bytes()
for b.Loop() {
zr, err := NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
b.Fatal(err)
}
zr.Open("does-not-exist")
}
}
func BenchmarkReaderManyDeepDirs(b *testing.B) {
var buf bytes.Buffer
zw := NewWriter(&buf)
for i := range 2850 {
name := fmt.Sprintf("%x", i)
name = strings.Repeat("/"+name, i+1)[1:]
zw.CreateHeader(&FileHeader{
Name: name,
Method: Store,
})
}
if err := zw.Close(); err != nil {
b.Fatal(err)
}
data := buf.Bytes()
for b.Loop() {
zr, err := NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
b.Fatal(err)
}
zr.Open("does-not-exist")
}
}
func BenchmarkReaderManyShallowFiles(b *testing.B) {
var buf bytes.Buffer
zw := NewWriter(&buf)
for i := range 310000 {
name := fmt.Sprintf("%v", i)
zw.CreateHeader(&FileHeader{
Name: name,
Method: Store,
})
}
if err := zw.Close(); err != nil {
b.Fatal(err)
}
data := buf.Bytes()
for b.Loop() {
zr, err := NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
b.Fatal(err)
}
zr.Open("does-not-exist")
}
}