Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
huff0: Speed up symbol counting
The compiler inserts a nil check instruction into the first loop in
countSimple. Lift that out of the loop for some extra throughput:

	goos: linux
	goarch: amd64
	pkg: github.com/klauspost/compress/huff0
	cpu: Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz
	                                         │  huff0/old   │           huff0/nilcheck            │
	                                         │     B/s      │     B/s       vs base               │
	Compress1XReuseNone/digits-8               438.9Mi ± 0%   467.7Mi ± 1%   +6.55% (p=0.002 n=6)
	Compress1XReuseNone/gettysburg-8           247.3Mi ± 2%   257.4Mi ± 1%   +4.08% (p=0.002 n=6)
	Compress1XReuseNone/twain-8                349.2Mi ± 1%   367.9Mi ± 1%   +5.35% (p=0.002 n=6)
	Compress1XReuseNone/low-ent.10k-8          474.3Mi ± 1%   488.4Mi ± 0%   +2.98% (p=0.002 n=6)
	Compress1XReuseNone/superlow-ent-10k-8     303.4Mi ± 1%   341.6Mi ± 0%  +12.57% (p=0.002 n=6)
	Compress1XReuseNone/crash2-8               10.89Mi ± 3%   10.90Mi ± 1%        ~ (p=0.794 n=6)
	Compress1XReuseNone/endzerobits-8          15.40Mi ± 5%   15.54Mi ± 0%   +0.87% (p=0.006 n=6)
	Compress1XReuseNone/endnonzero-8           7.548Mi ± 2%   7.539Mi ± 5%        ~ (p=0.855 n=6)
	Compress1XReuseNone/case1-8                14.29Mi ± 1%   14.39Mi ± 1%        ~ (p=0.082 n=6)
	Compress1XReuseNone/case2-8                11.89Mi ± 1%   11.92Mi ± 0%        ~ (p=0.121 n=6)
	Compress1XReuseNone/case3-8                12.96Mi ± 0%   12.92Mi ± 0%        ~ (p=0.201 n=6)
	Compress1XReuseNone/pngdata.001-8          289.2Mi ± 1%   318.2Mi ± 0%  +10.03% (p=0.002 n=6)
	Compress1XReuseNone/normcount2-8           34.46Mi ± 1%   34.30Mi ± 1%        ~ (p=0.485 n=6)
	Compress1XReuseAllow/digits-8              458.0Mi ± 1%   490.6Mi ± 1%   +7.11% (p=0.002 n=6)
	Compress1XReuseAllow/gettysburg-8          279.5Mi ± 3%   293.1Mi ± 1%   +4.85% (p=0.002 n=6)
	Compress1XReuseAllow/twain-8               348.2Mi ± 1%   367.2Mi ± 0%   +5.45% (p=0.002 n=6)
	Compress1XReuseAllow/low-ent.10k-8         478.0Mi ± 1%   490.4Mi ± 1%   +2.58% (p=0.002 n=6)
	Compress1XReuseAllow/superlow-ent-10k-8    307.2Mi ± 0%   345.6Mi ± 0%  +12.49% (p=0.002 n=6)
	Compress1XReuseAllow/crash2-8              16.92Mi ± 1%   17.09Mi ± 1%   +0.99% (p=0.006 n=6)
	Compress1XReuseAllow/endzerobits-8         16.75Mi ± 2%   16.84Mi ± 0%   +0.54% (p=0.002 n=6)
	Compress1XReuseAllow/endnonzero-8          12.58Mi ± 1%   12.65Mi ± 0%   +0.57% (p=0.002 n=6)
	Compress1XReuseAllow/case1-8               19.77Mi ± 1%   19.81Mi ± 1%        ~ (p=0.589 n=6)
	Compress1XReuseAllow/case2-8               16.96Mi ± 3%   16.58Mi ± 3%        ~ (p=0.288 n=6)
	Compress1XReuseAllow/case3-8               18.04Mi ± 2%   17.90Mi ± 2%        ~ (p=0.818 n=6)
	Compress1XReuseAllow/pngdata.001-8         291.6Mi ± 0%   322.0Mi ± 0%  +10.44% (p=0.002 n=6)
	Compress1XReuseAllow/normcount2-8          48.58Mi ± 1%   48.38Mi ± 1%        ~ (p=0.258 n=6)
	Compress1XReusePrefer/digits-8             460.6Mi ± 0%   493.0Mi ± 0%   +7.04% (p=0.002 n=6)
	Compress1XReusePrefer/gettysburg-8         412.8Mi ± 1%   436.7Mi ± 2%   +5.77% (p=0.002 n=6)
	Compress1XReusePrefer/twain-8              350.4Mi ± 0%   369.4Mi ± 0%   +5.41% (p=0.002 n=6)
	Compress1XReusePrefer/low-ent.10k-8        481.8Mi ± 0%   493.6Mi ± 0%   +2.44% (p=0.002 n=6)
	Compress1XReusePrefer/superlow-ent-10k-8   311.3Mi ± 1%   351.8Mi ± 0%  +12.99% (p=0.002 n=6)
	Compress1XReusePrefer/crash2-8             63.51Mi ± 1%   65.02Mi ± 1%   +2.38% (p=0.002 n=6)
	Compress1XReusePrefer/endzerobits-8        24.28Mi ± 0%   24.38Mi ± 0%   +0.43% (p=0.004 n=6)
	Compress1XReusePrefer/endnonzero-8         33.18Mi ± 0%   33.35Mi ± 0%   +0.49% (p=0.017 n=6)
	Compress1XReusePrefer/case1-8              148.9Mi ± 1%   165.1Mi ± 0%  +10.88% (p=0.002 n=6)
	Compress1XReusePrefer/case2-8              141.4Mi ± 0%   142.9Mi ± 0%   +1.07% (p=0.002 n=6)
	Compress1XReusePrefer/case3-8              152.1Mi ± 0%   154.3Mi ± 0%   +1.42% (p=0.002 n=6)
	Compress1XReusePrefer/pngdata.001-8        299.3Mi ± 1%   331.3Mi ± 0%  +10.70% (p=0.002 n=6)
	Compress1XReusePrefer/normcount2-8         210.7Mi ± 1%   215.1Mi ± 1%   +2.07% (p=0.002 n=6)
	Compress4XReuseNone/digits-8               457.9Mi ± 1%   490.0Mi ± 0%   +7.01% (p=0.002 n=6)
	Compress4XReuseNone/gettysburg-8           245.4Mi ± 0%   255.5Mi ± 0%   +4.11% (p=0.002 n=6)
	Compress4XReuseNone/twain-8                348.3Mi ± 0%   367.9Mi ± 0%   +5.63% (p=0.002 n=6)
	Compress4XReuseNone/low-ent.10k-8          475.1Mi ± 1%   487.0Mi ± 0%   +2.50% (p=0.002 n=6)
	Compress4XReuseNone/superlow-ent-10k-8     302.4Mi ± 0%   339.3Mi ± 3%  +12.19% (p=0.002 n=6)
	Compress4XReuseNone/case1-8                14.31Mi ± 0%   14.24Mi ± 1%        ~ (p=0.119 n=6)
	Compress4XReuseNone/case2-8                11.69Mi ± 1%   11.66Mi ± 1%        ~ (p=0.502 n=6)
	Compress4XReuseNone/case3-8                12.72Mi ± 0%   12.67Mi ± 1%        ~ (p=0.102 n=6)
	Compress4XReuseNone/pngdata.001-8          289.0Mi ± 1%   317.7Mi ± 0%   +9.92% (p=0.002 n=6)
	Compress4XReuseNone/normcount2-8           33.35Mi ± 1%   33.45Mi ± 3%        ~ (p=0.909 n=6)
	Compress4XReuseAllow/digits-8              458.1Mi ± 2%   491.1Mi ± 0%   +7.21% (p=0.002 n=6)
	Compress4XReuseAllow/gettysburg-8          281.0Mi ± 1%   292.5Mi ± 0%   +4.09% (p=0.002 n=6)
	Compress4XReuseAllow/twain-8               348.8Mi ± 0%   368.4Mi ± 1%   +5.63% (p=0.002 n=6)
	Compress4XReuseAllow/low-ent.10k-8         477.3Mi ± 0%   488.7Mi ± 2%        ~ (p=0.065 n=6)
	Compress4XReuseAllow/superlow-ent-10k-8    305.8Mi ± 0%   344.4Mi ± 0%  +12.63% (p=0.002 n=6)
	Compress4XReuseAllow/case1-8               19.34Mi ± 1%   19.54Mi ± 2%   +1.01% (p=0.039 n=6)
	Compress4XReuseAllow/case2-8               16.57Mi ± 0%   15.89Mi ± 5%        ~ (p=0.061 n=6)
	Compress4XReuseAllow/case3-8               17.68Mi ± 0%   17.17Mi ± 8%        ~ (p=0.061 n=6)
	Compress4XReuseAllow/pngdata.001-8         291.2Mi ± 0%   319.6Mi ± 1%   +9.75% (p=0.002 n=6)
	Compress4XReuseAllow/normcount2-8          47.46Mi ± 1%   47.57Mi ± 1%        ~ (p=1.000 n=6)
	Compress4XReusePrefer/digits-8             460.0Mi ± 0%   492.9Mi ± 0%   +7.14% (p=0.002 n=6)
	Compress4XReusePrefer/gettysburg-8         408.3Mi ± 1%   432.6Mi ± 0%   +5.95% (p=0.002 n=6)
	Compress4XReusePrefer/twain-8              350.0Mi ± 0%   370.0Mi ± 1%   +5.70% (p=0.002 n=6)
	Compress4XReusePrefer/low-ent.10k-8        481.1Mi ± 0%   492.7Mi ± 0%   +2.41% (p=0.002 n=6)
	Compress4XReusePrefer/superlow-ent-10k-8   309.3Mi ± 1%   351.0Mi ± 0%  +13.50% (p=0.002 n=6)
	Compress4XReusePrefer/case1-8              130.5Mi ± 0%   140.2Mi ± 1%   +7.44% (p=0.002 n=6)
	Compress4XReusePrefer/case2-8              120.0Mi ± 0%   120.8Mi ± 1%   +0.69% (p=0.004 n=6)
	Compress4XReusePrefer/case3-8              126.3Mi ± 2%   129.6Mi ± 0%   +2.64% (p=0.002 n=6)
	Compress4XReusePrefer/pngdata.001-8        300.2Mi ± 1%   330.6Mi ± 0%  +10.13% (p=0.002 n=6)
	Compress4XReusePrefer/normcount2-8         183.7Mi ± 1%   187.2Mi ± 1%   +1.88% (p=0.009 n=6)
	geomean                                    111.6Mi        116.1Mi        +3.99%
  • Loading branch information
greatroar committed Nov 19, 2023
commit f2a79e9072ea31dc66224ebed86749291eb26fdf
1 change: 1 addition & 0 deletions huff0/compress.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
// Does not update s.clearCount.
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
reuse = true
_ = s.count // Assert that s != nil to speed up the following loop.
for _, v := range in {
s.count[v]++
}
Expand Down