diff --git a/converter.go b/converter.go index 07d0903..faf49e4 100644 --- a/converter.go +++ b/converter.go @@ -20,24 +20,27 @@ import "image" // Keep value in [0,255] range. func clampUint8(in int32) uint8 { - if in < 0 { - return 0 + // casting a negative int to an uint will result in an overflown large int. + // this behavior will be exploited here and in other functions to archive + // a higher performance. + if uint32(in) < 256 { + return uint8(in) } if in > 255 { return 255 } - return uint8(in) + return 0 } // Keep value in [0,65535] range. func clampUint16(in int64) uint16 { - if in < 0 { - return 0 + if uint64(in) < 65536 { + return uint16(in) } if in > 65535 { return 65535 } - return uint16(in) + return 0 } func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { @@ -102,12 +105,12 @@ func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, if coeff != 0 { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 4 case xi >= maxX: xi = 4 * maxX default: - xi *= 4 + xi = 0 } rgba[0] += int32(coeff) * int32(row[xi+0]) rgba[1] += int32(coeff) * int32(row[xi+1]) @@ -142,12 +145,12 @@ func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []i if coeff != 0 { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 8 case xi >= maxX: xi = 8 * maxX default: - xi *= 8 + xi = 0 } rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3])) @@ -222,12 +225,12 @@ func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []i if coeff != 0 { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 2 case xi >= maxX: xi = 2 * maxX default: - xi *= 2 + xi = 0 } gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) sum += int64(coeff) @@ -258,12 +261,12 @@ func resizeYCbCr(in *ycc, out *ycc, scale float64, coeffs []int16, offset []int, if coeff != 0 { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 3 case xi >= maxX: xi = 3 * maxX default: - xi *= 3 + xi = 0 } p[0] += int32(coeff) * int32(row[xi+0]) p[1] += int32(coeff) * int32(row[xi+1]) @@ -295,12 +298,12 @@ func nearestYCbCr(in *ycc, out *ycc, scale float64, coeffs []bool, offset []int, if coeffs[ci+i] { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 3 case xi >= maxX: xi = 3 * maxX default: - xi *= 3 + xi = 0 } p[0] += float32(row[xi+0]) p[1] += float32(row[xi+1]) diff --git a/nearest.go b/nearest.go index 0f8c321..78ad3cb 100644 --- a/nearest.go +++ b/nearest.go @@ -94,12 +94,12 @@ func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, if coeffs[ci+i] { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 4 case xi >= maxX: xi = 4 * maxX default: - xi *= 4 + xi = 0 } rgba[0] += float32(row[xi+0]) rgba[1] += float32(row[xi+1]) @@ -133,12 +133,12 @@ func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs [] if coeffs[ci+i] { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 8 case xi >= maxX: xi = 8 * maxX default: - xi *= 8 + xi = 0 } rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) @@ -211,12 +211,12 @@ func nearestGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs [] if coeffs[ci+i] { xi := start + i switch { - case xi < 0: - xi = 0 + case uint(xi) < uint(maxX): + xi *= 2 case xi >= maxX: xi = 2 * maxX default: - xi *= 2 + xi = 0 } gray += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) sum++ diff --git a/resize.go b/resize.go index f6d24bb..a913c6f 100644 --- a/resize.go +++ b/resize.go @@ -97,7 +97,7 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i } taps, kernel := interp.kernel() - cpus := runtime.NumCPU() + cpus := runtime.GOMAXPROCS(0) wg := sync.WaitGroup{} // Generic access to image.Image is slow in tight loops. @@ -283,7 +283,7 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, interp InterpolationFunction) image.Image { taps, _ := interp.kernel() - cpus := runtime.NumCPU() + cpus := runtime.GOMAXPROCS(0) wg := sync.WaitGroup{} switch input := img.(type) { diff --git a/resize_test.go b/resize_test.go index f49bad6..3ba03f2 100644 --- a/resize_test.go +++ b/resize_test.go @@ -85,56 +85,105 @@ func Test_SameSizeReturnsOriginal(t *testing.T) { } } -func Benchmark_BigResizeLanczos3(b *testing.B) { - var m image.Image - for i := 0; i < b.N; i++ { - m = Resize(1000, 1000, img, Lanczos3) - } - m.At(0, 0) -} +const ( + // Use a small image size for benchmarks. We don't want memory performance + // to affect the benchmark results. + benchMaxX = 250 + benchMaxY = 250 -func Benchmark_Reduction(b *testing.B) { - largeImg := image.NewRGBA(image.Rect(0, 0, 1000, 1000)) + // Resize values near the original size require increase the amount of time + // resize spends converting the image. + benchWidth = 200 + benchHeight = 200 +) - var m image.Image - for i := 0; i < b.N; i++ { - m = Resize(300, 300, largeImg, Bicubic) - } - m.At(0, 0) -} - -// Benchmark resize of 16 MPix jpeg image to 800px width. -func jpegThumb(b *testing.B, interp InterpolationFunction) { - input := image.NewYCbCr(image.Rect(0, 0, 4896, 3264), image.YCbCrSubsampleRatio422) - - var output image.Image - for i := 0; i < b.N; i++ { - output = Resize(800, 0, input, interp) +func benchRGBA(b *testing.B, interp InterpolationFunction) { + m := image.NewRGBA(image.Rect(0, 0, benchMaxX, benchMaxY)) + // Initialize m's pixels to create a non-uniform image. + for y := m.Rect.Min.Y; y < m.Rect.Max.Y; y++ { + for x := m.Rect.Min.X; x < m.Rect.Max.X; x++ { + i := m.PixOffset(x, y) + m.Pix[i+0] = uint8(y + 4*x) + m.Pix[i+1] = uint8(y + 4*x) + m.Pix[i+2] = uint8(y + 4*x) + m.Pix[i+3] = uint8(4*y + x) + } } - output.At(0, 0) + var out image.Image + b.ResetTimer() + for i := 0; i < b.N; i++ { + out = Resize(benchWidth, benchHeight, m, interp) + } + out.At(0, 0) } -func Benchmark_LargeJpegThumbNearestNeighbor(b *testing.B) { - jpegThumb(b, NearestNeighbor) +// The names of some interpolation functions are truncated so that the columns +// of 'go test -bench' line up. +func Benchmark_Nearest_RGBA(b *testing.B) { + benchRGBA(b, NearestNeighbor) } -func Benchmark_LargeJpegThumbBilinear(b *testing.B) { - jpegThumb(b, Bilinear) +func Benchmark_Bilinear_RGBA(b *testing.B) { + benchRGBA(b, Bilinear) } -func Benchmark_LargeJpegThumbBicubic(b *testing.B) { - jpegThumb(b, Bicubic) +func Benchmark_Bicubic_RGBA(b *testing.B) { + benchRGBA(b, Bicubic) } -func Benchmark_LargeJpegThumbMitchellNetravali(b *testing.B) { - jpegThumb(b, MitchellNetravali) +func Benchmark_Mitchell_RGBA(b *testing.B) { + benchRGBA(b, MitchellNetravali) } -func Benchmark_LargeJpegThumbLanczos2(b *testing.B) { - jpegThumb(b, Lanczos2) +func Benchmark_Lanczos2_RGBA(b *testing.B) { + benchRGBA(b, Lanczos2) } -func Benchmark_LargeJpegThumbLanczos3(b *testing.B) { - jpegThumb(b, Lanczos3) +func Benchmark_Lanczos3_RGBA(b *testing.B) { + benchRGBA(b, Lanczos3) +} + +func benchYCbCr(b *testing.B, interp InterpolationFunction) { + m := image.NewYCbCr(image.Rect(0, 0, benchMaxX, benchMaxY), image.YCbCrSubsampleRatio422) + // Initialize m's pixels to create a non-uniform image. + for y := m.Rect.Min.Y; y < m.Rect.Max.Y; y++ { + for x := m.Rect.Min.X; x < m.Rect.Max.X; x++ { + yi := m.YOffset(x, y) + ci := m.COffset(x, y) + m.Y[yi] = uint8(16*y + x) + m.Cb[ci] = uint8(y + 16*x) + m.Cr[ci] = uint8(y + 16*x) + } + } + var out image.Image + b.ResetTimer() + for i := 0; i < b.N; i++ { + out = Resize(benchWidth, benchHeight, m, interp) + } + out.At(0, 0) +} + +func Benchmark_Nearest_YCC(b *testing.B) { + benchYCbCr(b, NearestNeighbor) +} + +func Benchmark_Bilinear_YCC(b *testing.B) { + benchYCbCr(b, Bilinear) +} + +func Benchmark_Bicubic_YCC(b *testing.B) { + benchYCbCr(b, Bicubic) +} + +func Benchmark_Mitchell_YCC(b *testing.B) { + benchYCbCr(b, MitchellNetravali) +} + +func Benchmark_Lanczos2_YCC(b *testing.B) { + benchYCbCr(b, Lanczos2) +} + +func Benchmark_Lanczos3_YCC(b *testing.B) { + benchYCbCr(b, Lanczos3) }