From 80b3fc2b3f2d53ca32f7520fff4263dc14f55882 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Tue, 29 Jul 2014 16:53:35 -0400 Subject: [PATCH 1/2] Pre-calculate start offset, optimize bound check Increased performance by 30% for RGBA and 45% for Gray images, minor performance increase for 16-bit images. The start offset calculated by createWeights are stored in a slice and passed to the resize functions to prevent duplication of effort. --- converter.go | 205 ++++++++++++++++++++++++++++----------------------- filters.go | 18 +++-- resize.go | 48 ++++++------ 3 files changed, 146 insertions(+), 125 deletions(-) diff --git a/converter.go b/converter.go index 8c6ac84..f023d9c 100644 --- a/converter.go +++ b/converter.go @@ -43,32 +43,35 @@ func clampUint16(in int64) uint16 { return uint16(in) } -func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) { +func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() for x := newBounds.Min.X; x < newBounds.Max.X; x++ { for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = oldBounds.Min.X + default: + xi = oldBounds.Max.X - 1 + } + r, g, b, a := in.At(xi, x).RGBA() + rgba[0] += int64(coeff) * int64(r) + rgba[1] += int64(coeff) * int64(g) + rgba[2] += int64(coeff) * int64(b) + rgba[3] += int64(coeff) * int64(a) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - r, g, b, a := in.At(xx, x).RGBA() - rgba[0] += int64(coeff) * int64(r) - rgba[1] += int64(coeff) * int64(g) - rgba[2] += int64(coeff) * int64(b) - rgba[3] += int64(coeff) * int64(a) - sum += int64(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 @@ -88,114 +91,126 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in } } -func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, filterLength int) { +func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 4 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int32 var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 4 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += int32(coeff) * int32(row[xi+0]) + rgba[1] += int32(coeff) * int32(row[xi+1]) + rgba[2] += int32(coeff) * int32(row[xi+2]) + rgba[3] += int32(coeff) * int32(row[xi+3]) + sum += int32(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 4 - rgba[0] += int32(coeff) * int32(row[offset+0]) - rgba[1] += int32(coeff) * int32(row[offset+1]) - rgba[2] += int32(coeff) * int32(row[offset+2]) - rgba[3] += int32(coeff) * int32(row[offset+3]) - sum += int32(coeff) } - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - out.Pix[offset+0] = clampUint8(rgba[0] / sum) - out.Pix[offset+1] = clampUint8(rgba[1] / sum) - out.Pix[offset+2] = clampUint8(rgba[2] / sum) - out.Pix[offset+3] = clampUint8(rgba[3] / sum) + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = clampUint8(rgba[0] / sum) + out.Pix[xo+1] = clampUint8(rgba[1] / sum) + out.Pix[xo+2] = clampUint8(rgba[2] / sum) + out.Pix[xo+3] = clampUint8(rgba[3] / sum) } } } -func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, filterLength int) { +func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 8 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var rgba [4]int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 8 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3])) + rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5])) + rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7])) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 8 - rgba[0] += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1])) - rgba[1] += int64(coeff) * int64(uint16(row[offset+2])<<8|uint16(row[offset+3])) - rgba[2] += int64(coeff) * int64(uint16(row[offset+4])<<8|uint16(row[offset+5])) - rgba[3] += int64(coeff) * int64(uint16(row[offset+6])<<8|uint16(row[offset+7])) - sum += int64(coeff) } - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 value := clampUint16(rgba[0] / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) value = clampUint16(rgba[1] / sum) - out.Pix[offset+2] = uint8(value >> 8) - out.Pix[offset+3] = uint8(value) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) value = clampUint16(rgba[2] / sum) - out.Pix[offset+4] = uint8(value >> 8) - out.Pix[offset+5] = uint8(value) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) value = clampUint16(rgba[3] / sum) - out.Pix[offset+6] = uint8(value >> 8) - out.Pix[offset+7] = uint8(value) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) } } } -func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, filterLength int) { +func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var gray int32 var sum int32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += int32(coeff) * int32(row[xi]) + sum += int32(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) - gray += int32(coeff) * int32(row[offset]) - sum += int32(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) @@ -204,30 +219,34 @@ func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, } } -func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, filterLength int) { +func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, offset []int, filterLength int) { oldBounds := in.Bounds() newBounds := out.Bounds() + minX := oldBounds.Min.X * 2 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2 for x := newBounds.Min.X; x < newBounds.Max.X; x++ { row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - interpX := scale*(float64(y)+0.5) + float64(oldBounds.Min.X) - start := int(interpX) - filterLength/2 + 1 - var gray int64 var sum int64 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength for i := 0; i < filterLength; i++ { - xx := start + i - if xx < oldBounds.Min.X { - xx = oldBounds.Min.X - } else if xx >= oldBounds.Max.X { - xx = oldBounds.Max.X - 1 + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 2 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + sum += int64(coeff) } - - coeff := coeffs[(y-newBounds.Min.Y)*filterLength+i] - offset := (xx - oldBounds.Min.X) * 2 - gray += int64(coeff) * int64(uint16(row[offset+0])<<8|uint16(row[offset+1])) - sum += int64(coeff) } offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 diff --git a/filters.go b/filters.go index a6bd4e8..a5f6e79 100644 --- a/filters.go +++ b/filters.go @@ -80,37 +80,39 @@ func lanczos3(in float64) float64 { } // range [-256,256] -func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, int) { +func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, []int, int) { filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) filterFactor := math.Min(1./(blur*scale), 1) coeffs := make([]int16, dy*filterLength) + start := make([]int, dy) for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) - start := int(interpX) - filterLength/2 + 1 + start[y] = int(interpX) - filterLength/2 + 1 for i := 0; i < filterLength; i++ { - in := (interpX - float64(start) - float64(i)) * filterFactor + in := (interpX - float64(start[y]) - float64(i)) * filterFactor coeffs[y*filterLength+i] = int16(kernel(in) * 256) } } - return coeffs, filterLength + return coeffs, start, filterLength } // range [-65536,65536] -func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, int) { +func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, []int, int) { filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) filterFactor := math.Min(1./(blur*scale), 1) coeffs := make([]int32, dy*filterLength) + start := make([]int, dy) for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) - start := int(interpX) - filterLength/2 + 1 + start[y] = int(interpX) - filterLength/2 + 1 for i := 0; i < filterLength; i++ { - in := (interpX - float64(start) - float64(i)) * filterFactor + in := (interpX - float64(start[y]) - float64(i)) * filterFactor coeffs[y*filterLength+i] = int32(kernel(in) * 65536) } } - return coeffs, filterLength + return coeffs, start, filterLength } diff --git a/resize.go b/resize.go index 0c3ec9a..b9eb775 100644 --- a/resize.go +++ b/resize.go @@ -95,25 +95,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(input, slice, scaleX, coeffs, filterLength) + resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -127,25 +127,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, filterLength) + resizeRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -156,25 +156,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeRGBA64(input, slice, scaleX, coeffs, filterLength) + resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeGeneric(temp, slice, scaleY, coeffs, filterLength) + resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -185,25 +185,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewGray(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.Gray) go func() { defer wg.Done() - resizeGray(input, slice, scaleX, coeffs, filterLength) + resizeGray(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.Gray) go func() { defer wg.Done() - resizeGray(temp, slice, scaleY, coeffs, filterLength) + resizeGray(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -214,25 +214,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.Gray16) go func() { defer wg.Done() - resizeGray16(input, slice, scaleX, coeffs, filterLength) + resizeGray16(input, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.Gray16) go func() { defer wg.Done() - resizeGray16(temp, slice, scaleY, coeffs, filterLength) + resizeGray16(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -243,25 +243,25 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image - coeffs, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel) + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(temp, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeGeneric(img, slice, scaleX, coeffs, filterLength) + resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength) }() } wg.Wait() // horizontal filter on transposed image, result is not transposed - coeffs, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, filterLength) + resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() From 427b8d133e710b7781794fbf781ade2c1a71dd2e Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Tue, 29 Jul 2014 18:32:58 -0400 Subject: [PATCH 2/2] Optimized Nearest-Neighbor function - 2x faster --- filters.go | 29 +++++- nearest.go | 228 +++++++++++++++++++++++++++++++++++++++++++ nearest_test.go | 41 ++++++++ resize.go | 253 ++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 520 insertions(+), 31 deletions(-) create mode 100644 nearest.go create mode 100644 nearest_test.go diff --git a/filters.go b/filters.go index a5f6e79..0cc738c 100644 --- a/filters.go +++ b/filters.go @@ -89,8 +89,9 @@ func createWeights8(dy, minx, filterLength int, blur, scale float64, kernel func for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) for i := 0; i < filterLength; i++ { - in := (interpX - float64(start[y]) - float64(i)) * filterFactor + in := (interpX - float64(i)) * filterFactor coeffs[y*filterLength+i] = int16(kernel(in) * 256) } } @@ -108,11 +109,35 @@ func createWeights16(dy, minx, filterLength int, blur, scale float64, kernel fun for y := 0; y < dy; y++ { interpX := scale*(float64(y)+0.5) + float64(minx) start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) for i := 0; i < filterLength; i++ { - in := (interpX - float64(start[y]) - float64(i)) * filterFactor + in := (interpX - float64(i)) * filterFactor coeffs[y*filterLength+i] = int32(kernel(in) * 65536) } } return coeffs, start, filterLength } + +func createWeightsNearest(dy, minx, filterLength int, blur, scale float64) ([]bool, []int, int) { + filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) + filterFactor := math.Min(1./(blur*scale), 1) + + coeffs := make([]bool, dy*filterLength) + start := make([]int, dy) + for y := 0; y < dy; y++ { + interpX := scale*(float64(y)+0.5) + float64(minx) + start[y] = int(interpX) - filterLength/2 + 1 + interpX -= float64(start[y]) + for i := 0; i < filterLength; i++ { + in := (interpX - float64(i)) * filterFactor + if in >= -0.5 && in < 0.5 { + coeffs[y*filterLength+i] = true + } else { + coeffs[y*filterLength+i] = false + } + } + } + + return coeffs, start, filterLength +} diff --git a/nearest.go b/nearest.go new file mode 100644 index 0000000..5708fd0 --- /dev/null +++ b/nearest.go @@ -0,0 +1,228 @@ +package resize + +import "image" + +func floatToUint8(x float32) uint8 { + // Nearest-neighbor values are always + // positive no need to check lower-bound. + if x > 0xfe { + return 0xff + } + return uint8(x) +} + +func floatToUint16(x float32) uint16 { + if x > 0xfffe { + return 0xffff + } + return uint16(x) +} + +func nearestGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = oldBounds.Min.X + default: + xi = oldBounds.Max.X - 1 + } + r, g, b, a := in.At(xi, x).RGBA() + rgba[0] += float32(r) + rgba[1] += float32(g) + rgba[2] += float32(b) + rgba[3] += float32(a) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[offset+0] = uint8(value >> 8) + out.Pix[offset+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[offset+2] = uint8(value >> 8) + out.Pix[offset+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[offset+4] = uint8(value >> 8) + out.Pix[offset+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[offset+6] = uint8(value >> 8) + out.Pix[offset+7] = uint8(value) + } + } +} + +func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 4 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 4 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 4 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += float32(row[xi+0]) + rgba[1] += float32(row[xi+1]) + rgba[2] += float32(row[xi+2]) + rgba[3] += float32(row[xi+3]) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = floatToUint8(rgba[0] / sum) + out.Pix[xo+1] = floatToUint8(rgba[1] / sum) + out.Pix[xo+2] = floatToUint8(rgba[2] / sum) + out.Pix[xo+3] = floatToUint8(rgba[3] / sum) + } + } +} + +func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 8 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 8 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 8 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) + rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5])) + rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7])) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) + } + } +} + +func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var gray float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + break + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += float32(row[xi]) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) + out.Pix[offset] = floatToUint8(gray / sum) + } + } +} + +func nearestGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []bool, offset []int, filterLength int) { + oldBounds := in.Bounds() + newBounds := out.Bounds() + minX := oldBounds.Min.X * 2 + maxX := (oldBounds.Max.X - oldBounds.Min.X - 1) * 2 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[(x-oldBounds.Min.Y)*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var gray float32 + var sum float32 + start := offset[y] + ci := (y - newBounds.Min.Y) * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(oldBounds.Max.X): + xi *= 2 + case xi >= oldBounds.Max.X: + xi = maxX + default: + xi = minX + } + gray += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + sum++ + } + } + + offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 + value := floatToUint16(gray / sum) + out.Pix[offset+0] = uint8(value >> 8) + out.Pix[offset+1] = uint8(value) + } + } +} diff --git a/nearest_test.go b/nearest_test.go new file mode 100644 index 0000000..08adbda --- /dev/null +++ b/nearest_test.go @@ -0,0 +1,41 @@ +package resize + +import "testing" + +func Test_FloatToUint8(t *testing.T) { + var testData = []struct { + in float32 + expected uint8 + }{ + {0, 0}, + {255, 255}, + {128, 128}, + {1, 1}, + {256, 255}, + } + for _, test := range testData { + actual := floatToUint8(test.in) + if actual != test.expected { + t.Fail() + } + } +} + +func Test_FloatToUint16(t *testing.T) { + var testData = []struct { + in float32 + expected uint16 + }{ + {0, 0}, + {65535, 65535}, + {128, 128}, + {1, 1}, + {65536, 65535}, + } + for _, test := range testData { + actual := floatToUint16(test.in) + if actual != test.expected { + t.Fail() + } + } +} diff --git a/resize.go b/resize.go index b9eb775..fad39b4 100644 --- a/resize.go +++ b/resize.go @@ -33,36 +33,41 @@ import ( // An InterpolationFunction provides the parameters that describe an // interpolation kernel. It returns the number of samples to take // and the kernel function to use for sampling. -type InterpolationFunction func() (int, func(float64) float64) +type InterpolationFunction int -// Nearest-neighbor interpolation -func NearestNeighbor() (int, func(float64) float64) { - return 2, nearest -} +// InterpolationFunction constants +const ( + // Nearest-neighbor interpolation + NearestNeighbor InterpolationFunction = iota + // Bilinear interpolation + Bilinear + // Bicubic interpolation (with cubic hermite spline) + Bicubic + // Mitchell-Netravali interpolation + MitchellNetravali + // Lanczos interpolation (a=2) + Lanczos2 + // Lanczos interpolation (a=3) + Lanczos3 +) -// Bilinear interpolation -func Bilinear() (int, func(float64) float64) { - return 2, linear -} - -// Bicubic interpolation (with cubic hermite spline) -func Bicubic() (int, func(float64) float64) { - return 4, cubic -} - -// Mitchell-Netravali interpolation -func MitchellNetravali() (int, func(float64) float64) { - return 4, mitchellnetravali -} - -// Lanczos interpolation (a=2) -func Lanczos2() (int, func(float64) float64) { - return 4, lanczos2 -} - -// Lanczos interpolation (a=3) -func Lanczos3() (int, func(float64) float64) { - return 6, lanczos3 +// kernal, returns an InterpolationFunctions taps and kernel. +func (i InterpolationFunction) kernel() (int, func(float64) float64) { + switch i { + case Bilinear: + return 2, linear + case Bicubic: + return 4, cubic + case MitchellNetravali: + return 4, mitchellnetravali + case Lanczos2: + return 4, lanczos2 + case Lanczos3: + return 6, lanczos3 + default: + // Default to NearestNeighbor. + return 2, nearest + } } // values <1 will sharpen the image @@ -81,8 +86,11 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i if height == 0 { height = uint(0.7 + float64(img.Bounds().Dy())/scaleY) } + if interp == NearestNeighbor { + return resizeNearest(width, height, scaleX, scaleY, img, interp) + } - taps, kernel := interp() + taps, kernel := interp.kernel() cpus := runtime.NumCPU() wg := sync.WaitGroup{} @@ -269,6 +277,193 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i } } +func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, interp InterpolationFunction) image.Image { + taps, _ := interp.kernel() + cpus := runtime.NumCPU() + wg := sync.WaitGroup{} + + switch input := img.(type) { + case *image.RGBA: + // 8-bit precision + temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.YCbCr: + // 8-bit precision + // accessing the YCbCr arrays in a tight loop is slow. + // converting the image before filtering will improve performance. + inputAsRGBA := convertYCbCrToRGBA(input) + temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(inputAsRGBA, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA) + go func() { + defer wg.Done() + nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.RGBA64: + // 16-bit precision + temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestRGBA64(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestGeneric(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.Gray: + // 8-bit precision + temp := image.NewGray(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewGray(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.Gray) + go func() { + defer wg.Done() + nearestGray(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.Gray) + go func() { + defer wg.Done() + nearestGray(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.Gray16: + // 16-bit precision + temp := image.NewGray16(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), input.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.Gray16) + go func() { + defer wg.Done() + nearestGray16(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.Gray16) + go func() { + defer wg.Done() + nearestGray16(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + default: + // 16-bit precision + temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) + result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), img.Bounds().Min.X, taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestGeneric(img, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), temp.Bounds().Min.X, taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.RGBA64) + go func() { + defer wg.Done() + nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + } + +} + // Calculates scaling factors using old and new image dimensions. func calcFactors(width, height uint, oldWidth, oldHeight float64) (scaleX, scaleY float64) { if width == 0 {