diff --git a/README.md b/README.md index ea145d8..2aefa75 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ func main() { Caveats ------- -* Optimized access routines are used for `image.RGBA`, `image.RGBA64`, `image.YCbCr`, `image.Gray`, and `image.Gray16` types. All other image types are accessed in a generic way that will result in slow processing speed. +* Optimized access routines are used for `image.RGBA`, `image.NRGBA`, `image.RGBA64`, `image.NRGBA64`, `image.YCbCr`, `image.Gray`, and `image.Gray16` types. All other image types are accessed in a generic way that will result in slow processing speed. * JPEG images are stored in `image.YCbCr`. This image format stores data in a way that will decrease processing speed. A resize may be up to 2 times slower than with `image.RGBA`. diff --git a/converter.go b/converter.go index 254592a..f4d9d0f 100644 --- a/converter.go +++ b/converter.go @@ -43,7 +43,7 @@ func clampUint16(in int64) uint16 { return 0 } -func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { +func resizeGeneric(in image.Image, out *image.NRGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { newBounds := out.Bounds() maxX := in.Bounds().Dx() - 1 @@ -89,7 +89,7 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in } } -func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) { +func resizeRGBA(in *image.RGBA, out *image.NRGBA, scale float64, coeffs []int16, offset []int, filterLength int) { newBounds := out.Bounds() maxX := in.Bounds().Dx() - 1 @@ -129,7 +129,95 @@ func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, } } -func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { +func resizeNRGBA(in *image.NRGBA, out *image.NRGBA, scale float64, coeffs []int16, offset []int, filterLength int) { + newBounds := out.Bounds() + maxX := in.Bounds().Dx() - 1 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[x*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]int32 + var sum int32 + start := offset[y] + ci := y * filterLength + for i := 0; i < filterLength; i++ { + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(maxX): + xi *= 4 + case xi >= maxX: + xi = 4 * maxX + default: + xi = 0 + } + rgba[0] += int32(coeff) * int32(row[xi+0]) + rgba[1] += int32(coeff) * int32(row[xi+1]) + rgba[2] += int32(coeff) * int32(row[xi+2]) + rgba[3] += int32(coeff) * int32(row[xi+3]) + sum += int32(coeff) + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = clampUint8(rgba[0] / sum) + out.Pix[xo+1] = clampUint8(rgba[1] / sum) + out.Pix[xo+2] = clampUint8(rgba[2] / sum) + out.Pix[xo+3] = clampUint8(rgba[3] / sum) + } + } +} + +func resizeRGBA64(in *image.RGBA64, out *image.NRGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { + newBounds := out.Bounds() + maxX := in.Bounds().Dx() - 1 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[x*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]int64 + var sum int64 + start := offset[y] + ci := y * filterLength + for i := 0; i < filterLength; i++ { + coeff := coeffs[ci+i] + if coeff != 0 { + xi := start + i + switch { + case uint(xi) < uint(maxX): + xi *= 8 + case xi >= maxX: + xi = 8 * maxX + default: + xi = 0 + } + rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) + rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3])) + rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5])) + rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7])) + sum += int64(coeff) + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := clampUint16(rgba[0] / sum) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) + value = clampUint16(rgba[1] / sum) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) + value = clampUint16(rgba[2] / sum) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) + value = clampUint16(rgba[3] / sum) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) + } + } +} + +func resizeNRGBA64(in *image.NRGBA64, out *image.NRGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { newBounds := out.Bounds() maxX := in.Bounds().Dx() - 1 diff --git a/nearest.go b/nearest.go index 78ad3cb..888039d 100644 --- a/nearest.go +++ b/nearest.go @@ -118,6 +118,45 @@ func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, } } +func nearestNRGBA(in *image.NRGBA, out *image.NRGBA, scale float64, coeffs []bool, offset []int, filterLength int) { + newBounds := out.Bounds() + maxX := in.Bounds().Dx() - 1 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[x*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := y * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(maxX): + xi *= 4 + case xi >= maxX: + xi = 4 * maxX + default: + xi = 0 + } + rgba[0] += float32(row[xi+0]) + rgba[1] += float32(row[xi+1]) + rgba[2] += float32(row[xi+2]) + rgba[3] += float32(row[xi+3]) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 + out.Pix[xo+0] = floatToUint8(rgba[0] / sum) + out.Pix[xo+1] = floatToUint8(rgba[1] / sum) + out.Pix[xo+2] = floatToUint8(rgba[2] / sum) + out.Pix[xo+3] = floatToUint8(rgba[3] / sum) + } + } +} + func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { newBounds := out.Bounds() maxX := in.Bounds().Dx() - 1 @@ -165,6 +204,53 @@ func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs [] } } +func nearestNRGBA64(in *image.NRGBA64, out *image.NRGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { + newBounds := out.Bounds() + maxX := in.Bounds().Dx() - 1 + + for x := newBounds.Min.X; x < newBounds.Max.X; x++ { + row := in.Pix[x*in.Stride:] + for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { + var rgba [4]float32 + var sum float32 + start := offset[y] + ci := y * filterLength + for i := 0; i < filterLength; i++ { + if coeffs[ci+i] { + xi := start + i + switch { + case uint(xi) < uint(maxX): + xi *= 8 + case xi >= maxX: + xi = 8 * maxX + default: + xi = 0 + } + rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) + rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) + rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5])) + rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7])) + sum++ + } + } + + xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 + value := floatToUint16(rgba[0] / sum) + out.Pix[xo+0] = uint8(value >> 8) + out.Pix[xo+1] = uint8(value) + value = floatToUint16(rgba[1] / sum) + out.Pix[xo+2] = uint8(value >> 8) + out.Pix[xo+3] = uint8(value) + value = floatToUint16(rgba[2] / sum) + out.Pix[xo+4] = uint8(value >> 8) + out.Pix[xo+5] = uint8(value) + value = floatToUint16(rgba[3] / sum) + out.Pix[xo+6] = uint8(value >> 8) + out.Pix[xo+7] = uint8(value) + } + } +} + func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) { newBounds := out.Bounds() maxX := in.Bounds().Dx() - 1 diff --git a/resize.go b/resize.go index a913c6f..4d4ff6e 100644 --- a/resize.go +++ b/resize.go @@ -105,14 +105,14 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i switch input := img.(type) { case *image.RGBA: // 8-bit precision - temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) + temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA) + slice := makeSlice(temp, i, cpus).(*image.NRGBA) go func() { defer wg.Done() resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength) @@ -124,14 +124,44 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA) + slice := makeSlice(result, i, cpus).(*image.NRGBA) go func() { defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + resizeNRGBA(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() return result + case *image.NRGBA: + // 8-bit precision + temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.NRGBA) + go func() { + defer wg.Done() + resizeNRGBA(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.NRGBA) + go func() { + defer wg.Done() + resizeNRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.YCbCr: // 8-bit precision // accessing the YCbCr arrays in a tight loop is slow. @@ -164,14 +194,14 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i return result.YCbCr() case *image.RGBA64: // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) + slice := makeSlice(temp, i, cpus).(*image.NRGBA64) go func() { defer wg.Done() resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength) @@ -183,10 +213,39 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) + slice := makeSlice(result, i, cpus).(*image.NRGBA64) go func() { defer wg.Done() - resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength) + resizeNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.NRGBA64: + // 16-bit precision + temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.NRGBA64) + go func() { + defer wg.Done() + resizeNRGBA64(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.NRGBA64) + go func() { + defer wg.Done() + resizeNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -251,14 +310,14 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i return result default: // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) + temp := image.NewNRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) + result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height))) // horizontal filter, results in transposed temporary image coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) + slice := makeSlice(temp, i, cpus).(*image.NRGBA64) go func() { defer wg.Done() resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength) @@ -270,10 +329,10 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) wg.Add(cpus) for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) + slice := makeSlice(result, i, cpus).(*image.NRGBA64) go func() { defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) + resizeNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait() @@ -316,6 +375,35 @@ func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, } wg.Wait() return result + case *image.NRGBA: + // 8-bit precision + temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.NRGBA) + go func() { + defer wg.Done() + nearestNRGBA(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.NRGBA) + go func() { + defer wg.Done() + nearestNRGBA(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result case *image.YCbCr: // 8-bit precision // accessing the YCbCr arrays in a tight loop is slow. @@ -370,7 +458,36 @@ func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, slice := makeSlice(result, i, cpus).(*image.RGBA64) go func() { defer wg.Done() - nearestGeneric(temp, slice, scaleY, coeffs, offset, filterLength) + nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) + }() + } + wg.Wait() + return result + case *image.NRGBA64: + // 16-bit precision + temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) + result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height))) + + // horizontal filter, results in transposed temporary image + coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(temp, i, cpus).(*image.NRGBA64) + go func() { + defer wg.Done() + nearestNRGBA64(input, slice, scaleX, coeffs, offset, filterLength) + }() + } + wg.Wait() + + // horizontal filter on transposed image, result is not transposed + coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) + wg.Add(cpus) + for i := 0; i < cpus; i++ { + slice := makeSlice(result, i, cpus).(*image.NRGBA64) + go func() { + defer wg.Done() + nearestNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) }() } wg.Wait()