Merge branch 'NRGBA_support'

This adds optimized access to image.NRGBA, image.NRGBA64 types. Internally these
image types are now preferred as they are easier to handle if alpha channels are
used.
This commit is contained in:
nfnt 2015-05-12 15:50:27 +02:00
commit 3502bb8cd1
4 changed files with 311 additions and 20 deletions

View File

@ -90,7 +90,7 @@ func main() {
Caveats
-------
* Optimized access routines are used for `image.RGBA`, `image.RGBA64`, `image.YCbCr`, `image.Gray`, and `image.Gray16` types. All other image types are accessed in a generic way that will result in slow processing speed.
* Optimized access routines are used for `image.RGBA`, `image.NRGBA`, `image.RGBA64`, `image.NRGBA64`, `image.YCbCr`, `image.Gray`, and `image.Gray16` types. All other image types are accessed in a generic way that will result in slow processing speed.
* JPEG images are stored in `image.YCbCr`. This image format stores data in a way that will decrease processing speed. A resize may be up to 2 times slower than with `image.RGBA`.

View File

@ -43,7 +43,7 @@ func clampUint16(in int64) uint16 {
return 0
}
func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
func resizeGeneric(in image.Image, out *image.NRGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
@ -89,7 +89,7 @@ func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []in
}
}
func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) {
func resizeRGBA(in *image.RGBA, out *image.NRGBA, scale float64, coeffs []int16, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
@ -129,7 +129,95 @@ func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16,
}
}
func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
func resizeNRGBA(in *image.NRGBA, out *image.NRGBA, scale float64, coeffs []int16, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[x*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]int32
var sum int32
start := offset[y]
ci := y * filterLength
for i := 0; i < filterLength; i++ {
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(maxX):
xi *= 4
case xi >= maxX:
xi = 4 * maxX
default:
xi = 0
}
rgba[0] += int32(coeff) * int32(row[xi+0])
rgba[1] += int32(coeff) * int32(row[xi+1])
rgba[2] += int32(coeff) * int32(row[xi+2])
rgba[3] += int32(coeff) * int32(row[xi+3])
sum += int32(coeff)
}
}
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
out.Pix[xo+0] = clampUint8(rgba[0] / sum)
out.Pix[xo+1] = clampUint8(rgba[1] / sum)
out.Pix[xo+2] = clampUint8(rgba[2] / sum)
out.Pix[xo+3] = clampUint8(rgba[3] / sum)
}
}
}
func resizeRGBA64(in *image.RGBA64, out *image.NRGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[x*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]int64
var sum int64
start := offset[y]
ci := y * filterLength
for i := 0; i < filterLength; i++ {
coeff := coeffs[ci+i]
if coeff != 0 {
xi := start + i
switch {
case uint(xi) < uint(maxX):
xi *= 8
case xi >= maxX:
xi = 8 * maxX
default:
xi = 0
}
rgba[0] += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1]))
rgba[1] += int64(coeff) * int64(uint16(row[xi+2])<<8|uint16(row[xi+3]))
rgba[2] += int64(coeff) * int64(uint16(row[xi+4])<<8|uint16(row[xi+5]))
rgba[3] += int64(coeff) * int64(uint16(row[xi+6])<<8|uint16(row[xi+7]))
sum += int64(coeff)
}
}
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
value := clampUint16(rgba[0] / sum)
out.Pix[xo+0] = uint8(value >> 8)
out.Pix[xo+1] = uint8(value)
value = clampUint16(rgba[1] / sum)
out.Pix[xo+2] = uint8(value >> 8)
out.Pix[xo+3] = uint8(value)
value = clampUint16(rgba[2] / sum)
out.Pix[xo+4] = uint8(value >> 8)
out.Pix[xo+5] = uint8(value)
value = clampUint16(rgba[3] / sum)
out.Pix[xo+6] = uint8(value >> 8)
out.Pix[xo+7] = uint8(value)
}
}
}
func resizeNRGBA64(in *image.NRGBA64, out *image.NRGBA64, scale float64, coeffs []int32, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1

View File

@ -118,6 +118,45 @@ func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool,
}
}
func nearestNRGBA(in *image.NRGBA, out *image.NRGBA, scale float64, coeffs []bool, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[x*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]float32
var sum float32
start := offset[y]
ci := y * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(maxX):
xi *= 4
case xi >= maxX:
xi = 4 * maxX
default:
xi = 0
}
rgba[0] += float32(row[xi+0])
rgba[1] += float32(row[xi+1])
rgba[2] += float32(row[xi+2])
rgba[3] += float32(row[xi+3])
sum++
}
}
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4
out.Pix[xo+0] = floatToUint8(rgba[0] / sum)
out.Pix[xo+1] = floatToUint8(rgba[1] / sum)
out.Pix[xo+2] = floatToUint8(rgba[2] / sum)
out.Pix[xo+3] = floatToUint8(rgba[3] / sum)
}
}
}
func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
@ -165,6 +204,53 @@ func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []
}
}
func nearestNRGBA64(in *image.NRGBA64, out *image.NRGBA64, scale float64, coeffs []bool, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1
for x := newBounds.Min.X; x < newBounds.Max.X; x++ {
row := in.Pix[x*in.Stride:]
for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ {
var rgba [4]float32
var sum float32
start := offset[y]
ci := y * filterLength
for i := 0; i < filterLength; i++ {
if coeffs[ci+i] {
xi := start + i
switch {
case uint(xi) < uint(maxX):
xi *= 8
case xi >= maxX:
xi = 8 * maxX
default:
xi = 0
}
rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1]))
rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3]))
rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5]))
rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7]))
sum++
}
}
xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8
value := floatToUint16(rgba[0] / sum)
out.Pix[xo+0] = uint8(value >> 8)
out.Pix[xo+1] = uint8(value)
value = floatToUint16(rgba[1] / sum)
out.Pix[xo+2] = uint8(value >> 8)
out.Pix[xo+3] = uint8(value)
value = floatToUint16(rgba[2] / sum)
out.Pix[xo+4] = uint8(value >> 8)
out.Pix[xo+5] = uint8(value)
value = floatToUint16(rgba[3] / sum)
out.Pix[xo+6] = uint8(value >> 8)
out.Pix[xo+7] = uint8(value)
}
}
}
func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) {
newBounds := out.Bounds()
maxX := in.Bounds().Dx() - 1

149
resize.go
View File

@ -105,14 +105,14 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
switch input := img.(type) {
case *image.RGBA:
// 8-bit precision
temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewRGBA(image.Rect(0, 0, int(width), int(height)))
temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA)
slice := makeSlice(temp, i, cpus).(*image.NRGBA)
go func() {
defer wg.Done()
resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength)
@ -124,14 +124,44 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA)
slice := makeSlice(result, i, cpus).(*image.NRGBA)
go func() {
defer wg.Done()
resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
resizeNRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.NRGBA:
// 8-bit precision
temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.NRGBA)
go func() {
defer wg.Done()
resizeNRGBA(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.NRGBA)
go func() {
defer wg.Done()
resizeNRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.YCbCr:
// 8-bit precision
// accessing the YCbCr arrays in a tight loop is slow.
@ -164,14 +194,14 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
return result.YCbCr()
case *image.RGBA64:
// 16-bit precision
temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA64)
slice := makeSlice(temp, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength)
@ -183,10 +213,39 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA64)
slice := makeSlice(result, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
resizeGeneric(temp, slice, scaleY, coeffs, offset, filterLength)
resizeNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.NRGBA64:
// 16-bit precision
temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
resizeNRGBA64(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
resizeNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -251,14 +310,14 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
return result
default:
// 16-bit precision
temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width)))
result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height)))
temp := image.NewNRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width)))
result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.RGBA64)
slice := makeSlice(temp, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength)
@ -270,10 +329,10 @@ func Resize(width, height uint, img image.Image, interp InterpolationFunction) i
coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.RGBA64)
slice := makeSlice(result, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
resizeNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
@ -316,6 +375,35 @@ func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image,
}
wg.Wait()
return result
case *image.NRGBA:
// 8-bit precision
temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.NRGBA)
go func() {
defer wg.Done()
nearestNRGBA(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.NRGBA)
go func() {
defer wg.Done()
nearestNRGBA(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.YCbCr:
// 8-bit precision
// accessing the YCbCr arrays in a tight loop is slow.
@ -370,7 +458,36 @@ func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image,
slice := makeSlice(result, i, cpus).(*image.RGBA64)
go func() {
defer wg.Done()
nearestGeneric(temp, slice, scaleY, coeffs, offset, filterLength)
nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()
return result
case *image.NRGBA64:
// 16-bit precision
temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width)))
result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height)))
// horizontal filter, results in transposed temporary image
coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(temp, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
nearestNRGBA64(input, slice, scaleX, coeffs, offset, filterLength)
}()
}
wg.Wait()
// horizontal filter on transposed image, result is not transposed
coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY)
wg.Add(cpus)
for i := 0; i < cpus; i++ {
slice := makeSlice(result, i, cpus).(*image.NRGBA64)
go func() {
defer wg.Done()
nearestNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength)
}()
}
wg.Wait()