整数双线性插值优化

如何解决整数双线性插值优化

我的代码受到双线性插值的极大限制，所以我编写了一个不使用浮点数学的版本 (ScaleBlerpI)。这已经 ~~1.5~~ 快了 1.85 倍，但我想知道如何让它更快。

感谢任何提示。

func ScaleBlerpI(src,dst *ValueFieldI) {
    mx := uint64((src.Width - 1) * math.MaxUint32 / dst.Width)
    my := uint64((src.Height - 1) * math.MaxUint32 / dst.Height)

    for y := uint64(0); y < uint64(dst.Height); y++ {
        for x := uint64(0); x < uint64(dst.Width); x++ {
            gx := (x * mx) >> 32            // eq. / math.MaxUint32
            tx := (x * mx) & math.MaxUint32 // eq. % (math.MaxUint32 + 1) or % 2^32
            gy := (y * my) >> 32
            ty := (y * my) & math.MaxUint32

            srcX,srcY := int(gx),int(gy)
            rgba00 := src.GetComponent(srcX,srcY)
            rgba10 := src.GetComponent(srcX+1,srcY)
            rgba01 := src.GetComponent(srcX,srcY+1)
            rgba11 := src.GetComponent(srcX+1,srcY+1)
            result := []uint32{
                blerpI(rgba00[0],rgba10[0],rgba01[0],rgba11[0],tx,ty),blerpI(rgba00[1],rgba10[1],rgba01[1],rgba11[1],blerpI(rgba00[2],rgba10[2],rgba01[2],rgba11[2],}
            dst.SetComponent(int(x),int(y),result)
        }
    }
}

func lerpI(s,e uint32,f uint64) uint32 {
    // basically s * (1 - f) + b * f
    return uint32(
        (uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
            math.MaxUint32)
}
func blerpI(c00,c10,c01,c11 uint32,ty uint64) uint32 {
    return lerpI(
        lerpI(c00,tx),lerpI(c01,c11,ty,)
}

type ValueFieldI struct {
    Width,Height int
    ComponentSize int
    Values        []uint32
}

func (vf *ValueFieldI) GetComponent(x,y int) []uint32 {
    componentIdx := x + y*vf.Width
    return vf.Values[componentIdx*vf.ComponentSize : componentIdx*vf.ComponentSize+vf.ComponentSize]
}

func (vf *ValueFieldI) SetComponent(x,y int,c []uint32) {
    copy(vf.GetComponent(x,y),c)
}

分析显示我在 blerpI、src.GetComponent 和 dst.SetComponent 上浪费的时间最多

编辑 1

已替换

    // basically s * (1 - f) + e * f
    return uint32(
        (uint64(s)*(math.MaxUint32-f) + uint64(e)*f) /
            math.MaxUint32)

与

    // basically s + f*(e-s)
    return s + uint32((f*(uint64(e)-uint64(s)))>>32)

整数版本现在快 1.85 倍。

编辑 2

基准：

func BenchmarkBlerpIRand(b *testing.B) {
    src := &ValueFieldI{
        Width:         37,Height:        37,ComponentSize: 3,Values:        make([]uint32,37*37*3),}

    for i := range src.Values {
        src.Values[i] = rand.Uint32()
    }

    dst := &ValueFieldI{
        Width:         37 * 8,Height:        37 * 8,37*8*37*8*3),}

    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        ScaleBlerpI(src,dst)
    }
}