posts - 225, comments - 62, trackbacks - 0, articles - 0
   :: 首页 :: 新随笔 :: 联系 :: 聚合  :: 管理
要求向量和矩阵都是16字节边界对齐的

void M4MulM4(Matrix4f& mout, const Matrix4f& m1, const Matrix4f& m2)
{
    MC_ASSERT( 
!(((int)(&mout))&0xF&& !(((int)(&m1))&0xF&& !(((int)(&m2))&0xF)  );
    asm(
        
"li         r0,0;"
        
"li         r10,0x10;"
        
"li         r11,0x20;"
        
"li         r12,0x30;"
        
"vxor       v0,v0,v0;"

        
"lvx        v1,r4,r0;"
        
"lvx        v5,r5,r0;"
        
"lvx        v2,r4,r10;"
        
"lvx        v6,r5,r10;"
        
"lvx        v3,r4,r11;"
        
"lvx        v7,r5,r11;"
        
"lvx        v4,r4,r12;"
        
"lvx        v8,r5,r12;"

        
"vspltw        v9,v1,0;"
        
"vmaddfp       v13,v9,v5,v0;"
        
"vspltw        v10,v1,1;"
        
"vmaddfp       v13,v10,v6,v13;"
        
"vspltw        v11,v1,2;"
        
"vmaddfp       v13,v11,v7,v13;"
        
"vspltw        v12,v1,3;"
        
"vmaddfp       v13,v12,v8,v13;"
        
"stvx          v13,r3,r0;"


        
"vspltw        v9,v2,0;"
        
"vmaddfp       v13,v9,v5,v0;"
        
"vspltw        v10,v2,1;"
        
"vmaddfp       v13,v10,v6,v13;"
        
"vspltw        v11,v2,2;"
        
"vmaddfp       v13,v11,v7,v13;"
        
"vspltw        v12,v2,3;"
        
"vmaddfp       v13,v12,v8,v13;"
        
"stvx          v13,r3,r10;"

        
"vspltw        v9,v3,0;"
        
"vmaddfp       v13,v9,v5,v0;"
        
"vspltw        v10,v3,1;"
        
"vmaddfp       v13,v10,v6,v13;"
        
"vspltw        v11,v3,2;"
        
"vmaddfp       v13,v11,v7,v13;"
        
"vspltw        v12,v3,3;"
        
"vmaddfp       v13,v12,v8,v13;"
        
"stvx          v13,r3,r11;"

        
"vspltw        v9,v4,0;"
        
"vmaddfp       v13,v9,v5,v0;"
        
"vspltw        v10,v4,1;"
        
"vmaddfp       v13,v10,v6,v13;"
        
"vspltw        v11,v4,2;"
        
"vmaddfp       v13,v11,v7,v13;"
        
"vspltw        v12,v4,3;"
        
"vmaddfp       v13,v12,v8,v13;"
        
"stvx          v13,r3,r12;"
        );
}

void V4MulM4(Vector4f& vout, const Vector4f& v, const Matrix4f& m)
{
    MC_ASSERT( 
!(((int)(&vout))&0xF&& !(((int)(&v))&0xF&& !(((int)(&m))&0xF)  );
    asm(
        
"li         r0,0;"
        
"li         r10,0x10;"
        
"li         r11,0x20;"
        
"li         r12,0x30;"
        
"vxor       v0,v0,v0;"

        
"lvx        v1,r4,r0;"
        
"lvx        v5,r5,r0;"
        
"lvx        v6,r5,r10;"
        
"lvx        v7,r5,r11;"
        
"lvx        v8,r5,r12;"

        
"vspltw        v9,v1,0;"
        
"vmaddfp       v13,v9,v5,v0;"
        
"vspltw        v10,v1,1;"
        
"vmaddfp       v13,v10,v6,v13;"
        
"vspltw        v11,v1,2;"
        
"vmaddfp       v13,v11,v7,v13;"
        
"vspltw        v12,v1,3;"
        
"vmaddfp       v13,v12,v8,v13;"
        
"stvx          v13,r3,r0;"
        );
}
只有注册用户登录后才能发表评论。