|
菜鸟现有一win下的内嵌汇编语言的函数,需要转到linux下的内嵌汇编:
void BilinearRun(unsigned char * src, unsigned char * dst,
long nSrcWidth, long nSrcHeight,
long nDstWidth, long nDstHeight,
long nSrcPitch, long nDstPitch )
{
// Simple bilinear resize, mmx optimised
// This function resize src, giving dst, bilinear algorithm used
// to avoid floating point computations, x and y resize ratios are multiplied by 16384
// so integer mmx can be used
static unsigned long rounder[2] = {0x2000, 0x2000};
static unsigned short init[4] = {0x4000, 0, 0x4000, 0};
static unsigned short et1[4] = {0xffff, 0x3fff, 0xffff, 0x3fff};
unsigned long SrcPtr;
unsigned long xint, yint;
unsigned long dstmod = nDstPitch - nDstWidth;
__asm {
mov esi, src
mov SrcPtr, esi
mov eax, nSrcHeight
mov ecx, nDstHeight
// compute xratio*16384
mov eax, nSrcWidth
dec eax ; nSrcWidth-1
xor edx, edx
mov ebx, nDstWidth
dec ebx ; nDstWidth-1
div ebx ; (nSrcWidth-1)/(nDstWidth-1)=xratio
mov xint, eax ; int part of xratio
xor eax, eax
div bx
shr ax, 2 ; ax=frac part of xratio*16384=xr
mov cx, ax
neg cx ; -xr
shl eax, 16
mov ax, cx
movd mm7, eax ; mm7=0,0,xr,-xr
punpckldq mm7, mm7 ; mm7=xr,-xr,xr,-xr
// compute yratio*16384
mov eax, nSrcHeight
dec eax ; nSrcHeight-1
xor edx, edx
mov ebx, nDstHeight
dec ebx ; nDstHeight-1
div ebx ; (nSrcHeight-1)/(nDstHeight-1)=yratio
mov yint, eax ; int part of yratio
xor eax, eax
div bx
shr ax, 2 ; ax=frac part of yratio*16384=yr
mov cx, ax
neg cx ; -yr
shl eax, 16
mov ax, cx
movd mm6, eax ; mm6=0,0,yr,-yr
mov eax, yint
mul nSrcPitch
mov ebx, eax ; yint*nSrcPitch
movq mm3, rounder
movq mm5, init
mov edi, dst ; destination
pxor mm1, mm1 ; mm1=0
movq mm3, rounder
resize_yloop:
mov esi, SrcPtr ; source
mov ecx, nDstWidth ; count
movq mm4, init ; current xr
resize_xloop:
mov edx, nSrcPitch
movzx eax, word ptr[esi] // eax=00BA
movzx edx, word ptr[esi+edx] // edx=00DC
shl edx, 16 ; edx=DC00
or edx, eax ; edx=DCBA
movd mm0, edx ; mm0=0000DCBA
punpcklbw mm0, mm1 ; mm0=0D0C0B0A
pmaddwd mm0, mm4 ; mm0= d*xr-c*(1-xr),b*xr-a*(1-xr)
paddd mm0, mm3 ; + rounder
psrld mm0, 14 ; shift (divide by 16384)
packssdw mm0, mm0
add esi, xint ; next x point (next loop)
pmaddwd mm0, mm5 ; mm0=(d*xr+c*(1-xr))*yr+(b*xr+a*(1-xr))*(1-yr)
paddw mm4, mm7 ; next x interpol (next loop)
movd eax, mm0
add eax, 8192 ; +rounder
shr eax, 14 ; shift (divide by 16384)
mov byte ptr [edi], al ; write new point
inc edi
dec ecx
je resize_end_xloop
movd eax, mm4
or ax, ax
jg resize_xloop ; if frac part <1
inc esi ; next source point
pand mm4, et1 ; frac part -1
paddw mm4, init
jmp resize_xloop
resize_end_xloop:
// go to next src line
add edi, dstmod
paddw mm5, mm6
add SrcPtr, ebx ; + yint*nSrcPitch
movd eax, mm5
or ax, ax
jg notbig
mov eax, nSrcPitch
add SrcPtr, eax
pand mm5, et1 ; frac part -1
paddw mm5, init
notbig:
dec nDstHeight ; 1 more line done
jg resize_yloop
// done
emms
} // __asm
}
上面贴出来的排版乱了,我把函数附带在附件里了。
都快被老板逼死了,请前辈们帮忙啦.... |
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有帐号?注册
x
|