i386 inline assembly
i386 inline assembly
- Subject: i386 inline assembly
- From: Steve Checkoway <email@hidden>
- Date: Sun, 15 Jan 2006 21:04:04 -0800
I've found two instances where inline assembly is failing that I
haven't been able to fix yet. Please forgive the massive amount of
spam in this e-mail. I don't know enough about i386 assembly to know
which parts are relevant.
0. .balign isn't recognized. (This is 0 because I just checked and it
looks like .balign n should be replaced with .align log(n),
so .balign 8 with .align 3.
1. Macros don't seem to be working. The particular bit of code is:
asm volatile(
".macro FDCT_ROW_SSE2_H1 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
"movdqa \\t+48(%1), %%xmm7 \n\t"
"movdqa \\t(%1), %%xmm4 \n\t"
"movdqa \\t+16(%1), %%xmm5 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2_H2 i t \n\t"
"movq \\i(%0), %%xmm2 \n\t"
"movq \\i+8(%0), %%xmm0 \n\t"
"movdqa \\t+32(%1), %%xmm3 \n\t"
"movdqa \\t+48(%1), %%xmm7 \n\t"
".endm \n\t"
".macro FDCT_ROW_SSE2 i \n\t"
"movq %%xmm2, %%xmm1 \n\t"
"pshuflw $27, %%xmm0, %%xmm0 \n\t"
"paddsw %%xmm0, %%xmm1 \n\t"
"psubsw %%xmm0, %%xmm2 \n\t"
"punpckldq %%xmm2, %%xmm1 \n\t"
"pshufd $78, %%xmm1, %%xmm2 \n\t"
"pmaddwd %%xmm2, %%xmm3 \n\t"
"pmaddwd %%xmm1, %%xmm7 \n\t"
"pmaddwd %%xmm5, %%xmm2 \n\t"
"pmaddwd %%xmm4, %%xmm1 \n\t"
"paddd %%xmm7, %%xmm3 \n\t"
"paddd %%xmm2, %%xmm1 \n\t"
"paddd %%xmm6, %%xmm3 \n\t"
"paddd %%xmm6, %%xmm1 \n\t"
"psrad %3, %%xmm3 \n\t"
"psrad %3, %%xmm1 \n\t"
"packssdw %%xmm3, %%xmm1 \n\t"
"movdqa %%xmm1, \\i(%4) \n\t"
".endm \n\t"
"movdqa (%2), %%xmm6 \n\t"
"FDCT_ROW_SSE2_H1 0 0 \n\t"
"FDCT_ROW_SSE2 0 \n\t"
"FDCT_ROW_SSE2_H2 64 0 \n\t"
"FDCT_ROW_SSE2 64 \n\t"
"FDCT_ROW_SSE2_H1 16 64 \n\t"
"FDCT_ROW_SSE2 16 \n\t"
"FDCT_ROW_SSE2_H2 112 64 \n\t"
"FDCT_ROW_SSE2 112 \n\t"
"FDCT_ROW_SSE2_H1 32 128 \n\t"
"FDCT_ROW_SSE2 32 \n\t"
"FDCT_ROW_SSE2_H2 96 128 \n\t"
"FDCT_ROW_SSE2 96 \n\t"
"FDCT_ROW_SSE2_H1 48 192 \n\t"
"FDCT_ROW_SSE2 48 \n\t"
"FDCT_ROW_SSE2_H2 80 192 \n\t"
"FDCT_ROW_SSE2 80 \n\t"
:
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2),
"r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
);
First, the macros aren't being terminated, but I can fix that by
replacing .endm with .endmacro. After I do that, it says that there
is an invalid character '_' in the opcode. If I remove all
underscores, it tells me that fdctrowsse2h1 (in lowercase) is not an
i386 instruction. I thought it might be because it ends in a number
but replacing all of the macros with unique names without numbers did
nothing.
2. I'm getting "error: can't find a register in class 'BREG' while
reloading 'asm'" I got an error similar to that before but it was
GENERAL_REGS instead of BREG. Passing -fomit-frame-pointer freed up
the needed register. However, that doesn't seem to be working here.
The code is:
__asm __volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"addl %4, %1 \n\t"
"addl $8, %2 \n\t"
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
"movq %%mm4, (%3) \n\t"
"addl %5, %3 \n\t"
"decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"addl %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"addl %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"addl %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"addl %5, %3 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 16(%2), %%mm1 \n\t"
"addl %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 24(%2), %%mm3 \n\t"
"addl %4, %1 \n\t"
"addl $32, %2 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"addl %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"addl %5, %3 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at
least) and cant be used
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"(src1Stride), "D"(dstStride)
:"memory");
The macro in there is:
#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
"movq " #rega ", " #regr " \n\t"\
"pand " #regb ", " #regr " \n\t"\
"pxor " #rega ", " #regb " \n\t"\
"pand " #regfe "," #regb " \n\t"\
"psrlq $1, " #regb " \n\t"\
"paddb " #regb ", " #regr " \n\t"
- Steve
Attachment:
smime.p7s
Description: S/MIME cryptographic signature
_______________________________________________
Do not post admin requests to the list. They will be ignored.
Xcode-users mailing list (email@hidden)
Help/Unsubscribe/Update your Subscription:
This email sent to email@hidden