• Open Menu Close Menu
  • Apple
  • Shopping Bag
  • Apple
  • Mac
  • iPad
  • iPhone
  • Watch
  • TV
  • Music
  • Support
  • Search apple.com
  • Shopping Bag

Lists

Open Menu Close Menu
  • Terms and Conditions
  • Lists hosted on this site
  • Email the Postmaster
  • Tips for posting to public mailing lists
i386 inline assembly
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

i386 inline assembly


  • Subject: i386 inline assembly
  • From: Steve Checkoway <email@hidden>
  • Date: Sun, 15 Jan 2006 21:04:04 -0800

I've found two instances where inline assembly is failing that I haven't been able to fix yet. Please forgive the massive amount of spam in this e-mail. I don't know enough about i386 assembly to know which parts are relevant.

0. .balign isn't recognized. (This is 0 because I just checked and it looks like .balign n should be replaced with .align log(n), so .balign 8 with .align 3.

1. Macros don't seem to be working. The particular bit of code is:

    asm volatile(
        ".macro FDCT_ROW_SSE2_H1 i t   \n\t"
	"movq      \\i(%0), %%xmm2     \n\t"
	"movq      \\i+8(%0), %%xmm0   \n\t"
	"movdqa    \\t+32(%1), %%xmm3  \n\t"
	"movdqa    \\t+48(%1), %%xmm7  \n\t"
	"movdqa    \\t(%1), %%xmm4     \n\t"
	"movdqa    \\t+16(%1), %%xmm5  \n\t"
	".endm                         \n\t"
        ".macro FDCT_ROW_SSE2_H2 i t   \n\t"
	"movq      \\i(%0), %%xmm2     \n\t"
	"movq      \\i+8(%0), %%xmm0   \n\t"
	"movdqa    \\t+32(%1), %%xmm3  \n\t"
	"movdqa    \\t+48(%1), %%xmm7  \n\t"
	".endm                         \n\t"
	".macro FDCT_ROW_SSE2 i        \n\t"
	"movq      %%xmm2, %%xmm1      \n\t"
	"pshuflw   $27, %%xmm0, %%xmm0 \n\t"
	"paddsw    %%xmm0, %%xmm1      \n\t"
	"psubsw    %%xmm0, %%xmm2      \n\t"
	"punpckldq %%xmm2, %%xmm1      \n\t"
	"pshufd    $78, %%xmm1, %%xmm2 \n\t"
	"pmaddwd   %%xmm2, %%xmm3      \n\t"
	"pmaddwd   %%xmm1, %%xmm7      \n\t"
	"pmaddwd   %%xmm5, %%xmm2      \n\t"
	"pmaddwd   %%xmm4, %%xmm1      \n\t"
	"paddd     %%xmm7, %%xmm3      \n\t"
	"paddd     %%xmm2, %%xmm1      \n\t"
	"paddd     %%xmm6, %%xmm3      \n\t"
	"paddd     %%xmm6, %%xmm1      \n\t"
	"psrad     %3, %%xmm3          \n\t"
	"psrad     %3, %%xmm1          \n\t"
	"packssdw  %%xmm3, %%xmm1      \n\t"
	"movdqa    %%xmm1, \\i(%4)     \n\t"
	".endm                         \n\t"
	"movdqa    (%2), %%xmm6        \n\t"
	"FDCT_ROW_SSE2_H1 0 0 \n\t"
	"FDCT_ROW_SSE2 0 \n\t"
	"FDCT_ROW_SSE2_H2 64 0 \n\t"
	"FDCT_ROW_SSE2 64 \n\t"

	"FDCT_ROW_SSE2_H1 16 64 \n\t"
	"FDCT_ROW_SSE2 16 \n\t"
	"FDCT_ROW_SSE2_H2 112 64 \n\t"
	"FDCT_ROW_SSE2 112 \n\t"

	"FDCT_ROW_SSE2_H1 32 128 \n\t"
	"FDCT_ROW_SSE2 32 \n\t"
	"FDCT_ROW_SSE2_H2 96 128 \n\t"
	"FDCT_ROW_SSE2 96 \n\t"

"FDCT_ROW_SSE2_H1 48 192 \n\t"
"FDCT_ROW_SSE2 48 \n\t"
"FDCT_ROW_SSE2_H2 80 192 \n\t"
"FDCT_ROW_SSE2 80 \n\t"
:
: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
);


First, the macros aren't being terminated, but I can fix that by replacing .endm with .endmacro. After I do that, it says that there is an invalid character '_' in the opcode. If I remove all underscores, it tells me that fdctrowsse2h1 (in lowercase) is not an i386 instruction. I thought it might be because it ends in a number but replacing all of the macros with unique names without numbers did nothing.

2. I'm getting "error: can't find a register in class 'BREG' while reloading 'asm'" I got an error similar to that before but it was GENERAL_REGS instead of BREG. Passing -fomit-frame-pointer freed up the needed register. However, that doesn't seem to be working here.

The code is:

__asm __volatile(
"testl $1, %0 \n\t"
" jz 1f \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"addl %4, %1 \n\t"
"addl $8, %2 \n\t"
PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
"movq %%mm4, (%3) \n\t"
"addl %5, %3 \n\t"
"decl %0 \n\t"
".balign 8 \n\t"
"1: \n\t"
"movq (%1), %%mm0 \n\t"
"movq (%2), %%mm1 \n\t"
"addl %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 8(%2), %%mm3 \n\t"
"addl %4, %1 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"addl %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"addl %5, %3 \n\t"
"movq (%1), %%mm0 \n\t"
"movq 16(%2), %%mm1 \n\t"
"addl %4, %1 \n\t"
"movq (%1), %%mm2 \n\t"
"movq 24(%2), %%mm3 \n\t"
"addl %4, %1 \n\t"
"addl $32, %2 \n\t"
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%3) \n\t"
"addl %5, %3 \n\t"
"movq %%mm5, (%3) \n\t"
"addl %5, %3 \n\t"
"subl $4, %0 \n\t"
"jnz 1b \n\t"
#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#else
:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
#endif
:"S"(src1Stride), "D"(dstStride)
:"memory");


The macro in there is:
#define PAVGB(a, b, c, e)		PAVGB_MMX_NO_RND(a, b, c, e)
#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
    "movq " #rega ", " #regr "	\n\t"\
    "pand " #regb ", " #regr "	\n\t"\
    "pxor " #rega ", " #regb "	\n\t"\
    "pand " #regfe "," #regb "	\n\t"\
    "psrlq $1, " #regb " 	\n\t"\
    "paddb " #regb ", " #regr "	\n\t"


- Steve

Attachment: smime.p7s
Description: S/MIME cryptographic signature

 _______________________________________________
Do not post admin requests to the list. They will be ignored.
Xcode-users mailing list      (email@hidden)
Help/Unsubscribe/Update your Subscription:

This email sent to email@hidden

  • Follow-Ups:
    • Re: i386 inline assembly
      • From: email@hidden
    • Re: i386 inline assembly
      • From: Eric Albert <email@hidden>
  • Prev by Date: Re: per-arch files
  • Next by Date: Re: i386 inline assembly
  • Previous by thread: Re: per-arch files
  • Next by thread: Re: i386 inline assembly
  • Index(es):
    • Date
    • Thread