|
- Fix PIC issues in mmx routines
- --- video/mmxflags_asm.S
- +++ video/mmxflags_asm.S
- @@ -1,11 +1,6 @@
-
- #if defined(i386) && defined(USE_MMX)
-
- -.data
- - .align 16
- - .type flags,@object
- -flags: .long 0
- -
- .text
- .align 4
- .globl cpu_flags
- @@ -40,16 +35,13 @@ cpu_flags:
- xorl %ecx,%eax
- je cpu_flags.L1
-
- - pusha
- + pushl %ebx
-
- movl $1,%eax
- cpuid
- + movl %edx,%eax
-
- - movl %edx,flags
- -
- - popa
- -
- - movl flags,%eax
- + popl %ebx
-
- cpu_flags.L1:
- ret
- --- video/mmxidct_asm.S
- +++ video/mmxidct_asm.S
- @@ -31,11 +31,6 @@ preSC: .short 16384,22725,21407,19266,
- x0005000200010001:
- .long 0x00010001,0x00050002
- .align 8
- - .type x0040000000000000,@object
- - .size x0040000000000000,8
- -x0040000000000000:
- - .long 0, 0x00400000
- - .align 8
- .type x5a825a825a825a82,@object
- .size x5a825a825a825a82,8
- x5a825a825a825a82:
- @@ -80,8 +75,21 @@ scratch7:
- x0:
- .long 0,0
- .align 8
- +
- .text
- .align 4
- +
- +#ifdef __PIC__
- +# undef __i686 /* gcc define gets in our way */
- +# define MUNG(sym) sym ## @GOTOFF(%ebx)
- +# define INIT_PIC() \
- + call __i686.get_pc_thunk.bx ; \
- + addl $_GLOBAL_OFFSET_TABLE_, %ebx
- +#else
- +# define MUNG(sym) sym
- +# define INIT_PIC()
- +#endif
- +
- .globl IDCT_mmx
- .type IDCT_mmx,@function
- IDCT_mmx:
- @@ -92,8 +100,9 @@ IDCT_mmx:
- pushl %edx
- pushl %esi
- pushl %edi
- + INIT_PIC()
- movl 8(%ebp),%esi /* source matrix */
- - leal preSC, %ecx
- + leal MUNG(preSC), %ecx
- /* column 0: even part
- * use V4, V12, V0, V8 to produce V22..V25
- */
- @@ -109,7 +118,7 @@ IDCT_mmx:
- movq %mm1, %mm2 /* added 11/1/96 */
- pmulhw 8*8(%esi),%mm5 /* V8 */
- psubsw %mm0, %mm1 /* V16 */
- - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
- + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
- paddsw %mm0, %mm2 /* V17 */
- movq %mm2, %mm0 /* duplicate V17 */
- psraw $1, %mm2 /* t75=t82 */
- @@ -150,7 +159,7 @@ IDCT_mmx:
- paddsw %mm0, %mm3 /* V29 ; free mm0 */
- movq %mm7, %mm1 /* duplicate V26 */
- psraw $1, %mm3 /* t91=t94 */
- - pmulhw x539f539f539f539f,%mm7 /* V33 */
- + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
- psraw $1, %mm1 /* t96 */
- movq %mm5, %mm0 /* duplicate V2 */
- psraw $2, %mm4 /* t85=t87 */
- @@ -158,15 +167,15 @@ IDCT_mmx:
- psubsw %mm4, %mm0 /* V28 ; free mm4 */
- movq %mm0, %mm2 /* duplicate V28 */
- psraw $1, %mm5 /* t90=t93 */
- - pmulhw x4546454645464546,%mm0 /* V35 */
- + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
- psraw $1, %mm2 /* t97 */
- movq %mm5, %mm4 /* duplicate t90=t93 */
- psubsw %mm2, %mm1 /* V32 ; free mm2 */
- - pmulhw x61f861f861f861f8,%mm1 /* V36 */
- + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
- psllw $1, %mm7 /* t107 */
- paddsw %mm3, %mm5 /* V31 */
- psubsw %mm3, %mm4 /* V30 ; free mm3 */
- - pmulhw x5a825a825a825a82,%mm4 /* V34 */
- + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
- nop
- psubsw %mm1, %mm0 /* V38 */
- psubsw %mm7, %mm1 /* V37 ; free mm7 */
- @@ -233,7 +242,7 @@ IDCT_mmx:
- psubsw %mm7, %mm1 /* V50 */
- pmulhw 8*9(%esi), %mm5 /* V9 */
- paddsw %mm7, %mm2 /* V51 */
- - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
- + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
- movq %mm2, %mm6 /* duplicate V51 */
- psraw $1, %mm2 /* t138=t144 */
- movq %mm3, %mm4 /* duplicate V1 */
- @@ -274,11 +283,11 @@ IDCT_mmx:
- * even more by doing the correction step in a later stage when the number
- * is actually multiplied by 16
- */
- - paddw x0005000200010001, %mm4
- + paddw MUNG(x0005000200010001), %mm4
- psubsw %mm6, %mm3 /* V60 ; free mm6 */
- psraw $1, %mm0 /* t154=t156 */
- movq %mm3, %mm1 /* duplicate V60 */
- - pmulhw x539f539f539f539f, %mm1 /* V67 */
- + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
- movq %mm5, %mm6 /* duplicate V3 */
- psraw $2, %mm4 /* t148=t150 */
- paddsw %mm4, %mm5 /* V61 */
- @@ -287,13 +296,13 @@ IDCT_mmx:
- psllw $1, %mm1 /* t169 */
- paddsw %mm0, %mm5 /* V65 -> result */
- psubsw %mm0, %mm4 /* V64 ; free mm0 */
- - pmulhw x5a825a825a825a82, %mm4 /* V68 */
- + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
- psraw $1, %mm3 /* t158 */
- psubsw %mm6, %mm3 /* V66 */
- movq %mm5, %mm2 /* duplicate V65 */
- - pmulhw x61f861f861f861f8, %mm3 /* V70 */
- + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
- psllw $1, %mm6 /* t165 */
- - pmulhw x4546454645464546, %mm6 /* V69 */
- + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
- psraw $1, %mm2 /* t172 */
- /* moved from next block */
- movq 8*5(%esi), %mm0 /* V56 */
- @@ -418,7 +427,7 @@ IDCT_mmx:
- * movq 8*13(%esi), %mm4 tmt13
- */
- psubsw %mm4, %mm3 /* V134 */
- - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
- + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
- movq 8*9(%esi), %mm6 /* tmt9 */
- paddsw %mm4, %mm5 /* V135 ; mm4 free */
- movq %mm0, %mm4 /* duplicate tmt1 */
- @@ -447,17 +456,17 @@ IDCT_mmx:
- psubsw %mm7, %mm0 /* V144 */
- movq %mm0, %mm3 /* duplicate V144 */
- paddsw %mm7, %mm2 /* V147 ; free mm7 */
- - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
- + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
- movq %mm1, %mm7 /* duplicate tmt3 */
- paddsw %mm5, %mm7 /* V145 */
- psubsw %mm5, %mm1 /* V146 ; free mm5 */
- psubsw %mm1, %mm3 /* V150 */
- movq %mm7, %mm5 /* duplicate V145 */
- - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
- + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
- psubsw %mm2, %mm5 /* V148 */
- - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
- + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
- psllw $2, %mm0 /* t311 */
- - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
- + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
- paddsw %mm2, %mm7 /* V149 ; free mm2 */
- psllw $1, %mm1 /* t313 */
- nop /* without the nop - freeze here for one clock */
- @@ -483,7 +492,7 @@ IDCT_mmx:
- paddsw %mm3, %mm6 /* V164 ; free mm3 */
- movq %mm4, %mm3 /* duplicate V142 */
- psubsw %mm5, %mm4 /* V165 ; free mm5 */
- - movq %mm2, scratch7 /* out7 */
- + movq %mm2, MUNG(scratch7) /* out7 */
- psraw $4, %mm6
- psraw $4, %mm4
- paddsw %mm5, %mm3 /* V162 */
- @@ -494,11 +503,11 @@ IDCT_mmx:
- */
- movq %mm6, 8*9(%esi) /* out9 */
- paddsw %mm1, %mm0 /* V161 */
- - movq %mm3, scratch5 /* out5 */
- + movq %mm3, MUNG(scratch5) /* out5 */
- psubsw %mm1, %mm5 /* V166 ; free mm1 */
- movq %mm4, 8*11(%esi) /* out11 */
- psraw $4, %mm5
- - movq %mm0, scratch3 /* out3 */
- + movq %mm0, MUNG(scratch3) /* out3 */
- movq %mm2, %mm4 /* duplicate V140 */
- movq %mm5, 8*13(%esi) /* out13 */
- paddsw %mm7, %mm2 /* V160 */
- @@ -508,7 +517,7 @@ IDCT_mmx:
- /* moved from the next block */
- movq 8*3(%esi), %mm7
- psraw $4, %mm4
- - movq %mm2, scratch1 /* out1 */
- + movq %mm2, MUNG(scratch1) /* out1 */
- /* moved from the next block */
- movq %mm0, %mm1
- movq %mm4, 8*15(%esi) /* out15 */
- @@ -565,15 +574,15 @@ IDCT_mmx:
- paddsw %mm4, %mm3 /* V113 ; free mm4 */
- movq %mm0, %mm4 /* duplicate V110 */
- paddsw %mm1, %mm2 /* V111 */
- - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
- + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
- psubsw %mm1, %mm5 /* V112 ; free mm1 */
- psubsw %mm5, %mm4 /* V116 */
- movq %mm2, %mm1 /* duplicate V111 */
- - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
- + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
- psubsw %mm3, %mm2 /* V114 */
- - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
- + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
- paddsw %mm3, %mm1 /* V115 ; free mm3 */
- - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
- + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
- psllw $2, %mm0 /* t266 */
- movq %mm1, (%esi) /* save V115 */
- psllw $1, %mm5 /* t268 */
- @@ -591,7 +600,7 @@ IDCT_mmx:
- movq %mm6, %mm3 /* duplicate tmt4 */
- psubsw %mm0, %mm6 /* V100 */
- paddsw %mm0, %mm3 /* V101 ; free mm0 */
- - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
- + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
- movq %mm7, %mm5 /* duplicate tmt0 */
- movq 8*8(%esi), %mm1 /* tmt8 */
- paddsw %mm1, %mm7 /* V103 */
- @@ -625,10 +634,10 @@ IDCT_mmx:
- movq 8*2(%esi), %mm3 /* V123 */
- paddsw %mm4, %mm7 /* out0 */
- /* moved up from next block */
- - movq scratch3, %mm0
- + movq MUNG(scratch3), %mm0
- psraw $4, %mm7
- /* moved up from next block */
- - movq scratch5, %mm6
- + movq MUNG(scratch5), %mm6
- psubsw %mm4, %mm1 /* out14 ; free mm4 */
- paddsw %mm3, %mm5 /* out2 */
- psraw $4, %mm1
- @@ -639,7 +648,7 @@ IDCT_mmx:
- movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
- psraw $4, %mm2
- /* moved up to the prev block */
- - movq scratch7, %mm4
- + movq MUNG(scratch7), %mm4
- /* moved up to the prev block */
- psraw $4, %mm0
- movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
- @@ -647,13 +656,13 @@ IDCT_mmx:
- psraw $4, %mm6
- /* move back the data to its correct place
- * moved up to the prev block
- - * movq scratch3, %mm0
- - * movq scratch5, %mm6
- - * movq scratch7, %mm4
- + * movq MUNG(scratch3), %mm0
- + * movq MUNG(scratch5), %mm6
- + * movq MUNG(scratch7), %mm4
- * psraw $4, %mm0
- * psraw $4, %mm6
- */
- - movq scratch1, %mm1
- + movq MUNG(scratch1), %mm1
- psraw $4, %mm4
- movq %mm0, 8*3(%esi) /* out3 */
- psraw $4, %mm1
- @@ -671,6 +680,15 @@ IDCT_mmx:
- .Lfe1:
- .size IDCT_mmx,.Lfe1-IDCT_mmx
-
- +#ifdef __PIC__
- + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
- +.globl __i686.get_pc_thunk.bx
- + .hidden __i686.get_pc_thunk.bx
- + .type __i686.get_pc_thunk.bx,@function
- + __i686.get_pc_thunk.bx:
- + movl (%esp), %ebx
- + ret
- +#endif
-
- #endif /* i386 && USE_MMX */
-
|