smpeg-0.4.4-PIC.patch 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. Fix PIC issues in mmx routines
  2. --- video/mmxflags_asm.S
  3. +++ video/mmxflags_asm.S
  4. @@ -1,11 +1,6 @@
  5. #if defined(i386) && defined(USE_MMX)
  6. -.data
  7. - .align 16
  8. - .type flags,@object
  9. -flags: .long 0
  10. -
  11. .text
  12. .align 4
  13. .globl cpu_flags
  14. @@ -40,16 +35,13 @@ cpu_flags:
  15. xorl %ecx,%eax
  16. je cpu_flags.L1
  17. - pusha
  18. + pushl %ebx
  19. movl $1,%eax
  20. cpuid
  21. + movl %edx,%eax
  22. - movl %edx,flags
  23. -
  24. - popa
  25. -
  26. - movl flags,%eax
  27. + popl %ebx
  28. cpu_flags.L1:
  29. ret
  30. --- video/mmxidct_asm.S
  31. +++ video/mmxidct_asm.S
  32. @@ -31,11 +31,6 @@ preSC: .short 16384,22725,21407,19266,
  33. x0005000200010001:
  34. .long 0x00010001,0x00050002
  35. .align 8
  36. - .type x0040000000000000,@object
  37. - .size x0040000000000000,8
  38. -x0040000000000000:
  39. - .long 0, 0x00400000
  40. - .align 8
  41. .type x5a825a825a825a82,@object
  42. .size x5a825a825a825a82,8
  43. x5a825a825a825a82:
  44. @@ -80,8 +75,21 @@ scratch7:
  45. x0:
  46. .long 0,0
  47. .align 8
  48. +
  49. .text
  50. .align 4
  51. +
  52. +#ifdef __PIC__
  53. +# undef __i686 /* gcc define gets in our way */
  54. +# define MUNG(sym) sym ## @GOTOFF(%ebx)
  55. +# define INIT_PIC() \
  56. + call __i686.get_pc_thunk.bx ; \
  57. + addl $_GLOBAL_OFFSET_TABLE_, %ebx
  58. +#else
  59. +# define MUNG(sym) sym
  60. +# define INIT_PIC()
  61. +#endif
  62. +
  63. .globl IDCT_mmx
  64. .type IDCT_mmx,@function
  65. IDCT_mmx:
  66. @@ -92,8 +100,9 @@ IDCT_mmx:
  67. pushl %edx
  68. pushl %esi
  69. pushl %edi
  70. + INIT_PIC()
  71. movl 8(%ebp),%esi /* source matrix */
  72. - leal preSC, %ecx
  73. + leal MUNG(preSC), %ecx
  74. /* column 0: even part
  75. * use V4, V12, V0, V8 to produce V22..V25
  76. */
  77. @@ -109,7 +118,7 @@ IDCT_mmx:
  78. movq %mm1, %mm2 /* added 11/1/96 */
  79. pmulhw 8*8(%esi),%mm5 /* V8 */
  80. psubsw %mm0, %mm1 /* V16 */
  81. - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V18 */
  82. + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V18 */
  83. paddsw %mm0, %mm2 /* V17 */
  84. movq %mm2, %mm0 /* duplicate V17 */
  85. psraw $1, %mm2 /* t75=t82 */
  86. @@ -150,7 +159,7 @@ IDCT_mmx:
  87. paddsw %mm0, %mm3 /* V29 ; free mm0 */
  88. movq %mm7, %mm1 /* duplicate V26 */
  89. psraw $1, %mm3 /* t91=t94 */
  90. - pmulhw x539f539f539f539f,%mm7 /* V33 */
  91. + pmulhw MUNG(x539f539f539f539f),%mm7 /* V33 */
  92. psraw $1, %mm1 /* t96 */
  93. movq %mm5, %mm0 /* duplicate V2 */
  94. psraw $2, %mm4 /* t85=t87 */
  95. @@ -158,15 +167,15 @@ IDCT_mmx:
  96. psubsw %mm4, %mm0 /* V28 ; free mm4 */
  97. movq %mm0, %mm2 /* duplicate V28 */
  98. psraw $1, %mm5 /* t90=t93 */
  99. - pmulhw x4546454645464546,%mm0 /* V35 */
  100. + pmulhw MUNG(x4546454645464546),%mm0 /* V35 */
  101. psraw $1, %mm2 /* t97 */
  102. movq %mm5, %mm4 /* duplicate t90=t93 */
  103. psubsw %mm2, %mm1 /* V32 ; free mm2 */
  104. - pmulhw x61f861f861f861f8,%mm1 /* V36 */
  105. + pmulhw MUNG(x61f861f861f861f8),%mm1 /* V36 */
  106. psllw $1, %mm7 /* t107 */
  107. paddsw %mm3, %mm5 /* V31 */
  108. psubsw %mm3, %mm4 /* V30 ; free mm3 */
  109. - pmulhw x5a825a825a825a82,%mm4 /* V34 */
  110. + pmulhw MUNG(x5a825a825a825a82),%mm4 /* V34 */
  111. nop
  112. psubsw %mm1, %mm0 /* V38 */
  113. psubsw %mm7, %mm1 /* V37 ; free mm7 */
  114. @@ -233,7 +242,7 @@ IDCT_mmx:
  115. psubsw %mm7, %mm1 /* V50 */
  116. pmulhw 8*9(%esi), %mm5 /* V9 */
  117. paddsw %mm7, %mm2 /* V51 */
  118. - pmulhw x5a825a825a825a82, %mm1 /* 23170 ->V52 */
  119. + pmulhw MUNG(x5a825a825a825a82), %mm1 /* 23170 ->V52 */
  120. movq %mm2, %mm6 /* duplicate V51 */
  121. psraw $1, %mm2 /* t138=t144 */
  122. movq %mm3, %mm4 /* duplicate V1 */
  123. @@ -274,11 +283,11 @@ IDCT_mmx:
  124. * even more by doing the correction step in a later stage when the number
  125. * is actually multiplied by 16
  126. */
  127. - paddw x0005000200010001, %mm4
  128. + paddw MUNG(x0005000200010001), %mm4
  129. psubsw %mm6, %mm3 /* V60 ; free mm6 */
  130. psraw $1, %mm0 /* t154=t156 */
  131. movq %mm3, %mm1 /* duplicate V60 */
  132. - pmulhw x539f539f539f539f, %mm1 /* V67 */
  133. + pmulhw MUNG(x539f539f539f539f), %mm1 /* V67 */
  134. movq %mm5, %mm6 /* duplicate V3 */
  135. psraw $2, %mm4 /* t148=t150 */
  136. paddsw %mm4, %mm5 /* V61 */
  137. @@ -287,13 +296,13 @@ IDCT_mmx:
  138. psllw $1, %mm1 /* t169 */
  139. paddsw %mm0, %mm5 /* V65 -> result */
  140. psubsw %mm0, %mm4 /* V64 ; free mm0 */
  141. - pmulhw x5a825a825a825a82, %mm4 /* V68 */
  142. + pmulhw MUNG(x5a825a825a825a82), %mm4 /* V68 */
  143. psraw $1, %mm3 /* t158 */
  144. psubsw %mm6, %mm3 /* V66 */
  145. movq %mm5, %mm2 /* duplicate V65 */
  146. - pmulhw x61f861f861f861f8, %mm3 /* V70 */
  147. + pmulhw MUNG(x61f861f861f861f8), %mm3 /* V70 */
  148. psllw $1, %mm6 /* t165 */
  149. - pmulhw x4546454645464546, %mm6 /* V69 */
  150. + pmulhw MUNG(x4546454645464546), %mm6 /* V69 */
  151. psraw $1, %mm2 /* t172 */
  152. /* moved from next block */
  153. movq 8*5(%esi), %mm0 /* V56 */
  154. @@ -418,7 +427,7 @@ IDCT_mmx:
  155. * movq 8*13(%esi), %mm4 tmt13
  156. */
  157. psubsw %mm4, %mm3 /* V134 */
  158. - pmulhw x5a825a825a825a82, %mm3 /* 23170 ->V136 */
  159. + pmulhw MUNG(x5a825a825a825a82), %mm3 /* 23170 ->V136 */
  160. movq 8*9(%esi), %mm6 /* tmt9 */
  161. paddsw %mm4, %mm5 /* V135 ; mm4 free */
  162. movq %mm0, %mm4 /* duplicate tmt1 */
  163. @@ -447,17 +456,17 @@ IDCT_mmx:
  164. psubsw %mm7, %mm0 /* V144 */
  165. movq %mm0, %mm3 /* duplicate V144 */
  166. paddsw %mm7, %mm2 /* V147 ; free mm7 */
  167. - pmulhw x539f539f539f539f, %mm0 /* 21407-> V151 */
  168. + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V151 */
  169. movq %mm1, %mm7 /* duplicate tmt3 */
  170. paddsw %mm5, %mm7 /* V145 */
  171. psubsw %mm5, %mm1 /* V146 ; free mm5 */
  172. psubsw %mm1, %mm3 /* V150 */
  173. movq %mm7, %mm5 /* duplicate V145 */
  174. - pmulhw x4546454645464546, %mm1 /* 17734-> V153 */
  175. + pmulhw MUNG(x4546454645464546), %mm1 /* 17734-> V153 */
  176. psubsw %mm2, %mm5 /* V148 */
  177. - pmulhw x61f861f861f861f8, %mm3 /* 25080-> V154 */
  178. + pmulhw MUNG(x61f861f861f861f8), %mm3 /* 25080-> V154 */
  179. psllw $2, %mm0 /* t311 */
  180. - pmulhw x5a825a825a825a82, %mm5 /* 23170-> V152 */
  181. + pmulhw MUNG(x5a825a825a825a82), %mm5 /* 23170-> V152 */
  182. paddsw %mm2, %mm7 /* V149 ; free mm2 */
  183. psllw $1, %mm1 /* t313 */
  184. nop /* without the nop - freeze here for one clock */
  185. @@ -483,7 +492,7 @@ IDCT_mmx:
  186. paddsw %mm3, %mm6 /* V164 ; free mm3 */
  187. movq %mm4, %mm3 /* duplicate V142 */
  188. psubsw %mm5, %mm4 /* V165 ; free mm5 */
  189. - movq %mm2, scratch7 /* out7 */
  190. + movq %mm2, MUNG(scratch7) /* out7 */
  191. psraw $4, %mm6
  192. psraw $4, %mm4
  193. paddsw %mm5, %mm3 /* V162 */
  194. @@ -494,11 +503,11 @@ IDCT_mmx:
  195. */
  196. movq %mm6, 8*9(%esi) /* out9 */
  197. paddsw %mm1, %mm0 /* V161 */
  198. - movq %mm3, scratch5 /* out5 */
  199. + movq %mm3, MUNG(scratch5) /* out5 */
  200. psubsw %mm1, %mm5 /* V166 ; free mm1 */
  201. movq %mm4, 8*11(%esi) /* out11 */
  202. psraw $4, %mm5
  203. - movq %mm0, scratch3 /* out3 */
  204. + movq %mm0, MUNG(scratch3) /* out3 */
  205. movq %mm2, %mm4 /* duplicate V140 */
  206. movq %mm5, 8*13(%esi) /* out13 */
  207. paddsw %mm7, %mm2 /* V160 */
  208. @@ -508,7 +517,7 @@ IDCT_mmx:
  209. /* moved from the next block */
  210. movq 8*3(%esi), %mm7
  211. psraw $4, %mm4
  212. - movq %mm2, scratch1 /* out1 */
  213. + movq %mm2, MUNG(scratch1) /* out1 */
  214. /* moved from the next block */
  215. movq %mm0, %mm1
  216. movq %mm4, 8*15(%esi) /* out15 */
  217. @@ -565,15 +574,15 @@ IDCT_mmx:
  218. paddsw %mm4, %mm3 /* V113 ; free mm4 */
  219. movq %mm0, %mm4 /* duplicate V110 */
  220. paddsw %mm1, %mm2 /* V111 */
  221. - pmulhw x539f539f539f539f, %mm0 /* 21407-> V117 */
  222. + pmulhw MUNG(x539f539f539f539f), %mm0 /* 21407-> V117 */
  223. psubsw %mm1, %mm5 /* V112 ; free mm1 */
  224. psubsw %mm5, %mm4 /* V116 */
  225. movq %mm2, %mm1 /* duplicate V111 */
  226. - pmulhw x4546454645464546, %mm5 /* 17734-> V119 */
  227. + pmulhw MUNG(x4546454645464546), %mm5 /* 17734-> V119 */
  228. psubsw %mm3, %mm2 /* V114 */
  229. - pmulhw x61f861f861f861f8, %mm4 /* 25080-> V120 */
  230. + pmulhw MUNG(x61f861f861f861f8), %mm4 /* 25080-> V120 */
  231. paddsw %mm3, %mm1 /* V115 ; free mm3 */
  232. - pmulhw x5a825a825a825a82, %mm2 /* 23170-> V118 */
  233. + pmulhw MUNG(x5a825a825a825a82), %mm2 /* 23170-> V118 */
  234. psllw $2, %mm0 /* t266 */
  235. movq %mm1, (%esi) /* save V115 */
  236. psllw $1, %mm5 /* t268 */
  237. @@ -591,7 +600,7 @@ IDCT_mmx:
  238. movq %mm6, %mm3 /* duplicate tmt4 */
  239. psubsw %mm0, %mm6 /* V100 */
  240. paddsw %mm0, %mm3 /* V101 ; free mm0 */
  241. - pmulhw x5a825a825a825a82, %mm6 /* 23170 ->V102 */
  242. + pmulhw MUNG(x5a825a825a825a82), %mm6 /* 23170 ->V102 */
  243. movq %mm7, %mm5 /* duplicate tmt0 */
  244. movq 8*8(%esi), %mm1 /* tmt8 */
  245. paddsw %mm1, %mm7 /* V103 */
  246. @@ -625,10 +634,10 @@ IDCT_mmx:
  247. movq 8*2(%esi), %mm3 /* V123 */
  248. paddsw %mm4, %mm7 /* out0 */
  249. /* moved up from next block */
  250. - movq scratch3, %mm0
  251. + movq MUNG(scratch3), %mm0
  252. psraw $4, %mm7
  253. /* moved up from next block */
  254. - movq scratch5, %mm6
  255. + movq MUNG(scratch5), %mm6
  256. psubsw %mm4, %mm1 /* out14 ; free mm4 */
  257. paddsw %mm3, %mm5 /* out2 */
  258. psraw $4, %mm1
  259. @@ -639,7 +648,7 @@ IDCT_mmx:
  260. movq %mm5, 8*2(%esi) /* out2 ; free mm5 */
  261. psraw $4, %mm2
  262. /* moved up to the prev block */
  263. - movq scratch7, %mm4
  264. + movq MUNG(scratch7), %mm4
  265. /* moved up to the prev block */
  266. psraw $4, %mm0
  267. movq %mm2, 8*12(%esi) /* out12 ; free mm2 */
  268. @@ -647,13 +656,13 @@ IDCT_mmx:
  269. psraw $4, %mm6
  270. /* move back the data to its correct place
  271. * moved up to the prev block
  272. - * movq scratch3, %mm0
  273. - * movq scratch5, %mm6
  274. - * movq scratch7, %mm4
  275. + * movq MUNG(scratch3), %mm0
  276. + * movq MUNG(scratch5), %mm6
  277. + * movq MUNG(scratch7), %mm4
  278. * psraw $4, %mm0
  279. * psraw $4, %mm6
  280. */
  281. - movq scratch1, %mm1
  282. + movq MUNG(scratch1), %mm1
  283. psraw $4, %mm4
  284. movq %mm0, 8*3(%esi) /* out3 */
  285. psraw $4, %mm1
  286. @@ -671,6 +680,15 @@ IDCT_mmx:
  287. .Lfe1:
  288. .size IDCT_mmx,.Lfe1-IDCT_mmx
  289. +#ifdef __PIC__
  290. + .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
  291. +.globl __i686.get_pc_thunk.bx
  292. + .hidden __i686.get_pc_thunk.bx
  293. + .type __i686.get_pc_thunk.bx,@function
  294. + __i686.get_pc_thunk.bx:
  295. + movl (%esp), %ebx
  296. + ret
  297. +#endif
  298. #endif /* i386 && USE_MMX */