Re: [MPlayer-dev-eng] [PATCH] compiling with Intel C
Hi Dmitry
Dmitry Antipov a écrit :
Hello all,
an attached patch proposes the bits needed for the first successful :-)
compilation
of svn-22004 with Intel C 9.1.046. It compiles and plays a few movies
found on my hard drive, but it's highly experimental.
You did it!!! Weeeh!
Were you able to measure any speed-up?
the option '-benchmark' is your friend.
[...]
A few things about your patch that look very much like cosmetics...
------------------------------------------------------------------------
diff -ur MPlayer-1.0svn22004/configure MPlayer-1.0svn22004-devel/configure
--- MPlayer-1.0svn22004/configure 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/configure 2007-01-24 18:18:17.000000000 +0300
@@ -1597,7 +1597,7 @@
_stripbinaries=no
elif test -z "$CFLAGS" ; then
if test "$cc_vendor" = "intel" ; then
- CFLAGS="-O2 $_march $_mcpu $_pipe -fomit-frame-pointer"
+ CFLAGS="-w0 -O3 $_march $_mcpu $_pipe -fomit-frame-pointer -funroll-loops -fno-math-errno"
elif test "$cc_vendor" != "gnu" ; then
CFLAGS="-O2 $_march $_mcpu $_pipe"
else
@@ -1609,8 +1609,6 @@
if test -n "$LDFLAGS" ; then
_ld_extra="$_ld_extra $LDFLAGS"
_warn_CFLAGS=yes
-elif test "$cc_vendor" = "intel" ; then
- _ld_extra="$_ld_extra -i-static"
fi
if test -n "$CPPFLAGS" ; then
_inc_extra="$_inc_extra $CPPFLAGS"
@@ -7236,7 +7234,7 @@
echores "no"
fi
-if cc_check -Wdeclaration-after-statement ; then
+if test "$cc_vendor" != "intel" && cc_check -Wdeclaration-after-statement ; then
CFLAGS="-Wdeclaration-after-statement $CFLAGS"
fi
@@ -7721,7 +7719,11 @@
#endif
/* attribute(used) as needed by some compilers */
-#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 300)
+#if __INTEL_COMPILER
+/* Since Intel C (as of 9.1.046) doesn't understand 'used' attribute, here is a hack
+ to force it to emit variable which is referenced only in inline assembly code */
+# define attribute_used __attribute__((section (".data")))
+#elif (__GNUC__ * 100 + __GNUC_MINOR__ >= 300)
# define attribute_used __attribute__((used))
#else
# define attribute_used
diff -ur MPlayer-1.0svn22004/libavcodec/h264.c MPlayer-1.0svn22004-devel/libavcodec/h264.c
--- MPlayer-1.0svn22004/libavcodec/h264.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/libavcodec/h264.c 2007-01-24 17:20:49.000000000 +0300
@@ -5982,7 +5982,7 @@
return ctx + 4 * cat;
}
-static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
+static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
diff -ur MPlayer-1.0svn22004/libavcodec/i386/dsputil_mmx.c MPlayer-1.0svn22004-devel/libavcodec/i386/dsputil_mmx.c
--- MPlayer-1.0svn22004/libavcodec/i386/dsputil_mmx.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/libavcodec/i386/dsputil_mmx.c 2007-01-24 17:45:20.000000000 +0300
@@ -633,6 +633,32 @@
}
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
+#ifdef __INTEL_COMPILER
+ /* Ugly, but uses a minimal number of operands just to fit into Intel C limits */
+ asm volatile("movd (%1), %%mm0 \n\t"
+ "mov %3, %%eax \n\t"
+ "movd (%1, %%eax), %%mm1 \n\t"
+ "add %3, %%eax \n\t"
+ "movd (%1, %%eax), %%mm2 \n\t"
+ "add %3, %%eax \n\t"
+ "movd (%1, %%eax), %%mm3 \n\t"
+ "punpcklbw %%mm1, %%mm0 \n\t"
+ "punpcklbw %%mm3, %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "punpcklwd %%mm2, %%mm0 \n\t"
+ "punpckhwd %%mm2, %%mm1 \n\t"
+ "movd %%mm0, (%0) \n\t"
+ "punpckhdq %%mm0, %%mm0 \n\t"
+ "mov %2, %%eax \n\t"
+ "movd %%mm0, (%0, %%eax) \n\t"
+ "add %2, %%eax \n\t"
+ "movd %%mm1, (%0, %%eax) \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "add %2, %%eax \n\t"
+ "movd %%mm1, (%0, %%eax) \n\t"
+ :: "r" (dst), "r" (src), "r"(dst_stride), "r"(src_stride)
+ : "eax");
+#else
Is there a reason why this special version should only be used by ICC?
Can GCC compile it too? Is it supposed to be slower? By how much?
static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
diff -ur MPlayer-1.0svn22004/libmpcodecs/vf_fspp.c MPlayer-1.0svn22004-devel/libmpcodecs/vf_fspp.c
--- MPlayer-1.0svn22004/libmpcodecs/vf_fspp.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/libmpcodecs/vf_fspp.c 2007-01-24 17:21:20.000000000 +0300
@@ -717,7 +717,7 @@
#ifdef HAVE_MMX
-static uint64_t attribute_used __attribute__((aligned(8))) temps[4];//!!
+static uint64_t attribute_used __attribute__((aligned(8))) temps[4] = {0ULL};//!!
why 0ULL in particular?
static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_382683433=FIX64(0.382683433, 14);
static uint64_t attribute_used __attribute__((aligned(8))) MM_FIX_0_541196100=FIX64(0.541196100, 14);
diff -ur MPlayer-1.0svn22004/loader/dmo/DMO_AudioDecoder.c MPlayer-1.0svn22004-devel/loader/dmo/DMO_AudioDecoder.c
--- MPlayer-1.0svn22004/loader/dmo/DMO_AudioDecoder.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/loader/dmo/DMO_AudioDecoder.c 2007-01-24 17:18:31.000000000 +0300
@@ -35,8 +35,6 @@
#include "../../mp_msg.h"
-#define __MODULE__ "DirectShow audio decoder"
-
Why ?
diff -ur MPlayer-1.0svn22004/loader/dmo/DMO_VideoDecoder.c MPlayer-1.0svn22004-devel/loader/dmo/DMO_VideoDecoder.c
--- MPlayer-1.0svn22004/loader/dmo/DMO_VideoDecoder.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/loader/dmo/DMO_VideoDecoder.c 2007-01-24 17:18:34.000000000 +0300
@@ -58,8 +58,6 @@
// strcmp((const char*)info.dll,...) is used instead of (... == ...)
// so Arpi could use char* pointer in his simplified DMO_VideoDecoder class
-#define __MODULE__ "DirectShow_VideoDecoder"
-
same here
diff -ur MPlayer-1.0svn22004/loader/dshow/DS_AudioDecoder.c MPlayer-1.0svn22004-devel/loader/dshow/DS_AudioDecoder.c
--- MPlayer-1.0svn22004/loader/dshow/DS_AudioDecoder.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/loader/dshow/DS_AudioDecoder.c 2007-01-24 17:18:39.000000000 +0300
@@ -33,8 +33,6 @@
#include <stdio.h>
#include <stdlib.h>
-#define __MODULE__ "DirectShow audio decoder"
here too.
diff -ur MPlayer-1.0svn22004/loader/dshow/DS_VideoDecoder.c MPlayer-1.0svn22004-devel/loader/dshow/DS_VideoDecoder.c
--- MPlayer-1.0svn22004/loader/dshow/DS_VideoDecoder.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/loader/dshow/DS_VideoDecoder.c 2007-01-24 17:18:41.000000000 +0300
@@ -58,8 +58,6 @@
// strcmp((const char*)info.dll,...) is used instead of (... == ...)
// so Arpi could use char* pointer in his simplified DS_VideoDecoder class
-#define __MODULE__ "DirectShow_VideoDecoder"
here also
diff -ur MPlayer-1.0svn22004/mp3lib/dct64_sse.c MPlayer-1.0svn22004-devel/mp3lib/dct64_sse.c
--- MPlayer-1.0svn22004/mp3lib/dct64_sse.c 2007-01-24 18:08:55.000000000 +0300
+++ MPlayer-1.0svn22004-devel/mp3lib/dct64_sse.c 2007-01-24 18:14:00.000000000 +0300
@@ -70,30 +70,27 @@
for (i = 0; i < 0x20; i += 0x10)
{
- asm(
- "movaps %4, %%xmm1\n\t"
- "movaps %5, %%xmm3\n\t"
- "movaps %6, %%xmm4\n\t"
- "movaps %7, %%xmm6\n\t"
- "movaps %%xmm1, %%xmm7\n\t"
- "shufps $27, %%xmm7, %%xmm7\n\t"
- "movaps %%xmm3, %%xmm5\n\t"
- "shufps $27, %%xmm5, %%xmm5\n\t"
- "movaps %%xmm4, %%xmm2\n\t"
- "shufps $27, %%xmm2, %%xmm2\n\t"
- "movaps %%xmm6, %%xmm0\n\t"
- "shufps $27, %%xmm0, %%xmm0\n\t"
- "addps %%xmm0, %%xmm1\n\t"
- "movaps %%xmm1, %0\n\t"
- "addps %%xmm2, %%xmm3\n\t"
- "movaps %%xmm3, %1\n\t"
- "subps %%xmm4, %%xmm5\n\t"
- "movaps %%xmm5, %2\n\t"
- "subps %%xmm6, %%xmm7\n\t"
- "movaps %%xmm7, %3\n\t"
- :"=m"(*(b2 + i)), "=m"(*(b2 + i + 4)), "=m"(*(b2 + i + 8)), "=m"(*(b2 + i + 12))
- :"m"(*(b1 + i)), "m"(*(b1 + i + 4)), "m"(*(b1 + i + 8)), "m"(*(b1 + i + 12))
- );
+ asm("movaps (%1), %%xmm1\n\t"
+ "movaps 16(%1), %%xmm3\n\t"
+ "movaps 32(%1), %%xmm4\n\t"
+ "movaps 48(%1), %%xmm6\n\t"
+ "movaps %%xmm1, %%xmm7\n\t"
+ "shufps $27, %%xmm7, %%xmm7\n\t"
+ "movaps %%xmm3, %%xmm5\n\t"
+ "shufps $27, %%xmm5, %%xmm5\n\t"
+ "movaps %%xmm4, %%xmm2\n\t"
+ "shufps $27, %%xmm2, %%xmm2\n\t"
+ "movaps %%xmm6, %%xmm0\n\t"
+ "shufps $27, %%xmm0, %%xmm0\n\t"
+ "addps %%xmm0, %%xmm1\n\t"
+ "movaps %%xmm1, (%0)\n\t"
+ "addps %%xmm2, %%xmm3\n\t"
+ "movaps %%xmm3, 16(%0)\n\t"
+ "subps %%xmm4, %%xmm5\n\t"
+ "movaps %%xmm5, 32(%0)\n\t"
+ "subps %%xmm6, %%xmm7\n\t"
+ "movaps %%xmm7, 48(%0)\n\t"
+ : : "r"(b2 + i), "r"(b1 + i));
}
}
Having this re-indenting the code doesn't help tracking down changes.
Please split cosmetic and functional changes.
I don't know what others may think, but let me say that it's really a
huge achievement you've done here!
Thanks!
Guillaume
_______________________________________________
MPlayer-dev-eng mailing list
MPlayer-dev-eng@xxxxxxxxxxxx
http://lists.mplayerhq.hu/mailman/listinfo/mplayer-dev-eng