filter_LTLIBRARIES = libgstvideoscale.la
if HAVE_CPU_I386
-SCALER = gstscale_x86.c
+SCALER = gstscale_x86.c gstscale_x86_asm.s
else
SCALER =
endif
noinst_HEADERS = gstscale_x86.h
#CFLAGS += -S -O1 $(FOMIT_FRAME_POINTER) -funroll-all-loops -finline-functions -ffast-math
-CFLAGS = -g $(GLIB_CFLAGS) $(GST_CFLAGS) $(XML_CFLAGS) -O5 -fomit-frame-pointer -ffast-math
+CFLAGS = $(GLIB_CFLAGS) $(GST_CFLAGS) $(XML_CFLAGS) -O5 -fomit-frame-pointer -ffast-math
GST_DEBUG (0,"scaler start/end %p %p %p\n", copy_row, eip, (void*)(eip-copy_row));
}
-void
-gst_videoscale_scale_nearest_x86 (GstVideoScale *scale,
- unsigned char *src,
- unsigned char *dest,
- int sw, int sh, int dw, int dh)
-{
- int pos, inc, y;
- int u1, u2;
-
-
- scale->temp = scale->copy_row;
-
- GST_DEBUG (0,"videoscale: scaling nearest %p %p %p %d\n", scale->copy_row, src, dest, dw);
-
- pos = 0x10000;
- inc = (sh<<16)/dh;
-
- for (y = dh; y > 0; y--) {
-
- while (pos >0x10000) {
- src += sw;
- pos-=0x10000;
- }
-
- __asm__ __volatile__ ("
- movl %2, %%eax\n
- call *%%eax
- "
- : "=&D" (u1), "=&S" (u2)
- : "g" (scale->temp), "0" (dest), "1" (src)
- : "memory" );
-
- dest+= dw;
-
- pos += inc;
- }
- GST_DEBUG(0,"videoscale: scaling nearest done %p\n", scale->copy_row);
-}
--- /dev/null
+.text
+ .align 4
+.globl gst_videoscale_scale_nearest_x86
+ .type gst_videoscale_scale_nearest_x86,@function
+gst_videoscale_scale_nearest_x86:
+
+ subl $8,%esp
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ movl 28(%esp),%ebp
+ movl 24(%esp),%edx
+ addl $28,%edx
+ movl 24(%esp),%eax
+ movl %edx,8220(%eax)
+ movl $65536,12(%esp)
+ movl 40(%esp),%ecx
+ sall $16,%ecx
+ movl %ecx,%eax
+ cltd
+ idivl 48(%esp)
+ movl %eax,%ecx
+ movl 48(%esp),%eax
+ movl %eax,16(%esp)
+ testl %eax,%eax
+ jle .L92
+ jmp .L100
+ .p2align 4,,7
+.L97:
+ addl 36(%esp),%ebp
+ addl $-65536,12(%esp)
+.L100:
+ cmpl $65536,12(%esp)
+ jg .L97
+ movl 32(%esp),%edi
+ movl %ebp,%esi
+ movl 24(%esp),%edx
+
+ movl 8220(%edx), %eax
+ call *%eax
+
+ movl 44(%esp),%eax
+ addl %eax,32(%esp)
+ addl %ecx,12(%esp)
+ decl 16(%esp)
+ cmpl $0,16(%esp)
+ jg .L100
+.L92:
+ popl %esi
+ popl %edi
+ popl %ebp
+ addl $8,%esp
+ ret