Web lists-archives.org

Re: [MPlayer-dev-eng] vo_gl PBO patch ..




version 4 .. reflecting Reimar's suggestions.

Cheers, Sven

-- 
health & wealth
mailto:sgothel@xxxxxxxxxxx ; www  : http://www.jausoft.ca ; pgp: http://www.jausoft.com/gpg/
land : +1 (780) 637 3842 ; cell: +1 (780) 952 4481
Timezone MST: EST-2, UTC-7, CET-8 ; MDT: EDT-2, UTC-6, CEDT-8
Index: libvo/vo_gl2.c
===================================================================
--- libvo/vo_gl2.c	(revision 26555)
+++ libvo/vo_gl2.c	(working copy)
@@ -385,7 +385,7 @@
       }
 
       if (texdirty) {
-        glUploadTex(GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
+        glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
                     square->texture, image_width * image_bytes,
                     0, 0, thisw, thish, 0);
       }
@@ -756,17 +756,17 @@
         subtex_w = texture_width - subtex_x;
       ActiveTexture(GL_TEXTURE0);
       glBindTexture(GL_TEXTURE_2D, tsq->texobj);
-      glUploadTex(GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
+      glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
                   yptr, ystride, subtex_x, subtex_y,
                   subtex_w, subtex_h, 0);
       ActiveTexture(GL_TEXTURE1);
       glBindTexture(GL_TEXTURE_2D, tsq->uvtexobjs[0]);
-      glUploadTex(GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
+      glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
                   uptr, ustride, subtex_x / 2, subtex_y / 2,
                   subtex_w / 2, subtex_h / 2, 0);
       ActiveTexture(GL_TEXTURE2);
       glBindTexture(GL_TEXTURE_2D, tsq->uvtexobjs[1]);
-      glUploadTex(GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
+      glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format,  gl_bitmap_type,
                   vptr, vstride, subtex_x / 2, subtex_y / 2,
                   subtex_w / 2, subtex_h / 2, 0);
       subtex_x = 0;
Index: libvo/vo_gl.c
===================================================================
--- libvo/vo_gl.c	(revision 26555)
+++ libvo/vo_gl.c	(working copy)
@@ -2,6 +2,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
+#include <assert.h>
 
 #include "config.h"
 #include "mp_msg.h"
@@ -82,13 +83,13 @@
 static int many_fmts;
 static int use_glFinish;
 static int swap_interval;
+static int use_pboDMA;
 static GLenum gl_target;
 static GLint gl_texfmt;
 static GLenum gl_format;
 static GLenum gl_type;
-static GLuint gl_buffer;
-static int gl_buffersize;
-static void *gl_bufferptr;
+static gl_pbo_info_t draw_mcpy_pbo[3];
+static gl_pbo_info_t get_image_pbo;
 static GLuint fragprog;
 static GLuint default_texs[22];
 static char *custom_prog;
@@ -317,7 +318,7 @@
       BindTexture(gl_target, *curtex++);
       glCreateClearTex(gl_target, GL_ALPHA, scale_type, sx, sy, 0);
     }
-    glUploadTex(gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, i->bitmap, i->stride,
+    glUploadTex(NULL, gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, i->bitmap, i->stride,
                 x, y, i->w, i->h, 0);
   }
   eosdDispList = glGenLists(1);
@@ -369,10 +370,10 @@
   if (largeeosdtex[0])
     glDeleteTextures(2, largeeosdtex);
   largeeosdtex[0] = 0;
-  if (DeleteBuffers && gl_buffer)
-    DeleteBuffers(1, &gl_buffer);
-  gl_buffer = 0; gl_buffersize = 0;
-  gl_bufferptr = NULL;
+  glDestroyPBO(&get_image_pbo);
+  glDestroyPBO(&(draw_mcpy_pbo[0]));
+  glDestroyPBO(&(draw_mcpy_pbo[1]));
+  glDestroyPBO(&(draw_mcpy_pbo[2]));
   err_shown = 0;
 }
 
@@ -381,8 +382,16 @@
  * set global gl-related variables to their default values
  */
 static int initGl(uint32_t d_width, uint32_t d_height) {
-  texSize(image_width, image_height, &texture_width, &texture_height);
+  uint32_t _image_width=image_width;
+  if (use_pboDMA==(int)PBO_XFER_SINGLE_MEMCPY && use_rectangle) {
+    // increase texture stride a bit, since video stride is sometimes a bit higher
+    _image_width+=128;
+  }
+  texSize(_image_width, image_height, &texture_width, &texture_height);
 
+  memset(&get_image_pbo, 0, sizeof(get_image_pbo));
+  memset(draw_mcpy_pbo, 0, sizeof(draw_mcpy_pbo));
+
   glDisable(GL_BLEND); 
   glDisable(GL_DEPTH_TEST);
   glDepthMask(GL_FALSE);
@@ -391,8 +400,8 @@
   glDrawBuffer(vo_doublebuffering?GL_BACK:GL_FRONT);
   glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
 
-  mp_msg(MSGT_VO, MSGL_V, "[gl] Creating %dx%d texture...\n",
-          texture_width, texture_height);
+  mp_msg(MSGT_VO, MSGL_V, "[gl] Creating %dx%d texture_sz, %dx%d image_sz, %ux%u d_sz...\n",
+          texture_width, texture_height, image_width, image_height, d_width, d_height);
 
   if (image_format == IMGFMT_YV12) {
     int i;
@@ -407,9 +416,16 @@
     ActiveTexture(GL_TEXTURE1);
     glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
                      texture_width / 2, texture_height / 2, 128);
+
+    if(use_pboDMA) {
+        glCreatePBO((gl_pbo_xfer_t)use_pboDMA, &draw_mcpy_pbo[1], gl_format, gl_type, texture_width/2, texture_height/2, 0);
+    }
     ActiveTexture(GL_TEXTURE2);
     glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
                      texture_width / 2, texture_height / 2, 128);
+    if(use_pboDMA) {
+        glCreatePBO((gl_pbo_xfer_t)use_pboDMA, &draw_mcpy_pbo[2], gl_format, gl_type, texture_width/2, texture_height/2, 0);
+    }
     switch (use_yuv) {
       case YUV_CONVERSION_FRAGMENT_LOOKUP:
       case YUV_CONVERSION_FRAGMENT_POW:
@@ -428,6 +444,10 @@
   }
   glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
                    texture_width, texture_height, 0);
+  if(use_pboDMA) {
+      glCreatePBO((gl_pbo_xfer_t)use_pboDMA, &draw_mcpy_pbo[0], gl_format, gl_type, texture_width, texture_height, 0);
+  }
+  glCreatePBO(PBO_XFER_NO_MEMCPY, &get_image_pbo, gl_format, gl_type, texture_width, texture_height, 0);
 
   resize(d_width, d_height);
 
@@ -531,7 +551,7 @@
   glGenTextures(1, &osdtex[osdtexCnt]);
   BindTexture(gl_target, osdtex[osdtexCnt]);
   glCreateClearTex(gl_target, GL_LUMINANCE, scale_type, sx, sy, 0);
-  glUploadTex(gl_target, GL_LUMINANCE, GL_UNSIGNED_BYTE, src, stride,
+  glUploadTex(NULL, gl_target, GL_LUMINANCE, GL_UNSIGNED_BYTE, src, stride,
               0, 0, w, h, 0);
 
 #ifndef FAST_OSD
@@ -545,7 +565,7 @@
   // in-place is not possible since it is reused for future OSDs
   for (i = h * stride - 1; i >= 0; i--)
     tmp[i] = -srca[i];
-  glUploadTex(gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, tmp, stride,
+  glUploadTex(NULL, gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, tmp, stride,
               0, 0, w, h, 0);
   free(tmp);
   }
@@ -646,15 +666,15 @@
 static int draw_slice(uint8_t *src[], int stride[], int w,int h,int x,int y)
 {
   mpi_flipped = (stride[0] < 0);
-  glUploadTex(gl_target, gl_format, gl_type, src[0], stride[0],
-              x, y, w, h, slice_height);
+  glUploadTex(&draw_mcpy_pbo[0], gl_target, gl_format, gl_type,
+              src[0], stride[0], x, y, w, h, slice_height);
   if (image_format == IMGFMT_YV12) {
     ActiveTexture(GL_TEXTURE1);
-    glUploadTex(gl_target, gl_format, gl_type, src[1], stride[1],
-                x / 2, y / 2, w / 2, h / 2, slice_height);
+    glUploadTex(&draw_mcpy_pbo[1], gl_target, gl_format, gl_type,
+                src[1], stride[1], x / 2, y / 2, w / 2, h / 2, slice_height);
     ActiveTexture(GL_TEXTURE2);
-    glUploadTex(gl_target, gl_format, gl_type, src[2], stride[2],
-                x / 2, y / 2, w / 2, h / 2, slice_height);
+    glUploadTex(&draw_mcpy_pbo[2], gl_target, gl_format, gl_type,
+                src[2], stride[2], x / 2, y / 2, w / 2, h / 2, slice_height);
     ActiveTexture(GL_TEXTURE0);
   }
   return 0;
@@ -668,22 +688,34 @@
     err_shown = 1;
     return VO_FALSE;
   }
-  if (mpi->flags & MP_IMGFLAG_READABLE) return VO_FALSE;
-  if (mpi->type == MP_IMGTYPE_IP || mpi->type == MP_IMGTYPE_IPB)
+  if (mpi->flags & MP_IMGFLAG_READABLE) {
+    mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGFLAG_READABLE\n");
+    return VO_FALSE;
+  }
+  if (mpi->type == MP_IMGTYPE_IP || mpi->type == MP_IMGTYPE_IPB) {
+    if (mpi->type == MP_IMGTYPE_IP) {
+        mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGTYPE_IP\n");
+    } else {
+        mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGTYPE_IPB\n");
+    }
     return VO_FALSE; // we can not provide readable buffers
-  if (!gl_buffer)
-    GenBuffers(1, &gl_buffer);
-  BindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer);
+  }
+
   mpi->stride[0] = mpi->width * mpi->bpp / 8;
-  if (mpi->stride[0] * mpi->h > gl_buffersize) {
-    BufferData(GL_PIXEL_UNPACK_BUFFER, mpi->stride[0] * mpi->h,
-               NULL, GL_DYNAMIC_DRAW);
-    gl_buffersize = mpi->stride[0] * mpi->h;
+  if (!get_image_pbo.name || mpi->stride[0] * mpi->h > get_image_pbo.sz) {
+    glDestroyPBO(&get_image_pbo);
+    if(!glCreatePBO(0, &get_image_pbo, gl_format, gl_type, mpi->stride[0], mpi->h, 1)) {
+        mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: Could not create PBO\n");
+        return VO_FALSE;
+    }
+  } else {
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
   }
-  if (!gl_bufferptr)
-    gl_bufferptr = MapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
-  mpi->planes[0] = gl_bufferptr;
-  BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+  glMapPBO(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY, &get_image_pbo);
+  mpi->planes[0] = get_image_pbo.mem;
+
+  glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
   if (mpi->planes[0] == NULL) {
     if (!err_shown)
       mp_msg(MSGT_VO, MSGL_ERR, "[gl] could not acquire buffer for dr\n"
@@ -701,10 +733,12 @@
     mpi->stride[2] = mpi->width >> 1;
   }
   mpi->flags |= MP_IMGFLAG_DIRECT;
+  mp_msg (MSGT_VO, MSGL_V, "[gl] get_image_pbo .. direct, \n");
   return VO_TRUE;
 }
 
 static uint32_t draw_image(mp_image_t *mpi) {
+  gl_pbo_info_t * pbo[3] = { NULL, NULL, NULL };
   int slice = slice_height;
   int stride[3] = {mpi->stride[0], mpi->stride[1], mpi->stride[2]};
   unsigned char *planes[3] = {mpi->planes[0], mpi->planes[1], mpi->planes[2]};
@@ -718,24 +752,33 @@
     planes[0] -= base;
     planes[1] -= base;
     planes[2] -= base;
-    BindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer);
-    UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
-    gl_bufferptr = NULL;
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
+    glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
     slice = 0; // always "upload" full texture
-  }
-  glUploadTex(gl_target, gl_format, gl_type, planes[0], stride[0],
+    mp_msg (MSGT_VO, MSGL_INFO, "[gl] draw_image: direct, \n");
+    pbo[0] = &get_image_pbo;
+    pbo[1] = &get_image_pbo;
+    pbo[2] = &get_image_pbo;
+  } else if(use_pboDMA) {
+    pbo[0] = &draw_mcpy_pbo[0];
+    pbo[1] = &draw_mcpy_pbo[1];
+    pbo[2] = &draw_mcpy_pbo[2];
+  } 
+
+  glUploadTex(pbo[0], gl_target, gl_format, gl_type, planes[0], stride[0],
               mpi->x, mpi->y, mpi->w, mpi->h, slice);
   if (mpi->imgfmt == IMGFMT_YV12) {
     ActiveTexture(GL_TEXTURE1);
-    glUploadTex(gl_target, gl_format, gl_type, planes[1], stride[1],
-                mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
+    glUploadTex(pbo[1], gl_target, gl_format, gl_type, planes[1], stride[1],
+              mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
     ActiveTexture(GL_TEXTURE2);
-    glUploadTex(gl_target, gl_format, gl_type, planes[2], stride[2],
-                mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
+    glUploadTex(pbo[2], gl_target, gl_format, gl_type, planes[2], stride[2],
+              mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
     ActiveTexture(GL_TEXTURE0);
   }
-  if (mpi->flags & MP_IMGFLAG_DIRECT)
-    BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+  if (mpi->flags & MP_IMGFLAG_DIRECT) {
+      glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
+  }
   return VO_TRUE;
 }
 
@@ -798,6 +841,7 @@
   {"customtlin",   OPT_ARG_BOOL, &custom_tlin,  NULL},
   {"customtrect",  OPT_ARG_BOOL, &custom_trect, NULL},
   {"osdcolor",     OPT_ARG_INT,  &osd_color,    NULL},
+  {"pbodma",       OPT_ARG_INT,  &use_pboDMA,   NULL},
   {NULL}
 };
 
@@ -814,6 +858,10 @@
     use_rectangle = 0;
     use_glFinish = 0;
     swap_interval = 1;
+    use_pboDMA = (int)PBO_XFER_DISABLED;
+    assert(0==(int)PBO_XFER_DISABLED);
+    assert(1==(int)PBO_XFER_MULTIPLE_MEMCPY);
+    assert(2==(int)PBO_XFER_SINGLE_MEMCPY);
     slice_height = 0;
     custom_prog = NULL;
     custom_tex = NULL;
@@ -869,6 +917,13 @@
               "    use texture_rectangle for customtex texture\n"
               "  osdcolor=<0xAARRGGBB>\n"
               "    use the given color for the OSD\n"
+              "  pbodma=<0,1,2>\n"
+              "    use PBO DMA transfers for any texture stream upload,\n"
+              "    a safe setting, e.g. for AMD GPU's, would be: rectangle=0:pbodma=1\n"
+              "    a fast setting, e.g. for NVidia GPU's, would be: rectangle=2:pbodma=2\n"
+              "    0: disabled (default).\n"
+              "    1: use multiple memcpy video to texture, slower, but shall work with all GPU's.\n"
+              "    2: use single memcpy video to texture, faster, but may not work for some GPU's (ie. AMD's).\n"
               "\n" );
       return -1;
     }
@@ -876,12 +931,16 @@
       gl_target = GL_TEXTURE_RECTANGLE;
     else
       gl_target = GL_TEXTURE_2D;
+    if(0>use_pboDMA || use_pboDMA>(int)PBO_XFER_SINGLE_MEMCPY) {
+        use_pboDMA=(int)PBO_XFER_DISABLED;
+    }
     yuvconvtype = use_yuv | lscale << YUV_LUM_SCALER_SHIFT | cscale << YUV_CHROM_SCALER_SHIFT;
     if (many_fmts)
       mp_msg (MSGT_VO, MSGL_INFO, "[gl] using extended formats. "
                "Use -vo gl:nomanyfmts if playback fails.\n");
     mp_msg (MSGT_VO, MSGL_V, "[gl] Using %d as slice height "
              "(0 means image height).\n", slice_height);
+
     if( !vo_init() ) return -1; // Can't open X11
 
     return 0;
Index: libvo/gl_common.h
===================================================================
--- libvo/gl_common.h	(revision 26555)
+++ libvo/gl_common.h	(working copy)
@@ -203,6 +203,38 @@
 #endif
 /** \} */ // end of glextdefines group
 
+#ifndef BUFFER_OFFSET
+  #define BUFFER_OFFSET(i) ((char *)NULL + (i))   
+#endif
+
+/**
+ * Sad thing, AMD's latest fglrx driver's PBO impl.
+ * cannot handle a different GL_UNPACK_ROW_LENGTH,
+ * other than the texture stride or POT stride.
+ * Therefor we need the 'PBO_XFER_MULTIPLE_MEMCPY' mode,
+ * which shapes the frame data to the texture size by ourself,
+ * still faster than glTexSubImage2D without any guaranteed DMA xfer..
+ */
+typedef enum {
+    PBO_XFER_DISABLED=0,
+    PBO_XFER_MULTIPLE_MEMCPY=1,
+    PBO_XFER_SINGLE_MEMCPY=2,
+    PBO_XFER_NO_MEMCPY=3
+} gl_pbo_xfer_t;
+
+typedef struct {
+  gl_pbo_xfer_t mode;
+  GLuint name;
+  int    tw;
+  int    th;
+  int    bytesPerPixel;
+  int    stride;
+  int    sz;
+  int    bound;
+  int    test;
+  uint8_t*  mem;
+} gl_pbo_info_t;
+
 void glAdjustAlignment(int stride);
 
 const char *glValName(GLint value);
@@ -214,9 +246,15 @@
                       int w, int h, unsigned char val);
 int glCreatePPMTex(GLenum target, GLenum fmt, GLint filter,
                    FILE *f, int *width, int *height, int *maxval);
-void glUploadTex(GLenum target, GLenum format, GLenum type,
-                 const void *dataptr, int stride,
-                 int x, int y, int w, int h, int slice);
+void glUploadTex(gl_pbo_info_t *pbo, GLenum target, GLenum format, GLenum type,
+                    const void *dataptr, int stride,
+                    int x, int y, int w, int h, int slice);
+void glDestroyPBO(gl_pbo_info_t * pbo);
+int glCreatePBO(gl_pbo_xfer_t mode, gl_pbo_info_t * pbo, GLenum format, GLenum type, int tw, int th, int keepBound);
+void glBindPBO(GLenum type, gl_pbo_info_t * pbo);
+void glUnbindPBO(GLenum type, gl_pbo_info_t * pbo);
+void glMapPBO(GLenum buff, GLenum mode, gl_pbo_info_t * pbo);
+void glUnmapPBO(GLenum buff, gl_pbo_info_t * pbo);
 void glDrawTex(GLfloat x, GLfloat y, GLfloat w, GLfloat h,
                GLfloat tx, GLfloat ty, GLfloat tw, GLfloat th,
                int sx, int sy, int rect_tex, int is_yv12, int flip);
Index: libvo/gl_common.c
===================================================================
--- libvo/gl_common.c	(revision 26555)
+++ libvo/gl_common.c	(working copy)
@@ -418,7 +418,7 @@
       fmt = GL_RGB16;
   }
   glCreateClearTex(target, fmt, filter, w, h, 0);
-  glUploadTex(target, GL_RGB, (m > 255) ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
+  glUploadTex(NULL, target, GL_RGB, (m > 255) ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
               data, w * bpp, 0, 0, w, h, 0);
   free(data);
   if (width) *width = w;
@@ -470,6 +470,7 @@
 
 /**
  * \brief upload a texture, handling things like stride and slices
+ * \param pbo optional PBO information, maybe NULL
  * \param target texture target, usually GL_TEXTURE_2D
  * \param format OpenGL format of data
  * \param type OpenGL type of data
@@ -482,11 +483,17 @@
  * \param slice height of an upload slice, 0 for all at once
  * \ingroup gltexture
  */
-void glUploadTex(GLenum target, GLenum format, GLenum type,
+void glUploadTex(gl_pbo_info_t *pbo, 
+                 GLenum target, GLenum format, GLenum type,
                  const void *dataptr, int stride,
                  int x, int y, int w, int h, int slice) {
+  int bytesPerPixel=glFmt2bpp(format, type), i;
   const uint8_t *data = dataptr;
-  int y_max = y + h;
+  int ownBondBuffer=0;
+  int ownMemMap=0;
+  int rowlenBytes=0;
+  int usePBO = NULL!=pbo && 0!=pbo->name ;
+
   if (w <= 0 || h <= 0) return;
   if (slice <= 0)
     slice = h;
@@ -494,17 +501,128 @@
     data += (h - 1) * stride;
     stride = -stride;
   }
-  // this is not always correct, but should work for MPlayer
-  glAdjustAlignment(stride);
-  glPixelStorei(GL_UNPACK_ROW_LENGTH, stride / glFmt2bpp(format, type));
-  for (; y + slice <= y_max; y += slice) {
-    glTexSubImage2D(target, 0, x, y, w, slice, format, type, data);
-    data += stride * slice;
+
+  if ( usePBO && pbo->mode==PBO_XFER_SINGLE_MEMCPY && h * stride > pbo->sz ) {
+    pbo->mode=PBO_XFER_MULTIPLE_MEMCPY;
   }
-  if (y < y_max)
-    glTexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
+
+  if (usePBO && h * w > pbo->sz )
+  {
+    usePBO = 0;
+  }
+
+  if(usePBO) {
+      if(!pbo->bound) {
+        glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+        ownBondBuffer=1;
+      }
+
+      if(!pbo->mem) {
+        glMapPBO(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY, pbo);
+        ownMemMap=1;
+      }
+
+      if(pbo->mode==PBO_XFER_SINGLE_MEMCPY) {
+        memcpy(pbo->mem, data,  h*stride);
+        rowlenBytes = stride;
+      } else if(pbo->mode==PBO_XFER_MULTIPLE_MEMCPY) {
+        for(i=0;i<h;i++)
+        {
+            memcpy(pbo->mem+(i*pbo->stride), data+(i*stride), w*bytesPerPixel);
+        }
+        rowlenBytes = pbo->stride;
+      } else {
+        // PBO_XFER_NO_MEMCPY
+        rowlenBytes = stride;
+      }
+      
+      if(ownMemMap) {
+        glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+      }
+  } else {
+      rowlenBytes = stride;
+  }
+  
+  glAdjustAlignment(rowlenBytes);
+  glPixelStorei(GL_UNPACK_ROW_LENGTH, rowlenBytes / bytesPerPixel);
+
+  if(usePBO) {
+      if(!pbo->mem) {
+        glTexSubImage2D(target, 0, x, y, w, h, format, type, BUFFER_OFFSET(0));
+      } else {
+        glTexSubImage2D(target, 0, x, y, w, h, format, type, BUFFER_OFFSET(data-pbo->mem));
+      }
+  } else {
+      glTexSubImage2D(target, 0, x, y, w, h, format, type, data);
+  }
+
+  if(usePBO && ownBondBuffer) {
+    glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  }
 }
 
+void glDestroyPBO(gl_pbo_info_t * pbo) {
+  if (!pbo->name) {
+    return;
+  }
+  mp_msg (MSGT_VO, MSGL_V, "[gl] glDestroyPBO %d: %dx%d, bytesPP %d, stride %d, sz %d\n", 
+      pbo->name, pbo->tw, pbo->th, pbo->bytesPerPixel, pbo->stride, pbo->sz);
+
+  glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  DeleteBuffers(1, &(pbo->name));
+  memset(pbo, 0, sizeof(gl_pbo_info_t));
+}
+
+int glCreatePBO(gl_pbo_xfer_t mode, gl_pbo_info_t * pbo, GLenum format, GLenum type, int tw, int th, int keepBound) {
+    int bytesPerPixel=glFmt2bpp(format, type);
+    memset(pbo, 0, sizeof(gl_pbo_info_t));
+    if (mode==PBO_XFER_DISABLED || !DeleteBuffers || !GenBuffers || !BindBuffer || !BufferData || !MapBuffer) {
+        return 0;
+    }
+    pbo->mode=mode;
+    GenBuffers(1, &(pbo->name));
+    pbo->tw=tw;
+    pbo->th=th;
+    pbo->bytesPerPixel=bytesPerPixel;
+    pbo->stride=tw*bytesPerPixel;
+    pbo->sz=th*pbo->stride;
+    pbo->mem=NULL;
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+    BufferData(GL_PIXEL_UNPACK_BUFFER, pbo->sz, NULL, GL_STREAM_DRAW);
+    mp_msg (MSGT_VO, MSGL_V, "[gl] glCreatePBO %d: %dx%d, bytesPP %d, stride %d, sz %d, mode %d\n", 
+      pbo->name, tw, th, bytesPerPixel, pbo->stride, pbo->sz, pbo->mode);
+    if(!keepBound) {
+        glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+    }
+    return 1;
+}
+
+void glBindPBO(GLenum buff, gl_pbo_info_t * pbo) {
+    BindBuffer(buff, pbo->name);
+    pbo->bound=1;
+}
+
+void glUnbindPBO(GLenum buff, gl_pbo_info_t * pbo) {
+    BindBuffer(buff, 0);
+    pbo->bound=0;
+}
+
+void glMapPBO(GLenum buff, GLenum mode, gl_pbo_info_t * pbo) {
+    if(pbo->mem) {
+        glUnmapPBO(buff, pbo);
+    }
+    pbo->mem = MapBuffer(buff, mode);
+}
+
+void glUnmapPBO(GLenum buff, gl_pbo_info_t * pbo) {
+    if(pbo->mem) {
+        UnmapBuffer(buff);
+        pbo->mem = NULL;
+    }
+}
+
 static void fillUVcoeff(GLfloat *ucoef, GLfloat *vcoef,
                         float uvcos, float uvsin) {
   int i;
@@ -981,7 +1099,7 @@
       gen_gamma_map(&lookup_data[2 * LOOKUP_RES], LOOKUP_RES, bgamma);
       glCreateClearTex(GL_TEXTURE_2D, GL_LUMINANCE8, GL_LINEAR,
                        LOOKUP_RES, 4, 0);
-      glUploadTex(GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE, lookup_data,
+      glUploadTex(NULL, GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE, lookup_data,
                   LOOKUP_RES, 0, 0, LOOKUP_RES, 4, 0);
       ActiveTexture(GL_TEXTURE0);
       texs[0] += '0';
Index: DOCS/man/en/mplayer.1
===================================================================
--- DOCS/man/en/mplayer.1	(revision 26555)
+++ DOCS/man/en/mplayer.1	(working copy)
@@ -3622,6 +3622,20 @@
 2: Use the GL_ARB_texture_non_power_of_two extension.
 In some cases only supported in software and thus very slow.
 .RE
+.IPs pbodma=<0,1,2>
+Use PBO DMA transfers for any texture stream upload.
+.br
+A safe setting, e.g. for AMD GPU's, would be: rectangle=0:pbodma=1
+.br
+A fast setting, e.g. for NVidia GPU's, would be: rectangle=2:pbodma=2
+.br
+.RSss
+0: Disabled (default).
+.br
+1: Use multiple memcpy video to texture, slower, but shall work with all GPU's
+.br
+2: use single memcpy video to texture, faster, but may not work for some GPU's (ie. AMD's)
+.RE
 .IPs swapinterval=<n>
 Minimum interval between two buffer swaps, counted in
 displayed frames (default: 1).
_______________________________________________
MPlayer-dev-eng mailing list
MPlayer-dev-eng@xxxxxxxxxxxx
https://lists.mplayerhq.hu/mailman/listinfo/mplayer-dev-eng