Web lists-archives.org

[MPlayer-dev-eng] vo_gl PBO patch ..




Hi Team,

this is regarding:
	- vo_gl
	- HD video material
	- AMD's low performance glTexSubImage2D()
          texture upload.

I have experienced a very low texture upload performance with AMD GPU's
while _trying_ to watch HD content.

A little performance test using valgrind, showed my the it's glTexSubImage2D() (again).

It looks like AMD still cannot do DMA transfers automatically, sad thing.

Therefor I have added a 'pbodma' option,
to use PBO's for texture uploads.

It looks silly, since we do an extra dma xfer with memcpy first,
and then let glTexSubImage2D() doing a DMA using PBO,
but it's still faster for AMD GPU's.

On a AMD 780G motherboard IPG HD3xxx we will gain a 
performance hit of 82% -> 24%,
which makes it usable.

Little performance hit on NVidia 3% -> 2%,
but still :)

Sad thing for AMD (again),
they cannot really handle GL_UNPACK_ROW_LENGTH,
therefor the only working combination for my AMD 780G is:
	'rectangle=0:pbodma=1'
However, it renders vo gl usable for this combination.

Please submit and/or let me know your concerns.

Cheers, Sven

-- 
health & wealth
mailto:sgothel@xxxxxxxxxxx ; www  : http://www.jausoft.ca ; pgp: http://www.jausoft.com/gpg/
land : +1 (780) 637 3842 ; cell: +1 (780) 952 4481
Timezone MST: EST-2, UTC-7, CET-8 ; MDT: EDT-2, UTC-6, CEDT-8
Index: libvo/vo_gl.c
===================================================================
--- libvo/vo_gl.c	(revision 26300)
+++ libvo/vo_gl.c	(working copy)
@@ -82,13 +82,14 @@
 static int many_fmts;
 static int use_glFinish;
 static int swap_interval;
+static int use_pboDMA;
+static gl_pbo_xfer_t gl_pboDMA;
 static GLenum gl_target;
 static GLint gl_texfmt;
 static GLenum gl_format;
 static GLenum gl_type;
-static GLuint gl_buffer;
-static int gl_buffersize;
-static void *gl_bufferptr;
+static gl_pbo_info_t draw_mcpy_pbo[3];
+static gl_pbo_info_t get_image_pbo;
 static GLuint fragprog;
 static GLuint default_texs[22];
 static char *custom_prog;
@@ -369,10 +370,10 @@
   if (largeeosdtex[0])
     glDeleteTextures(2, largeeosdtex);
   largeeosdtex[0] = 0;
-  if (DeleteBuffers && gl_buffer)
-    DeleteBuffers(1, &gl_buffer);
-  gl_buffer = 0; gl_buffersize = 0;
-  gl_bufferptr = NULL;
+  glDestroyPBO(&get_image_pbo);
+  glDestroyPBO(&(draw_mcpy_pbo[0]));
+  glDestroyPBO(&(draw_mcpy_pbo[1]));
+  glDestroyPBO(&(draw_mcpy_pbo[2]));
   err_shown = 0;
 }
 
@@ -381,8 +382,16 @@
  * set global gl-related variables to their default values
  */
 static int initGl(uint32_t d_width, uint32_t d_height) {
-  texSize(image_width, image_height, &texture_width, &texture_height);
+  uint32_t _image_width=image_width;
+  if (gl_pboDMA==PBO_XFER_SINGLE_MEMCPY && use_rectangle) {
+    // increase texture stride a bit, since video stride is sometimes a bit higher
+    _image_width+=128;
+  }
+  texSize(_image_width, image_height, &texture_width, &texture_height);
 
+  memset(&get_image_pbo, 0, sizeof(get_image_pbo));
+  memset(draw_mcpy_pbo, 0, sizeof(draw_mcpy_pbo));
+
   glDisable(GL_BLEND); 
   glDisable(GL_DEPTH_TEST);
   glDepthMask(GL_FALSE);
@@ -391,8 +400,8 @@
   glDrawBuffer(vo_doublebuffering?GL_BACK:GL_FRONT);
   glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
 
-  mp_msg(MSGT_VO, MSGL_V, "[gl] Creating %dx%d texture...\n",
-          texture_width, texture_height);
+  mp_msg(MSGT_VO, MSGL_V, "[gl] Creating %dx%d texture_sz, %dx%d image_sz, %ux%u d_sz...\n",
+          texture_width, texture_height, image_width, image_height, d_width, d_height);
 
   if (image_format == IMGFMT_YV12) {
     int i;
@@ -407,9 +416,16 @@
     ActiveTexture(GL_TEXTURE1);
     glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
                      texture_width / 2, texture_height / 2, 128);
+
+    if(use_pboDMA) {
+        glCreatePBO(gl_pboDMA, &draw_mcpy_pbo[1], gl_format, gl_type, texture_width/2, texture_height/2, 0);
+    }
     ActiveTexture(GL_TEXTURE2);
     glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
                      texture_width / 2, texture_height / 2, 128);
+    if(use_pboDMA) {
+        glCreatePBO(gl_pboDMA, &draw_mcpy_pbo[2], gl_format, gl_type, texture_width/2, texture_height/2, 0);
+    }
     switch (use_yuv) {
       case YUV_CONVERSION_FRAGMENT_LOOKUP:
       case YUV_CONVERSION_FRAGMENT_POW:
@@ -428,6 +444,10 @@
   }
   glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
                    texture_width, texture_height, 0);
+  if(use_pboDMA) {
+      glCreatePBO(gl_pboDMA, &draw_mcpy_pbo[0], gl_format, gl_type, texture_width, texture_height, 0);
+  }
+  glCreatePBO(PBO_XFER_NO_MEMCPY, &get_image_pbo, gl_format, gl_type, texture_width, texture_height, 0);
 
   resize(d_width, d_height);
 
@@ -646,16 +666,30 @@
 static int draw_slice(uint8_t *src[], int stride[], int w,int h,int x,int y)
 {
   mpi_flipped = (stride[0] < 0);
-  glUploadTex(gl_target, gl_format, gl_type, src[0], stride[0],
-              x, y, w, h, slice_height);
-  if (image_format == IMGFMT_YV12) {
-    ActiveTexture(GL_TEXTURE1);
-    glUploadTex(gl_target, gl_format, gl_type, src[1], stride[1],
-                x / 2, y / 2, w / 2, h / 2, slice_height);
-    ActiveTexture(GL_TEXTURE2);
-    glUploadTex(gl_target, gl_format, gl_type, src[2], stride[2],
-                x / 2, y / 2, w / 2, h / 2, slice_height);
-    ActiveTexture(GL_TEXTURE0);
+  if(!use_pboDMA) {
+      glUploadTex(gl_target, gl_format, gl_type, src[0], stride[0],
+                  x, y, w, h, slice_height);
+      if (image_format == IMGFMT_YV12) {
+        ActiveTexture(GL_TEXTURE1);
+        glUploadTex(gl_target, gl_format, gl_type, src[1], stride[1],
+                    x / 2, y / 2, w / 2, h / 2, slice_height);
+        ActiveTexture(GL_TEXTURE2);
+        glUploadTex(gl_target, gl_format, gl_type, src[2], stride[2],
+                    x / 2, y / 2, w / 2, h / 2, slice_height);
+        ActiveTexture(GL_TEXTURE0);
+      }
+  } else {
+      glUploadTexPBO(&draw_mcpy_pbo[0], gl_target, gl_format, gl_type,
+                  src[0], stride[0], x, y, w, h, slice_height);
+      if (image_format == IMGFMT_YV12) {
+        ActiveTexture(GL_TEXTURE1);
+        glUploadTexPBO(&draw_mcpy_pbo[1], gl_target, gl_format, gl_type,
+                    src[1], stride[1], x / 2, y / 2, w / 2, h / 2, slice_height);
+        ActiveTexture(GL_TEXTURE2);
+        glUploadTexPBO(&draw_mcpy_pbo[2], gl_target, gl_format, gl_type,
+                    src[2], stride[2], x / 2, y / 2, w / 2, h / 2, slice_height);
+        ActiveTexture(GL_TEXTURE0);
+      }
   }
   return 0;
 }
@@ -668,22 +702,34 @@
     err_shown = 1;
     return VO_FALSE;
   }
-  if (mpi->flags & MP_IMGFLAG_READABLE) return VO_FALSE;
-  if (mpi->type == MP_IMGTYPE_IP || mpi->type == MP_IMGTYPE_IPB)
+  if (mpi->flags & MP_IMGFLAG_READABLE) {
+    mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGFLAG_READABLE\n");
+    return VO_FALSE;
+  }
+  if (mpi->type == MP_IMGTYPE_IP || mpi->type == MP_IMGTYPE_IPB) {
+    if (mpi->type == MP_IMGTYPE_IP) {
+        mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGTYPE_IP\n");
+    } else {
+        mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGTYPE_IPB\n");
+    }
     return VO_FALSE; // we can not provide readable buffers
-  if (!gl_buffer)
-    GenBuffers(1, &gl_buffer);
-  BindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer);
+  }
+
   mpi->stride[0] = mpi->width * mpi->bpp / 8;
-  if (mpi->stride[0] * mpi->h > gl_buffersize) {
-    BufferData(GL_PIXEL_UNPACK_BUFFER, mpi->stride[0] * mpi->h,
-               NULL, GL_DYNAMIC_DRAW);
-    gl_buffersize = mpi->stride[0] * mpi->h;
+  if (!get_image_pbo.name || mpi->stride[0] * mpi->h > get_image_pbo.sz) {
+    glDestroyPBO(&get_image_pbo);
+    if(!glCreatePBO(0, &get_image_pbo, gl_format, gl_type, mpi->stride[0], mpi->h, 1)) {
+        mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: Could not create PBO\n");
+        return VO_FALSE;
+    }
+  } else {
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
   }
-  if (!gl_bufferptr)
-    gl_bufferptr = MapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
-  mpi->planes[0] = gl_bufferptr;
-  BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+  glMapPBO(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY, &get_image_pbo);
+  mpi->planes[0] = get_image_pbo.mem;
+
+  glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
   if (mpi->planes[0] == NULL) {
     if (!err_shown)
       mp_msg(MSGT_VO, MSGL_ERR, "[gl] could not acquire buffer for dr\n"
@@ -701,10 +747,12 @@
     mpi->stride[2] = mpi->width >> 1;
   }
   mpi->flags |= MP_IMGFLAG_DIRECT;
+  mp_msg (MSGT_VO, MSGL_V, "[gl] get_image_pbo .. direct, \n");
   return VO_TRUE;
 }
 
 static uint32_t draw_image(mp_image_t *mpi) {
+  gl_pbo_info_t * pbo[3] = { NULL, NULL, NULL };
   int slice = slice_height;
   int stride[3] = {mpi->stride[0], mpi->stride[1], mpi->stride[2]};
   unsigned char *planes[3] = {mpi->planes[0], mpi->planes[1], mpi->planes[2]};
@@ -718,24 +766,33 @@
     planes[0] -= base;
     planes[1] -= base;
     planes[2] -= base;
-    BindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer);
-    UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
-    gl_bufferptr = NULL;
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
+    glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
     slice = 0; // always "upload" full texture
-  }
-  glUploadTex(gl_target, gl_format, gl_type, planes[0], stride[0],
+    mp_msg (MSGT_VO, MSGL_INFO, "[gl] draw_image: direct, \n");
+    pbo[0] = &get_image_pbo;
+    pbo[1] = &get_image_pbo;
+    pbo[2] = &get_image_pbo;
+  } else if(use_pboDMA) {
+    pbo[0] = &draw_mcpy_pbo[0];
+    pbo[1] = &draw_mcpy_pbo[1];
+    pbo[2] = &draw_mcpy_pbo[2];
+  } // else glUploadTexPBO() will fall back to use glUploadTex() if pbo==NULL
+
+  glUploadTexPBO(pbo[0], gl_target, gl_format, gl_type, planes[0], stride[0],
               mpi->x, mpi->y, mpi->w, mpi->h, slice);
   if (mpi->imgfmt == IMGFMT_YV12) {
     ActiveTexture(GL_TEXTURE1);
-    glUploadTex(gl_target, gl_format, gl_type, planes[1], stride[1],
-                mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
+    glUploadTexPBO(pbo[1], gl_target, gl_format, gl_type, planes[1], stride[1],
+              mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
     ActiveTexture(GL_TEXTURE2);
-    glUploadTex(gl_target, gl_format, gl_type, planes[2], stride[2],
-                mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
+    glUploadTexPBO(pbo[2], gl_target, gl_format, gl_type, planes[2], stride[2],
+              mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
     ActiveTexture(GL_TEXTURE0);
   }
-  if (mpi->flags & MP_IMGFLAG_DIRECT)
-    BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+  if (mpi->flags & MP_IMGFLAG_DIRECT) {
+      glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
+  }
   return VO_TRUE;
 }
 
@@ -798,6 +855,7 @@
   {"customtlin",   OPT_ARG_BOOL, &custom_tlin,  NULL},
   {"customtrect",  OPT_ARG_BOOL, &custom_trect, NULL},
   {"osdcolor",     OPT_ARG_INT,  &osd_color,    NULL},
+  {"pbodma",       OPT_ARG_INT,  &use_pboDMA,   NULL},
   {NULL}
 };
 
@@ -814,6 +872,8 @@
     use_rectangle = 0;
     use_glFinish = 0;
     swap_interval = 1;
+    use_pboDMA = 0;
+    gl_pboDMA = PBO_XFER_DISABLED;
     slice_height = 0;
     custom_prog = NULL;
     custom_tex = NULL;
@@ -869,6 +929,13 @@
               "    use texture_rectangle for customtex texture\n"
               "  osdcolor=<0xAARRGGBB>\n"
               "    use the given color for the OSD\n"
+              "  pbodma=<0,1,2>\n"
+              "    use PBO DMA transfers for any texture stream upload,\n"
+              "    a safe setting, e.g. for AMD GPU's, would be: rectangle=0:pbodma=1\n"
+              "    a fast setting, e.g. for NVidia GPU's, would be: rectangle=2:pbodma=2\n"
+              "    0: disabled (default).\n"
+              "    1: use multiple memcpy video to texture, slower, but shall work with all GPU's.\n"
+              "    2: use single memcpy video to texture, faster, but may not work for some GPU's (ie. AMD's).\n"
               "\n" );
       return -1;
     }
@@ -882,6 +949,17 @@
                "Use -vo gl:nomanyfmts if playback fails.\n");
     mp_msg (MSGT_VO, MSGL_V, "[gl] Using %d as slice height "
              "(0 means image height).\n", slice_height);
+
+    switch(use_pboDMA) {
+        case  1:
+            gl_pboDMA = PBO_XFER_MULTIPLE_MEMCPY;
+            break;
+        case  2:
+            gl_pboDMA = PBO_XFER_SINGLE_MEMCPY;
+            break;
+        default:
+            gl_pboDMA = PBO_XFER_DISABLED;
+    }
     if( !vo_init() ) return -1; // Can't open X11
 
     return 0;
Index: libvo/gl_common.h
===================================================================
--- libvo/gl_common.h	(revision 26300)
+++ libvo/gl_common.h	(working copy)
@@ -203,6 +203,38 @@
 #endif
 /** \} */ // end of glextdefines group
 
+#ifndef BUFFER_OFFSET
+  #define BUFFER_OFFSET(i) ((char *)NULL + (i))   
+#endif
+
+/**
+ * Sad thing, AMD's latest fglrx driver's PBO impl.
+ * cannot handle a different GL_UNPACK_ROW_LENGTH,
+ * other than the texture stride or POT stride.
+ * Therefor we need the 'PBO_XFER_MULTIPLE_MEMCPY' mode,
+ * which shapes the frame data to the texture size by ourself,
+ * still faster than glTexSubImage2D without any guaranteed DMA xfer..
+ */
+typedef enum {
+    PBO_XFER_DISABLED=0,
+    PBO_XFER_NO_MEMCPY=1,
+    PBO_XFER_MULTIPLE_MEMCPY=2,
+    PBO_XFER_SINGLE_MEMCPY=3
+} gl_pbo_xfer_t;
+
+typedef struct {
+  gl_pbo_xfer_t mode;
+  GLuint name;
+  int    tw;
+  int    th;
+  int    bytesPerPixel;
+  int    stride;
+  int    sz;
+  int    bound;
+  int    test;
+  uint8_t*  mem;
+} gl_pbo_info_t;
+
 void glAdjustAlignment(int stride);
 
 const char *glValName(GLint value);
@@ -217,6 +249,16 @@
 void glUploadTex(GLenum target, GLenum format, GLenum type,
                  const void *dataptr, int stride,
                  int x, int y, int w, int h, int slice);
+
+void glDestroyPBO(gl_pbo_info_t * pbo);
+int glCreatePBO(gl_pbo_xfer_t mode, gl_pbo_info_t * pbo, GLenum format, GLenum type, int tw, int th, int keepBound);
+void glBindPBO(GLenum type, gl_pbo_info_t * pbo);
+void glUnbindPBO(GLenum type, gl_pbo_info_t * pbo);
+void glMapPBO(GLenum buff, GLenum mode, gl_pbo_info_t * pbo);
+void glUnmapPBO(GLenum buff, gl_pbo_info_t * pbo);
+void glUploadTexPBO(gl_pbo_info_t *pbo, GLenum target, GLenum format, GLenum type,
+                    const void *dataptr, int stride,
+                    int x, int y, int w, int h, int slice);
 void glDrawTex(GLfloat x, GLfloat y, GLfloat w, GLfloat h,
                GLfloat tx, GLfloat ty, GLfloat tw, GLfloat th,
                int sx, int sy, int rect_tex, int is_yv12, int flip);
Index: libvo/gl_common.c
===================================================================
--- libvo/gl_common.c	(revision 26300)
+++ libvo/gl_common.c	(working copy)
@@ -505,6 +505,155 @@
     glTexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
 }
 
+void glUploadTexPBO(gl_pbo_info_t *pbo, 
+                 GLenum target, GLenum format, GLenum type,
+                 const void *dataptr, int stride,
+                 int x, int y, int w, int h, int slice) {
+  int bytesPerPixel=glFmt2bpp(format, type), i;
+  const uint8_t *data = dataptr;
+  int ownBondBuffer=0;
+  int ownMemMap=0;
+  int rowlenBytes=0;
+
+  if (!pbo || !pbo->name) {
+    // fallback ..
+    glUploadTex(target, format, type, dataptr, stride, x, y, w, h, slice);
+    return;
+  }
+
+  if (w <= 0 || h <= 0) return;
+  if (slice <= 0)
+    slice = h;
+  if (stride < 0) {
+    data += (h - 1) * stride;
+    stride = -stride;
+  }
+
+  if ( pbo->mode==PBO_XFER_SINGLE_MEMCPY && h * stride > pbo->sz ) {
+    pbo->mode=PBO_XFER_MULTIPLE_MEMCPY;
+    mp_msg (MSGT_VO, MSGL_INFO, "[gl] glUploadTexPBO %d: PBO single -> multiple memcpy (video stride)!\n", 
+        pbo->name);
+  }
+  if ( h * w > pbo->sz )
+  {
+    mp_msg (MSGT_VO, MSGL_INFO, "[gl] glUploadTexPBO %d: %d/%d %dx%d, sz %d, slic %d, strd %d, bpp %d -> PBO Disabled (video size)!\n", 
+      pbo->name, x,y, w,h,  pbo->sz, slice, stride, bytesPerPixel);
+    glDestroyPBO(pbo);
+
+    // fallback ..
+    glUploadTex(target, format, type, dataptr, stride, x, y, w, h, slice);
+    return;
+  }
+
+  if((pbo->test&1)==0) {
+      mp_msg (MSGT_VO, MSGL_V, "[gl] glUploadTexPBO %d: %d/%d %dx%d, sz %d, slic %d, strd %d, bpp %d - mode %d\n", 
+        pbo->name, x,y, w,h,  pbo->sz, slice, stride, bytesPerPixel, pbo->mode);
+      pbo->test|=1;
+  }
+
+  if(!pbo->bound) {
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+    ownBondBuffer=1;
+  }
+
+  if(!pbo->mem) {
+    glMapPBO(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY, pbo);
+    ownMemMap=1;
+  }
+
+  if(pbo->mode==PBO_XFER_SINGLE_MEMCPY) {
+      memcpy(pbo->mem, data,  h*stride);
+      rowlenBytes = stride;
+  } else if(pbo->mode==PBO_XFER_MULTIPLE_MEMCPY) {
+      for(i=0;i<h;i++) {
+        memcpy(pbo->mem+(i*pbo->stride), data+(i*stride), w);
+      }
+      rowlenBytes = pbo->stride;
+  } else {
+      // PBO_XFER_NO_MEMCPY
+      rowlenBytes = stride;
+  }
+
+  if(ownMemMap) {
+      glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  }
+
+  glAdjustAlignment(rowlenBytes);
+  glPixelStorei(GL_UNPACK_ROW_LENGTH, rowlenBytes / bytesPerPixel);
+
+  if(!pbo->mem) {
+      glTexSubImage2D(target, 0, x, y, w, h, format, type, BUFFER_OFFSET(0));
+  } else {
+      glTexSubImage2D(target, 0, x, y, w, h, format, type, BUFFER_OFFSET(data-pbo->mem));
+  }
+
+  if(ownBondBuffer) {
+      glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  }
+}
+
+void glDestroyPBO(gl_pbo_info_t * pbo) {
+  if (!pbo->name) {
+    return;
+  }
+  mp_msg (MSGT_VO, MSGL_V, "[gl] glDestroyPBO %d: %dx%d, bytesPP %d, stride %d, sz %d\n", 
+      pbo->name, pbo->tw, pbo->th, pbo->bytesPerPixel, pbo->stride, pbo->sz);
+
+  glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+  DeleteBuffers(1, &(pbo->name));
+  memset(pbo, 0, sizeof(gl_pbo_info_t));
+}
+
+int glCreatePBO(gl_pbo_xfer_t mode, gl_pbo_info_t * pbo, GLenum format, GLenum type, int tw, int th, int keepBound) {
+    int bytesPerPixel=glFmt2bpp(format, type);
+    memset(pbo, 0, sizeof(gl_pbo_info_t));
+    if (mode==PBO_XFER_DISABLED || !DeleteBuffers || !GenBuffers || !BindBuffer || !BufferData || !MapBuffer) {
+        return 0;
+    }
+    pbo->mode=mode;
+    GenBuffers(1, &(pbo->name));
+    pbo->tw=tw;
+    pbo->th=th;
+    pbo->bytesPerPixel=bytesPerPixel;
+    pbo->stride=tw*bytesPerPixel;
+    pbo->sz=th*pbo->stride;
+    pbo->mem=NULL;
+    glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+    BufferData(GL_PIXEL_UNPACK_BUFFER, pbo->sz, NULL, GL_STREAM_DRAW);
+    mp_msg (MSGT_VO, MSGL_V, "[gl] glCreatePBO %d: %dx%d, bytesPP %d, stride %d, sz %d, mode %d\n", 
+      pbo->name, tw, th, bytesPerPixel, pbo->stride, pbo->sz, pbo->mode);
+    if(!keepBound) {
+        glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+    }
+    return 1;
+}
+
+void glBindPBO(GLenum buff, gl_pbo_info_t * pbo) {
+    BindBuffer(buff, pbo->name);
+    pbo->bound=1;
+}
+
+void glUnbindPBO(GLenum buff, gl_pbo_info_t * pbo) {
+    BindBuffer(buff, 0);
+    pbo->bound=0;
+}
+
+void glMapPBO(GLenum buff, GLenum mode, gl_pbo_info_t * pbo) {
+    if(pbo->mem) {
+        glUnmapPBO(buff, pbo);
+    }
+    pbo->mem = MapBuffer(buff, mode);
+}
+
+void glUnmapPBO(GLenum buff, gl_pbo_info_t * pbo) {
+    if(pbo->mem) {
+        UnmapBuffer(buff);
+        pbo->mem = NULL;
+    }
+}
+
 static void fillUVcoeff(GLfloat *ucoef, GLfloat *vcoef,
                         float uvcos, float uvsin) {
   int i;
Index: DOCS/man/en/mplayer.1
===================================================================
--- DOCS/man/en/mplayer.1	(revision 26300)
+++ DOCS/man/en/mplayer.1	(working copy)
@@ -3591,6 +3591,20 @@
 2: Use the GL_ARB_texture_non_power_of_two extension.
 In some cases only supported in software and thus very slow.
 .RE
+.IPs pbodma=<0,1,2>
+Use PBO DMA transfers for any texture stream upload.
+.br
+A safe setting, e.g. for AMD GPU's, would be: rectangle=0:pbodma=1
+.br
+A fast setting, e.g. for NVidia GPU's, would be: rectangle=2:pbodma=2
+.br
+.RSss
+0: Disabled (default).
+.br
+1: Use multiple memcpy video to texture, slower, but shall work with all GPU's
+.br
+2: use single memcpy video to texture, faster, but may not work for some GPU's (ie. AMD's)
+.RE
 .IPs swapinterval=<n>
 Minimum interval between two buffer swaps, counted in
 displayed frames (default: 1).
_______________________________________________
MPlayer-dev-eng mailing list
MPlayer-dev-eng@xxxxxxxxxxxx
https://lists.mplayerhq.hu/mailman/listinfo/mplayer-dev-eng