Re: [MPlayer-dev-eng] vo_gl PBO patch ..
- Date: Tue, 29 Apr 2008 13:44:32 -0600
- From: Sven Gothel <sgothel@xxxxxxxxxxx>
- Subject: Re: [MPlayer-dev-eng] vo_gl PBO patch ..
version 4 .. reflecting Reimar's suggestions.
Cheers, Sven
--
health & wealth
mailto:sgothel@xxxxxxxxxxx ; www : http://www.jausoft.ca ; pgp: http://www.jausoft.com/gpg/
land : +1 (780) 637 3842 ; cell: +1 (780) 952 4481
Timezone MST: EST-2, UTC-7, CET-8 ; MDT: EDT-2, UTC-6, CEDT-8
Index: libvo/vo_gl2.c
===================================================================
--- libvo/vo_gl2.c (revision 26555)
+++ libvo/vo_gl2.c (working copy)
@@ -385,7 +385,7 @@
}
if (texdirty) {
- glUploadTex(GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
+ glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
square->texture, image_width * image_bytes,
0, 0, thisw, thish, 0);
}
@@ -756,17 +756,17 @@
subtex_w = texture_width - subtex_x;
ActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, tsq->texobj);
- glUploadTex(GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
+ glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
yptr, ystride, subtex_x, subtex_y,
subtex_w, subtex_h, 0);
ActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, tsq->uvtexobjs[0]);
- glUploadTex(GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
+ glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
uptr, ustride, subtex_x / 2, subtex_y / 2,
subtex_w / 2, subtex_h / 2, 0);
ActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, tsq->uvtexobjs[1]);
- glUploadTex(GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
+ glUploadTex(NULL, GL_TEXTURE_2D, gl_bitmap_format, gl_bitmap_type,
vptr, vstride, subtex_x / 2, subtex_y / 2,
subtex_w / 2, subtex_h / 2, 0);
subtex_x = 0;
Index: libvo/vo_gl.c
===================================================================
--- libvo/vo_gl.c (revision 26555)
+++ libvo/vo_gl.c (working copy)
@@ -2,6 +2,7 @@
#include <stdlib.h>
#include <string.h>
#include <math.h>
+#include <assert.h>
#include "config.h"
#include "mp_msg.h"
@@ -82,13 +83,13 @@
static int many_fmts;
static int use_glFinish;
static int swap_interval;
+static int use_pboDMA;
static GLenum gl_target;
static GLint gl_texfmt;
static GLenum gl_format;
static GLenum gl_type;
-static GLuint gl_buffer;
-static int gl_buffersize;
-static void *gl_bufferptr;
+static gl_pbo_info_t draw_mcpy_pbo[3];
+static gl_pbo_info_t get_image_pbo;
static GLuint fragprog;
static GLuint default_texs[22];
static char *custom_prog;
@@ -317,7 +318,7 @@
BindTexture(gl_target, *curtex++);
glCreateClearTex(gl_target, GL_ALPHA, scale_type, sx, sy, 0);
}
- glUploadTex(gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, i->bitmap, i->stride,
+ glUploadTex(NULL, gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, i->bitmap, i->stride,
x, y, i->w, i->h, 0);
}
eosdDispList = glGenLists(1);
@@ -369,10 +370,10 @@
if (largeeosdtex[0])
glDeleteTextures(2, largeeosdtex);
largeeosdtex[0] = 0;
- if (DeleteBuffers && gl_buffer)
- DeleteBuffers(1, &gl_buffer);
- gl_buffer = 0; gl_buffersize = 0;
- gl_bufferptr = NULL;
+ glDestroyPBO(&get_image_pbo);
+ glDestroyPBO(&(draw_mcpy_pbo[0]));
+ glDestroyPBO(&(draw_mcpy_pbo[1]));
+ glDestroyPBO(&(draw_mcpy_pbo[2]));
err_shown = 0;
}
@@ -381,8 +382,16 @@
* set global gl-related variables to their default values
*/
static int initGl(uint32_t d_width, uint32_t d_height) {
- texSize(image_width, image_height, &texture_width, &texture_height);
+ uint32_t _image_width=image_width;
+ if (use_pboDMA==(int)PBO_XFER_SINGLE_MEMCPY && use_rectangle) {
+ // increase texture stride a bit, since video stride is sometimes a bit higher
+ _image_width+=128;
+ }
+ texSize(_image_width, image_height, &texture_width, &texture_height);
+ memset(&get_image_pbo, 0, sizeof(get_image_pbo));
+ memset(draw_mcpy_pbo, 0, sizeof(draw_mcpy_pbo));
+
glDisable(GL_BLEND);
glDisable(GL_DEPTH_TEST);
glDepthMask(GL_FALSE);
@@ -391,8 +400,8 @@
glDrawBuffer(vo_doublebuffering?GL_BACK:GL_FRONT);
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
- mp_msg(MSGT_VO, MSGL_V, "[gl] Creating %dx%d texture...\n",
- texture_width, texture_height);
+ mp_msg(MSGT_VO, MSGL_V, "[gl] Creating %dx%d texture_sz, %dx%d image_sz, %ux%u d_sz...\n",
+ texture_width, texture_height, image_width, image_height, d_width, d_height);
if (image_format == IMGFMT_YV12) {
int i;
@@ -407,9 +416,16 @@
ActiveTexture(GL_TEXTURE1);
glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
texture_width / 2, texture_height / 2, 128);
+
+ if(use_pboDMA) {
+ glCreatePBO((gl_pbo_xfer_t)use_pboDMA, &draw_mcpy_pbo[1], gl_format, gl_type, texture_width/2, texture_height/2, 0);
+ }
ActiveTexture(GL_TEXTURE2);
glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
texture_width / 2, texture_height / 2, 128);
+ if(use_pboDMA) {
+ glCreatePBO((gl_pbo_xfer_t)use_pboDMA, &draw_mcpy_pbo[2], gl_format, gl_type, texture_width/2, texture_height/2, 0);
+ }
switch (use_yuv) {
case YUV_CONVERSION_FRAGMENT_LOOKUP:
case YUV_CONVERSION_FRAGMENT_POW:
@@ -428,6 +444,10 @@
}
glCreateClearTex(gl_target, gl_texfmt, GL_LINEAR,
texture_width, texture_height, 0);
+ if(use_pboDMA) {
+ glCreatePBO((gl_pbo_xfer_t)use_pboDMA, &draw_mcpy_pbo[0], gl_format, gl_type, texture_width, texture_height, 0);
+ }
+ glCreatePBO(PBO_XFER_NO_MEMCPY, &get_image_pbo, gl_format, gl_type, texture_width, texture_height, 0);
resize(d_width, d_height);
@@ -531,7 +551,7 @@
glGenTextures(1, &osdtex[osdtexCnt]);
BindTexture(gl_target, osdtex[osdtexCnt]);
glCreateClearTex(gl_target, GL_LUMINANCE, scale_type, sx, sy, 0);
- glUploadTex(gl_target, GL_LUMINANCE, GL_UNSIGNED_BYTE, src, stride,
+ glUploadTex(NULL, gl_target, GL_LUMINANCE, GL_UNSIGNED_BYTE, src, stride,
0, 0, w, h, 0);
#ifndef FAST_OSD
@@ -545,7 +565,7 @@
// in-place is not possible since it is reused for future OSDs
for (i = h * stride - 1; i >= 0; i--)
tmp[i] = -srca[i];
- glUploadTex(gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, tmp, stride,
+ glUploadTex(NULL, gl_target, GL_ALPHA, GL_UNSIGNED_BYTE, tmp, stride,
0, 0, w, h, 0);
free(tmp);
}
@@ -646,15 +666,15 @@
static int draw_slice(uint8_t *src[], int stride[], int w,int h,int x,int y)
{
mpi_flipped = (stride[0] < 0);
- glUploadTex(gl_target, gl_format, gl_type, src[0], stride[0],
- x, y, w, h, slice_height);
+ glUploadTex(&draw_mcpy_pbo[0], gl_target, gl_format, gl_type,
+ src[0], stride[0], x, y, w, h, slice_height);
if (image_format == IMGFMT_YV12) {
ActiveTexture(GL_TEXTURE1);
- glUploadTex(gl_target, gl_format, gl_type, src[1], stride[1],
- x / 2, y / 2, w / 2, h / 2, slice_height);
+ glUploadTex(&draw_mcpy_pbo[1], gl_target, gl_format, gl_type,
+ src[1], stride[1], x / 2, y / 2, w / 2, h / 2, slice_height);
ActiveTexture(GL_TEXTURE2);
- glUploadTex(gl_target, gl_format, gl_type, src[2], stride[2],
- x / 2, y / 2, w / 2, h / 2, slice_height);
+ glUploadTex(&draw_mcpy_pbo[2], gl_target, gl_format, gl_type,
+ src[2], stride[2], x / 2, y / 2, w / 2, h / 2, slice_height);
ActiveTexture(GL_TEXTURE0);
}
return 0;
@@ -668,22 +688,34 @@
err_shown = 1;
return VO_FALSE;
}
- if (mpi->flags & MP_IMGFLAG_READABLE) return VO_FALSE;
- if (mpi->type == MP_IMGTYPE_IP || mpi->type == MP_IMGTYPE_IPB)
+ if (mpi->flags & MP_IMGFLAG_READABLE) {
+ mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGFLAG_READABLE\n");
+ return VO_FALSE;
+ }
+ if (mpi->type == MP_IMGTYPE_IP || mpi->type == MP_IMGTYPE_IPB) {
+ if (mpi->type == MP_IMGTYPE_IP) {
+ mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGTYPE_IP\n");
+ } else {
+ mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: MP_IMGTYPE_IPB\n");
+ }
return VO_FALSE; // we can not provide readable buffers
- if (!gl_buffer)
- GenBuffers(1, &gl_buffer);
- BindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer);
+ }
+
mpi->stride[0] = mpi->width * mpi->bpp / 8;
- if (mpi->stride[0] * mpi->h > gl_buffersize) {
- BufferData(GL_PIXEL_UNPACK_BUFFER, mpi->stride[0] * mpi->h,
- NULL, GL_DYNAMIC_DRAW);
- gl_buffersize = mpi->stride[0] * mpi->h;
+ if (!get_image_pbo.name || mpi->stride[0] * mpi->h > get_image_pbo.sz) {
+ glDestroyPBO(&get_image_pbo);
+ if(!glCreatePBO(0, &get_image_pbo, gl_format, gl_type, mpi->stride[0], mpi->h, 1)) {
+ mp_msg (MSGT_VO, MSGL_V, "[gl] get_image failed: Could not create PBO\n");
+ return VO_FALSE;
+ }
+ } else {
+ glBindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
}
- if (!gl_bufferptr)
- gl_bufferptr = MapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
- mpi->planes[0] = gl_bufferptr;
- BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+
+ glMapPBO(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY, &get_image_pbo);
+ mpi->planes[0] = get_image_pbo.mem;
+
+ glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
if (mpi->planes[0] == NULL) {
if (!err_shown)
mp_msg(MSGT_VO, MSGL_ERR, "[gl] could not acquire buffer for dr\n"
@@ -701,10 +733,12 @@
mpi->stride[2] = mpi->width >> 1;
}
mpi->flags |= MP_IMGFLAG_DIRECT;
+ mp_msg (MSGT_VO, MSGL_V, "[gl] get_image_pbo .. direct, \n");
return VO_TRUE;
}
static uint32_t draw_image(mp_image_t *mpi) {
+ gl_pbo_info_t * pbo[3] = { NULL, NULL, NULL };
int slice = slice_height;
int stride[3] = {mpi->stride[0], mpi->stride[1], mpi->stride[2]};
unsigned char *planes[3] = {mpi->planes[0], mpi->planes[1], mpi->planes[2]};
@@ -718,24 +752,33 @@
planes[0] -= base;
planes[1] -= base;
planes[2] -= base;
- BindBuffer(GL_PIXEL_UNPACK_BUFFER, gl_buffer);
- UnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
- gl_bufferptr = NULL;
+ glBindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
+ glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
slice = 0; // always "upload" full texture
- }
- glUploadTex(gl_target, gl_format, gl_type, planes[0], stride[0],
+ mp_msg (MSGT_VO, MSGL_INFO, "[gl] draw_image: direct, \n");
+ pbo[0] = &get_image_pbo;
+ pbo[1] = &get_image_pbo;
+ pbo[2] = &get_image_pbo;
+ } else if(use_pboDMA) {
+ pbo[0] = &draw_mcpy_pbo[0];
+ pbo[1] = &draw_mcpy_pbo[1];
+ pbo[2] = &draw_mcpy_pbo[2];
+ }
+
+ glUploadTex(pbo[0], gl_target, gl_format, gl_type, planes[0], stride[0],
mpi->x, mpi->y, mpi->w, mpi->h, slice);
if (mpi->imgfmt == IMGFMT_YV12) {
ActiveTexture(GL_TEXTURE1);
- glUploadTex(gl_target, gl_format, gl_type, planes[1], stride[1],
- mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
+ glUploadTex(pbo[1], gl_target, gl_format, gl_type, planes[1], stride[1],
+ mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
ActiveTexture(GL_TEXTURE2);
- glUploadTex(gl_target, gl_format, gl_type, planes[2], stride[2],
- mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
+ glUploadTex(pbo[2], gl_target, gl_format, gl_type, planes[2], stride[2],
+ mpi->x / 2, mpi->y / 2, mpi->w / 2, mpi->h / 2, slice);
ActiveTexture(GL_TEXTURE0);
}
- if (mpi->flags & MP_IMGFLAG_DIRECT)
- BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+ if (mpi->flags & MP_IMGFLAG_DIRECT) {
+ glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, &get_image_pbo);
+ }
return VO_TRUE;
}
@@ -798,6 +841,7 @@
{"customtlin", OPT_ARG_BOOL, &custom_tlin, NULL},
{"customtrect", OPT_ARG_BOOL, &custom_trect, NULL},
{"osdcolor", OPT_ARG_INT, &osd_color, NULL},
+ {"pbodma", OPT_ARG_INT, &use_pboDMA, NULL},
{NULL}
};
@@ -814,6 +858,10 @@
use_rectangle = 0;
use_glFinish = 0;
swap_interval = 1;
+ use_pboDMA = (int)PBO_XFER_DISABLED;
+ assert(0==(int)PBO_XFER_DISABLED);
+ assert(1==(int)PBO_XFER_MULTIPLE_MEMCPY);
+ assert(2==(int)PBO_XFER_SINGLE_MEMCPY);
slice_height = 0;
custom_prog = NULL;
custom_tex = NULL;
@@ -869,6 +917,13 @@
" use texture_rectangle for customtex texture\n"
" osdcolor=<0xAARRGGBB>\n"
" use the given color for the OSD\n"
+ " pbodma=<0,1,2>\n"
+ " use PBO DMA transfers for any texture stream upload,\n"
+ " a safe setting, e.g. for AMD GPU's, would be: rectangle=0:pbodma=1\n"
+ " a fast setting, e.g. for NVidia GPU's, would be: rectangle=2:pbodma=2\n"
+ " 0: disabled (default).\n"
+ " 1: use multiple memcpy video to texture, slower, but shall work with all GPU's.\n"
+ " 2: use single memcpy video to texture, faster, but may not work for some GPU's (ie. AMD's).\n"
"\n" );
return -1;
}
@@ -876,12 +931,16 @@
gl_target = GL_TEXTURE_RECTANGLE;
else
gl_target = GL_TEXTURE_2D;
+ if(0>use_pboDMA || use_pboDMA>(int)PBO_XFER_SINGLE_MEMCPY) {
+ use_pboDMA=(int)PBO_XFER_DISABLED;
+ }
yuvconvtype = use_yuv | lscale << YUV_LUM_SCALER_SHIFT | cscale << YUV_CHROM_SCALER_SHIFT;
if (many_fmts)
mp_msg (MSGT_VO, MSGL_INFO, "[gl] using extended formats. "
"Use -vo gl:nomanyfmts if playback fails.\n");
mp_msg (MSGT_VO, MSGL_V, "[gl] Using %d as slice height "
"(0 means image height).\n", slice_height);
+
if( !vo_init() ) return -1; // Can't open X11
return 0;
Index: libvo/gl_common.h
===================================================================
--- libvo/gl_common.h (revision 26555)
+++ libvo/gl_common.h (working copy)
@@ -203,6 +203,38 @@
#endif
/** \} */ // end of glextdefines group
+#ifndef BUFFER_OFFSET
+ #define BUFFER_OFFSET(i) ((char *)NULL + (i))
+#endif
+
+/**
+ * Sad thing, AMD's latest fglrx driver's PBO impl.
+ * cannot handle a different GL_UNPACK_ROW_LENGTH,
+ * other than the texture stride or POT stride.
+ * Therefor we need the 'PBO_XFER_MULTIPLE_MEMCPY' mode,
+ * which shapes the frame data to the texture size by ourself,
+ * still faster than glTexSubImage2D without any guaranteed DMA xfer..
+ */
+typedef enum {
+ PBO_XFER_DISABLED=0,
+ PBO_XFER_MULTIPLE_MEMCPY=1,
+ PBO_XFER_SINGLE_MEMCPY=2,
+ PBO_XFER_NO_MEMCPY=3
+} gl_pbo_xfer_t;
+
+typedef struct {
+ gl_pbo_xfer_t mode;
+ GLuint name;
+ int tw;
+ int th;
+ int bytesPerPixel;
+ int stride;
+ int sz;
+ int bound;
+ int test;
+ uint8_t* mem;
+} gl_pbo_info_t;
+
void glAdjustAlignment(int stride);
const char *glValName(GLint value);
@@ -214,9 +246,15 @@
int w, int h, unsigned char val);
int glCreatePPMTex(GLenum target, GLenum fmt, GLint filter,
FILE *f, int *width, int *height, int *maxval);
-void glUploadTex(GLenum target, GLenum format, GLenum type,
- const void *dataptr, int stride,
- int x, int y, int w, int h, int slice);
+void glUploadTex(gl_pbo_info_t *pbo, GLenum target, GLenum format, GLenum type,
+ const void *dataptr, int stride,
+ int x, int y, int w, int h, int slice);
+void glDestroyPBO(gl_pbo_info_t * pbo);
+int glCreatePBO(gl_pbo_xfer_t mode, gl_pbo_info_t * pbo, GLenum format, GLenum type, int tw, int th, int keepBound);
+void glBindPBO(GLenum type, gl_pbo_info_t * pbo);
+void glUnbindPBO(GLenum type, gl_pbo_info_t * pbo);
+void glMapPBO(GLenum buff, GLenum mode, gl_pbo_info_t * pbo);
+void glUnmapPBO(GLenum buff, gl_pbo_info_t * pbo);
void glDrawTex(GLfloat x, GLfloat y, GLfloat w, GLfloat h,
GLfloat tx, GLfloat ty, GLfloat tw, GLfloat th,
int sx, int sy, int rect_tex, int is_yv12, int flip);
Index: libvo/gl_common.c
===================================================================
--- libvo/gl_common.c (revision 26555)
+++ libvo/gl_common.c (working copy)
@@ -418,7 +418,7 @@
fmt = GL_RGB16;
}
glCreateClearTex(target, fmt, filter, w, h, 0);
- glUploadTex(target, GL_RGB, (m > 255) ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
+ glUploadTex(NULL, target, GL_RGB, (m > 255) ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE,
data, w * bpp, 0, 0, w, h, 0);
free(data);
if (width) *width = w;
@@ -470,6 +470,7 @@
/**
* \brief upload a texture, handling things like stride and slices
+ * \param pbo optional PBO information, maybe NULL
* \param target texture target, usually GL_TEXTURE_2D
* \param format OpenGL format of data
* \param type OpenGL type of data
@@ -482,11 +483,17 @@
* \param slice height of an upload slice, 0 for all at once
* \ingroup gltexture
*/
-void glUploadTex(GLenum target, GLenum format, GLenum type,
+void glUploadTex(gl_pbo_info_t *pbo,
+ GLenum target, GLenum format, GLenum type,
const void *dataptr, int stride,
int x, int y, int w, int h, int slice) {
+ int bytesPerPixel=glFmt2bpp(format, type), i;
const uint8_t *data = dataptr;
- int y_max = y + h;
+ int ownBondBuffer=0;
+ int ownMemMap=0;
+ int rowlenBytes=0;
+ int usePBO = NULL!=pbo && 0!=pbo->name ;
+
if (w <= 0 || h <= 0) return;
if (slice <= 0)
slice = h;
@@ -494,17 +501,128 @@
data += (h - 1) * stride;
stride = -stride;
}
- // this is not always correct, but should work for MPlayer
- glAdjustAlignment(stride);
- glPixelStorei(GL_UNPACK_ROW_LENGTH, stride / glFmt2bpp(format, type));
- for (; y + slice <= y_max; y += slice) {
- glTexSubImage2D(target, 0, x, y, w, slice, format, type, data);
- data += stride * slice;
+
+ if ( usePBO && pbo->mode==PBO_XFER_SINGLE_MEMCPY && h * stride > pbo->sz ) {
+ pbo->mode=PBO_XFER_MULTIPLE_MEMCPY;
}
- if (y < y_max)
- glTexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
+
+ if (usePBO && h * w > pbo->sz )
+ {
+ usePBO = 0;
+ }
+
+ if(usePBO) {
+ if(!pbo->bound) {
+ glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ ownBondBuffer=1;
+ }
+
+ if(!pbo->mem) {
+ glMapPBO(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY, pbo);
+ ownMemMap=1;
+ }
+
+ if(pbo->mode==PBO_XFER_SINGLE_MEMCPY) {
+ memcpy(pbo->mem, data, h*stride);
+ rowlenBytes = stride;
+ } else if(pbo->mode==PBO_XFER_MULTIPLE_MEMCPY) {
+ for(i=0;i<h;i++)
+ {
+ memcpy(pbo->mem+(i*pbo->stride), data+(i*stride), w*bytesPerPixel);
+ }
+ rowlenBytes = pbo->stride;
+ } else {
+ // PBO_XFER_NO_MEMCPY
+ rowlenBytes = stride;
+ }
+
+ if(ownMemMap) {
+ glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ }
+ } else {
+ rowlenBytes = stride;
+ }
+
+ glAdjustAlignment(rowlenBytes);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, rowlenBytes / bytesPerPixel);
+
+ if(usePBO) {
+ if(!pbo->mem) {
+ glTexSubImage2D(target, 0, x, y, w, h, format, type, BUFFER_OFFSET(0));
+ } else {
+ glTexSubImage2D(target, 0, x, y, w, h, format, type, BUFFER_OFFSET(data-pbo->mem));
+ }
+ } else {
+ glTexSubImage2D(target, 0, x, y, w, h, format, type, data);
+ }
+
+ if(usePBO && ownBondBuffer) {
+ glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ }
}
+void glDestroyPBO(gl_pbo_info_t * pbo) {
+ if (!pbo->name) {
+ return;
+ }
+ mp_msg (MSGT_VO, MSGL_V, "[gl] glDestroyPBO %d: %dx%d, bytesPP %d, stride %d, sz %d\n",
+ pbo->name, pbo->tw, pbo->th, pbo->bytesPerPixel, pbo->stride, pbo->sz);
+
+ glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ glUnmapPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ DeleteBuffers(1, &(pbo->name));
+ memset(pbo, 0, sizeof(gl_pbo_info_t));
+}
+
+int glCreatePBO(gl_pbo_xfer_t mode, gl_pbo_info_t * pbo, GLenum format, GLenum type, int tw, int th, int keepBound) {
+ int bytesPerPixel=glFmt2bpp(format, type);
+ memset(pbo, 0, sizeof(gl_pbo_info_t));
+ if (mode==PBO_XFER_DISABLED || !DeleteBuffers || !GenBuffers || !BindBuffer || !BufferData || !MapBuffer) {
+ return 0;
+ }
+ pbo->mode=mode;
+ GenBuffers(1, &(pbo->name));
+ pbo->tw=tw;
+ pbo->th=th;
+ pbo->bytesPerPixel=bytesPerPixel;
+ pbo->stride=tw*bytesPerPixel;
+ pbo->sz=th*pbo->stride;
+ pbo->mem=NULL;
+ glBindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ BufferData(GL_PIXEL_UNPACK_BUFFER, pbo->sz, NULL, GL_STREAM_DRAW);
+ mp_msg (MSGT_VO, MSGL_V, "[gl] glCreatePBO %d: %dx%d, bytesPP %d, stride %d, sz %d, mode %d\n",
+ pbo->name, tw, th, bytesPerPixel, pbo->stride, pbo->sz, pbo->mode);
+ if(!keepBound) {
+ glUnbindPBO(GL_PIXEL_UNPACK_BUFFER, pbo);
+ }
+ return 1;
+}
+
+void glBindPBO(GLenum buff, gl_pbo_info_t * pbo) {
+ BindBuffer(buff, pbo->name);
+ pbo->bound=1;
+}
+
+void glUnbindPBO(GLenum buff, gl_pbo_info_t * pbo) {
+ BindBuffer(buff, 0);
+ pbo->bound=0;
+}
+
+void glMapPBO(GLenum buff, GLenum mode, gl_pbo_info_t * pbo) {
+ if(pbo->mem) {
+ glUnmapPBO(buff, pbo);
+ }
+ pbo->mem = MapBuffer(buff, mode);
+}
+
+void glUnmapPBO(GLenum buff, gl_pbo_info_t * pbo) {
+ if(pbo->mem) {
+ UnmapBuffer(buff);
+ pbo->mem = NULL;
+ }
+}
+
static void fillUVcoeff(GLfloat *ucoef, GLfloat *vcoef,
float uvcos, float uvsin) {
int i;
@@ -981,7 +1099,7 @@
gen_gamma_map(&lookup_data[2 * LOOKUP_RES], LOOKUP_RES, bgamma);
glCreateClearTex(GL_TEXTURE_2D, GL_LUMINANCE8, GL_LINEAR,
LOOKUP_RES, 4, 0);
- glUploadTex(GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE, lookup_data,
+ glUploadTex(NULL, GL_TEXTURE_2D, GL_LUMINANCE, GL_UNSIGNED_BYTE, lookup_data,
LOOKUP_RES, 0, 0, LOOKUP_RES, 4, 0);
ActiveTexture(GL_TEXTURE0);
texs[0] += '0';
Index: DOCS/man/en/mplayer.1
===================================================================
--- DOCS/man/en/mplayer.1 (revision 26555)
+++ DOCS/man/en/mplayer.1 (working copy)
@@ -3622,6 +3622,20 @@
2: Use the GL_ARB_texture_non_power_of_two extension.
In some cases only supported in software and thus very slow.
.RE
+.IPs pbodma=<0,1,2>
+Use PBO DMA transfers for any texture stream upload.
+.br
+A safe setting, e.g. for AMD GPU's, would be: rectangle=0:pbodma=1
+.br
+A fast setting, e.g. for NVidia GPU's, would be: rectangle=2:pbodma=2
+.br
+.RSss
+0: Disabled (default).
+.br
+1: Use multiple memcpy video to texture, slower, but shall work with all GPU's
+.br
+2: use single memcpy video to texture, faster, but may not work for some GPU's (ie. AMD's)
+.RE
.IPs swapinterval=<n>
Minimum interval between two buffer swaps, counted in
displayed frames (default: 1).
_______________________________________________
MPlayer-dev-eng mailing list
MPlayer-dev-eng@xxxxxxxxxxxx
https://lists.mplayerhq.hu/mailman/listinfo/mplayer-dev-eng