/* * TCX framebuffer - hardware acceleration. * * Copyright (C) 2009 Michael Lorenz * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * MICHAEL LORENZ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* $NetBSD: tcx_accel.c,v 1.10 2016/09/23 20:50:54 macallan Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "tcx.h" #ifdef DEBUG #define ENTER xf86Msg(X_ERROR, "%s\n", __func__) #define LEAVE xf86Msg(X_ERROR, "%s done\n", __func__) #else #define ENTER #define LEAVE #endif static void TcxWaitMarker(ScreenPtr pScreenInfo, int Marker) { ENTER; /* do nothing */ } static int TcxMarkSync(ScreenPtr pScreenInfo) { ENTER; return 0; } static Bool TcxPrepareCopy ( PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, int alu, Pixel planemask ) { ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); ENTER; /* weed out the cases we can't accelerate */ #ifdef DEBUG xf86Msg(X_ERROR, "alu: %d mask %08x\n", alu, planemask); #endif if (alu != GXcopy) return FALSE; if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) return FALSE; pTcx->xdir = xdir; pTcx->ydir = ydir; pTcx->srcoff = exaGetPixmapOffset(pSrcPixmap) >> pTcx->pitchshift; pTcx->srcpitch = exaGetPixmapPitch(pSrcPixmap) >> pTcx->pitchshift; LEAVE; return TRUE; } static void TcxCopy ( PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int w, int h ) { ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); volatile uint64_t cmd, lcmd; int line, col, leftover, src, dst, xsteps, sstep, dstep, dpitch, x, xoff; int doff; ENTER; leftover = w & 0x1f; if (leftover > 0) lcmd = 0x3000000000000000LL | (leftover - 1) << 24; doff = exaGetPixmapOffset(pDstPixmap) >> pTcx->pitchshift; dpitch = exaGetPixmapPitch(pDstPixmap) >> pTcx->pitchshift; src = srcX + srcY * pTcx->srcpitch + pTcx->srcoff; dst = dstX + dstY * dpitch + doff; if (pTcx->ydir < 0) { src += (h - 1) * pTcx->srcpitch; dst += (h - 1) * dpitch; sstep = 0 - pTcx->srcpitch; dstep = 0 - dpitch; } else { sstep = pTcx->srcpitch; dstep = dpitch; } xsteps = w >> 5; if ((pTcx->xdir > 0) || (w < 33)) { for (line = 0; line < h; line++) { x = xsteps; xoff = 0; while (x > 0) { cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); pTcx->rblit[dst + xoff] = cmd; xoff += 32; x--; } if (leftover > 0) { cmd = lcmd | (uint64_t)(src + xoff); pTcx->rblit[dst + xoff] = cmd; } src += sstep; dst += dstep; } } else { /* same thing but right to left */ for (line = 0; line < h; line++) { x = xsteps; xoff = xsteps << 5; if (leftover > 0) { cmd = lcmd | (uint64_t)(src + xoff); pTcx->rblit[dst + xoff] = cmd; } xoff -= 32; while (x > 0) { cmd = 0x300000001f000000LL | (uint64_t)(src + xoff); pTcx->rblit[dst + xoff] = cmd; xoff -= 32; x--; } src += sstep; dst += dstep; } } LEAVE; } static void TcxDoneCopy(PixmapPtr pDstPixmap) { ENTER; LEAVE; } static Bool TcxPrepareSolid( PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg) { ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); uint32_t hwfg; ENTER; /* weed out the cases we can't accelerate */ if (pTcx->HasStipROP) { hwfg = alu << 28; } else if (alu == GXcopy) { hwfg = 0x30000000; } else return FALSE; if ((planemask != 0xffffffff) && (planemask != 0x00ffffff)) return FALSE; if (exaGetPixmapOffset(pPixmap) != 0) return FALSE; pTcx->fg = (fg & 0x00ffffff); /* set colour space ID if we're in 24bit mode */ if (pTcx->pitchshift != 0) hwfg |= 0x03000000; pTcx->fg |= hwfg; #ifdef DEBUG xf86Msg(X_ERROR, "fg: %08x\n", hwfg); #endif LEAVE; return TRUE; } static void TcxSolid( PixmapPtr pPixmap, int x1, int y1, int x2, int y2) { ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScreenInfo); int dpitch, dst, line, fullsteps, i; volatile uint64_t cmd, rcmd, lcmd, tmpl; uint32_t pmask; dpitch = exaGetPixmapPitch(pPixmap) >> pTcx->pitchshift; dst = x1 + y1 * dpitch; tmpl = ((uint64_t)pTcx->fg) << 32; /* * thanks to the funky architecture of the tcx's stipple 'engine' we have * to deal with two different cases: * - the whole width of the rectangle fits into a single 32 pixel aligned * unit of 32 pixels * - the first and the last 32bit unit may or may not contain less than * 32 pixels */ x2 -= 1; if ((x1 & 0xffe0) == (x2 & 0xffe0)) { /* the whole width fits in one 32 pixel write */ /* first zero out pixels on the right */ pmask = 0xffffffff << (31 - (x2 & 0x1f)); /* then mask out pixels on the left */ pmask &= (0xffffffff >> (x1 & 0x1f)); #ifdef DEBUG xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); #endif cmd = tmpl | (uint64_t)pmask; dst &= 0xffffffe0; for (line = y1; line < y2; line++) { pTcx->rstip[dst] = cmd; dst += dpitch; } } else { /* at least two writes per line */ pmask = 0xffffffff << (31 - (x2 & 0x1f)); rcmd = tmpl | (uint64_t)pmask; pmask = 0xffffffff >> (x1 & 0x1f); lcmd = tmpl | (uint64_t)pmask; cmd = tmpl | 0xffffffffLL; dst &= 0xffffffe0; fullsteps = ((x2 >> 5) - (x1 >> 5)); #ifdef DEBUG xf86Msg(X_ERROR, "%d %d %08x %d %d\n", x1, x2, pmask, y1, y2); xf86Msg(X_ERROR, "fullsteps: %d\n", fullsteps); #endif fullsteps = fullsteps << 5; for (line = y1; line < y2; line++) { pTcx->rstip[dst] = lcmd; for (i = 32; i < fullsteps; i+= 32) pTcx->rstip[dst + i] = cmd; pTcx->rstip[dst + i] = rcmd; dst += dpitch; } } } /* * Memcpy-based UTS. */ static Bool TcxUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch) { ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); char *dst = pTcx->fb + exaGetPixmapOffset(pDst); int dst_pitch = exaGetPixmapPitch(pDst); int bpp = pDst->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; ENTER; dst += (x * cpp) + (y * dst_pitch); while (h--) { memcpy(dst, src, wBytes); src += src_pitch; dst += dst_pitch; } LEAVE; return TRUE; } /* * Memcpy-based DFS. */ static Bool TcxDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch) { ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); char *src = pTcx->fb + exaGetPixmapOffset(pSrc); int src_pitch = exaGetPixmapPitch(pSrc); int bpp = pSrc->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; ENTER; src += (x * cpp) + (y * src_pitch); while (h--) { memcpy(dst, src, wBytes); src += src_pitch; dst += dst_pitch; } LEAVE; return TRUE; } Bool TcxInitAccel(ScreenPtr pScreen) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; TcxPtr pTcx = GET_TCX_FROM_SCRN(pScrn); ExaDriverPtr pExa; pExa = exaDriverAlloc(); if (!pExa) return FALSE; pTcx->pExa = pExa; pExa->exa_major = EXA_VERSION_MAJOR; pExa->exa_minor = EXA_VERSION_MINOR; /* * The S24 can display both 8 and 24bit data at the same time, and in * 24bit we can choose between gamma corrected and direct. No idea how that * would map to EXA - we'd have to pick the right framebuffer to draw into * and Solid() would need to know what kind of pixels to write */ pExa->memoryBase = pTcx->fb; if (pScrn->depth == 8) { pExa->memorySize = pTcx->vramsize; pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height; pExa->pixmapOffsetAlign = 1; pExa->pixmapPitchAlign = 1; } else { pExa->memorySize = 1024 * 1024 * 4; pExa->offScreenBase = pTcx->psdp->width * pTcx->psdp->height * 4; pExa->pixmapOffsetAlign = 4; pExa->pixmapPitchAlign = 4; } pExa->flags = EXA_OFFSCREEN_PIXMAPS; pExa->maxX = 2048; pExa->maxY = 2048; /* dummy, available VRAM is the limit */ pExa->MarkSync = TcxMarkSync; pExa->WaitMarker = TcxWaitMarker; pExa->PrepareSolid = TcxPrepareSolid; pExa->Solid = TcxSolid; pExa->DoneSolid = TcxDoneCopy; pExa->PrepareCopy = TcxPrepareCopy; pExa->Copy = TcxCopy; pExa->DoneCopy = TcxDoneCopy; /* EXA hits more optimized paths when it does not have to fallback because * of missing UTS/DFS, hook memcpy-based UTS/DFS. */ pExa->UploadToScreen = TcxUploadToScreen; pExa->DownloadFromScreen = TcxDownloadFromScreen; return exaDriverInit(pScreen, pExa); }