]> git.neil.brown.name Git - history.git/commitdiff
[TGAFB] Implement the fb_imageblit hook.
authorRichard Henderson <rth@are.twiddle.net>
Mon, 30 Dec 2002 16:23:48 +0000 (08:23 -0800)
committerRichard Henderson <rth@are.twiddle.net>
Mon, 30 Dec 2002 16:23:48 +0000 (08:23 -0800)
Speeds up rendering of text by around 7x for 8bpp cards,
as you'd expect from the difference in the volume of data
passed across the bus.  Thus the win should be about 31x
for 32bpp cards.

drivers/video/tgafb.c
include/video/tgafb.h

index 230f0c971e664747661a79b69123dfee47da6f42..8cc9094311e6f4ac969f4e4d82582b4247ac50f1 100644 (file)
@@ -39,6 +39,9 @@ static int tgafb_setcolreg(unsigned, unsigned, unsigned, unsigned,
                           unsigned, struct fb_info *);
 static int tgafb_blank(int, struct fb_info *);
 static void tgafb_init_fix(struct fb_info *);
+
+static void tgafb_imageblit(struct fb_info *, struct fb_image *);
+
 static int tgafb_pci_register(struct pci_dev *, const struct pci_device_id *);
 #ifdef MODULE
 static void tgafb_pci_unregister(struct pci_dev *);
@@ -59,7 +62,7 @@ static struct fb_ops tgafb_ops = {
        .fb_blank               = tgafb_blank,
        .fb_fillrect            = cfb_fillrect,
        .fb_copyarea            = cfb_copyarea,
-       .fb_imageblit           = cfb_imageblit,
+       .fb_imageblit           = tgafb_imageblit,
        .fb_cursor              = soft_cursor,
 };
 
@@ -499,6 +502,256 @@ tgafb_blank(int blank, struct fb_info *info)
 }
 
 
+/*
+ *  Acceleration.
+ */
+
+static void
+tgafb_imageblit(struct fb_info *info, struct fb_image *image)
+{
+       static unsigned char const bitrev[256] = {
+               0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+               0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+               0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+               0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+               0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+               0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+               0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+               0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+               0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+               0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+               0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+               0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+               0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+               0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+               0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+               0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+               0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+               0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+               0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+               0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+               0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+               0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+               0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+               0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+               0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+               0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+               0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+               0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+               0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+               0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+               0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+               0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
+       };
+
+       struct tga_par *par = (struct tga_par *) info->par;
+       u32 fgcolor, bgcolor, dx, dy, width, height, vxres, vyres, pixelmask;
+       unsigned long rincr, line_length, shift, pos, is8bpp;
+       unsigned long i, j, k;
+       const unsigned char *data;
+       void *regs_base, *fb_base;
+
+       dx = image->dx;
+       dy = image->dy;
+       width = image->width;
+       height = image->height;
+       vxres = info->var.xres_virtual;
+       vyres = info->var.yres_virtual;
+       line_length = info->fix.line_length;
+       rincr = (width + 7) / 8;
+
+       /* Crop the image to the screen.  */
+       if (dx > vxres || dy > vyres)
+               return;
+       if (dx + width > vxres)
+               width = vxres - dx;
+       if (dy + height > vyres)
+               height = vyres - dy;
+
+       /* For copies that aren't pixel expansion, there's little we
+          can do better than the generic code.  */
+       /* ??? There is a DMA write mode; I wonder if that could be
+          made to pull the data from the image buffer...  */
+       if (image->depth > 1) {
+               cfb_imageblit(info, image);
+               return;
+       }
+
+       regs_base = par->tga_regs_base;
+       fb_base = par->tga_fb_base;
+       is8bpp = par->tga_type == TGA_TYPE_8PLANE;
+
+       /* Expand the color values to fill 32-bits.  */
+       /* ??? Would be nice to notice colour changes elsewhere, so
+          that we can do this only when necessary.  */
+       fgcolor = image->fg_color;
+       bgcolor = image->bg_color;
+       if (is8bpp) {
+               fgcolor |= fgcolor << 8;
+               fgcolor |= fgcolor << 16;
+               bgcolor |= bgcolor << 8;
+               bgcolor |= bgcolor << 16;
+       } else {
+               fgcolor = ((u32 *)info->pseudo_palette)[fgcolor];
+               bgcolor = ((u32 *)info->pseudo_palette)[bgcolor];
+       }
+       __raw_writel(fgcolor, regs_base + TGA_FOREGROUND_REG);
+       __raw_writel(bgcolor, regs_base + TGA_BACKGROUND_REG);
+
+       /* Acquire proper alignment; set up the PIXELMASK register
+          so that we only write the proper character cell.  */
+       pos = dy * line_length + dx;
+       if (is8bpp) {
+               shift = pos & 3;
+               pos &= -4;
+       } else {
+               shift = (pos & 7) >> 2;
+               pos &= -8;
+       }
+
+       data = (const unsigned char *) image->data;
+
+       /* Enable opaque stipple mode.  */
+       __raw_writel((is8bpp
+                     ? TGA_MODE_SBM_8BPP | TGA_MODE_OPAQUE_STIPPLE
+                     : TGA_MODE_SBM_24BPP | TGA_MODE_OPAQUE_STIPPLE),
+                    regs_base + TGA_MODE_REG);
+
+       if (width + shift <= 32) {
+               unsigned long bwidth;
+
+               /* Handle common case of imaging a single character, in
+                  a font less than 32 pixels wide.  */
+
+               pixelmask = (1 << width) - 1;
+               pixelmask <<= shift;
+               __raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
+               wmb();
+
+               bwidth = (width + 7) / 8;
+
+               for (i = 0; i < height; ++i) {
+                       u32 mask = 0;
+
+                       /* The image data is bit big endian; we need
+                          little endian.  */
+                       for (j = 0; j < bwidth; ++j)
+                               mask |= bitrev[data[j]] << (j * 8);
+
+                       __raw_writel(mask << shift, fb_base + pos);
+
+                       pos += line_length;
+                       data += rincr;
+               }
+               wmb();
+               __raw_writel(0xffffffff, regs_base + TGA_PIXELMASK_REG);
+       } else if (shift == 0) {
+               unsigned long pos0 = pos;
+               const unsigned char *data0 = data;
+               unsigned long bincr = (is8bpp ? 8 : 8*4);
+               unsigned long bwidth;
+
+               /* Handle another common case in which accel_putcs
+                  generates a large bitmap, which happens to be aligned.
+                  Allow the tail to be misaligned.  This case is 
+                  interesting because we've not got to hold partial
+                  bytes across the words being written.  */
+
+               wmb();
+
+               bwidth = (width / 8) & -4;
+               for (i = 0; i < height; ++i) {
+                       for (j = 0; j < bwidth; j += 4) {
+                               u32 mask = 0;
+                               for (k = 0; k < 4; ++k)
+                                       mask |= bitrev[data[j+k]] << (k * 8);
+                               __raw_writel(mask, fb_base + pos + j*bincr);
+                       }
+                       pos += line_length;
+                       data += rincr;
+               }
+               wmb();
+
+               pixelmask = (1ul << (width & 31)) - 1;
+               if (pixelmask) {
+                       __raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
+                       wmb();
+
+                       pos = pos0 + bwidth*bincr;
+                       data = data0 + bwidth;
+                       bwidth = ((width & 31) + 7) / 8;
+
+                       for (i = 0; i < height; ++i) {
+                               u32 mask = 0;
+                               for (k = 0; k < bwidth; ++k)
+                                       mask |= bitrev[data[k]] << (k * 8);
+                               __raw_writel(mask, fb_base + pos);
+                               pos += line_length;
+                               data += rincr;
+                       }
+                       wmb();
+                       __raw_writel(0xffffffff, regs_base + TGA_PIXELMASK_REG);
+               }
+       } else {
+               unsigned long pos0 = pos;
+               const unsigned char *data0 = data;
+               unsigned long bincr = (is8bpp ? 8 : 8*4);
+               unsigned long bwidth;
+
+               /* Finally, handle the generic case of misaligned start.
+                  Here we split the write into 16-bit spans.  This allows
+                  us to use only one pixel mask, instead of four as would
+                  be required by writing 24-bit spans.  */
+
+               pixelmask = 0xffff << shift;
+               __raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
+               wmb();
+
+               bwidth = (width / 8) & -2;
+               for (i = 0; i < height; ++i) {
+                       for (j = 0; j < bwidth; j += 2) {
+                               u32 mask;
+                               mask = bitrev[data[j]];
+                               mask |= bitrev[data[j+1]] << 8;
+                               mask <<= shift;
+                               __raw_writel(mask, fb_base + pos + j*bincr);
+                       }
+                       pos += line_length;
+                       data += rincr;
+               }
+               wmb();
+
+               pixelmask = ((1ul << (width & 15)) - 1) << shift;
+               if (pixelmask) {
+                       __raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
+                       wmb();
+
+                       pos = pos0 + bwidth*bincr;
+                       data = data0 + bwidth;
+                       bwidth = (width & 15) > 8;
+
+                       for (i = 0; i < height; ++i) {
+                               u32 mask = bitrev[data[0]];
+                               if (bwidth)
+                                       mask |= bitrev[data[1]] << 8;
+                               mask <<= shift;
+                               __raw_writel(mask, fb_base + pos);
+                               pos += line_length;
+                               data += rincr;
+                       }
+                       wmb();
+               }
+               __raw_writel(0xffffffff, regs_base + TGA_PIXELMASK_REG);
+       }
+
+       /* Disable opaque stipple mode.  */
+       __raw_writel((is8bpp
+                     ? TGA_MODE_SBM_8BPP | TGA_MODE_SIMPLE
+                     : TGA_MODE_SBM_24BPP | TGA_MODE_SIMPLE),
+                    regs_base + TGA_MODE_REG);
+}
+
+
 /*
  *  Initialisation
  */
index fbfae108a525fbf509a1744a3b780502120eb95d..0e76dda6fc4da6a6f18228c2340ed4d719667742 100644 (file)
 #define        TGA_24PLANE_FB_OFFSET           0x0804000
 #define        TGA_24PLUSZ_FB_OFFSET           0x1004000
 
+#define TGA_FOREGROUND_REG             0x0020
+#define TGA_BACKGROUND_REG             0x0024
 #define        TGA_PLANEMASK_REG               0x0028
+#define TGA_PIXELMASK_ONESHOT_REG      0x002c
 #define        TGA_MODE_REG                    0x0030
 #define        TGA_RASTEROP_REG                0x0034
 #define        TGA_PIXELSHIFT_REG              0x0038
 #define        TGA_RAMDAC_SETUP_REG            0x00c0
 #define        TGA_BLOCK_COLOR0_REG            0x0140
 #define        TGA_BLOCK_COLOR1_REG            0x0144
+#define        TGA_BLOCK_COLOR2_REG            0x0148
+#define        TGA_BLOCK_COLOR3_REG            0x014c
+#define        TGA_BLOCK_COLOR4_REG            0x0150
+#define        TGA_BLOCK_COLOR5_REG            0x0154
+#define        TGA_BLOCK_COLOR6_REG            0x0158
+#define        TGA_BLOCK_COLOR7_REG            0x015c
+#define TGA_COPY64_SRC                 0x0160
+#define TGA_COPY64_DST                 0x0164
 #define        TGA_CLOCK_REG                   0x01e8
 #define        TGA_RAMDAC_REG                  0x01f0
 #define        TGA_CMD_STAT_REG                0x01f8
 #define TGA_VALID_BLANK                        0x02
 #define TGA_VALID_CURSOR               0x04
 
+#define TGA_MODE_SBM_8BPP              0x000
+#define TGA_MODE_SBM_24BPP             0x300
+
+#define TGA_MODE_SIMPLE                        0x00
+#define TGA_MODE_SIMPLEZ               0x10
+#define TGA_MODE_OPAQUE_STIPPLE                0x01
+#define TGA_MODE_OPAQUE_FILL           0x21
+#define TGA_MODE_TRANSPARENT_STIPPLE   0x03
+#define TGA_MODE_TRANSPARENT_FILL      0x23
+#define TGA_MODE_BLOCK_STIPPLE         0x0d
+#define TGA_MODE_BLOCK_FILL            0x2d
+#define TGA_MODE_COPY                  0x07
+#define TGA_MODE_DMA_READ_COPY_ND      0x17
+#define TGA_MODE_DMA_READ_COPY_D       0x37
+#define TGA_MODE_DMA_WRITE_COPY                0x1f
+
 
 /*
  * Useful defines for managing the ICS1562 PLL clock