/*
 * Copyright 1992 by Kevin E. Martin, Chapel Hill, North Carolina.
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the name of Kevin E. Martin not be used in
 * advertising or publicity pertaining to distribution of the software without
 * specific, written prior permission.  Kevin E. Martin makes no
 * representations about the suitability of this software for any purpose.
 * It is provided "as is" without express or implied warranty.
 *
 * KEVIN E. MARTIN DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL KEVIN E. MARTIN BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 *
 */
#include "misc.h"
#include "x386.h"
#include "reg8514.h"
#include "ibm8514im.h"
#include "scrnintstr.h"

#include "memcopy.h"
extern inline void * memcpy (void * dstpp, const void * srcpp, size_t len)
{
  unsigned long int dstp = (long int) dstpp;
  unsigned long int srcp = (long int) srcpp;

  /* Copy from the beginning to the end.  */

  /* If there not too few bytes to copy, use word copy.  */
  if (len >= OP_T_THRES)
    {
      /* Copy just a few bytes to make DSTP aligned.  */
      len -= (-dstp) % OPSIZ;
      BYTE_COPY_FWD (dstp, srcp, (-dstp) % OPSIZ);

      /* Copy from SRCP to DSTP taking advantage of the known
         alignment of DSTP.  Number of bytes remaining is put
         in the third argumnet, i.e. in LEN.  This number may
         vary from machine to machine.  */

      WORD_COPY_FWD (dstp, srcp, len, len);

      /* Fall out and copy the tail.  */
    }

  /* There are just a few bytes to copy.  Use byte memory operations.  */
  BYTE_COPY_FWD (dstp, srcp, len);

  return dstpp;
}

#define	reorder(a,b)	b = \
	(a & 0x80) >> 7 | \
	(a & 0x40) >> 5 | \
	(a & 0x20) >> 3 | \
	(a & 0x10) >> 1 | \
	(a & 0x08) << 1 | \
	(a & 0x04) << 3 | \
	(a & 0x02) << 5 | \
	(a & 0x01) << 7;

/*static */unsigned char swapbits[256];
extern pointer vgaBase;
unsigned char port35;
extern short chip_id;

extern ScrnInfoRec vga256InfoRec;
void
ibm8514ImageInit()
{
    int i;

    for (i = 0; i < 256; i++) {
	reorder(i,swapbits[i]);
    }
}

/* fast ImageWrite(), ImageRead(), and ImageFill() routines */
/* there are two cases; (i) when the bank switch can occur in the */
/* middle of raster line, and (ii) when it is guaranteed not possible. */
/* In theory, vga256InfoRec.virtualX should contain the number of bytes */
/* on the raster line; however, this is not necessarily true, and for */
/* many situations, the S3 card will always have 1024. */
/*				Phil Richards <pgr@prg.ox.ac.uk> */
/*				26th November 1992 */

#define VGABANKSIZE 0x10000 /* 64k */
static char old_bank = -1;

#define   VGA256INFOREC_HOLDS_VGAWIDTH 
#ifdef   VGA256INFOREC_HOLDS_VGAWIDTH
#define VGAWIDTH (vga256InfoRec.virtualX)

void
ibm8514ImageWrite(x, y, w, h, psrc, pwidth, px, py, alu, planemask)
    int			x;
    int			y;
    int			w;
    int			h;
    unsigned char	*psrc;
    int			pwidth;
    int			px;
    int			py;
    short		alu;
    short		planemask;
{
    int j, offset;
    char bank;
    char *videobuffer;
    unsigned short crt40;
    
    videobuffer = (char *) vgaBase;

    WaitQueue(2);
    outpw(FRGD_MIX, FSS_PCDATA | alu);
    outpw(WRT_MASK, planemask);

    WaitQueue(8);

    psrc   += pwidth * py + px;
    offset  = (y * VGAWIDTH) + x;
    bank    = offset / VGABANKSIZE;
    offset %= VGABANKSIZE;


   while ((inpw(0x9ae8) & 0x200) != 0); 
   if (chip_id == S3_801) {
     int i;

     
     /* begin 801 sequence for going in to linear mode */
     outp(0x3d4, 0x40);
     crt40 = inp(0x3d5);
     i = (crt40 & 0xf6) | 0x0a; /* enable fast write buffer and 
			    disable 8514/a mode */
     outp(0x3d5, (unsigned char) i);
     outp(0x3d4,0x59);
     outp(0x3d5,0x0a);  
     outp(0x3d4, 0x53);
     outp(0x3d5, 1);
     outp(0x3d4, 0x58);
     outp(0x3d5, 0x10);      /* go on to linear mode */
     /* end  801 sequence to go into linear mode */
   }

    /* if we do a bank switch here, is _not_ possible to do one in the
       loop before some data has been copied; for that situation to occur
       it would be necessary that offset == VGABANKSIZE; and by the
       above initialisation of offset, we know offset < VGABANKSIZE
    */
    outp(0x3d4, 0x35);
    if (old_bank != bank)
	outp(0x3d5, (port35 | bank));

    for (j = 0; j < h; j++, psrc += pwidth, offset += VGAWIDTH) {
	if (offset + w > VGABANKSIZE) {
	    int partwidth;

	    /* do the copy in two parts with a bank switch inbetween */
	    partwidth = VGABANKSIZE - offset;
	    if (partwidth > 0)
		memcpy(&videobuffer[offset], psrc, partwidth);

	    /* bank switch to the next bank */
            bank++;
	    outp(0x3d5, (port35 | bank));

	    /* adjust the offset by 1 banks worth*/
	    offset -= VGABANKSIZE;

	    /* for a partial copy, copy the bit that was left over only */
	    if (partwidth > 0) {
	        memcpy(videobuffer, psrc + partwidth, w - partwidth);
		continue;
	    }
	    /* drop through to the `normal' copy */
	}
	memcpy(&videobuffer[offset], psrc, w);
    }
    old_bank = bank;

   if (chip_id == S3_801) {
   /* begin 801  sequence to go into enhanced mode */
   outp(0x3d4, 0x53);
   outp(0x3d5, 0);
   outp(0x3d4, 0x58);
   outp(0x3d5, 0x00);
   outp(0x3d4, 0x40);
   outp(0x3d5, crt40);
   }
   /* end 801 sequence to go into enhanced mode */
    /* WaitQueue(1); */
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
}

void
ibm8514ImageRead(x, y, w, h, psrc, pwidth, px, py)
    int			x;
    int			y;
    int			w;
    int			h;
    unsigned char	*psrc;
    int			pwidth;
    int			px;
    int			py;
{
    int j;
    int offset;
    int bank;
    char *videobuffer;
    unsigned short crt40;
    
    videobuffer = (char *) vgaBase;

    WaitIdleEmpty();
    outpw(FRGD_MIX, FSS_PCDATA | MIX_SRC);

    WaitQueue(8);

   while ((inpw(0x9ae8) & 0x200) != 0); 
   if (chip_id == S3_801) {
     int i;
     /* begin 801 sequence for going in to linear mode */
     outp(0x3d4, 0x40);
     crt40 = inp(0x3d5);
     i = (crt40 & 0xf6) | 0x0a; /* enable fast write buffer and 
			    disable 8514/a mode */
     outp(0x3d5, (unsigned char) i);
     outp(0x3d4,0x59);
     outp(0x3d5,0x0a);  
     outp(0x3d4, 0x53);
     outp(0x3d5, 1);
     outp(0x3d4, 0x58);
     outp(0x3d5, 0x10);      /* go on to linear mode */
     /* end  801 sequence to go into linear mode */
   }

    psrc   += pwidth * py + px;
    offset  = (y * VGAWIDTH) + x;
    bank    = offset / VGABANKSIZE;
    offset %= VGABANKSIZE;

    outp(0x3d4, 0x35);
    if (old_bank != bank)
	outp(0x3d5, (port35 | bank));

    for (j = 0; j < h; j++, psrc += pwidth, offset += VGAWIDTH) {
	if (offset + w > VGABANKSIZE) {
	    int partwidth;

	    /* do the copy in two parts with a bank switch inbetween */
	    partwidth = VGABANKSIZE - offset;
	    if (partwidth > 0)
		memcpy(psrc, &videobuffer[offset], partwidth);

	    /* bank switch to the next bank */
            bank++;
	    outp(0x3d5, (port35 | bank));

	    /* adjust the offset by 1 banks worth*/
	    offset -= VGABANKSIZE;

	    if (partwidth > 0) {
	        memcpy(psrc + partwidth, videobuffer, w - partwidth);
		continue;
	    }
	    /* drop through to the `normal' copy */
	}
	memcpy(psrc, &videobuffer[offset], w);
    }
    old_bank = bank;
   if (chip_id == S3_801) {
   /* begin 801  sequence to go into enhanced mode */
   outp(0x3d4, 0x53);
   outp(0x3d5, 0);
   outp(0x3d4, 0x58);
   outp(0x3d5, 0x00);
   outp(0x3d4, 0x40);
   outp(0x3d5, crt40);
   }
   /* end 801 sequence to go into enhanced mode */
    /* WaitQueue(1); */
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
}

void
ibm8514ImageFill(x, y, w, h, psrc, pwidth, pw, ph, pox, poy, alu, planemask)
    int			x;
    int			y;
    int			w;
    int			h;
    int			pw, ph, pox, poy;
    unsigned char	*psrc;
    int			pwidth;
    short		alu;
    short		planemask;
{
    int j, tmp;
    unsigned char *pline;
    int ypix, xpix0, offset0;
    int cxpix;
    char *videobuffer;
    char bank;
    unsigned short crt40;    
    videobuffer = (char *)vgaBase;

    WaitQueue(2);
    outpw(FRGD_MIX, FSS_PCDATA | alu);
    outpw(WRT_MASK, planemask);

    WaitQueue(8);

    modulus(x-pox,pw,xpix0);
    cxpix = pw - xpix0;

    modulus(y+poy,ph,ypix);
    pline  = psrc + pwidth*ypix;

    offset0  = (y * VGAWIDTH) + x;
    bank     = offset0 / VGABANKSIZE;
    offset0 %= VGABANKSIZE;

   while ((inpw(0x9ae8) & 0x200) != 0); 
   if (chip_id == S3_801) {
     int i;
     /* begin 801 sequence for going in to linear mode */
     outp(0x3d4, 0x40);
     crt40 = inp(0x3d5);
     i = (crt40 & 0xf6) | 0x0a; /* enable fast write buffer and 
			    disable 8514/a mode */
     outp(0x3d5, (unsigned char) i);
     outp(0x3d4,0x59);
     outp(0x3d5,0x0a);  
     outp(0x3d4, 0x53);
     outp(0x3d5, 1);
     outp(0x3d4, 0x58);
     outp(0x3d5, 0x10);      /* go on to linear mode */
     /* end  801 sequence to go into linear mode */
   }

    outp (0x3d4, 0x35);
    if (bank != old_bank)
	outp(0x3d5, (port35 | bank));

    for (j = 0; j < h; j++, offset0 += VGAWIDTH) {
	int offset, width, xpix;

	width  = (w <= cxpix)? w: cxpix;
	xpix   = xpix0;
	offset = offset0;

	if (offset + width >= VGABANKSIZE) {
	    int partwidth;

	    partwidth = VGABANKSIZE - offset;
	    offset0  -= VGABANKSIZE;

	    if (partwidth > 0) {
                memcpy(&videobuffer[offset], pline + xpix, partwidth);
		width  -= partwidth;
		xpix   += partwidth;
		offset  = 0;
	    } else
		offset  = offset0;

	    bank++;
	    outp(0x3d5, (port35 | bank));
	}

        memcpy(&videobuffer[offset], pline + xpix, width);

	offset += width;
	for (width = w - cxpix; width >= pw; width -= pw, offset += pw) {
	    /* identical to ImageWrite() */
	    if (offset + pw > VGABANKSIZE) {
		int partwidth;

		partwidth = VGABANKSIZE - offset;
		if (partwidth > 0)
		    memcpy(&videobuffer[offset], pline, partwidth);

		bank++;
		outp(0x3d5, (port35 | bank));

		offset -= VGABANKSIZE;

		if (partwidth > 0) {
		    memcpy(videobuffer, pline + partwidth, pw - partwidth);
		    continue;
		}
	    }
	    memcpy(&videobuffer[offset], pline, pw);
	}

	/* at this point: 0 <= width < pw */
	if (width > 0) {
	    if (offset + width > VGABANKSIZE) {
		int partwidth;

		partwidth = VGABANKSIZE - offset;

		if (partwidth > 0) {
		    memcpy(&videobuffer[offset], pline, partwidth);
		    width  -= partwidth;
		    xpix    = partwidth;
		    offset  = 0;
		} else {
		    xpix    = 0;
		    offset -= VGABANKSIZE;
		}

		bank++;
		outp(0x3d5, (port35 | bank));
	    }

	    memcpy(&videobuffer[offset], pline + xpix, width);
	}

	if ((++ypix) == ph) {
	    ypix  = 0;
	    pline = psrc;
	} else
	    pline += pwidth;
    }
    old_bank = bank;

   if (chip_id == S3_801) {
   /* begin 801  sequence to go into enhanced mode */
   outp(0x3d4, 0x53);
   outp(0x3d5, 0);
   outp(0x3d4, 0x58);
   outp(0x3d5, 0x00);
   outp(0x3d4, 0x40);
   outp(0x3d5, crt40);
   }
   /* end 801 sequence to go into enhanced mode */

    /* WaitQueue(1); */
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
}
#else
#define VGAWIDTH 1024

void
ibm8514ImageWrite(x, y, w, h, psrc, pwidth, px, py, alu, planemask)
    int			x;
    int			y;
    int			w;
    int			h;
    unsigned char	*psrc;
    int			pwidth;
    int			px;
    int			py;
    short		alu;
    short		planemask;
{
    int j,i, offset;
    char bank;
    char *videobuffer;
    unsigned char crt40, crt54;
    videobuffer = (char *) vgaBase;

    WaitQueue(2);
    outpw(FRGD_MIX, FSS_PCDATA | alu);
    outpw(WRT_MASK, planemask);

   while ((inpw(0x9ae8) & 0x200) != 0); 
   if (chip_id == S3_801) {
     /* begin 801 sequence for going in to linear mode */
     outp(0x3d4, 0x40);
     crt40 = inp(0x3d5);
     i = (crt40 & 0xf6) | 0x0a; /* enable fast write buffer and 
			    disable 8514/a mode */
     outp(0x3d5, (unsigned char) i);
     outp(0x3d4,0x59);
     outp(0x3d5,0x0a);  
     outp(0x3d4, 0x53);
     outp(0x3d5, 1);
     outp(0x3d4, 0x58);
     outp(0x3d5, 0x10);      /* go on to linear mode */
     /* end  801 sequence to go into linear mode */
   }


    psrc   += pwidth * py + px;
    offset  = (y * VGAWIDTH) + x;
    bank    = offset / VGABANKSIZE;
    offset %= VGABANKSIZE;

    /* if we do a bank switch here, is _not_ possible to do one in the
       loop before some data has been copied; for that situation to occur
       it would be necessary that offset == VGABANKSIZE; and by the
       above initialisation of offset, we know offset < VGABANKSIZE
    */
    outp(0x3d4, 0x35);
    if (old_bank != bank)
	outp(0x3d5, (port35 | bank));

    for (j = 0; j < h; j++, psrc += pwidth, offset += VGAWIDTH) {
	if (offset >= VGABANKSIZE) {
	    /* bank switch to the next bank */
            bank++;
	    outp(0x3d5, (port35 | bank));

	    /* adjust the offset by 1 banks worth*/
	    offset -= VGABANKSIZE;
	}
	memcpy(&videobuffer[offset], psrc, w);
    }
    old_bank = bank;

   if (chip_id == S3_801) {
   /* begin 801  sequence to go into enhanced mode */
   outp(0x3d4, 0x53);
   outp(0x3d5, 0);
   outp(0x3d4, 0x58);
   outp(0x3d5, 0x00);
   outp(0x3d4, 0x40);
   outp(0x3d5, crt40);

   /* end 801 sequence to go into enhanced mode */
 }    /* WaitQueue(1); */

    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
}

void
ibm8514ImageRead(x, y, w, h, psrc, pwidth, px, py)
    int			x;
    int			y;
    int			w;
    int			h;
    unsigned char	*psrc;
    int			pwidth;
    int			px;
    int			py;
{
    int j, i;
    int offset;
    int bank;
    char *videobuffer;
    unsigned char crt40, crt54;

    videobuffer = (char *) vgaBase;

    WaitIdleEmpty();
    outpw(FRGD_MIX, FSS_PCDATA | MIX_SRC);
   while ((inpw(0x9ae8) & 0x200) != 0); 
   if (chip_id == S3_801) {
     /* begin 801 sequence for going in to linear mode */
     outp(0x3d4, 0x40);
     crt40 = inp(0x3d5);
     i = (crt40 & 0xf6) | 0x0a; /* enable fast write buffer and 
			    disable 8514/a mode */
     outp(0x3d5, (unsigned char) i);
     outp(0x3d4,0x59);
     outp(0x3d5,0x0a);  
     outp(0x3d4, 0x53);
     outp(0x3d5, 1);
     outp(0x3d4, 0x58);
     outp(0x3d5, 0x10);      /* go on to linear mode */
     /* end  801 sequence to go into linear mode */
   }


    psrc   += pwidth * py + px;
    offset  = (y * VGAWIDTH) + x;
    bank    = offset / VGABANKSIZE;
    offset %= VGABANKSIZE;

    outp(0x3d4, 0x35);
    if (old_bank != bank)
	outp(0x3d5, (port35 | bank));

    for (j = 0; j < h; j++, psrc += pwidth, offset += VGAWIDTH) {
	if (offset >= VGABANKSIZE) {
	    /* bank switch to the next bank */
            bank++;
	    outp(0x3d5, (port35 | bank));

	    /* adjust the offset by 1 banks worth*/
	    offset -= VGABANKSIZE;
	}
	memcpy(psrc, &videobuffer[offset], w);
    }
    old_bank = bank;

   if (chip_id == S3_801) {
   /* begin 801  sequence to go into enhanced mode */
   outp(0x3d4, 0x53);
   outp(0x3d5, 0);
   outp(0x3d4, 0x58);
   outp(0x3d5, 0x00);
   outp(0x3d4, 0x40);
   outp(0x3d5, crt40);

   /* end 801 sequence to go into enhanced mode */
 }
    /* WaitQueue(1); */
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
}

void
ibm8514ImageFill(x, y, w, h, psrc, pwidth, pw, ph, pox, poy, alu, planemask)
    int			x;
    int			y;
    int			w;
    int			h;
    int			pw, ph, pox, poy;
    unsigned char	*psrc;
    int			pwidth;
    short		alu;
    short		planemask;
{
    int j,i, tmp;
    unsigned char *pline;
    int ypix, xpix, offset0;
    int cxpix;
    char *videobuffer;
    char bank;
    unsigned char crt40, crt54;
    videobuffer = (char *)vgaBase;

    WaitQueue(2);
    outpw(FRGD_MIX, FSS_PCDATA | alu);
    outpw(WRT_MASK, planemask);

   while ((inpw(0x9ae8) & 0x200) != 0); 
   if (chip_id == S3_801) {
     /* begin 801 sequence for going in to linear mode */
     outp(0x3d4, 0x40);
     crt40 = inp(0x3d5);
     i = (crt40 & 0xf6) | 0x0a; /* enable fast write buffer and 
			    disable 8514/a mode */
     outp(0x3d5, (unsigned char) i);
     outp(0x3d4,0x59);
     outp(0x3d5,0x0a);  
     outp(0x3d4, 0x53);
     outp(0x3d5, 1);
     outp(0x3d4, 0x58);
     outp(0x3d5, 0x10);      /* go on to linear mode */
     /* end  801 sequence to go into linear mode */
   }


    modulus(x-pox,pw,xpix);
    cxpix = pw - xpix;

    modulus(y+poy,ph,ypix);
    pline  = psrc + pwidth*ypix;

    offset0  = (y * VGAWIDTH) + x;
    bank     = offset0 / VGABANKSIZE;
    offset0 %= VGABANKSIZE;

    outp (0x3d4, 0x35);
    if (bank != old_bank)
	outp(0x3d5, (port35 | bank));

    for (j = 0; j < h; j++, offset0 += VGAWIDTH) {
	if (offset0 >= VGABANKSIZE) {
	    bank++;
	    outp(0x3d5, (port35 | bank));
	    offset0 -= VGABANKSIZE;
	}

	if (w <= cxpix) {
	    memcpy(&videobuffer[offset0], pline + xpix, w);
	} else {
	    int width, offset;

	    memcpy(&videobuffer[offset0], pline + xpix, cxpix);

	    offset = offset0 + cxpix;
	    for (width = w - cxpix; width >= pw; width -= pw, offset += pw)
	    	memcpy(&videobuffer[offset], pline, pw);

	    /* at this point: 0 <= width < pw */
	    if (width > 0)
	    	memcpy(&videobuffer[offset], pline, width);
	}

	if ((++ypix) == ph) {
	    ypix  = 0;
	    pline = psrc;
	} else
	    pline += pwidth;
    }
    old_bank = bank;
   if (chip_id == S3_801) {
   /* begin 801  sequence to go into enhanced mode */
   outp(0x3d4, 0x53);
   outp(0x3d5, 0);
   outp(0x3d4, 0x58);
   outp(0x3d5, 0x00);
   outp(0x3d4, 0x40);
   outp(0x3d5, crt40);

   /* end 801 sequence to go into enhanced mode */
 }
    /* WaitQueue(1); */
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
}
#endif

static int _internal_ibm8514_mskbits[9] = {
	0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
};

/* #define MSKBITS(n) ((n)? ((1<<(n))-1): 0) */
#define MSKBIT(n) (_internal_ibm8514_mskbits[(n)])
#define SWPBIT(s) (swapbits[pline[(s)]])

void
ibm8514ImageStipple(x, y, w, h, psrc, pwidth, pw,
		    ph, pox, poy, fgPixel, alu, planemask)
    int			x;
    int			y;
    int			w;
    int			h;
    unsigned char	*psrc;
    int pw, ph, pox, poy;
    int			pwidth;
    int			fgPixel;
    short		alu;
    short		planemask;
{
    int			x1, x2, y1, y2, width;
    unsigned char	*newsrc = NULL;

    x1 = x & ~0x7;
    x2 = (x+w+7) & ~0x7;
    y1 = y;
    y2 = y+h;

    width = x2 - x1;

    if (pw <= 8 && pw < w) {
	newsrc = (unsigned char *)ALLOCATE_LOCAL(2*ph*sizeof(char));
	if (!newsrc) {
	    return;
	}

	while (pw <= 8) {
    	    unsigned char *newline, *pline;
	    int i;
	    pline = psrc;
	    newline = newsrc;
	    for (i = 0; i < ph; i++) {
		newline[0] = (pline[0] & (0xff >> (8-pw))) | pline[0] << pw;
		if (pw > 4)
		    newline[1] = pline[0] >> (8-pw);

		pline += pwidth;
		newline += 2;
	    }
	    pw *= 2;
	    pwidth = 2;
	    psrc = newsrc;
	}
    }

    WaitQueue(3);
    outpw(MULTIFUNC_CNTL, SCISSORS_L | x);
    outpw(MULTIFUNC_CNTL, SCISSORS_R | (x+w-1));
    outpw(WRT_MASK, planemask);

    WaitQueue(5);
    outpw(FRGD_MIX, FSS_FRGDCOL | alu);
    outpw(BKGD_MIX, BSS_BKGDCOL | MIX_DST);
    outpw(FRGD_COLOR, (short)fgPixel);
    outpw(MULTIFUNC_CNTL, PIX_CNTL | MIXSEL_EXPPC | COLCMPOP_F);
    outpw(MAJ_AXIS_PCNT, (short)(width-1));

  {
    int xpix, ypix, j;
    int clobits, lobits, chibits, hibits, pw8;
    unsigned char *pline;

    modulus(x1-pox,pw,xpix);
    clobits = xpix % 8;
    lobits = 8 - clobits;
    xpix /= 8;

    hibits = pw % 8;
    chibits = 8 - hibits;
    pw8 = pw / 8;

    modulus(y1-poy,ph,ypix);
    pline = psrc + (pwidth * ypix);

    for (j = y1; j < y2; j++) {
	unsigned long getbuf;
        int i, bitlft, pix;

	WaitQueue(3);
	outpw(CUR_X, (short)x1);
	outpw(CUR_Y, (short)j);
	outpw(CMD, CMD_LINE | PCDATA | _16BIT | LINETYPE
			    | DRAW | PLANAR | WRTDATA); 

	bitlft = lobits;
        getbuf = SWPBIT(xpix) & MSKBIT(lobits);
	pix    = xpix+1;

	for (i = 0; i < width; i += 16) {
	    while (bitlft < 16) {
		if (pix >= pw8) {
		    if (hibits > 0) {
			getbuf = (getbuf << hibits)
			       | ((SWPBIT(pix) & ~MSKBIT(chibits)) >> chibits);
			bitlft += hibits;
		    }
		    pix = 0;
		}
		getbuf = (getbuf << 8) | SWPBIT(pix++);
		bitlft += 8;
	    }
	    bitlft -= 16;
	    outpw(PIX_TRANS, (getbuf >> bitlft) & 0xffff);
	}

	if ((++ypix) == ph) {
	    ypix  = 0;
	    pline = psrc;
	} else
	    pline += pwidth;
    }
  }

    WaitQueue(5);
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
    outpw(BKGD_MIX, BSS_BKGDCOL | MIX_SRC);
    outpw(MULTIFUNC_CNTL, SCISSORS_L | 0);
    outpw(MULTIFUNC_CNTL, SCISSORS_R | 1023);
    outpw(MULTIFUNC_CNTL, PIX_CNTL | MIXSEL_FRGDMIX | COLCMPOP_F);

    if (newsrc)
	DEALLOCATE_LOCAL(newsrc);
}

void
ibm8514ImageOpStipple(x, y, w, h, psrc, pwidth, pw, ph, pox, poy, fgPixel, bgPixel, alu, planemask)
    int			x;
    int			y;
    int			w;
    int			h;
    unsigned char	*psrc;
	int pw, ph, pox, poy;
    int			pwidth;
    int			fgPixel;
    int			bgPixel;
    short		alu;
    short		planemask;
{
    int			x1, x2, y1, y2, width;
    unsigned char	*newsrc = NULL;

    x1 = x & ~0x7;
    x2 = (x+w+7) & ~0x7;
    y1 = y;
    y2 = y+h;

    width = x2 - x1;

    if (pw <= 8 && pw < w) {
	newsrc = (unsigned char *)ALLOCATE_LOCAL(2*ph*sizeof(char));
	if (!newsrc) {
	    return;
	}

	while (pw <= 8) {
            unsigned char *newline, *pline;
	    int i;
	    pline = psrc;
	    newline = newsrc;
	    for (i = 0; i < ph; i++) {
		newline[0] = (pline[0] & (0xff >> (8-pw))) | pline[0] << pw;
		if (pw > 4)
		    newline[1] = pline[0] >> (8-pw);

		pline += pwidth;
		newline += 2;
	    }
	    pw *= 2;
	    pwidth = 2;
	    psrc = newsrc;
	}
    }

    WaitQueue(3);
    outpw(MULTIFUNC_CNTL, SCISSORS_L | x);
    outpw(MULTIFUNC_CNTL, SCISSORS_R | (x+w-1));
    outpw(WRT_MASK, planemask);

    WaitQueue(5);
    outpw(FRGD_MIX, FSS_FRGDCOL | alu);
    outpw(BKGD_MIX, BSS_BKGDCOL | alu);
    outpw(FRGD_COLOR, (short)fgPixel);
    outpw(MULTIFUNC_CNTL, PIX_CNTL | MIXSEL_EXPPC | COLCMPOP_F);
    outpw(MAJ_AXIS_PCNT, (short)(width-1));


  {
    int xpix, ypix, j;
    int clobits, lobits, chibits, hibits, pw8;
    unsigned char *pline;

    modulus(x1-pox,pw,xpix);
    clobits = xpix % 8;
    lobits = 8 - clobits;
    xpix /= 8;

    hibits = pw % 8;
    chibits = 8 - hibits;
    pw8 = pw / 8;

    modulus(y1-poy,ph,ypix);
    pline = psrc + (pwidth * ypix);

    for (j = y1; j < y2; j++) {
	unsigned long getbuf;
	int i, bitlft, pix;

	WaitQueue(4);
	outpw(BKGD_COLOR, (short)bgPixel);
	outpw(CUR_X, (short)x1);
	outpw(CUR_Y, (short)j);
	outpw(CMD, CMD_LINE  | _16BIT | PCDATA | LINETYPE | DRAW | PLANAR | WRTDATA );

	bitlft = lobits;
        getbuf = SWPBIT(xpix) & MSKBIT(lobits);
        pix    = xpix+1;

	for (i = 0; i < width; i += 16) {
	    while (bitlft < 16) {
		if (pix >= pw8) {
		    if (hibits > 0) {
			getbuf = (getbuf << hibits)
			       | ((SWPBIT(pix) & ~MSKBIT(chibits)) >> chibits);
			bitlft += hibits;
		    }
		    pix = 0;
		}
		getbuf = (getbuf << 8) | SWPBIT(pix++);
		bitlft += 8;
	    }
	    bitlft -= 16;
	    outpw(PIX_TRANS, (getbuf >> bitlft) & 0xffff);
	}

	if ((++ypix) == ph) {
            ypix  = 0;
            pline = psrc;
        } else
            pline += pwidth;
    }
  }

    WaitQueue(5);
    outpw(FRGD_MIX, FSS_FRGDCOL | MIX_SRC);
    outpw(BKGD_MIX, BSS_BKGDCOL | MIX_SRC);
    outpw(MULTIFUNC_CNTL, SCISSORS_L | 0);
    outpw(MULTIFUNC_CNTL, SCISSORS_R | 1023);
    outpw(MULTIFUNC_CNTL, PIX_CNTL | MIXSEL_FRGDMIX | COLCMPOP_F);

    if (newsrc)
	DEALLOCATE_LOCAL(newsrc);
  }

