;
; FILLPOLY.ASM
;
; (c)Copyright 1995-1998 Steve Eschweiler. All Rights Reserved.
;

;
; This function assumes x clipping limits from 0 and 319!!!
; Coding it another way is simple, however, speed is all we really care about

.386p
.MODEL FLAT

.DATA

SCREEN_MEMORY   equ 0a0000h     ; 32-bit Video memory start address
ONE_PAGE_OFFSET equ 20480       ; 320 x 256!!!! / 4 ????
SC_INDEX        equ 03c4h       ; Sequence Controller Index
MAP_MASK        equ 02h         ; index in SC of Map Mask register
SCREEN_WIDTH    equ 80          ; width of screen in bytes from one scan line
				; to the next

	LMask           db      0fh,0eh,0ch,08h
	RMask           db      01h,03h,07h,0fh
	se_page_addr    dd      0a0000h
	se_result       db      01h     ; 0 is fail  1 is pass

;LMASK  1111 1110 1100 1000
;MASK   1111
;RMASK  0001 0011 0111 1111
;LEMASK  0101 0100 0100 0000
;EMASK   0101
;REMASK  0001 0001 0101 0101
;LOMASK  1010 1010 1000 1000
;OMASK   1010
;ROMASK  0000 0010 0010 1010
; bit 5 tells us whether it's odd or even
;0      0000 0000 0000 0000
;80     0000 0000 0101 0000
;160    0000 0000 1010 0000
;240    0000 0000 1111 0000
;320    0000 0001 0010 0000
;400    0000 0001 1001 0000
;480    0000 0001 1110 0000
;560    0000 0011 0011 0000

	LEDithMask  db  00000101b, 00000100b, 00000100b, 00000000b
	REDithMask  db  00000001b, 00000001b, 00000101b, 00000101b
	LODithMask  db  00001010b, 00001010b, 00001000b, 00001000b
	RODithMask  db  00000000b, 00000010b, 00000010b, 00001010b

	extrn   _se_color:dword   ; fill color              (int se_color)
	extrn   _se_lptr:dword    ; left x pixel array ptr  (int *se_lptr)
	extrn   _se_rptr:dword    ; right x pixel array ptr (int *se_rptr)
	extrn   _se_top:dword     ; starting y pixel        (int se_ystart)
	extrn   _se_bottom:dword  ; ending y pixel          (int se_yend)
	extrn   _se_plane0_ptr:dword
	extrn   _se_plane1_ptr:dword
	extrn   _se_plane2_ptr:dword
	extrn   _se_plane3_ptr:dword
	extrn   _se_gminx:dword
	extrn   _se_gmaxx:dword

.CODE
	public  se_fillpoly_
	public  se_dfillpoly_
	public  se_gfillpoly_
	public  se_setpage_
	public  se_inittest_
	public  se_settestresult_

se_fillpoly_    proc
	push    esi             ; preserve caller's registers
	push    edi

	cld                     ; make stos instructions inc pointers

	mov     esi,_se_top
	shl     esi,1
	shl     esi,1
	shl     esi,1
	shl     esi,1
	mov     eax,esi
	shl     esi,1
	shl     esi,1
	add     esi,eax
	add     esi,se_page_addr
	; The previous is MUCH faster than the following!!!
	;mov     eax,SCREEN_WIDTH
	;mul     _se_top
	;add     eax,se_page_addr
	;mov     esi,eax
	; point esi to x=0 in page at line _se_top
	; for each new scan line, simply increment esi by screen width.
	; for each new scan line, put x offset into edi and "add edi,esi"
	; then we simply stos? the scan line components where edi increments
	; automatically! ...this is the fastest way that I can think of.

	mov     ecx,_se_bottom
	shl     ecx,1
	shl     ecx,1
	shl     ecx,1
	shl     ecx,1
	mov     eax,ecx
	shl     ecx,1
	shl     ecx,1
	add     ecx,eax
	add     ecx,se_page_addr
	; The previous is MUCH faster than the following!!!
	;mov     eax,SCREEN_WIDTH
	;mul     _se_bottom
	;add     eax,se_page_addr
	;mov     ecx,eax

	mov     dx,SC_INDEX     ; point SC Index to the Map Mask
	mov     al,MAP_MASK
	out     dx,al

	mov     ah, BYTE PTR [_se_color]
	mov     al, BYTE PTR [_se_color]
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	mov     ah, BYTE PTR [_se_color]
	mov     al, BYTE PTR [_se_color]

	mov     ebx,_se_lptr
	mov     edx,_se_rptr

START_SCAN:
	push    ecx             ; save last y address offset
	push    edx             ; save right edge work array pointer
	push    ebx             ; save left edge work array pointer
	push    esi             ; save current y address offset

	mov     edi,[ebx]       ; via indirection of ebx = left pixel
	mov     ecx,[edx]       ; via indirection of edx = right pixel

	cmp     ecx,edi
	jg      SHORT NEXT_PHASE
	;mov     ebx,edi
	;mov     edi,ecx
	;mov     ecx,ebx
	xor     edi,ecx
	xor     ecx,edi
	xor     edi,ecx

NEXT_PHASE:

	; We'll skip any lines off of the screen right here in se_polyfill
	cmp edi,_se_gmaxx         ; compare _LeftX & maxx
	jg  SHORT SCAN_LINE_COMPLETE  ;_LeftX>maxx so scan line is invisable
	cmp ecx,_se_gminx         ; compare _RightX & minx
	jl  SHORT SCAN_LINE_COMPLETE  ;_RightX<minx so scan line is invisable

	cmp edi,_se_gminx         ; compare _LeftX & minx
	jge SHORT CLIP_AT_MAXX  ; _LeftX>=minx so we're done here.
	mov edi,_se_gminx
CLIP_AT_MAXX:
	cmp ecx,_se_gmaxx         ; compare _RightX & maxx
	jle SHORT CALC_CLIP_MASKS  ; _RightX<=maxx so we're done here.
	mov ecx,_se_gmaxx

CALC_CLIP_MASKS:
	mov     edx,edi         ; remember x pixel start
	shr     edi,1
	shr     edi,1

	and     edx,03h         ; alright, look up clip mask of left edge
	mov     bh,LMask[edx]   ; and put in bh

	mov     edx,ecx
	and     edx,03h         ; alright, look up clip mask of right edge
	mov     bl,RMask[edx]   ; and put in bl

	shr     ecx,1           ; calc # of addresses (4 pixels pairs)
	shr     ecx,1           ; and store in ecx for stos type instructions
	sub     ecx,edi         ; note: # of 4 pixels pairs -1

	jnz     SHORT MASKS_ARE_ALL_SET ; More then 1 byte (4 pixels) to draw
	and     bh,bl           ; Only one byte to draw so combine left and
				; right clip masks
MASKS_ARE_ALL_SET:
	add     edi,esi         ; offset of start of scan line in vid memory.
				; Note: this comes after we calc # addresses
				; across scan

	mov     dx,SC_INDEX+1   ; already points to the Map Mask reg
	mov     al,bh           ; left edge mask (or right and left)
	out     dx,al           ; left edge mask

	mov     al,ah           ; restore color at byte al
	stosb                   ; draw left edge
	dec     ecx             ; count off the left edge byte
	js      SHORT SCAN_LINE_COMPLETE ; if 1 byte, later
	jz      SHORT DO_RIGHT_EDGE      ; if 2 bytes, do last one and later
	mov     al,00fh         ; 4 pixel mask
	out     dx,al

	mov     al,ah           ; restore color at byte al

	test    se_result,01h
	jz      SHORT SLOWFILL

	shr     ecx,1
	pushf                   ; save flags after "shr ecx,1" for 2nd adc
	shr     ecx,1           ; ecx / 4 for 16 pixels in one shot!!!
	rep     stosd           ; draw four 4-pixel pairs
	adc     ecx,ecx
	rep     stosw           ; draw any remaining two 4-pixel pairs
	popf
	adc     ecx,ecx
SLOWFILL:
	rep     stosb           ; draw any remaining oddballs

DO_RIGHT_EDGE:
	mov     al,bl           ; right edge mask
	out     dx,al
	mov     al,ah           ; restore color at byte al
	stosb                   ; draw right edge

SCAN_LINE_COMPLETE:
	pop     esi             ; restore previous y address offset
	pop     ebx             ; restore left edge work array pointer
	pop     edx             ; restore right edge work array pointer
	pop     ecx             ; restore last y address offset
	add     esi,SCREEN_WIDTH
	add     ebx,4
	add     edx,4
	cmp     ecx,esi
	jns     START_SCAN
	;jnz     START_SCAN     ; use this to not draw bottom edges

FILL_COMPLETE:
	pop     edi
	pop     esi             ; restore caller's registers
	ret
se_fillpoly_    endp

se_dfillpoly_    proc
	push    esi             ; preserve caller's registers
	push    edi

	cld                     ; make stos instructions inc pointers

	mov     esi,_se_top
	shl     esi,1
	shl     esi,1
	shl     esi,1
	shl     esi,1
	mov     eax,esi
	shl     esi,1
	shl     esi,1
	add     esi,eax
	add     esi,se_page_addr
	; The previous is MUCH faster than the following!!!
	;mov     eax,SCREEN_WIDTH
	;mul     _se_top
	;add     eax,se_page_addr
	;mov     esi,eax
	; point esi to x=0 in page at line _se_top
	; for each new scan line, simply increment esi by screen width.
	; for each new scan line, put x offset into edi and "add edi,esi"
	; then we simply stos? the scan line components where edi increments
	; automatically! ...this is the fastest way that I can think of.

	mov     ecx,_se_bottom
	shl     ecx,1
	shl     ecx,1
	shl     ecx,1
	shl     ecx,1
	mov     eax,ecx
	shl     ecx,1
	shl     ecx,1
	add     ecx,eax
	add     ecx,se_page_addr
	; The previous is MUCH faster than the following!!!
	;mov     eax,SCREEN_WIDTH
	;mul     _se_bottom
	;add     eax,se_page_addr
	;mov     ecx,eax

	mov     dx,SC_INDEX     ; point SC Index to the Map Mask
	mov     al,MAP_MASK
	out     dx,al

	mov     ah, BYTE PTR [_se_color]
	mov     al, BYTE PTR [_se_color]
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	mov     ah, BYTE PTR [_se_color]
	mov     al, BYTE PTR [_se_color]

	mov     ebx,_se_lptr
	mov     edx,_se_rptr

DSTART_SCAN:
	push    ecx             ; save last y address offset
	push    edx             ; save right edge work array pointer
	push    ebx             ; save left edge work array pointer
	push    esi             ; save current y address offset

	mov     edi,[ebx]       ; via indirection of ebx = left pixel
	mov     ecx,[edx]       ; via indirection of edx = right pixel

	cmp     ecx,edi
	jg      SHORT DNEXT_PHASE
	;mov     ebx,edi
	;mov     edi,ecx
	;mov     ecx,ebx
	xor     edi,ecx
	xor     ecx,edi
	xor     edi,ecx

DNEXT_PHASE:

	; We'll skip any lines off of the screen right here in se_polyfill
	cmp edi,_se_gmaxx         ; compare _LeftX & maxx
jg  DSCAN_LINE_COMPLETE  ;_LeftX>maxx so scan line is invisable
	cmp ecx,_se_gminx         ; compare _RightX & minx
jl  DSCAN_LINE_COMPLETE  ;_RightX<minx so scan line is invisable

	cmp edi,_se_gminx         ; compare _LeftX & minx
	jge SHORT DCLIP_AT_MAXX ; _LeftX>=minx so we're done here.
	mov edi,_se_gminx
DCLIP_AT_MAXX:
	cmp ecx,_se_gmaxx         ; compare _RightX & maxx
	jle SHORT DCALC_CLIP_MASKS  ; _RightX<=maxx so we're done here.
	mov ecx,_se_gmaxx

DCALC_CLIP_MASKS:
	mov     edx,edi         ; remember x pixel start
	shr     edi,1
	shr     edi,1

	and     edx,03h         ; alright, look up clip mask of left edge
	mov     bh,LEDithMask[edx]   ; and put in bh
	test    esi,0000000000010000b
	jp      SHORT DONT_COMPLEMENT_1
	mov     bh,LODithMask[edx]
DONT_COMPLEMENT_1:

	mov     edx,ecx
	and     edx,03h         ; alright, look up clip mask of right edge
	mov     bl,REDithMask[edx]   ; and put in bl
	test    esi,0000000000010000b
	jp      SHORT DONT_COMPLEMENT_2
	mov     bl,RODithMask[edx]
DONT_COMPLEMENT_2:

	shr     ecx,1           ; calc # of addresses (4 pixels pairs)
	shr     ecx,1           ; and store in ecx for stos type instructions
	sub     ecx,edi         ; note: # of 4 pixels pairs -1

	jnz     SHORT DMASKS_ARE_ALL_SET ; More then 1 byte (4 pixels) to draw
	and     bh,bl           ; Only one byte to draw so combine left and
				; right clip masks
DMASKS_ARE_ALL_SET:
	add     edi,esi         ; offset of start of scan line in vid memory.
				; Note: this comes after we calc # addresses
				; across scan

	mov     dx,SC_INDEX+1   ; already points to the Map Mask reg
	mov     al,bh           ; left edge mask (or right and left)
	out     dx,al           ; left edge mask

	mov     al,ah           ; restore color at byte al
	stosb                   ; draw left edge
	dec     ecx             ; count off the left edge byte
	js      SHORT DSCAN_LINE_COMPLETE ; if 1 byte, later
	jz      SHORT DDO_RIGHT_EDGE ; if 2 bytes, do last one and later

	mov     al,00000101b    ; 4 pixel mask is not 0fh
	test    esi,0000000000010000b
	jp      SHORT DONT_COMPLEMENT_3
	not     al
	and     al,0fh
DONT_COMPLEMENT_3:
	out     dx,al

	mov     al,ah           ; restore color at byte al

	test    se_result,01h
	jz      SHORT DSLOWFILL

	shr     ecx,1
	pushf                   ; save flags after "shr ecx,1" for 2nd adc
	shr     ecx,1           ; ecx / 4 for 16 pixels in one shot!!!
	rep     stosd           ; draw four 4-pixel pairs
	adc     ecx,ecx
	rep     stosw           ; draw any remaining two 4-pixel pairs
	popf
	adc     ecx,ecx
DSLOWFILL:
	rep     stosb           ; draw any remaining oddballs

DDO_RIGHT_EDGE:
	mov     al,bl           ; right edge mask
	out     dx,al
	mov     al,ah           ; restore color at byte al
	stosb                   ; draw right edge

DSCAN_LINE_COMPLETE:
	pop     esi             ; restore previous y address offset
	pop     ebx             ; restore left edge work array pointer
	pop     edx             ; restore right edge work array pointer
	pop     ecx             ; restore last y address offset
	add     esi,SCREEN_WIDTH
	add     ebx,4
	add     edx,4
	cmp     ecx,esi
	jns     DSTART_SCAN
	;jnz     DSTART_SCAN     ; use this to not draw bottom edges

DFILL_COMPLETE:
	pop     edi
	pop     esi             ; restore caller's registers
	ret
se_dfillpoly_    endp

se_gfillpoly_   proc
	push    esi             ; preserve caller's registers
	push    edi

	cld                     ; make movs instructions inc pointers

	mov     eax,_se_top
	shl     eax,1
	shl     eax,1
	shl     eax,1
	shl     eax,1
	mov     ebx,eax
	shl     eax,1
	shl     eax,1
	add     eax,ebx
	add     eax,se_page_addr
	; The previous is MUCH faster than the following!!!
	;mov     ebx,SCREEN_WIDTH
	;mul     _se_top
	;add     ebx,se_page_addr
	;mov     eax,ebx
	; point eax to x=0 in page at line _se_top
	; for each new scan line, simply increment eax by screen width.
	; for each new scan line, put x offset into edi and "add edi,eax"
	; then we simply stos? the scan line components where edi increments
	; automatically! ...this is the fastest way that I can think of.
	push    eax

	mov     ecx,_se_bottom
	shl     ecx,1
	shl     ecx,1
	shl     ecx,1
	shl     ecx,1
	mov     eax,ecx
	shl     ecx,1
	shl     ecx,1
	add     ecx,eax
	add     ecx,se_page_addr
	; The previous is MUCH faster than the following!!!
	;mov     eax,SCREEN_WIDTH
	;mul     _se_bottom
	;add     eax,se_page_addr
	;mov     ecx,eax

	mov     dx,SC_INDEX     ; point SC Index to the Map Mask
	mov     al,MAP_MASK
	out     dx,al
	jmp     SHORT $+2

G0_SETUP:
	; pixel on plane 0 is 0001
	; pixel on plane 1 is 0010
	; pixel on plane 2 is 0100
	; pixel on plane 3 is 1000
	; set to plane 0
	mov     dx,SC_INDEX+1   ; already points to the Map Mask reg
	mov     al,01h          ; plane 0 mask
	out     dx,al

	mov     ebx,_se_lptr
	mov     edx,_se_rptr
	pop     eax             ; restore our saved eax start y offset

	mov     esi,_se_plane0_ptr

	; Save everything except color ptr for next plane
	push    eax ebx ecx edx

G0_START_SCAN:
	push    ecx             ; save last y address offset
	push    edx             ; save right edge work array pointer
	;push    ebx             ; save left edge work array pointer
	;push    eax             ; save current y address offset
	push    esi             ; save our ptr to our colors

	mov     edi,[ebx]       ; via indirection of ebx = left pixel
	mov     ecx,[edx]       ; via indirection of edx = right pixel

	cmp     ecx,edi         ; Since there is a chance that l>r, switch
	jg      SHORT G0_1
	;mov     ebx,edi
	;mov     edi,ecx
	;mov     ecx,ebx
	xor     edi,ecx
	xor     ecx,edi
	xor     edi,ecx

G0_1:
	; We'll skip any lines off of the screen. This needs to be done here!
	cmp     edi,_se_gmaxx
	jg      SHORT G0_SCAN_LINE_COMPLETE
	cmp     ecx,_se_gminx
	jl      SHORT G0_SCAN_LINE_COMPLETE

	; Now, let's find the number of offsets, but first we have to divide
	; by 4 before we subtract... Since we do this, we might as well use
	; the result of the left x/4 for our offset and add it to eax
	shr     edi,1
	shr     edi,1
	shr     ecx,1
	shr     ecx,1

	cmp     byte ptr [esi],00h
	jne     SHORT G0_2
	inc     esi             ; increment to point to next byte of color
	inc     edi             ; increment to point to next byte of vid mem
G0_2:
	sub     ecx,edi         ; ecx has got the count for REP MOVSB
	js      SHORT G0_SCAN_LINE_COMPLETE
	inc     ecx             ; we always do this for plane 1
	add     edi,eax         ; edi has got the vid mem offset for MOVSB

	test    se_result,01h
	jz      SHORT G0_SLOWFILL

	shr     ecx,1           ; divide by 4 for REP MOVSD
	pushf                   ; save carry flag after "shr ecx,1" for adc
	shr     ecx,1           ; divide by 4 for REP MOVSD
	rep     movsd
	adc     ecx,ecx
	rep     movsw
	popf
	adc     ecx,ecx
G0_SLOWFILL:
	rep     movsb

G0_SCAN_LINE_COMPLETE:
	pop     esi             ; restore ptr to our colors
	;pop     eax             ; restore previous y address offset
	;pop     ebx             ; restore left edge work array pointer
	pop     edx             ; restore right edge work array pointer
	pop     ecx             ; restore last y address offset
	add     ebx,4
	add     edx,4
	add     esi,SCREEN_WIDTH
	add     eax,SCREEN_WIDTH
	cmp     ecx,eax
	jns     SHORT G0_START_SCAN

G1_SETUP:
	; pixel on plane 0 is 0001
	; pixel on plane 1 is 0010
	; pixel on plane 2 is 0100
	; pixel on plane 3 is 1000
	; set to plane 1
	mov     dx,SC_INDEX+1   ; already points to the Map Mask reg
	mov     al,02h          ; plane 1 mask
	out     dx,al
	mov     esi,_se_plane1_ptr

	; Restore everything that was saved
	pop     edx ecx ebx eax

	; Save everything again except color ptr for next plane
	push    eax ebx ecx edx

G1_START_SCAN:
	push    ecx             ; save last y address offset
	push    edx             ; save right edge work array pointer
	push    ebx             ; save left edge work array pointer
	;push    eax             ; save current y address offset
	push    esi             ; save our ptr to our colors

	mov     edi,[ebx]       ; via indirection of ebx = left pixel
	mov     ecx,[edx]       ; via indirection of edx = right pixel
	mov     ebx,ecx

	cmp     ecx,edi         ; Since there is a chance that l>r, switch
	jg      SHORT G1_1
	;mov     ebx,edi
	;mov     edi,ecx
	;mov     ecx,ebx
	xor     edi,ecx
	xor     ecx,edi
	xor     edi,ecx

G1_1:
	; We'll skip any lines off of the screen. This needs to be done here!
	cmp     edi,_se_gmaxx
	jg      SHORT G1_SCAN_LINE_COMPLETE
	cmp     ecx,_se_gminx
	jl      SHORT G1_SCAN_LINE_COMPLETE

	; Now, let's find the number of offsets, but first we have to divide
	; by 4 before we subtract... Since we do this, we might as well use
	; the result of the left x/4 for our offset and add it to eax
	shr     edi,1
	shr     edi,1
	shr     ecx,1
	shr     ecx,1

	cmp     byte ptr [esi],00h
	jne     SHORT G1_2
	inc     esi             ; increment to point to next byte of color
	inc     edi             ; increment to point to next byte of vid mem
G1_2:
	sub     ecx,edi         ; ecx has got the count for REP MOVSB
	js      SHORT G1_SCAN_LINE_COMPLETE
	and     ebx,03h
	cmp     ebx,00h
	je      SHORT G1_3
	inc     ecx
G1_3:
	add     edi,eax         ; edi has got the vid mem offset for MOVSB

	test    se_result,01h
	jz      SHORT G1_SLOWFILL

	shr     ecx,1           ; divide by 4 for REP MOVSD
	pushf                   ; save carry flag after "shr ecx,1" for adc
	shr     ecx,1           ; divide by 4 for REP MOVSD
	rep     movsd
	adc     ecx,ecx
	rep     movsw
	popf
	adc     ecx,ecx
G1_SLOWFILL:
	rep     movsb

G1_SCAN_LINE_COMPLETE:
	pop     esi             ; restore ptr to our colors
	;pop     eax             ; restore previous y address offset
	pop     ebx             ; restore left edge work array pointer
	pop     edx             ; restore right edge work array pointer
	pop     ecx             ; restore last y address offset
	add     ebx,4
	add     edx,4
	add     esi,SCREEN_WIDTH
	add     eax,SCREEN_WIDTH
	cmp     ecx,eax
	jns     SHORT G1_START_SCAN

G2_SETUP:
	; pixel on plane 0 is 0001
	; pixel on plane 1 is 0010
	; pixel on plane 2 is 0100
	; pixel on plane 3 is 1000
	; set to plane 2
	mov     dx,SC_INDEX+1   ; already points to the Map Mask reg
	mov     al,04h          ; plane 2 mask
	out     dx,al
	mov     esi,_se_plane2_ptr

	; Restore everything that was saved
	pop     edx ecx ebx eax

	; Save everything again except color ptr for next plane
	push    eax ebx ecx edx

G2_START_SCAN:
	push    ecx             ; save last y address offset
	push    edx             ; save right edge work array pointer
	;push    ebx             ; save left edge work array pointer
	;push    eax             ; save current y address offset
	push    esi             ; save our ptr to our colors

	mov     edi,[ebx]       ; via indirection of ebx = left pixel
	mov     ecx,[edx]       ; via indirection of edx = right pixel

	cmp     ecx,edi         ; Since there is a chance that l>r, switch
	jg      SHORT G2_1
	;mov     ebx,edi
	;mov     edi,ecx
	;mov     ecx,ebx
	xor     edi,ecx
	xor     ecx,edi
	xor     edi,ecx

G2_1:
	; We'll skip any lines off of the screen. This needs to be done here!
	cmp     edi,_se_gmaxx
	jg      SHORT G2_SCAN_LINE_COMPLETE
	cmp     ecx,_se_gminx
	jl      SHORT G2_SCAN_LINE_COMPLETE

	; Now, let's find the number of offsets, but first we have to divide
	; by 4 before we subtract... Since we do this, we might as well use
	; the result of the left x/4 for our offset and add it to eax
	shr     edi,1
	shr     edi,1
	shr     ecx,1
	shr     ecx,1

	cmp     byte ptr [esi],00h
	jne     SHORT G2_2
	inc     esi             ; increment to point to next byte of color
	inc     edi             ; increment to point to next byte of vid mem
G2_2:
	sub     ecx,edi         ; ecx has got the count for REP MOVSB
	js      SHORT G2_SCAN_LINE_COMPLETE
	test    dword ptr [edx],02h       ; inc ecx if nz
	jz      SHORT G2_3
	inc     ecx
G2_3:
	add     edi,eax         ; edi has got the vid mem offset for MOVSB

	test    se_result,01h
	jz      SHORT G2_SLOWFILL

	shr     ecx,1           ; divide by 4 for REP MOVSD
	pushf                   ; save carry flag after "shr ecx,1" for adc
	shr     ecx,1           ; divide by 4 for REP MOVSD
	rep     movsd
	adc     ecx,ecx
	rep     movsw
	popf
	adc     ecx,ecx
G2_SLOWFILL:
	rep     movsb

G2_SCAN_LINE_COMPLETE:
	pop     esi             ; restore ptr to our colors
	;pop     eax             ; restore previous y address offset
	;pop     ebx             ; restore left edge work array pointer
	pop     edx             ; restore right edge work array pointer
	pop     ecx             ; restore last y address offset
	add     ebx,4
	add     edx,4
	add     esi,SCREEN_WIDTH
	add     eax,SCREEN_WIDTH
	cmp     ecx,eax
	jns     SHORT G2_START_SCAN

G3_SETUP:
	; pixel on plane 0 is 0001
	; pixel on plane 1 is 0010
	; pixel on plane 2 is 0100
	; pixel on plane 3 is 1000
	; set to plane 3
	mov     dx,SC_INDEX+1   ; already points to the Map Mask reg
	mov     al,08h          ; plane 3 mask
	out     dx,al
	mov     esi,_se_plane3_ptr

	; Restore everything that was saved
	pop     edx ecx ebx eax

G3_START_SCAN:
	push    ecx             ; save last y address offset
	push    edx             ; save right edge work array pointer
	push    ebx             ; save left edge work array pointer
	;push    eax             ; save current y address offset
	push    esi             ; save our ptr to our colors

	mov     edi,[ebx]       ; via indirection of ebx = left pixel
	mov     ecx,[edx]       ; via indirection of edx = right pixel
	mov     ebx,[edx]

	cmp     ecx,edi         ; Since there is a chance that l>r, switch
	jg      SHORT G3_1
	;mov     ebx,edi
	;mov     edi,ecx
	;mov     ecx,ebx
	xor     edi,ecx
	xor     ecx,edi
	xor     edi,ecx

G3_1:
	; We'll skip any lines off of the screen. This needs to be done here!
	cmp     edi,_se_gmaxx
	jg      SHORT G3_SCAN_LINE_COMPLETE
	cmp     ecx,_se_gminx
	jl      SHORT G3_SCAN_LINE_COMPLETE

	; Now, let's find the number of offsets, but first we have to divide
	; by 4 before we subtract... Since we do this, we might as well use
	; the result of the left x/4 for our offset and add it to eax
	shr     edi,1
	shr     edi,1
	shr     ecx,1
	shr     ecx,1
	sub     ecx,edi         ; ecx has got the count for REP MOVSB
	js      SHORT G3_SCAN_LINE_COMPLETE

	and     ebx,03h
	cmp     ebx,03h
	jl      SHORT G3_3
	inc     ecx
G3_3:
	add     edi,eax         ; edi has got the vid mem offset for MOVSB

	test    se_result,01h
	jz      SHORT G3_SLOWFILL

	shr     ecx,1           ; divide by 4 for REP MOVSD
	pushf                   ; save carry flag after "shr ecx,1" for adc
	shr     ecx,1           ; divide by 4 for REP MOVSD
	rep     movsd
	adc     ecx,ecx
	rep     movsw
	popf
	adc     ecx,ecx
G3_SLOWFILL:
	rep     movsb

G3_SCAN_LINE_COMPLETE:
	pop     esi             ; restore ptr to our colors
	;pop     eax             ; restore previous y address offset
	pop     ebx             ; restore left edge work array pointer
	pop     edx             ; restore right edge work array pointer
	pop     ecx             ; restore last y address offset
	add     ebx,4
	add     edx,4
	add     esi,SCREEN_WIDTH
	add     eax,SCREEN_WIDTH
	cmp     ecx,eax
	jns     SHORT G3_START_SCAN

GFILL_COMPLETE:
	pop     edi
	pop     esi             ; restore caller's registers
	ret
se_gfillpoly_   endp

se_setpage_    proc    near
	push    esi edi ebx
	mov     ebx,ONE_PAGE_OFFSET
	mul     ebx
	add     eax,SCREEN_MEMORY
	mov     se_page_addr,eax
	pop     ebx edi esi
	ret
se_setpage_     endp

se_inittest_   proc    near
	push    esi edi ecx edx

	mov     esi,eax         ; save argument in esi

	mov     dx,SC_INDEX     ; point SC Index to the Map Mask
	mov     al,MAP_MASK
	out     dx,al
	jmp     SHORT $+2
	mov     dx,SC_INDEX+1
	mov     al,01h          ; plane 0 mask
	out     dx,al

	mov     ecx,1
	mov     edi,SCREEN_MEMORY
	rep     movsd

	pop     edx ecx edi esi
	ret
se_inittest_    endp

se_settestresult_       proc    near
	push    esi edi
	mov     se_result,al
	pop     edi esi
	ret
se_settestresult_       endp

	end