00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include "windows.h"
00035 #include "softdrv.h"
00036 #include "basetype.h"
00037 #include "drawspan.h"
00038 #include "render.h"
00039
00040 typedef struct EdgeAsmFPUTag
00041 {
00042 int X, y, Height;
00043 geFloat x, u, v, z, r, g, b;
00044 geFloat xstep, ustep, vstep, zstep;
00045 geFloat rstep, gstep, bstep;
00046 uint32 R, G, B;
00047 } EdgeAsmFPU;
00048
00049
00050 #pragma warning (disable:4410) //illegal size for operand
00051
00052 void DrawScanLineGouraudNoZ_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
00053 {
00054 TDest =Dest;
00055 Red =Green =0;
00056 _asm
00057 {
00058 push ebx
00059 push ecx
00060 push esi
00061 push edi
00062
00063 mov ebx,pLeft
00064 mov ecx,pRight
00065 mov eax,[ebx]EdgeAsmFPU.X
00066 mov edx,[ecx]EdgeAsmFPU.X
00067 sub edx,eax
00068 jle GouraudReturnNoZ
00069
00070 mov esi,eax
00071 inc edx
00072 shl eax,1
00073 add TDest,eax
00074 test esi,1
00075 jz NoSinglePixie
00076
00077
00078 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
00079 fmul [Real65536] ; UL16
00080 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
00081 fmul [Real65536] ; VL16 UL16
00082 fld dword ptr [ebx]EdgeAsmFPU.r ; RL VL16 UL16
00083 fld dword ptr [ebx]EdgeAsmFPU.g ; GL RL VL16 UL16
00084 fld dword ptr [ebx]EdgeAsmFPU.b ; BL GL RL VL16 UL16
00085 fxch st(4) ; UL16 GL RL VL16 BL
00086 fistp [u16] ; GL RL VL16 BL
00087 fxch st(2) ; VL16 RL GL BL
00088 fistp [v16] ; RL GL BL
00089
00090 mov [widTemp],edx
00091
00092 mov ecx,[VShift]
00093 mov ebx,[GHMaskShifted]
00094
00095 mov esi,[GWMask]
00096 mov edi,[v16]
00097
00098 mov edx,dword ptr[u16]
00099
00100 shr edi,cl
00101
00102 shr edx,16
00103 xor eax,eax
00104
00105 and edi,ebx
00106 and edx,esi
00107
00108 add edi,edx
00109
00110 add edi,GBitPtrHalf
00111 mov ax,word ptr[edi*2]
00112
00113 mov ebx,eax
00114 and eax,REDMASK
00115
00116 mov ecx,ebx
00117 and ebx,GREENMASK
00118
00119 mov dword ptr[Red],eax
00120 and ecx,BLUEMASK
00121
00122 mov dword ptr[Green],ebx
00123 mov dword ptr[Blue],ecx
00124
00125 fimul dword ptr[Red] ; R GL BL
00126 fxch st(1) ; GL R BL
00127 fimul dword ptr[Green] ; G R BL
00128 fxch st(1) ; R G BL
00129 fadd qword ptr[Magic] ; Rk G BL
00130 fxch st(2) ; BL G Rk
00131 fimul [Blue] ; B G Rk
00132 fxch st(1) ; G B Rk
00133 fadd qword ptr[Magic] ; Gk B Rk
00134 fxch st(2) ; Rk B Gk
00135 fstp qword ptr[Bucket] ; B Gk
00136 fadd qword ptr[Magic] ; Bk Gk
00137 fxch st(1) ; Gk Bk
00138 fstp qword ptr[Bucket2] ; Bk
00139
00140 mov edx,dword ptr[Bucket]
00141 mov eax,dword ptr[Bucket2]
00142
00143 and edx,REDMASK
00144 and eax,GREENMASK
00145
00146 fstp qword ptr[Bucket]
00147
00148 mov ecx,TDest
00149 mov ebx,dword ptr[Bucket]
00150
00151 and ebx,BLUEMASK
00152 or edx,eax
00153
00154 add TDest,2
00155 or edx,ebx
00156
00157 mov ebx,pLeft
00158 mov word ptr[ecx],dx
00159 mov edx,[widTemp]
00160 mov ecx,pRight
00161 dec edx
00162 jz GouraudReturnNoZ
00163 mov esi,edx
00164 and esi,1
00165 sub edx,esi
00166 jz GouraudReturnNoZ
00167
00168 NoSinglePixie:
00169 mov [widTemp],edx ; just for a temp
00170 shr edx,1
00171
00172 ; try to keep fmul fxch pairs seperated to avoid stalling
00173 ; calc this scanlines steps ; FPU Stack
00174 ; st0 st1 st2 st3 st4 st5 st6 st7
00175 fild dword ptr [widTemp] ; WID
00176
00177 mov [widTemp],edx ; Color interps doubled
00178
00179 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
00180 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
00181 fsub st(1), st ; UL UD WID
00182 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
00183 fxch st(1) ; UL VR UD WID
00184 fmul [Real65536] ; UL16 VR UD WID
00185 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
00186 fsub st(2), st ; VL UL16 VD UD WID
00187 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
00188 fxch st(3) ; VD VL UL16 RR UD WID
00189 fmul [Real65536] ; VD16 VL UL16 RR UD WID
00190 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
00191 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
00192 fxch st(5) ; UD VD16 VL UL16 RD RL WID
00193 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
00194 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
00195 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
00196 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
00197 fistp [u16] ; VD16 UD16 VL16 RD RL WID
00198 fxch st(2) ; VL16 UD16 VD16 RD RL WID
00199 fistp [v16] ; UD16 VD16 RD RL WID
00200 fld1 ; 1 UD16 VD16 RD RL WID
00201 fdivrp st(5),st ; UD16 VD16 RD RL DWID
00202
00203
00204
00205 fmul st,st(4) ; USTP VD16 RD RL DWID
00206 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
00207 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
00208 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
00209 fxch st(2) ; USTP GD LG VD16 RD RL DWID
00210 fistp [UStep] ; GD LG VD16 RD RL DWID
00211 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
00212 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
00213 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
00214 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
00215 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
00216 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
00217 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
00218 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
00219 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
00220 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
00221 frndint
00222 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
00223 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
00224 frndint
00225 fxch st(7) ; DW2 BD RI LG LB VSTP RL GI
00226 fmulp st(1),st ; BI RI LG LB VSTP RL GI
00227 frndint
00228 fxch st(4) ; VSTP RI LG LB BI RL GI
00229 fistp [VStep] ; RI LG LB BI RL GI
00230
00231 push ebp
00232
00233 PixieLoop:
00234 mov ecx,[VShift]
00235 mov ebx,[GHMaskShifted]
00236
00237 mov ebp,[GWMask]
00238 mov edi,[v16]
00239
00240 mov edx,dword ptr[u16]
00241 mov esi,edi
00242
00243 mov eax,edx
00244
00245 shr edi,cl
00246 add esi,[VStep]
00247
00248 shr edx,16
00249 add eax,[UStep]
00250
00251 and edi,ebx
00252 and edx,ebp
00253
00254 add edi,edx
00255 mov edx,eax
00256
00257 add edi,GBitPtrHalf
00258 add eax,[UStep]
00259
00260 mov [u16],eax
00261 mov ax,word ptr[edi*2]
00262
00263 mov edi,esi
00264 add esi,[VStep]
00265
00266 shr edx,16
00267 mov [v16],esi
00268
00269 shr edi,cl
00270 and edx,ebp
00271
00272 mov esi,GBitPtrHalf
00273 and edi,ebx
00274
00275 shl eax,16
00276 add esi,edx
00277
00278 add esi,edi
00279
00280 mov ax,word ptr[esi*2]
00281
00282 mov ebx,eax
00283 and eax,REDMASK
00284
00285 mov ecx,ebx
00286 and ebx,GREENMASK
00287
00288 mov dword ptr[Red],eax
00289 and ecx,BLUEMASK
00290
00291 mov dword ptr[Green],ebx
00292 mov dword ptr[Blue],ecx
00293
00294 fild qword ptr[Red] ; r RI LG LB BI RL GI
00295 fmul st,st(5) ; R RI LG LB BI RL GI
00296 fild qword ptr[Green] ; g R RI LG LB BI RL GI
00297 fmul st,st(3) ; G R RI LG LB BI RL GI
00298 fxch st(6) ; RL R RI LG LB BI G GI
00299 fadd st,st(2) ; RL2 R RI LG LB BI G GI
00300 fxch st(3) ; LG R RI RL2 LB BI G GI
00301 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
00302 fxch st(6) ; G R RI RL2 LB BI LG2 GI
00303 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
00304 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
00305 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
00306 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
00307 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
00308 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
00309
00310 mov edx,dword ptr[Bucket]
00311 mov eax,dword ptr[Bucket2]
00312
00313 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
00314 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
00315 fxch st(3) ; LB RI RL2 B BI LG2 GI
00316 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
00317 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
00318 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
00319
00320 and edx,GREENMASK
00321 and eax,REDMASK
00322
00323 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
00324 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
00325
00326 mov edi,edx
00327 mov ebx,dword ptr[Bucket]
00328 or edi,eax
00329 mov ebp,TDest
00330 and ebx,BLUEMASK
00331
00332 fxch st(3) ; LG2 LB2 BI RL2 GI
00333 or edi,ebx
00334
00335 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
00336
00337 rol edi,16
00338 add TDest,4
00339
00340 mov [ebp],edi
00341 dec [widTemp]
00342
00343 jnz PixieLoop
00344
00345 pop ebp
00346
00347 fstp [u16]
00348 fstp [v16]
00349 fstp [u16]
00350 fstp [v16]
00351 fstp [u16]
00352 fstp [v16]
00353
00354 GouraudReturnNoZ:
00355 pop edi
00356 pop esi
00357 pop ecx
00358 pop ebx
00359 }
00360 }
00361
00362 void DrawScanLineGouraudNoZTrans_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
00363 {
00364 TDest =Dest;
00365 Red =Green =0;
00366 _asm
00367 {
00368 push ebx
00369 push ecx
00370 push esi
00371 push edi
00372
00373 mov ebx,pLeft
00374 mov ecx,pRight
00375 mov eax,[ebx]EdgeAsmFPU.X
00376 mov edx,[ecx]EdgeAsmFPU.X
00377 sub edx,eax
00378 jle GouraudReturnNoZ
00379
00380 mov esi,eax
00381 inc edx
00382 shl eax,1
00383 add TDest,eax
00384 test esi,1
00385 jz NoSinglePixie
00386
00387
00388 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
00389 fmul [Real65536] ; UL16
00390 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
00391 fmul [Real65536] ; VL16 UL16
00392 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
00393 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
00394 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
00395 fxch st(4) ; UL16 GL RL VL16 BL
00396 fistp [u16] ; GL RL VL16 BL
00397 fxch st(2) ; VL16 RL GL BL
00398 fistp [v16] ; RL GL BL
00399
00400 mov [widTemp],edx
00401
00402 mov ecx,[VShift]
00403 mov ebx,[GHMaskShifted]
00404
00405 mov esi,[GWMask]
00406 mov edi,[v16]
00407
00408 mov edx,dword ptr[u16]
00409
00410 shr edi,cl
00411
00412 shr edx,16
00413 xor eax,eax
00414
00415 and edi,ebx
00416 and edx,esi
00417
00418 add edi,edx
00419
00420 add edi,GBitPtrHalf
00421 mov ax,word ptr[edi*2]
00422
00423 mov word ptr[TempPix],ax
00424
00425 mov ebx,eax
00426 and eax,REDMASK
00427
00428 mov ecx,ebx
00429 and ebx,GREENMASK
00430
00431 mov dword ptr[Red],eax
00432 and ecx,BLUEMASK
00433
00434 mov dword ptr[Green],ebx
00435 mov dword ptr[Blue],ecx
00436
00437 fimul dword ptr[Red] ; R GL BL
00438 fxch st(1) ; GL R BL
00439 fimul dword ptr[Green] ; G R BL
00440 fxch st(1) ; R G BL
00441 fadd qword ptr[Magic] ; Rk G BL
00442 fxch st(2) ; BL G Rk
00443 fimul [Blue] ; B G Rk
00444 fxch st(1) ; G B Rk
00445 fadd qword ptr[Magic] ; Gk B Rk
00446 fxch st(2) ; Rk B Gk
00447 fstp qword ptr[Bucket] ; B Gk
00448 fadd qword ptr[Magic] ; Bk Gk
00449 fxch st(1) ; Gk Bk
00450 fstp qword ptr[Bucket2] ; Bk
00451
00452 mov edx,dword ptr[Bucket]
00453 mov eax,dword ptr[Bucket2]
00454
00455 and edx,REDMASK
00456 and eax,GREENMASK
00457
00458 fstp qword ptr[Bucket]
00459
00460 mov ecx,TDest
00461 mov ebx,dword ptr[Bucket]
00462
00463 and ebx,BLUEMASK
00464 or edx,eax
00465
00466 add TDest,2
00467 or edx,ebx
00468
00469 mov ebx,pLeft
00470 cmp word ptr[TempPix],01h
00471 je SkipSinglePixie
00472
00473 mov word ptr[ecx],dx
00474
00475 SkipSinglePixie:
00476 mov edx,[widTemp]
00477 mov ecx,pRight
00478 dec edx
00479 jz GouraudReturnNoZ
00480 mov esi,edx
00481 and esi,1
00482 sub edx,esi
00483 jz GouraudReturnNoZ
00484
00485 NoSinglePixie:
00486 mov [widTemp],edx ; just for a temp
00487 shr edx,1
00488
00489 ; try to keep fmul fxch pairs seperated to avoid stalling
00490 ; calc this scanlines steps ; FPU Stack
00491 ; st0 st1 st2 st3 st4 st5 st6 st7
00492 fild dword ptr [widTemp] ; WID
00493
00494 mov [widTemp],edx ; Color interps doubled
00495
00496 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
00497 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
00498 fsub st(1), st ; UL UD WID
00499 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
00500 fxch st(1) ; UL VR UD WID
00501 fmul [Real65536] ; UL16 VR UD WID
00502 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
00503 fsub st(2), st ; VL UL16 VD UD WID
00504 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
00505 fxch st(3) ; VD VL UL16 RR UD WID
00506 fmul [Real65536] ; VD16 VL UL16 RR UD WID
00507 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
00508 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
00509 fxch st(5) ; UD VD16 VL UL16 RD RL WID
00510 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
00511 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
00512 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
00513 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
00514 fistp [u16] ; VD16 UD16 VL16 RD RL WID
00515 fxch st(2) ; VL16 UD16 VD16 RD RL WID
00516 fistp [v16] ; UD16 VD16 RD RL WID
00517 fld1 ; 1 UD16 VD16 RD RL WID
00518 fdivrp st(5),st ; UD16 VD16 RD RL DWID
00519
00520
00521
00522 fmul st,st(4) ; USTP VD16 RD RL DWID
00523 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
00524 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
00525 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
00526 fxch st(2) ; USTP GD LG VD16 RD RL DWID
00527 fistp [UStep] ; GD LG VD16 RD RL DWID
00528 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
00529 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
00530 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
00531 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
00532 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
00533 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
00534 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
00535 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
00536 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
00537 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
00538 frndint
00539 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
00540 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
00541 frndint
00542 fxch st(7) ; DW2 BD RI LG LB VSTP RL GI
00543 fmulp st(1),st ; BI RI LG LB VSTP RL GI
00544 frndint
00545 fxch st(4) ; VSTP RI LG LB BI RL GI
00546 fistp [VStep] ; RI LG LB BI RL GI
00547
00548 push ebp
00549
00550 PixieLoop:
00551 mov ecx,[VShift]
00552 mov ebx,[GHMaskShifted]
00553
00554 mov ebp,[GWMask]
00555 mov edi,[v16]
00556
00557 mov edx,dword ptr[u16]
00558 mov esi,edi
00559
00560 mov eax,edx
00561
00562 shr edi,cl
00563 add esi,[VStep]
00564
00565 shr edx,16
00566 add eax,[UStep]
00567
00568 and edi,ebx
00569 and edx,ebp
00570
00571 add edi,edx
00572 mov edx,eax
00573
00574 add edi,GBitPtrHalf
00575 add eax,[UStep]
00576
00577 mov [u16],eax
00578 mov ax,word ptr[edi*2]
00579
00580 mov edi,esi
00581 add esi,[VStep]
00582
00583 shr edx,16
00584 mov [v16],esi
00585
00586 shr edi,cl
00587 and edx,ebp
00588
00589 mov esi,GBitPtrHalf
00590 and edi,ebx
00591
00592 shl eax,16
00593 add esi,edx
00594
00595 add esi,edi
00596
00597 mov ax,word ptr[esi*2]
00598
00599 mov [TempPix],eax
00600
00601 mov ebx,eax
00602 and eax,REDMASK
00603
00604 mov ecx,ebx
00605 and ebx,GREENMASK
00606
00607 mov dword ptr[Red],eax
00608 and ecx,BLUEMASK
00609
00610 mov dword ptr[Green],ebx
00611 mov dword ptr[Blue],ecx
00612
00613 fild qword ptr[Red] ; r RI LG LB BI RL GI
00614 fmul st,st(5) ; R RI LG LB BI RL GI
00615 fild qword ptr[Green] ; g R RI LG LB BI RL GI
00616 fmul st,st(3) ; G R RI LG LB BI RL GI
00617 fxch st(6) ; RL R RI LG LB BI G GI
00618 fadd st,st(2) ; RL2 R RI LG LB BI G GI
00619 fxch st(3) ; LG R RI RL2 LB BI G GI
00620 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
00621 fxch st(6) ; G R RI RL2 LB BI LG2 GI
00622 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
00623 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
00624 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
00625 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
00626 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
00627 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
00628
00629 mov edx,dword ptr[Bucket]
00630 mov eax,dword ptr[Bucket2]
00631
00632 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
00633 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
00634 fxch st(3) ; LB RI RL2 B BI LG2 GI
00635 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
00636 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
00637 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
00638
00639 and edx,GREENMASK
00640 and eax,REDMASK
00641
00642 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
00643 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
00644
00645 mov edi,edx
00646 mov ebx,dword ptr[Bucket]
00647 or edi,eax
00648 mov ebp,TDest
00649 and ebx,BLUEMASK
00650
00651 fxch st(3) ; LG2 LB2 BI RL2 GI
00652 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
00653
00654 or edi,ebx
00655 add TDest,4
00656
00657 cmp [TempPix],010001h
00658 je SkipPixie
00659
00660 mov [ebp],edi
00661
00662 SkipPixie:
00663 dec [widTemp]
00664
00665 jnz PixieLoop
00666
00667 pop ebp
00668
00669 fstp [u16]
00670 fstp [v16]
00671 fstp [u16]
00672 fstp [v16]
00673 fstp [u16]
00674 fstp [v16]
00675
00676 GouraudReturnNoZ:
00677 pop edi
00678 pop esi
00679 pop ecx
00680 pop ebx
00681 }
00682 }
00683
00684 void DrawScanLineGouraudZBuffer_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
00685 {
00686 TDest =Dest;
00687 Red =Green =0;
00688 _asm
00689 {
00690 push ebx
00691 push ecx
00692 push esi
00693 push edi
00694
00695 mov ebx,pLeft
00696 mov ecx,pRight
00697 mov eax,[ebx]EdgeAsmFPU.X
00698 mov edx,[ecx]EdgeAsmFPU.X
00699 sub edx,eax
00700 jle GouraudReturnNoZ
00701
00702 mov esi,eax
00703 inc edx
00704 shl eax,1
00705 add TDest,eax
00706 add pZBufferPtr,eax
00707 test esi,1
00708 jz NoSinglePixie
00709
00710
00711 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
00712 fmul [Real65536] ; UL16
00713 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
00714 fmul [Real65536] ; VL16 UL16
00715 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
00716 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
00717 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
00718 fxch st(4) ; UL16 GL RL VL16 BL
00719 fistp [u16] ; GL RL VL16 BL
00720 fxch st(2) ; VL16 RL GL BL
00721 fistp [v16] ; RL GL BL
00722 fld dword ptr[ebx]EdgeAsmFPU.z ; z RL GL BL
00723 fistp [z16] ; RL GL BL
00724
00725 mov [widTemp],edx
00726
00727 mov ecx,[VShift]
00728 mov ebx,[GHMaskShifted]
00729
00730 mov esi,[GWMask]
00731 mov edi,[v16]
00732
00733 mov edx,dword ptr[u16]
00734
00735 shr edi,cl
00736
00737 shr edx,16
00738 xor eax,eax
00739
00740 and edi,ebx
00741 and edx,esi
00742
00743 add edi,edx
00744
00745 add edi,GBitPtrHalf
00746 mov ax,word ptr[edi*2]
00747
00748 mov ebx,eax
00749 and eax,REDMASK
00750
00751 mov ecx,ebx
00752 and ebx,GREENMASK
00753
00754 mov dword ptr[Red],eax
00755 and ecx,BLUEMASK
00756
00757 mov dword ptr[Green],ebx
00758 mov dword ptr[Blue],ecx
00759
00760 fimul dword ptr[Red] ; R GL BL
00761 fxch st(1) ; GL R BL
00762 fimul dword ptr[Green] ; G R BL
00763 fxch st(1) ; R G BL
00764 fadd qword ptr[Magic] ; Rk G BL
00765 fxch st(2) ; BL G Rk
00766 fimul [Blue] ; B G Rk
00767 fxch st(1) ; G B Rk
00768 fadd qword ptr[Magic] ; Gk B Rk
00769 fxch st(2) ; Rk B Gk
00770 fstp qword ptr[Bucket] ; B Gk
00771 fadd qword ptr[Magic] ; Bk Gk
00772 fxch st(1) ; Gk Bk
00773 fstp qword ptr[Bucket2] ; Bk
00774
00775 mov edx,dword ptr[Bucket]
00776 mov eax,dword ptr[Bucket2]
00777
00778 and edx,REDMASK
00779 and eax,GREENMASK
00780
00781 fstp qword ptr[Bucket]
00782
00783 mov ecx,TDest
00784 mov ebx,dword ptr[Bucket]
00785
00786 mov esi,[z16]
00787 and ebx,BLUEMASK
00788
00789 shr esi,16
00790 or edx,eax
00791
00792 mov eax,pZBufferPtr
00793 add TDest,2
00794
00795 cmp word ptr[eax],si
00796 jg SkipSinglePixie
00797
00798 or edx,ebx
00799 mov word ptr[eax],si
00800
00801 mov word ptr[ecx],dx
00802
00803 SkipSinglePixie:
00804 mov ebx,pLeft
00805 mov edx,[widTemp]
00806 add pZBufferPtr,2
00807 mov ecx,pRight
00808 dec edx
00809 jz GouraudReturnNoZ
00810 mov esi,edx
00811 and esi,1
00812 sub edx,esi
00813 jz GouraudReturnNoZ
00814
00815 NoSinglePixie:
00816 mov [widTemp],edx ; just for a temp
00817 shr edx,1
00818
00819 ; try to keep fmul fxch pairs seperated to avoid stalling
00820 ; calc this scanlines steps ; FPU Stack
00821 ; st0 st1 st2 st3 st4 st5 st6 st7
00822 fild dword ptr [widTemp] ; WID
00823
00824 mov [widTemp],edx ; Color interps doubled
00825
00826 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
00827 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
00828 fsub st(1), st ; UL UD WID
00829 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
00830 fxch st(1) ; UL VR UD WID
00831 fmul [Real65536] ; UL16 VR UD WID
00832 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
00833 fsub st(2), st ; VL UL16 VD UD WID
00834 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
00835 fxch st(3) ; VD VL UL16 RR UD WID
00836 fmul [Real65536] ; VD16 VL UL16 RR UD WID
00837 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
00838 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
00839 fxch st(5) ; UD VD16 VL UL16 RD RL WID
00840 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
00841 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
00842 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
00843 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
00844 fistp [u16] ; VD16 UD16 VL16 RD RL WID
00845 fxch st(2) ; VL16 UD16 VD16 RD RL WID
00846 fistp [v16] ; UD16 VD16 RD RL WID
00847 fld1 ; 1 UD16 VD16 RD RL WID
00848 fdivrp st(5),st ; UD16 VD16 RD RL DWID
00849
00850
00851
00852 fmul st,st(4) ; USTP VD16 RD RL DWID
00853 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
00854 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
00855 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
00856 fxch st(2) ; USTP GD LG VD16 RD RL DWID
00857 fistp [UStep] ; GD LG VD16 RD RL DWID
00858 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
00859 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
00860 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
00861 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
00862 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
00863 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
00864 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
00865 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
00866 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
00867 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
00868 frndint
00869 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
00870 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
00871 frndint
00872 fxch st(1) ; BD GI RI LG LB VSTP RL DW2
00873 fmul st,st(7) ; BD GI RI LG LB VSTP RL DW2
00874 frndint
00875 fxch st(5) ; VSTP GI RI LG LB BD RL DW2
00876 fistp [VStep] ; GI RI LG LB BD RL DW2
00877 fld [ecx]EdgeAsmFPU.z ; rz GI RI LG LB BD RL DW2
00878 fsub [ebx]EdgeAsmFPU.z ; zd GI RI LG LB BD RL DW2
00879 fxch st(7) ; DW2 GI RI LG LB BD RL zd
00880 fmulp st(7),st ; GI RI LG LB BD RL zd
00881 fld [ebx]EdgeAsmFPU.z ; lz GI RI LG LB BD RL zd
00882 fxch st(7) ; zd GI RI LG LB BD RL lz
00883 fistp [ZStep] ; GI RI LG LB BD RL lz
00884 fxch st(6) ; lz RI LG LB BD RL GI
00885 fistp [z16] ; RI LG LB BD RL GI
00886
00887 push ebp
00888
00889 PixieLoop:
00890 mov ebx,[ZStep]
00891
00892 add [z16],ebx
00893 mov ecx,[VShift]
00894
00895 mov edi,[z16]
00896
00897 shr edi,16
00898 mov ebx,[GHMaskShifted]
00899
00900 mov ebp,[GWMask]
00901 mov edx,dword ptr[u16]
00902
00903 mov word ptr[Z32],di
00904 mov eax,edx
00905
00906 shr edx,16
00907 mov edi,[v16]
00908
00909 add eax,[UStep]
00910 mov esi,edi
00911
00912 shr edi,cl
00913 add esi,[VStep]
00914
00915 and edi,ebx
00916 and edx,ebp
00917
00918 add edi,edx
00919 mov edx,eax
00920
00921 add edi,GBitPtrHalf
00922 add eax,[UStep]
00923
00924 mov [u16],eax
00925 mov ax,word ptr[edi*2]
00926
00927 mov edi,esi
00928 add esi,[VStep]
00929
00930 shr edx,16
00931 mov [v16],esi
00932
00933 shr edi,cl
00934 and edx,ebp
00935
00936 mov esi,GBitPtrHalf
00937 and edi,ebx
00938
00939 shl eax,16
00940 add esi,edx
00941
00942 add esi,edi
00943
00944 mov ax,word ptr[esi*2]
00945
00946 mov ebx,eax
00947 and eax,REDMASK
00948
00949 mov ecx,ebx
00950 and ebx,GREENMASK
00951
00952 mov dword ptr[Red],eax
00953 and ecx,BLUEMASK
00954
00955 mov dword ptr[Green],ebx
00956 mov dword ptr[Blue],ecx
00957
00958 fild qword ptr[Red] ; r RI LG LB BI RL GI
00959 fmul st,st(5) ; R RI LG LB BI RL GI
00960 fild qword ptr[Green] ; g R RI LG LB BI RL GI
00961 fmul st,st(3) ; G R RI LG LB BI RL GI
00962 fxch st(6) ; RL R RI LG LB BI G GI
00963 fadd st,st(2) ; RL2 R RI LG LB BI G GI
00964 fxch st(3) ; LG R RI RL2 LB BI G GI
00965 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
00966 fxch st(6) ; G R RI RL2 LB BI LG2 GI
00967 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
00968 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
00969 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
00970 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
00971 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
00972 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
00973
00974 mov edx,dword ptr[Bucket]
00975 mov eax,dword ptr[Bucket2]
00976
00977 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
00978 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
00979 fxch st(3) ; LB RI RL2 B BI LG2 GI
00980 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
00981 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
00982 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
00983
00984 and edx,GREENMASK
00985 and eax,REDMASK
00986
00987 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
00988 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
00989
00990 mov edi,edx
00991 mov ebx,dword ptr[Bucket]
00992 or edi,eax
00993 mov ebp,TDest
00994 and ebx,BLUEMASK
00995
00996 fxch st(3) ; LG2 LB2 BI RL2 GI
00997 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
00998
00999 mov si,word ptr[Z32]
01000 add TDest,4
01001
01002 mov ecx,pZBufferPtr
01003 or edi,ebx
01004
01005 cmp word ptr[ecx],si
01006 jg SkipPixie
01007
01008 rol edi,16
01009
01010 mov [ecx],si
01011 mov dword ptr[ebp],edi
01012 mov [ecx+2],si
01013 SkipPixie:
01014 add pZBufferPtr,4
01015 dec [widTemp]
01016
01017 jnz PixieLoop
01018
01019 pop ebp
01020
01021 fstp [u16]
01022 fstp [v16]
01023 fstp [u16]
01024 fstp [v16]
01025 fstp [u16]
01026 fstp [v16]
01027
01028 GouraudReturnNoZ:
01029 pop edi
01030 pop esi
01031 pop ecx
01032 pop ebx
01033 }
01034 }
01035
01036 void DrawScanLineGouraudZBufferNoZWrite_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
01037 {
01038 TDest =Dest;
01039 Red =Green =0;
01040 _asm
01041 {
01042 push ebx
01043 push ecx
01044 push esi
01045 push edi
01046
01047 mov ebx,pLeft
01048 mov ecx,pRight
01049 mov eax,[ebx]EdgeAsmFPU.X
01050 mov edx,[ecx]EdgeAsmFPU.X
01051 sub edx,eax
01052 jle GouraudReturnNoZ
01053
01054 mov esi,eax
01055 inc edx
01056 shl eax,1
01057 add TDest,eax
01058 add pZBufferPtr,eax
01059 test esi,1
01060 jz NoSinglePixie
01061
01062
01063 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
01064 fmul [Real65536] ; UL16
01065 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
01066 fmul [Real65536] ; VL16 UL16
01067 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
01068 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
01069 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
01070 fxch st(4) ; UL16 GL RL VL16 BL
01071 fistp [u16] ; GL RL VL16 BL
01072 fxch st(2) ; VL16 RL GL BL
01073 fistp [v16] ; RL GL BL
01074 fld dword ptr[ebx]EdgeAsmFPU.z ; z RL GL BL
01075 fistp [z16] ; RL GL BL
01076
01077 mov [widTemp],edx
01078
01079 mov ecx,[VShift]
01080 mov ebx,[GHMaskShifted]
01081
01082 mov esi,[GWMask]
01083 mov edi,[v16]
01084
01085 mov edx,dword ptr[u16]
01086
01087 shr edi,cl
01088
01089 shr edx,16
01090 xor eax,eax
01091
01092 and edi,ebx
01093 and edx,esi
01094
01095 add edi,edx
01096
01097 add edi,GBitPtrHalf
01098 mov ax,word ptr[edi*2]
01099
01100 mov ebx,eax
01101 and eax,REDMASK
01102
01103 mov ecx,ebx
01104 and ebx,GREENMASK
01105
01106 mov dword ptr[Red],eax
01107 and ecx,BLUEMASK
01108
01109 mov dword ptr[Green],ebx
01110 mov dword ptr[Blue],ecx
01111
01112 fimul dword ptr[Red] ; R GL BL
01113 fxch st(1) ; GL R BL
01114 fimul dword ptr[Green] ; G R BL
01115 fxch st(1) ; R G BL
01116 fadd qword ptr[Magic] ; Rk G BL
01117 fxch st(2) ; BL G Rk
01118 fimul [Blue] ; B G Rk
01119 fxch st(1) ; G B Rk
01120 fadd qword ptr[Magic] ; Gk B Rk
01121 fxch st(2) ; Rk B Gk
01122 fstp qword ptr[Bucket] ; B Gk
01123 fadd qword ptr[Magic] ; Bk Gk
01124 fxch st(1) ; Gk Bk
01125 fstp qword ptr[Bucket2] ; Bk
01126
01127 mov edx,dword ptr[Bucket]
01128 mov eax,dword ptr[Bucket2]
01129
01130 and edx,REDMASK
01131 and eax,GREENMASK
01132
01133 fstp qword ptr[Bucket]
01134
01135 mov ecx,TDest
01136 mov ebx,dword ptr[Bucket]
01137
01138 mov esi,[z16]
01139 and ebx,BLUEMASK
01140
01141 shr esi,16
01142 or edx,eax
01143
01144 mov eax,pZBufferPtr
01145 add TDest,2
01146
01147 cmp word ptr[eax],si
01148 jg SkipSinglePixie
01149
01150 or edx,ebx
01151
01152 mov word ptr[ecx],dx
01153
01154 SkipSinglePixie:
01155 mov ebx,pLeft
01156 mov edx,[widTemp]
01157 add pZBufferPtr,2
01158 mov ecx,pRight
01159 dec edx
01160 jz GouraudReturnNoZ
01161 mov esi,edx
01162 and esi,1
01163 sub edx,esi
01164 jz GouraudReturnNoZ
01165
01166 NoSinglePixie:
01167 mov [widTemp],edx ; just for a temp
01168 shr edx,1
01169
01170 ; try to keep fmul fxch pairs seperated to avoid stalling
01171 ; calc this scanlines steps ; FPU Stack
01172 ; st0 st1 st2 st3 st4 st5 st6 st7
01173 fild dword ptr [widTemp] ; WID
01174
01175 mov [widTemp],edx ; Color interps doubled
01176
01177 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
01178 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
01179 fsub st(1), st ; UL UD WID
01180 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
01181 fxch st(1) ; UL VR UD WID
01182 fmul [Real65536] ; UL16 VR UD WID
01183 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
01184 fsub st(2), st ; VL UL16 VD UD WID
01185 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
01186 fxch st(3) ; VD VL UL16 RR UD WID
01187 fmul [Real65536] ; VD16 VL UL16 RR UD WID
01188 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
01189 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
01190 fxch st(5) ; UD VD16 VL UL16 RD RL WID
01191 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
01192 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
01193 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
01194 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
01195 fistp [u16] ; VD16 UD16 VL16 RD RL WID
01196 fxch st(2) ; VL16 UD16 VD16 RD RL WID
01197 fistp [v16] ; UD16 VD16 RD RL WID
01198 fld1 ; 1 UD16 VD16 RD RL WID
01199 fdivrp st(5),st ; UD16 VD16 RD RL DWID
01200
01201
01202
01203 fmul st,st(4) ; USTP VD16 RD RL DWID
01204 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
01205 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
01206 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
01207 fxch st(2) ; USTP GD LG VD16 RD RL DWID
01208 fistp [UStep] ; GD LG VD16 RD RL DWID
01209 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
01210 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
01211 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
01212 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
01213 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
01214 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
01215 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
01216 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
01217 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
01218 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
01219 frndint
01220 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
01221 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
01222 frndint
01223 fxch st(1) ; BD GI RI LG LB VSTP RL DW2
01224 fmul st,st(7) ; BD GI RI LG LB VSTP RL DW2
01225 frndint
01226 fxch st(5) ; VSTP GI RI LG LB BD RL DW2
01227 fistp [VStep] ; GI RI LG LB BD RL DW2
01228 fld [ecx]EdgeAsmFPU.z ; rz GI RI LG LB BD RL DW2
01229 fsub [ebx]EdgeAsmFPU.z ; zd GI RI LG LB BD RL DW2
01230 fxch st(7) ; DW2 GI RI LG LB BD RL zd
01231 fmulp st(7),st ; GI RI LG LB BD RL zd
01232 fld [ebx]EdgeAsmFPU.z ; lz GI RI LG LB BD RL zd
01233 fxch st(7) ; zd GI RI LG LB BD RL lz
01234 fistp [ZStep] ; GI RI LG LB BD RL lz
01235 fxch st(6) ; lz RI LG LB BD RL GI
01236 fistp [z16] ; RI LG LB BD RL GI
01237
01238 push ebp
01239
01240 PixieLoop:
01241 mov ebx,[ZStep]
01242
01243 add [z16],ebx
01244 mov ecx,[VShift]
01245
01246 mov edi,[z16]
01247
01248 shr edi,16
01249 mov ebx,[GHMaskShifted]
01250
01251 mov ebp,[GWMask]
01252 mov edx,dword ptr[u16]
01253
01254 mov word ptr[Z32],di
01255 mov eax,edx
01256
01257 shr edx,16
01258 mov edi,[v16]
01259
01260 add eax,[UStep]
01261 mov esi,edi
01262
01263 shr edi,cl
01264 add esi,[VStep]
01265
01266 and edi,ebx
01267 and edx,ebp
01268
01269 add edi,edx
01270 mov edx,eax
01271
01272 add edi,GBitPtrHalf
01273 add eax,[UStep]
01274
01275 mov [u16],eax
01276 mov ax,word ptr[edi*2]
01277
01278 mov edi,esi
01279 add esi,[VStep]
01280
01281 shr edx,16
01282 mov [v16],esi
01283
01284 shr edi,cl
01285 and edx,ebp
01286
01287 mov esi,GBitPtrHalf
01288 and edi,ebx
01289
01290 shl eax,16
01291 add esi,edx
01292
01293 add esi,edi
01294
01295 mov ax,word ptr[esi*2]
01296
01297 mov ebx,eax
01298 and eax,REDMASK
01299
01300 mov ecx,ebx
01301 and ebx,GREENMASK
01302
01303 mov dword ptr[Red],eax
01304 and ecx,BLUEMASK
01305
01306 mov dword ptr[Green],ebx
01307 mov dword ptr[Blue],ecx
01308
01309 fild qword ptr[Red] ; r RI LG LB BI RL GI
01310 fmul st,st(5) ; R RI LG LB BI RL GI
01311 fild qword ptr[Green] ; g R RI LG LB BI RL GI
01312 fmul st,st(3) ; G R RI LG LB BI RL GI
01313 fxch st(6) ; RL R RI LG LB BI G GI
01314 fadd st,st(2) ; RL2 R RI LG LB BI G GI
01315 fxch st(3) ; LG R RI RL2 LB BI G GI
01316 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
01317 fxch st(6) ; G R RI RL2 LB BI LG2 GI
01318 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
01319 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
01320 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
01321 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
01322 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
01323 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
01324
01325 mov edx,dword ptr[Bucket]
01326 mov eax,dword ptr[Bucket2]
01327
01328 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
01329 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
01330 fxch st(3) ; LB RI RL2 B BI LG2 GI
01331 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
01332 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
01333 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
01334
01335 and edx,GREENMASK
01336 and eax,REDMASK
01337
01338 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
01339 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
01340
01341 mov edi,edx
01342 mov ebx,dword ptr[Bucket]
01343 or edi,eax
01344 mov ebp,TDest
01345 and ebx,BLUEMASK
01346
01347 fxch st(3) ; LG2 LB2 BI RL2 GI
01348 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
01349
01350 mov si,word ptr[Z32]
01351 add TDest,4
01352
01353 mov ecx,pZBufferPtr
01354 or edi,ebx
01355
01356 cmp word ptr[ecx],si
01357 jg SkipPixie
01358
01359 rol edi,16
01360
01361 mov dword ptr[ebp],edi
01362 SkipPixie:
01363 add pZBufferPtr,4
01364 dec [widTemp]
01365
01366 jnz PixieLoop
01367
01368 pop ebp
01369
01370 fstp [u16]
01371 fstp [v16]
01372 fstp [u16]
01373 fstp [v16]
01374 fstp [u16]
01375 fstp [v16]
01376
01377 GouraudReturnNoZ:
01378 pop edi
01379 pop esi
01380 pop ecx
01381 pop ebx
01382 }
01383 }
01384
01385 void DrawScanLineGouraudNoZBufferZWrite_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
01386 {
01387 TDest =Dest;
01388 Red =Green =0;
01389 _asm
01390 {
01391 push ebx
01392 push ecx
01393 push esi
01394 push edi
01395
01396 mov ebx,pLeft
01397 mov ecx,pRight
01398 mov eax,[ebx]EdgeAsmFPU.X
01399 mov edx,[ecx]EdgeAsmFPU.X
01400 sub edx,eax
01401 jle GouraudReturnNoZ
01402
01403 mov esi,eax
01404 inc edx
01405 shl eax,1
01406 add TDest,eax
01407 add pZBufferPtr,eax
01408 test esi,1
01409 jz NoSinglePixie
01410
01411
01412 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
01413 fmul [Real65536] ; UL16
01414 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
01415 fmul [Real65536] ; VL16 UL16
01416 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
01417 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
01418 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
01419 fxch st(4) ; UL16 GL RL VL16 BL
01420 fistp [u16] ; GL RL VL16 BL
01421 fxch st(2) ; VL16 RL GL BL
01422 fistp [v16] ; RL GL BL
01423 fld dword ptr[ebx]EdgeAsmFPU.z ; z RL GL BL
01424 fistp [z16] ; RL GL BL
01425
01426 mov [widTemp],edx
01427
01428 mov ecx,[VShift]
01429 mov ebx,[GHMaskShifted]
01430
01431 mov esi,[GWMask]
01432 mov edi,[v16]
01433
01434 mov edx,dword ptr[u16]
01435
01436 shr edi,cl
01437
01438 shr edx,16
01439 xor eax,eax
01440
01441 and edi,ebx
01442 and edx,esi
01443
01444 add edi,edx
01445
01446 add edi,GBitPtrHalf
01447 mov ax,word ptr[edi*2]
01448
01449 mov ebx,eax
01450 and eax,REDMASK
01451
01452 mov ecx,ebx
01453 and ebx,GREENMASK
01454
01455 mov dword ptr[Red],eax
01456 and ecx,BLUEMASK
01457
01458 mov dword ptr[Green],ebx
01459 mov dword ptr[Blue],ecx
01460
01461 fimul dword ptr[Red] ; R GL BL
01462 fxch st(1) ; GL R BL
01463 fimul dword ptr[Green] ; G R BL
01464 fxch st(1) ; R G BL
01465 fadd qword ptr[Magic] ; Rk G BL
01466 fxch st(2) ; BL G Rk
01467 fimul [Blue] ; B G Rk
01468 fxch st(1) ; G B Rk
01469 fadd qword ptr[Magic] ; Gk B Rk
01470 fxch st(2) ; Rk B Gk
01471 fstp qword ptr[Bucket] ; B Gk
01472 fadd qword ptr[Magic] ; Bk Gk
01473 fxch st(1) ; Gk Bk
01474 fstp qword ptr[Bucket2] ; Bk
01475
01476 mov edx,dword ptr[Bucket]
01477 mov eax,dword ptr[Bucket2]
01478
01479 and edx,REDMASK
01480 and eax,GREENMASK
01481
01482 fstp qword ptr[Bucket]
01483
01484 mov ecx,TDest
01485 mov ebx,dword ptr[Bucket]
01486
01487 mov esi,[z16]
01488 and ebx,BLUEMASK
01489
01490 shr esi,16
01491 or edx,eax
01492
01493 mov eax,pZBufferPtr
01494 add TDest,2
01495
01496 or edx,ebx
01497 mov word ptr[eax],si
01498
01499 mov word ptr[ecx],dx
01500
01501 mov ebx,pLeft
01502 mov edx,[widTemp]
01503 add pZBufferPtr,2
01504 mov ecx,pRight
01505 dec edx
01506 jz GouraudReturnNoZ
01507 mov esi,edx
01508 and esi,1
01509 sub edx,esi
01510 jz GouraudReturnNoZ
01511
01512 NoSinglePixie:
01513 mov [widTemp],edx ; just for a temp
01514 shr edx,1
01515
01516 ; try to keep fmul fxch pairs seperated to avoid stalling
01517 ; calc this scanlines steps ; FPU Stack
01518 ; st0 st1 st2 st3 st4 st5 st6 st7
01519 fild dword ptr [widTemp] ; WID
01520
01521 mov [widTemp],edx ; Color interps doubled
01522
01523 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
01524 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
01525 fsub st(1), st ; UL UD WID
01526 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
01527 fxch st(1) ; UL VR UD WID
01528 fmul [Real65536] ; UL16 VR UD WID
01529 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
01530 fsub st(2), st ; VL UL16 VD UD WID
01531 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
01532 fxch st(3) ; VD VL UL16 RR UD WID
01533 fmul [Real65536] ; VD16 VL UL16 RR UD WID
01534 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
01535 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
01536 fxch st(5) ; UD VD16 VL UL16 RD RL WID
01537 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
01538 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
01539 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
01540 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
01541 fistp [u16] ; VD16 UD16 VL16 RD RL WID
01542 fxch st(2) ; VL16 UD16 VD16 RD RL WID
01543 fistp [v16] ; UD16 VD16 RD RL WID
01544 fld1 ; 1 UD16 VD16 RD RL WID
01545 fdivrp st(5),st ; UD16 VD16 RD RL DWID
01546
01547
01548
01549 fmul st,st(4) ; USTP VD16 RD RL DWID
01550 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
01551 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
01552 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
01553 fxch st(2) ; USTP GD LG VD16 RD RL DWID
01554 fistp [UStep] ; GD LG VD16 RD RL DWID
01555 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
01556 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
01557 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
01558 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
01559 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
01560 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
01561 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
01562 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
01563 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
01564 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
01565 frndint
01566 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
01567 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
01568 frndint
01569 fxch st(1) ; BD GI RI LG LB VSTP RL DW2
01570 fmul st,st(7) ; BD GI RI LG LB VSTP RL DW2
01571 frndint
01572 fxch st(5) ; VSTP GI RI LG LB BD RL DW2
01573 fistp [VStep] ; GI RI LG LB BD RL DW2
01574 fld [ecx]EdgeAsmFPU.z ; rz GI RI LG LB BD RL DW2
01575 fsub [ebx]EdgeAsmFPU.z ; zd GI RI LG LB BD RL DW2
01576 fxch st(7) ; DW2 GI RI LG LB BD RL zd
01577 fmulp st(7),st ; GI RI LG LB BD RL zd
01578 fld [ebx]EdgeAsmFPU.z ; lz GI RI LG LB BD RL zd
01579 fxch st(7) ; zd GI RI LG LB BD RL lz
01580 fistp [ZStep] ; GI RI LG LB BD RL lz
01581 fxch st(6) ; lz RI LG LB BD RL GI
01582 fistp [z16] ; RI LG LB BD RL GI
01583
01584 push ebp
01585
01586 PixieLoop:
01587 mov ebx,[ZStep]
01588
01589 add [z16],ebx
01590 mov ecx,[VShift]
01591
01592 mov edi,[z16]
01593
01594 shr edi,16
01595 mov ebx,[GHMaskShifted]
01596
01597 mov ebp,[GWMask]
01598 mov edx,dword ptr[u16]
01599
01600 mov word ptr[Z32],di
01601 mov eax,edx
01602
01603 shr edx,16
01604 mov edi,[v16]
01605
01606 add eax,[UStep]
01607 mov esi,edi
01608
01609 shr edi,cl
01610 add esi,[VStep]
01611
01612 and edi,ebx
01613 and edx,ebp
01614
01615 add edi,edx
01616 mov edx,eax
01617
01618 add edi,GBitPtrHalf
01619 add eax,[UStep]
01620
01621 mov [u16],eax
01622 mov ax,word ptr[edi*2]
01623
01624 mov edi,esi
01625 add esi,[VStep]
01626
01627 shr edx,16
01628 mov [v16],esi
01629
01630 shr edi,cl
01631 and edx,ebp
01632
01633 mov esi,GBitPtrHalf
01634 and edi,ebx
01635
01636 shl eax,16
01637 add esi,edx
01638
01639 add esi,edi
01640
01641 mov ax,word ptr[esi*2]
01642
01643 mov ebx,eax
01644 and eax,REDMASK
01645
01646 mov ecx,ebx
01647 and ebx,GREENMASK
01648
01649 mov dword ptr[Red],eax
01650 and ecx,BLUEMASK
01651
01652 mov dword ptr[Green],ebx
01653 mov dword ptr[Blue],ecx
01654
01655 fild qword ptr[Red] ; r RI LG LB BI RL GI
01656 fmul st,st(5) ; R RI LG LB BI RL GI
01657 fild qword ptr[Green] ; g R RI LG LB BI RL GI
01658 fmul st,st(3) ; G R RI LG LB BI RL GI
01659 fxch st(6) ; RL R RI LG LB BI G GI
01660 fadd st,st(2) ; RL2 R RI LG LB BI G GI
01661 fxch st(3) ; LG R RI RL2 LB BI G GI
01662 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
01663 fxch st(6) ; G R RI RL2 LB BI LG2 GI
01664 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
01665 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
01666 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
01667 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
01668 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
01669 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
01670
01671 mov edx,dword ptr[Bucket]
01672 mov eax,dword ptr[Bucket2]
01673
01674 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
01675 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
01676 fxch st(3) ; LB RI RL2 B BI LG2 GI
01677 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
01678 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
01679 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
01680
01681 and edx,GREENMASK
01682 and eax,REDMASK
01683
01684 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
01685 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
01686
01687 mov edi,edx
01688 mov ebx,dword ptr[Bucket]
01689 or edi,eax
01690 mov ebp,TDest
01691 and ebx,BLUEMASK
01692
01693 fxch st(3) ; LG2 LB2 BI RL2 GI
01694 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
01695
01696 mov si,word ptr[Z32]
01697 add TDest,4
01698
01699 mov ecx,pZBufferPtr
01700 or edi,ebx
01701
01702 mov [ecx],si
01703 rol edi,16
01704
01705 mov dword ptr[ebp],edi
01706 mov [ecx+2],si
01707
01708 add pZBufferPtr,4
01709 dec [widTemp]
01710
01711 jnz PixieLoop
01712
01713 pop ebp
01714
01715 fstp [u16]
01716 fstp [v16]
01717 fstp [u16]
01718 fstp [v16]
01719 fstp [u16]
01720 fstp [v16]
01721
01722 GouraudReturnNoZ:
01723 pop edi
01724 pop esi
01725 pop ecx
01726 pop ebx
01727 }
01728 }
01729
01730 void DrawScanLineGouraudZBufferTrans_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
01731 {
01732 TDest =Dest;
01733 Red =Green =0;
01734 _asm
01735 {
01736 push ebx
01737 push ecx
01738 push esi
01739 push edi
01740
01741 mov ebx,pLeft
01742 mov ecx,pRight
01743 mov eax,[ebx]EdgeAsmFPU.X
01744 mov edx,[ecx]EdgeAsmFPU.X
01745 sub edx,eax
01746 jle GouraudReturnNoZ
01747
01748 mov esi,eax
01749 inc edx
01750 shl eax,1
01751 add TDest,eax
01752 add pZBufferPtr,eax
01753 test esi,1
01754 jz NoSinglePixie
01755
01756
01757 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
01758 fmul [Real65536] ; UL16
01759 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
01760 fmul [Real65536] ; VL16 UL16
01761 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
01762 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
01763 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
01764 fxch st(4) ; UL16 GL RL VL16 BL
01765 fistp [u16] ; GL RL VL16 BL
01766 fxch st(2) ; VL16 RL GL BL
01767 fistp [v16] ; RL GL BL
01768 fld dword ptr[ebx]EdgeAsmFPU.z ; z RL GL BL
01769 fistp [z16] ; RL GL BL
01770
01771 mov [widTemp],edx
01772
01773 mov ecx,[VShift]
01774 mov ebx,[GHMaskShifted]
01775
01776 mov esi,[GWMask]
01777 mov edi,[v16]
01778
01779 mov edx,dword ptr[u16]
01780
01781 shr edi,cl
01782
01783 shr edx,16
01784 xor eax,eax
01785
01786 and edi,ebx
01787 and edx,esi
01788
01789 add edi,edx
01790
01791 add edi,GBitPtrHalf
01792 mov ax,word ptr[edi*2]
01793
01794 mov word ptr[TempPix],ax
01795
01796 mov ebx,eax
01797 and eax,REDMASK
01798
01799 mov ecx,ebx
01800 and ebx,GREENMASK
01801
01802 mov dword ptr[Red],eax
01803 and ecx,BLUEMASK
01804
01805 mov dword ptr[Green],ebx
01806 mov dword ptr[Blue],ecx
01807
01808 fimul dword ptr[Red] ; R GL BL
01809 fxch st(1) ; GL R BL
01810 fimul dword ptr[Green] ; G R BL
01811 fxch st(1) ; R G BL
01812 fadd qword ptr[Magic] ; Rk G BL
01813 fxch st(2) ; BL G Rk
01814 fimul [Blue] ; B G Rk
01815 fxch st(1) ; G B Rk
01816 fadd qword ptr[Magic] ; Gk B Rk
01817 fxch st(2) ; Rk B Gk
01818 fstp qword ptr[Bucket] ; B Gk
01819 fadd qword ptr[Magic] ; Bk Gk
01820 fxch st(1) ; Gk Bk
01821 fstp qword ptr[Bucket2] ; Bk
01822
01823 mov edx,dword ptr[Bucket]
01824 mov eax,dword ptr[Bucket2]
01825
01826 and edx,REDMASK
01827 and eax,GREENMASK
01828
01829 fstp qword ptr[Bucket]
01830
01831 mov ecx,TDest
01832 mov ebx,dword ptr[Bucket]
01833
01834 mov esi,[z16]
01835 and ebx,BLUEMASK
01836
01837 shr esi,16
01838 or edx,eax
01839
01840 mov eax,pZBufferPtr
01841 add TDest,2
01842
01843 cmp word ptr[TempPix],01h
01844 je SkipSinglePixie
01845
01846 cmp word ptr[eax],si
01847 jg SkipSinglePixie
01848
01849 or edx,ebx
01850 mov word ptr[eax],si
01851
01852 mov word ptr[ecx],dx
01853
01854 SkipSinglePixie:
01855 mov ebx,pLeft
01856 mov edx,[widTemp]
01857 add pZBufferPtr,2
01858 mov ecx,pRight
01859 dec edx
01860 jz GouraudReturnNoZ
01861 mov esi,edx
01862 and esi,1
01863 sub edx,esi
01864 jz GouraudReturnNoZ
01865
01866 NoSinglePixie:
01867 mov [widTemp],edx ; just for a temp
01868 shr edx,1
01869
01870 ; try to keep fmul fxch pairs seperated to avoid stalling
01871 ; calc this scanlines steps ; FPU Stack
01872 ; st0 st1 st2 st3 st4 st5 st6 st7
01873 fild dword ptr [widTemp] ; WID
01874
01875 mov [widTemp],edx ; Color interps doubled
01876
01877 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
01878 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
01879 fsub st(1), st ; UL UD WID
01880 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
01881 fxch st(1) ; UL VR UD WID
01882 fmul [Real65536] ; UL16 VR UD WID
01883 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
01884 fsub st(2), st ; VL UL16 VD UD WID
01885 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
01886 fxch st(3) ; VD VL UL16 RR UD WID
01887 fmul [Real65536] ; VD16 VL UL16 RR UD WID
01888 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
01889 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
01890 fxch st(5) ; UD VD16 VL UL16 RD RL WID
01891 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
01892 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
01893 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
01894 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
01895 fistp [u16] ; VD16 UD16 VL16 RD RL WID
01896 fxch st(2) ; VL16 UD16 VD16 RD RL WID
01897 fistp [v16] ; UD16 VD16 RD RL WID
01898 fld1 ; 1 UD16 VD16 RD RL WID
01899 fdivrp st(5),st ; UD16 VD16 RD RL DWID
01900
01901
01902
01903 fmul st,st(4) ; USTP VD16 RD RL DWID
01904 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
01905 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
01906 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
01907 fxch st(2) ; USTP GD LG VD16 RD RL DWID
01908 fistp [UStep] ; GD LG VD16 RD RL DWID
01909 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
01910 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
01911 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
01912 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
01913 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
01914 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
01915 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
01916 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
01917 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
01918 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
01919 frndint
01920 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
01921 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
01922 frndint
01923 fxch st(1) ; BD GI RI LG LB VSTP RL DW2
01924 fmul st,st(7) ; BD GI RI LG LB VSTP RL DW2
01925 frndint
01926 fxch st(5) ; VSTP GI RI LG LB BD RL DW2
01927 fistp [VStep] ; GI RI LG LB BD RL DW2
01928 fld [ecx]EdgeAsmFPU.z ; rz GI RI LG LB BD RL DW2
01929 fsub [ebx]EdgeAsmFPU.z ; zd GI RI LG LB BD RL DW2
01930 fxch st(7) ; DW2 GI RI LG LB BD RL zd
01931 fmulp st(7),st ; GI RI LG LB BD RL zd
01932 fld [ebx]EdgeAsmFPU.z ; lz GI RI LG LB BD RL zd
01933 fxch st(7) ; zd GI RI LG LB BD RL lz
01934 fistp [ZStep] ; GI RI LG LB BD RL lz
01935 fxch st(6) ; lz RI LG LB BD RL GI
01936 fistp [z16] ; RI LG LB BD RL GI
01937
01938 push ebp
01939
01940 PixieLoop:
01941 mov ebx,[ZStep]
01942
01943 add [z16],ebx
01944 mov ecx,[VShift]
01945
01946 mov edi,[z16]
01947
01948 shr edi,16
01949 mov ebx,[GHMaskShifted]
01950
01951 mov ebp,[GWMask]
01952 mov edx,dword ptr[u16]
01953
01954 mov word ptr[Z32],di
01955 mov eax,edx
01956
01957 shr edx,16
01958 mov edi,[v16]
01959
01960 add eax,[UStep]
01961 mov esi,edi
01962
01963 shr edi,cl
01964 add esi,[VStep]
01965
01966 and edi,ebx
01967 and edx,ebp
01968
01969 add edi,edx
01970 mov edx,eax
01971
01972 add edi,GBitPtrHalf
01973 add eax,[UStep]
01974
01975 mov [u16],eax
01976 mov ax,word ptr[edi*2]
01977
01978 mov edi,esi
01979 add esi,[VStep]
01980
01981 shr edx,16
01982 mov [v16],esi
01983
01984 shr edi,cl
01985 and edx,ebp
01986
01987 mov esi,GBitPtrHalf
01988 and edi,ebx
01989
01990 shl eax,16
01991 add esi,edx
01992
01993 add esi,edi
01994
01995 mov ax,word ptr[esi*2]
01996
01997 mov [TempPix],eax
01998
01999 mov ebx,eax
02000 and eax,REDMASK
02001
02002 mov ecx,ebx
02003 and ebx,GREENMASK
02004
02005 mov dword ptr[Red],eax
02006 and ecx,BLUEMASK
02007
02008 mov dword ptr[Green],ebx
02009 mov dword ptr[Blue],ecx
02010
02011 fild qword ptr[Red] ; r RI LG LB BI RL GI
02012 fmul st,st(5) ; R RI LG LB BI RL GI
02013 fild qword ptr[Green] ; g R RI LG LB BI RL GI
02014 fmul st,st(3) ; G R RI LG LB BI RL GI
02015 fxch st(6) ; RL R RI LG LB BI G GI
02016 fadd st,st(2) ; RL2 R RI LG LB BI G GI
02017 fxch st(3) ; LG R RI RL2 LB BI G GI
02018 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
02019 fxch st(6) ; G R RI RL2 LB BI LG2 GI
02020 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
02021 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
02022 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
02023 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
02024 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
02025 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
02026
02027 mov edx,dword ptr[Bucket]
02028 mov eax,dword ptr[Bucket2]
02029
02030 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
02031 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
02032 fxch st(3) ; LB RI RL2 B BI LG2 GI
02033 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
02034 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
02035 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
02036
02037 and edx,GREENMASK
02038 and eax,REDMASK
02039
02040 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
02041 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
02042
02043 mov edi,edx
02044 mov ebx,dword ptr[Bucket]
02045 or edi,eax
02046 mov ebp,TDest
02047 and ebx,BLUEMASK
02048
02049 fxch st(3) ; LG2 LB2 BI RL2 GI
02050 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
02051
02052 mov si,word ptr[Z32]
02053 add TDest,4
02054
02055 mov ecx,pZBufferPtr
02056 or edi,ebx
02057
02058 cmp [TempPix],010001h
02059 je SkipPixie
02060
02061 rol edi,16
02062
02063 cmp word ptr[ecx],si
02064 jg SkipPixie
02065
02066 mov [ecx],si
02067 mov dword ptr[ebp],edi
02068 mov [ecx+2],si
02069 SkipPixie:
02070 add pZBufferPtr,4
02071 dec [widTemp]
02072
02073 jnz PixieLoop
02074
02075 pop ebp
02076
02077 fstp [u16]
02078 fstp [v16]
02079 fstp [u16]
02080 fstp [v16]
02081 fstp [u16]
02082 fstp [v16]
02083
02084 GouraudReturnNoZ:
02085 pop edi
02086 pop esi
02087 pop ecx
02088 pop ebx
02089 }
02090 }
02091
02092 void DrawScanLineGouraudZBufferNoZWriteTrans_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
02093 {
02094 TDest =Dest;
02095 Red =Green =0;
02096 _asm
02097 {
02098 push ebx
02099 push ecx
02100 push esi
02101 push edi
02102
02103 mov ebx,pLeft
02104 mov ecx,pRight
02105 mov eax,[ebx]EdgeAsmFPU.X
02106 mov edx,[ecx]EdgeAsmFPU.X
02107 sub edx,eax
02108 jle GouraudReturnNoZ
02109
02110 mov esi,eax
02111 inc edx
02112 shl eax,1
02113 add TDest,eax
02114 add pZBufferPtr,eax
02115 test esi,1
02116 jz NoSinglePixie
02117
02118
02119 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
02120 fmul [Real65536] ; UL16
02121 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
02122 fmul [Real65536] ; VL16 UL16
02123 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
02124 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
02125 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
02126 fxch st(4) ; UL16 GL RL VL16 BL
02127 fistp [u16] ; GL RL VL16 BL
02128 fxch st(2) ; VL16 RL GL BL
02129 fistp [v16] ; RL GL BL
02130 fld dword ptr[ebx]EdgeAsmFPU.z ; z RL GL BL
02131 fistp [z16] ; RL GL BL
02132
02133 mov [widTemp],edx
02134
02135 mov ecx,[VShift]
02136 mov ebx,[GHMaskShifted]
02137
02138 mov esi,[GWMask]
02139 mov edi,[v16]
02140
02141 mov edx,dword ptr[u16]
02142
02143 shr edi,cl
02144
02145 shr edx,16
02146 xor eax,eax
02147
02148 and edi,ebx
02149 and edx,esi
02150
02151 add edi,edx
02152
02153 add edi,GBitPtrHalf
02154 mov ax,word ptr[edi*2]
02155
02156 mov word ptr[TempPix],ax
02157
02158 mov ebx,eax
02159 and eax,REDMASK
02160
02161 mov ecx,ebx
02162 and ebx,GREENMASK
02163
02164 mov dword ptr[Red],eax
02165 and ecx,BLUEMASK
02166
02167 mov dword ptr[Green],ebx
02168 mov dword ptr[Blue],ecx
02169
02170 fimul dword ptr[Red] ; R GL BL
02171 fxch st(1) ; GL R BL
02172 fimul dword ptr[Green] ; G R BL
02173 fxch st(1) ; R G BL
02174 fadd qword ptr[Magic] ; Rk G BL
02175 fxch st(2) ; BL G Rk
02176 fimul [Blue] ; B G Rk
02177 fxch st(1) ; G B Rk
02178 fadd qword ptr[Magic] ; Gk B Rk
02179 fxch st(2) ; Rk B Gk
02180 fstp qword ptr[Bucket] ; B Gk
02181 fadd qword ptr[Magic] ; Bk Gk
02182 fxch st(1) ; Gk Bk
02183 fstp qword ptr[Bucket2] ; Bk
02184
02185 mov edx,dword ptr[Bucket]
02186 mov eax,dword ptr[Bucket2]
02187
02188 and edx,REDMASK
02189 and eax,GREENMASK
02190
02191 fstp qword ptr[Bucket]
02192
02193 mov ecx,TDest
02194 mov ebx,dword ptr[Bucket]
02195
02196 mov esi,[z16]
02197 and ebx,BLUEMASK
02198
02199 shr esi,16
02200 or edx,eax
02201
02202 mov eax,pZBufferPtr
02203 add TDest,2
02204
02205 cmp word ptr[TempPix],01h
02206 je SkipSinglePixie
02207
02208 cmp word ptr[eax],si
02209 jg SkipSinglePixie
02210
02211 or edx,ebx
02212
02213 mov word ptr[ecx],dx
02214
02215 SkipSinglePixie:
02216 mov ebx,pLeft
02217 mov edx,[widTemp]
02218 add pZBufferPtr,2
02219 mov ecx,pRight
02220 dec edx
02221 jz GouraudReturnNoZ
02222 mov esi,edx
02223 and esi,1
02224 sub edx,esi
02225 jz GouraudReturnNoZ
02226
02227 NoSinglePixie:
02228 mov [widTemp],edx ; just for a temp
02229 shr edx,1
02230
02231 ; try to keep fmul fxch pairs seperated to avoid stalling
02232 ; calc this scanlines steps ; FPU Stack
02233 ; st0 st1 st2 st3 st4 st5 st6 st7
02234 fild dword ptr [widTemp] ; WID
02235
02236 mov [widTemp],edx ; Color interps doubled
02237
02238 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
02239 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
02240 fsub st(1), st ; UL UD WID
02241 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
02242 fxch st(1) ; UL VR UD WID
02243 fmul [Real65536] ; UL16 VR UD WID
02244 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
02245 fsub st(2), st ; VL UL16 VD UD WID
02246 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
02247 fxch st(3) ; VD VL UL16 RR UD WID
02248 fmul [Real65536] ; VD16 VL UL16 RR UD WID
02249 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
02250 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
02251 fxch st(5) ; UD VD16 VL UL16 RD RL WID
02252 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
02253 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
02254 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
02255 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
02256 fistp [u16] ; VD16 UD16 VL16 RD RL WID
02257 fxch st(2) ; VL16 UD16 VD16 RD RL WID
02258 fistp [v16] ; UD16 VD16 RD RL WID
02259 fld1 ; 1 UD16 VD16 RD RL WID
02260 fdivrp st(5),st ; UD16 VD16 RD RL DWID
02261
02262
02263
02264 fmul st,st(4) ; USTP VD16 RD RL DWID
02265 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
02266 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
02267 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
02268 fxch st(2) ; USTP GD LG VD16 RD RL DWID
02269 fistp [UStep] ; GD LG VD16 RD RL DWID
02270 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
02271 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
02272 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
02273 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
02274 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
02275 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
02276 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
02277 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
02278 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
02279 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
02280 frndint
02281 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
02282 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
02283 frndint
02284 fxch st(1) ; BD GI RI LG LB VSTP RL DW2
02285 fmul st,st(7) ; BD GI RI LG LB VSTP RL DW2
02286 frndint
02287 fxch st(5) ; VSTP GI RI LG LB BD RL DW2
02288 fistp [VStep] ; GI RI LG LB BD RL DW2
02289 fld [ecx]EdgeAsmFPU.z ; rz GI RI LG LB BD RL DW2
02290 fsub [ebx]EdgeAsmFPU.z ; zd GI RI LG LB BD RL DW2
02291 fxch st(7) ; DW2 GI RI LG LB BD RL zd
02292 fmulp st(7),st ; GI RI LG LB BD RL zd
02293 fld [ebx]EdgeAsmFPU.z ; lz GI RI LG LB BD RL zd
02294 fxch st(7) ; zd GI RI LG LB BD RL lz
02295 fistp [ZStep] ; GI RI LG LB BD RL lz
02296 fxch st(6) ; lz RI LG LB BD RL GI
02297 fistp [z16] ; RI LG LB BD RL GI
02298
02299 push ebp
02300
02301 PixieLoop:
02302 mov ebx,[ZStep]
02303
02304 add [z16],ebx
02305 mov ecx,[VShift]
02306
02307 mov edi,[z16]
02308
02309 shr edi,16
02310 mov ebx,[GHMaskShifted]
02311
02312 mov ebp,[GWMask]
02313 mov edx,dword ptr[u16]
02314
02315 mov word ptr[Z32],di
02316 mov eax,edx
02317
02318 shr edx,16
02319 mov edi,[v16]
02320
02321 add eax,[UStep]
02322 mov esi,edi
02323
02324 shr edi,cl
02325 add esi,[VStep]
02326
02327 and edi,ebx
02328 and edx,ebp
02329
02330 add edi,edx
02331 mov edx,eax
02332
02333 add edi,GBitPtrHalf
02334 add eax,[UStep]
02335
02336 mov [u16],eax
02337 mov ax,word ptr[edi*2]
02338
02339 mov edi,esi
02340 add esi,[VStep]
02341
02342 shr edx,16
02343 mov [v16],esi
02344
02345 shr edi,cl
02346 and edx,ebp
02347
02348 mov esi,GBitPtrHalf
02349 and edi,ebx
02350
02351 shl eax,16
02352 add esi,edx
02353
02354 add esi,edi
02355
02356 mov ax,word ptr[esi*2]
02357
02358 mov [TempPix],eax
02359
02360 mov ebx,eax
02361 and eax,REDMASK
02362
02363 mov ecx,ebx
02364 and ebx,GREENMASK
02365
02366 mov dword ptr[Red],eax
02367 and ecx,BLUEMASK
02368
02369 mov dword ptr[Green],ebx
02370 mov dword ptr[Blue],ecx
02371
02372 fild qword ptr[Red] ; r RI LG LB BI RL GI
02373 fmul st,st(5) ; R RI LG LB BI RL GI
02374 fild qword ptr[Green] ; g R RI LG LB BI RL GI
02375 fmul st,st(3) ; G R RI LG LB BI RL GI
02376 fxch st(6) ; RL R RI LG LB BI G GI
02377 fadd st,st(2) ; RL2 R RI LG LB BI G GI
02378 fxch st(3) ; LG R RI RL2 LB BI G GI
02379 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
02380 fxch st(6) ; G R RI RL2 LB BI LG2 GI
02381 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
02382 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
02383 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
02384 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
02385 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
02386 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
02387
02388 mov edx,dword ptr[Bucket]
02389 mov eax,dword ptr[Bucket2]
02390
02391 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
02392 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
02393 fxch st(3) ; LB RI RL2 B BI LG2 GI
02394 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
02395 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
02396 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
02397
02398 and edx,GREENMASK
02399 and eax,REDMASK
02400
02401 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
02402 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
02403
02404 mov edi,edx
02405 mov ebx,dword ptr[Bucket]
02406 or edi,eax
02407 mov ebp,TDest
02408 and ebx,BLUEMASK
02409
02410 fxch st(3) ; LG2 LB2 BI RL2 GI
02411 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
02412
02413 mov si,word ptr[Z32]
02414 add TDest,4
02415
02416 mov ecx,pZBufferPtr
02417 or edi,ebx
02418
02419 cmp [TempPix],010001h
02420 je SkipPixie
02421
02422 rol edi,16
02423
02424 cmp word ptr[ecx],si
02425 jg SkipPixie
02426
02427 mov dword ptr[ebp],edi
02428 SkipPixie:
02429 add pZBufferPtr,4
02430 dec [widTemp]
02431
02432 jnz PixieLoop
02433
02434 pop ebp
02435
02436 fstp [u16]
02437 fstp [v16]
02438 fstp [u16]
02439 fstp [v16]
02440 fstp [u16]
02441 fstp [v16]
02442
02443 GouraudReturnNoZ:
02444 pop edi
02445 pop esi
02446 pop ecx
02447 pop ebx
02448 }
02449 }
02450
02451 void DrawScanLineGouraudNoZBufferZWriteTrans_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
02452 {
02453 TDest =Dest;
02454 Red =Green =0;
02455 _asm
02456 {
02457 push ebx
02458 push ecx
02459 push esi
02460 push edi
02461
02462 mov ebx,pLeft
02463 mov ecx,pRight
02464 mov eax,[ebx]EdgeAsmFPU.X
02465 mov edx,[ecx]EdgeAsmFPU.X
02466 sub edx,eax
02467 jle GouraudReturnNoZ
02468
02469 mov esi,eax
02470 inc edx
02471 shl eax,1
02472 add TDest,eax
02473 add pZBufferPtr,eax
02474 test esi,1
02475 jz NoSinglePixie
02476
02477
02478 fld dword ptr [ebx]EdgeAsmFPU.u ; UL
02479 fmul [Real65536] ; UL16
02480 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16
02481 fmul [Real65536] ; VL16 UL16
02482 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VL16 UL16
02483 fild dword ptr [ebx]EdgeAsmFPU.G ; GL RL VL16 UL16
02484 fild dword ptr [ebx]EdgeAsmFPU.B ; BL GL RL VL16 UL16
02485 fxch st(4) ; UL16 GL RL VL16 BL
02486 fistp [u16] ; GL RL VL16 BL
02487 fxch st(2) ; VL16 RL GL BL
02488 fistp [v16] ; RL GL BL
02489 fld dword ptr[ebx]EdgeAsmFPU.z ; z RL GL BL
02490 fistp [z16] ; RL GL BL
02491
02492 mov [widTemp],edx
02493
02494 mov ecx,[VShift]
02495 mov ebx,[GHMaskShifted]
02496
02497 mov esi,[GWMask]
02498 mov edi,[v16]
02499
02500 mov edx,dword ptr[u16]
02501
02502 shr edi,cl
02503
02504 shr edx,16
02505 xor eax,eax
02506
02507 and edi,ebx
02508 and edx,esi
02509
02510 add edi,edx
02511
02512 add edi,GBitPtrHalf
02513 mov ax,word ptr[edi*2]
02514
02515 mov word ptr[TempPix],ax
02516
02517 mov ebx,eax
02518 and eax,REDMASK
02519
02520 mov ecx,ebx
02521 and ebx,GREENMASK
02522
02523 mov dword ptr[Red],eax
02524 and ecx,BLUEMASK
02525
02526 mov dword ptr[Green],ebx
02527 mov dword ptr[Blue],ecx
02528
02529 fimul dword ptr[Red] ; R GL BL
02530 fxch st(1) ; GL R BL
02531 fimul dword ptr[Green] ; G R BL
02532 fxch st(1) ; R G BL
02533 fadd qword ptr[Magic] ; Rk G BL
02534 fxch st(2) ; BL G Rk
02535 fimul [Blue] ; B G Rk
02536 fxch st(1) ; G B Rk
02537 fadd qword ptr[Magic] ; Gk B Rk
02538 fxch st(2) ; Rk B Gk
02539 fstp qword ptr[Bucket] ; B Gk
02540 fadd qword ptr[Magic] ; Bk Gk
02541 fxch st(1) ; Gk Bk
02542 fstp qword ptr[Bucket2] ; Bk
02543
02544 mov edx,dword ptr[Bucket]
02545 mov eax,dword ptr[Bucket2]
02546
02547 and edx,REDMASK
02548 and eax,GREENMASK
02549
02550 fstp qword ptr[Bucket]
02551
02552 mov ecx,TDest
02553 mov ebx,dword ptr[Bucket]
02554
02555 mov esi,[z16]
02556 and ebx,BLUEMASK
02557
02558 shr esi,16
02559 or edx,eax
02560
02561 mov eax,pZBufferPtr
02562 add TDest,2
02563
02564 cmp word ptr[TempPix],01h
02565 je SkipSinglePixie
02566
02567 or edx,ebx
02568 mov word ptr[eax],si
02569
02570 mov word ptr[ecx],dx
02571
02572 SkipSinglePixie:
02573 mov ebx,pLeft
02574 mov edx,[widTemp]
02575 add pZBufferPtr,2
02576 mov ecx,pRight
02577 dec edx
02578 jz GouraudReturnNoZ
02579 mov esi,edx
02580 and esi,1
02581 sub edx,esi
02582 jz GouraudReturnNoZ
02583
02584 NoSinglePixie:
02585 mov [widTemp],edx ; just for a temp
02586 shr edx,1
02587
02588 ; try to keep fmul fxch pairs seperated to avoid stalling
02589 ; calc this scanlines steps ; FPU Stack
02590 ; st0 st1 st2 st3 st4 st5 st6 st7
02591 fild dword ptr [widTemp] ; WID
02592
02593 mov [widTemp],edx ; Color interps doubled
02594
02595 fld dword ptr [ecx]EdgeAsmFPU.u ; UR WID
02596 fld dword ptr [ebx]EdgeAsmFPU.u ; UL UR WID
02597 fsub st(1), st ; UL UD WID
02598 fld dword ptr [ecx]EdgeAsmFPU.v ; VR UL UD WID
02599 fxch st(1) ; UL VR UD WID
02600 fmul [Real65536] ; UL16 VR UD WID
02601 fld dword ptr [ebx]EdgeAsmFPU.v ; VL UL16 VR UD WID
02602 fsub st(2), st ; VL UL16 VD UD WID
02603 fild dword ptr [ecx]EdgeAsmFPU.R ; RR VL UL16 VD UD WID
02604 fxch st(3) ; VD VL UL16 RR UD WID
02605 fmul [Real65536] ; VD16 VL UL16 RR UD WID
02606 fild dword ptr [ebx]EdgeAsmFPU.R ; RL VD16 VL UL16 RR UD WID
02607 fsub st(4),st ; RL VD16 VL UL16 RD UD WID
02608 fxch st(5) ; UD VD16 VL UL16 RD RL WID
02609 fmul [Real65536] ; UD16 VD16 VL UL16 RD RL WID
02610 fxch st(2) ; VL VD16 UD16 UL16 RD RL WID
02611 fmul [Real65536] ; VL16 VD16 UD16 UL16 RD RL WID
02612 fxch st(3) ; UL16 VD16 UD16 VL16 RD RL WID
02613 fistp [u16] ; VD16 UD16 VL16 RD RL WID
02614 fxch st(2) ; VL16 UD16 VD16 RD RL WID
02615 fistp [v16] ; UD16 VD16 RD RL WID
02616 fld1 ; 1 UD16 VD16 RD RL WID
02617 fdivrp st(5),st ; UD16 VD16 RD RL DWID
02618
02619
02620
02621 fmul st,st(4) ; USTP VD16 RD RL DWID
02622 fild [ecx]EdgeAsmFPU.G ; RG USTP VD16 RD RL DWID
02623 fild [ebx]EdgeAsmFPU.G ; LG RG USTP VD16 RD RL DWID
02624 fsub st(1),st ; LG GD USTP VD16 RD RL DWID
02625 fxch st(2) ; USTP GD LG VD16 RD RL DWID
02626 fistp [UStep] ; GD LG VD16 RD RL DWID
02627 fild [ecx]EdgeAsmFPU.B ; RB GD LG VD16 RD RL DWID
02628 fild [ebx]EdgeAsmFPU.B ; LB RB GD LG VD16 RD RL DWID
02629 fsub st(1),st ; LB BD GD LG VD16 RD RL DWID
02630 fxch st(4) ; VD16 BD GD LG LB RD RL DWID
02631 fmul st,st(7) ; VSTP BD GD LG LB RD RL DWID
02632 fxch st(7) ; DWID BD GD LG LB RD RL VSTP
02633 fmul dword ptr[Two] ; DW2 BD GD LG LB RD RL VSTP
02634 fxch st(7) ; VSTP BD GD LG LB RD RL DW2
02635 fxch st(5) ; RD BD GD LG LB VSTP RL DW2
02636 fmul st,st(7) ; RI BD GD LG LB VSTP RL DW2
02637 frndint
02638 fxch st(2) ; GD BD RI LG LB VSTP RL DW2
02639 fmul st,st(7) ; GI BD RI LG LB VSTP RL DW2
02640 frndint
02641 fxch st(1) ; BD GI RI LG LB VSTP RL DW2
02642 fmul st,st(7) ; BD GI RI LG LB VSTP RL DW2
02643 frndint
02644 fxch st(5) ; VSTP GI RI LG LB BD RL DW2
02645 fistp [VStep] ; GI RI LG LB BD RL DW2
02646 fld [ecx]EdgeAsmFPU.z ; rz GI RI LG LB BD RL DW2
02647 fsub [ebx]EdgeAsmFPU.z ; zd GI RI LG LB BD RL DW2
02648 fxch st(7) ; DW2 GI RI LG LB BD RL zd
02649 fmulp st(7),st ; GI RI LG LB BD RL zd
02650 fld [ebx]EdgeAsmFPU.z ; lz GI RI LG LB BD RL zd
02651 fxch st(7) ; zd GI RI LG LB BD RL lz
02652 fistp [ZStep] ; GI RI LG LB BD RL lz
02653 fxch st(6) ; lz RI LG LB BD RL GI
02654 fistp [z16] ; RI LG LB BD RL GI
02655
02656 push ebp
02657
02658 PixieLoop:
02659 mov ebx,[ZStep]
02660
02661 add [z16],ebx
02662 mov ecx,[VShift]
02663
02664 mov edi,[z16]
02665
02666 shr edi,16
02667 mov ebx,[GHMaskShifted]
02668
02669 mov ebp,[GWMask]
02670 mov edx,dword ptr[u16]
02671
02672 mov word ptr[Z32],di
02673 mov eax,edx
02674
02675 shr edx,16
02676 mov edi,[v16]
02677
02678 add eax,[UStep]
02679 mov esi,edi
02680
02681 shr edi,cl
02682 add esi,[VStep]
02683
02684 and edi,ebx
02685 and edx,ebp
02686
02687 add edi,edx
02688 mov edx,eax
02689
02690 add edi,GBitPtrHalf
02691 add eax,[UStep]
02692
02693 mov [u16],eax
02694 mov ax,word ptr[edi*2]
02695
02696 mov edi,esi
02697 add esi,[VStep]
02698
02699 shr edx,16
02700 mov [v16],esi
02701
02702 shr edi,cl
02703 and edx,ebp
02704
02705 mov esi,GBitPtrHalf
02706 and edi,ebx
02707
02708 shl eax,16
02709 add esi,edx
02710
02711 add esi,edi
02712
02713 mov ax,word ptr[esi*2]
02714
02715 mov [TempPix],eax
02716
02717 mov ebx,eax
02718 and eax,REDMASK
02719
02720 mov ecx,ebx
02721 and ebx,GREENMASK
02722
02723 mov dword ptr[Red],eax
02724 and ecx,BLUEMASK
02725
02726 mov dword ptr[Green],ebx
02727 mov dword ptr[Blue],ecx
02728
02729 fild qword ptr[Red] ; r RI LG LB BI RL GI
02730 fmul st,st(5) ; R RI LG LB BI RL GI
02731 fild qword ptr[Green] ; g R RI LG LB BI RL GI
02732 fmul st,st(3) ; G R RI LG LB BI RL GI
02733 fxch st(6) ; RL R RI LG LB BI G GI
02734 fadd st,st(2) ; RL2 R RI LG LB BI G GI
02735 fxch st(3) ; LG R RI RL2 LB BI G GI
02736 fadd st,st(7) ; LG2 R RI RL2 LB BI G GI
02737 fxch st(6) ; G R RI RL2 LB BI LG2 GI
02738 fadd qword ptr[Magic] ; Gk R RI RL2 LB BI LG2 GI
02739 fxch st(1) ; R Gk RI RL2 LB BI LG2 GI
02740 fadd qword ptr[Magic] ; Rk Gk RI RL2 LB BI LG2 GI
02741 fxch st(1) ; Gk Rk RI RL2 LB BI LG2 GI
02742 fstp qword ptr[Bucket] ; Rk RI RL2 LB BI LG2 GI
02743 fstp qword ptr[Bucket2] ; RI RL2 LB BI LG2 GI
02744
02745 mov edx,dword ptr[Bucket]
02746 mov eax,dword ptr[Bucket2]
02747
02748 fild dword ptr[Blue] ; b RI RL2 LB BI LG2 GI
02749 fmul st,st(3) ; B RI RL2 LB BI LG2 GI
02750 fxch st(3) ; LB RI RL2 B BI LG2 GI
02751 fadd st,st(4) ; LB2 RI RL2 B BI LG2 GI
02752 fxch st(3) ; B RI RL2 LB2 BI LG2 GI
02753 fadd qword ptr[Magic] ; Bk RI RL2 LB2 BI LG2 GI
02754
02755 and edx,GREENMASK
02756 and eax,REDMASK
02757
02758 fstp qword ptr[Bucket] ; RI RL2 LB2 BI LG2 GI
02759 fstp qword ptr[Bucket2] ; RL2 LB2 BI LG2 GI
02760
02761 mov edi,edx
02762 mov ebx,dword ptr[Bucket]
02763 or edi,eax
02764 mov ebp,TDest
02765 and ebx,BLUEMASK
02766
02767 fxch st(3) ; LG2 LB2 BI RL2 GI
02768 fld qword ptr[Bucket2] ; RI LG2 LB2 BI RL2 GI
02769
02770 mov si,word ptr[Z32]
02771 add TDest,4
02772
02773 mov ecx,pZBufferPtr
02774 or edi,ebx
02775
02776 cmp [TempPix],010001h
02777 je SkipPixie
02778
02779 mov [ecx],si
02780 rol edi,16
02781
02782 mov dword ptr[ebp],edi
02783 mov [ecx+2],si
02784 SkipPixie:
02785 add pZBufferPtr,4
02786 dec [widTemp]
02787
02788 jnz PixieLoop
02789
02790 pop ebp
02791
02792 fstp [u16]
02793 fstp [v16]
02794 fstp [u16]
02795 fstp [v16]
02796 fstp [u16]
02797 fstp [v16]
02798
02799 GouraudReturnNoZ:
02800 pop edi
02801 pop esi
02802 pop ecx
02803 pop ebx
02804 }
02805 }
02806
02807 void DrawScanLineGouraudNoZSolid_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
02808 {
02809 TDest =Dest;
02810 Red =Green =0;
02811 _asm
02812 {
02813 push ebx
02814 push ecx
02815 push esi
02816 push edi
02817
02818 mov ebx,pLeft
02819 mov ecx,pRight
02820 mov eax,[ebx]EdgeAsmFPU.X
02821 mov edx,[ecx]EdgeAsmFPU.X
02822 sub edx,eax
02823 jle GouraudReturnNoZ
02824
02825 mov esi,eax
02826 inc edx
02827 shl eax,1
02828 add TDest,eax
02829 test esi,1
02830 jz NoSinglePixie
02831
02832
02833 fild dword ptr [ebx]EdgeAsmFPU.B ; BL
02834 fmul dword ptr[BlueMask]
02835 fadd qword ptr[Magic] ; Bk
02836 fild dword ptr [ebx]EdgeAsmFPU.G ; GL Bk
02837 fmul dword ptr[GreenMask]
02838 fadd qword ptr[Magic] ; Gk Bk
02839 fild dword ptr [ebx]EdgeAsmFPU.R ; RL Gk Bk
02840 fmul dword ptr[MiniRedMask]
02841 fadd qword ptr[Magic] ; Rk Gk Bk
02842 fxch st(2) ; Bk Gk Rk
02843 fstp qword ptr[Bucket] ; Gk Rk
02844 fstp qword ptr[Bucket2] ; Rk
02845
02846 mov esi,dword ptr[Bucket]
02847 mov eax,dword ptr[Bucket2]
02848
02849 and esi,BLUEMASK
02850 and eax,GREENMASK
02851
02852 fstp qword ptr[Bucket]
02853
02854 mov ecx,TDest
02855 mov ebx,dword ptr[Bucket]
02856
02857 and ebx,REDMASK
02858 or esi,eax
02859
02860 add TDest,2
02861 or esi,ebx
02862
02863 mov ebx,pLeft
02864 mov word ptr[ecx],si
02865
02866 mov ecx,pRight
02867 dec edx
02868
02869 jz GouraudReturnNoZ
02870 mov esi,edx
02871 and esi,1
02872 sub edx,esi
02873 jz GouraudReturnNoZ
02874
02875 NoSinglePixie:
02876 shr edx,1
02877 fld1
02878 mov [widTemp],edx ; just for a temp
02879
02880
02881 ; try to keep fmul fxch pairs seperated to avoid stalling
02882 ; calc this scanlines steps ; FPU Stack
02883 ; st0 st1 st2 st3 st4 st5 st6 st7
02884 fidiv dword ptr [widTemp] ; WID
02885 fild dword ptr [ecx]EdgeAsmFPU.R ; RR WID
02886 fild dword ptr [ebx]EdgeAsmFPU.R ; RL RR WID
02887 fsub st(1),st ; RL RD WID
02888 fild [ecx]EdgeAsmFPU.G ; GR RL RD WID
02889 fild [ebx]EdgeAsmFPU.G ; GL GR RL RD WID
02890 fsub st(1),st ; GL GD RL RD WID
02891 fild [ecx]EdgeAsmFPU.B ; BR GL GD RL RD WID
02892 fild [ebx]EdgeAsmFPU.B ; BL BR GL GD RL RD WID
02893 fsub st(1),st ; BL BD GL GD RL RD WID
02894 fxch st(5) ; RD BD GL GD RL BL WID
02895 fmul st,st(6) ; RI BD GL GD RL BL WID
02896 frndint
02897 fxch st(3) ; GD BD GL RI RL BL WID
02898 fmul st,st(6) ; GI BD GL RI RL BL WID
02899 frndint
02900 fxch st(6) ; WID BD GL RI RL BL GI
02901 fmulp st(1),st ; BI GL RI RL BL GI
02902 frndint
02903 fld qword ptr[RedMask] ; rm BI GL RI RL BL GI
02904 fmul st(3),st ; rm BI GL RI RL BL GI
02905 fmulp st(4),st ; BI GL RI RL BL GI
02906 fld dword ptr[GreenMask] ; gm BI GL RI RL BL GI
02907 fmul st(2),st ; gm BI GL RI RL BL GI
02908 fmulp st(6),st ; BI GL RI RL BL GI
02909 fld dword ptr[BlueMask] ; bm BI GL RI RL BL GI
02910 fmul st(1),st ; bm BI GL RI RL BL GI
02911 fmulp st(5),st ; BI GL RI RL BL GI
02912
02913 PixieLoop:
02914
02915 fld st(3) ; r BI GL RI RL BL GI
02916 fadd qword ptr[Magic] ; rk BI GL RI RL BL GI
02917 fld st(2) ; g rk BI GL RI RL BL GI
02918 fadd qword ptr[Magic] ; gk rk BI GL RI RL BL GI
02919 fxch st(1) ; rk gk BI GL RI RL BL GI
02920 fstp qword ptr[Bucket] ; gk BI GL RI RL BL GI
02921 fstp qword ptr[Bucket2] ; BI GL RI RL BL GI
02922 fld st(4) ; b BI GL RI RL BL GI
02923 fadd qword ptr[Magic] ; bk BI GL RI RL BL GI
02924
02925 mov ecx,dword ptr[Bucket]
02926 mov eax,dword ptr[Bucket2]
02927
02928 fstp qword ptr[Bucket] ; BI GL RI RL BL GI
02929 fadd st(4),st ; BI GL RI RL BL2 GI
02930
02931 and ecx,REDMASK
02932 and eax,GREENMASK
02933
02934 mov ebx,dword ptr[Bucket]
02935 or ecx,eax
02936
02937 fstp qword ptr[Bucket] ; GL RI RL BL2 GI
02938 fadd st,st(4) ; GL2 RI RL BL2 GI
02939 fstp qword ptr[Bucket2] ; RI RL BL2 GI
02940 fadd st(1),st ; RI RL2 BL2 GI
02941 fld qword ptr[Bucket2] ; GL2 RI RL2 BL2 GI
02942 fld qword ptr[Bucket] ; BI GL2 RI RL2 BL2 GI
02943
02944 and ebx,BLUEMASK
02945 mov edi,TDest
02946
02947 or ecx,ebx
02948 add TDest,4
02949
02950 rol ecx,16
02951
02952 mov [edi],ecx
02953 dec edx
02954
02955 jnz PixieLoop
02956
02957
02958 fstp [u16]
02959 fstp [v16]
02960 fstp [u16]
02961 fstp [v16]
02962 fstp [u16]
02963 fstp [v16]
02964
02965 GouraudReturnNoZ:
02966 pop edi
02967 pop esi
02968 pop ecx
02969 pop ebx
02970 }
02971 }
02972
02973 void DrawScanLineGouraudNoZBufferZWriteSolid_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
02974 {
02975 TDest =Dest;
02976 Red =Green =0;
02977 _asm
02978 {
02979 push ebx
02980 push ecx
02981 push esi
02982 push edi
02983
02984 mov ebx,pLeft
02985 mov ecx,pRight
02986 mov eax,[ebx]EdgeAsmFPU.X
02987 mov edx,[ecx]EdgeAsmFPU.X
02988 sub edx,eax
02989 jle GouraudReturnNoZ
02990
02991 mov esi,eax
02992 inc edx
02993 shl eax,1
02994 add TDest,eax
02995 add pZBufferPtr,eax
02996 test esi,1
02997 jz NoSinglePixie
02998
02999
03000 fild dword ptr [ebx]EdgeAsmFPU.B ; BL
03001 fmul dword ptr[BlueMask]
03002 fadd qword ptr[Magic] ; Bk
03003 fild dword ptr [ebx]EdgeAsmFPU.G ; GL Bk
03004 fmul dword ptr[GreenMask]
03005 fadd qword ptr[Magic] ; Gk Bk
03006 fild dword ptr [ebx]EdgeAsmFPU.R ; RL Gk Bk
03007 fmul dword ptr[MiniRedMask]
03008 fadd qword ptr[Magic] ; Rk Gk Bk
03009 fxch st(2) ; Bk Gk Rk
03010 fstp qword ptr[Bucket] ; Gk Rk
03011 fstp qword ptr[Bucket2] ; Rk
03012 fld dword ptr[ebx]EdgeAsmFPU.z ; z
03013 fistp [z16] ;
03014
03015 mov esi,dword ptr[Bucket]
03016 mov eax,dword ptr[Bucket2]
03017
03018 and esi,BLUEMASK
03019 and eax,GREENMASK
03020
03021 fstp qword ptr[Bucket]
03022
03023 mov ecx,TDest
03024 mov ebx,dword ptr[Bucket]
03025
03026 and ebx,REDMASK
03027 or esi,eax
03028
03029 mov edi,[z16]
03030 mov eax,pZBufferPtr
03031
03032 add TDest,2
03033 or esi,ebx
03034
03035 shr edi,16
03036 mov ebx,pLeft
03037
03038 mov word ptr[eax],di
03039 mov word ptr[ecx],si
03040
03041 mov ecx,pRight
03042 dec edx
03043
03044 jz GouraudReturnNoZ
03045 mov esi,edx
03046 add pZBufferPtr,2
03047 and esi,1
03048 sub edx,esi
03049 jz GouraudReturnNoZ
03050
03051 NoSinglePixie:
03052 shr edx,1
03053 fld1
03054 mov [widTemp],edx ; just for a temp
03055
03056
03057 ; try to keep fmul fxch pairs seperated to avoid stalling
03058 ; calc this scanlines steps ; FPU Stack
03059 ; st0 st1 st2 st3 st4 st5 st6 st7
03060 fidiv dword ptr [widTemp] ; WID
03061 fild dword ptr [ecx]EdgeAsmFPU.R ; RR WID
03062 fild dword ptr [ebx]EdgeAsmFPU.R ; RL RR WID
03063 fsub st(1),st ; RL RD WID
03064 fild [ecx]EdgeAsmFPU.G ; GR RL RD WID
03065 fild [ebx]EdgeAsmFPU.G ; GL GR RL RD WID
03066 fsub st(1),st ; GL GD RL RD WID
03067 fild [ecx]EdgeAsmFPU.B ; BR GL GD RL RD WID
03068 fild [ebx]EdgeAsmFPU.B ; BL BR GL GD RL RD WID
03069 fsub st(1),st ; BL BD GL GD RL RD WID
03070 fxch st(5) ; RD BD GL GD RL BL WID
03071 fmul st,st(6) ; RI BD GL GD RL BL WID
03072 frndint
03073 fxch st(3) ; GD BD GL RI RL BL WID
03074 fmul st,st(6) ; GI BD GL RI RL BL WID
03075 frndint
03076 fxch st(6) ; WID BD GL RI RL BL GI
03077 fmulp st(1),st ; BI GL RI RL BL GI
03078 frndint
03079 fld qword ptr[RedMask] ; rm BI GL RI RL BL GI
03080 fmul st(3),st ; rm BI GL RI RL BL GI
03081 fmulp st(4),st ; BI GL RI RL BL GI
03082 fld dword ptr[GreenMask] ; gm BI GL RI RL BL GI
03083 fmul st(2),st ; gm BI GL RI RL BL GI
03084 fmulp st(6),st ; BI GL RI RL BL GI
03085 fld dword ptr[BlueMask] ; bm BI GL RI RL BL GI
03086 fmul st(1),st ; bm BI GL RI RL BL GI
03087 fmulp st(5),st ; BI GL RI RL BL GI
03088 fld [ecx]EdgeAsmFPU.z ; rz BI GL RI RL BL GI
03089 fsub [ebx]EdgeAsmFPU.z ; zd BI GL RI RL BL GI
03090 fld [ebx]EdgeAsmFPU.z ; lz zd BI GL RI RL BL GI
03091 fistp [z16] ; zd BI GL RI RL BL GI
03092 fistp [ZStep] ; BI GL RI RL BL GI
03093
03094 PixieLoop:
03095
03096 fld st(3) ; r BI GL RI RL BL GI
03097 fadd qword ptr[Magic] ; rk BI GL RI RL BL GI
03098 fld st(2) ; g rk BI GL RI RL BL GI
03099 fadd qword ptr[Magic] ; gk rk BI GL RI RL BL GI
03100 fxch st(1) ; rk gk BI GL RI RL BL GI
03101 fstp qword ptr[Bucket] ; gk BI GL RI RL BL GI
03102
03103 mov edi,[z16]
03104 mov esi,pZBufferPtr
03105
03106 shr edi,16
03107 mov ebx,[ZStep]
03108
03109 mov word ptr[esi],di
03110 add [z16],ebx
03111
03112 fstp qword ptr[Bucket2] ; BI GL RI RL BL GI
03113 fld st(4) ; b BI GL RI RL BL GI
03114 fadd qword ptr[Magic] ; bk BI GL RI RL BL GI
03115
03116 mov word ptr[esi+2],di
03117
03118 mov ecx,dword ptr[Bucket]
03119 mov eax,dword ptr[Bucket2]
03120
03121 fstp qword ptr[Bucket] ; BI GL RI RL BL GI
03122 fadd st(4),st ; BI GL RI RL BL2 GI
03123
03124 and ecx,REDMASK
03125 and eax,GREENMASK
03126
03127 mov ebx,dword ptr[Bucket]
03128 or ecx,eax
03129
03130 fstp qword ptr[Bucket] ; GL RI RL BL2 GI
03131 fadd st,st(4) ; GL2 RI RL BL2 GI
03132 fstp qword ptr[Bucket2] ; RI RL BL2 GI
03133 fadd st(1),st ; RI RL2 BL2 GI
03134 fld qword ptr[Bucket2] ; GL2 RI RL2 BL2 GI
03135 fld qword ptr[Bucket] ; BI GL2 RI RL2 BL2 GI
03136
03137 and ebx,BLUEMASK
03138 mov edi,TDest
03139
03140 or ecx,ebx
03141 add TDest,4
03142
03143 rol ecx,16
03144
03145 mov [edi],ecx
03146
03147 add pZBufferPtr,4
03148
03149 dec edx
03150
03151 jnz PixieLoop
03152
03153
03154 fstp [u16]
03155 fstp [v16]
03156 fstp [u16]
03157 fstp [v16]
03158 fstp [u16]
03159 fstp [v16]
03160
03161 GouraudReturnNoZ:
03162 pop edi
03163 pop esi
03164 pop ecx
03165 pop ebx
03166 }
03167 }
03168
03169 void DrawScanLineGouraudZBufferSolid_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
03170 {
03171 TDest =Dest;
03172 Red =Green =0;
03173 _asm
03174 {
03175 push ebx
03176 push ecx
03177 push esi
03178 push edi
03179
03180 mov ebx,pLeft
03181 mov ecx,pRight
03182 mov eax,[ebx]EdgeAsmFPU.X
03183 mov edx,[ecx]EdgeAsmFPU.X
03184 sub edx,eax
03185 jle GouraudReturnNoZ
03186
03187 mov esi,eax
03188 inc edx
03189 shl eax,1
03190 add TDest,eax
03191 add pZBufferPtr,eax
03192 test esi,1
03193 jz NoSinglePixie
03194
03195
03196 fild dword ptr [ebx]EdgeAsmFPU.B ; BL
03197 fmul dword ptr[BlueMask]
03198 fadd qword ptr[Magic] ; Bk
03199 fild dword ptr [ebx]EdgeAsmFPU.G ; GL Bk
03200 fmul dword ptr[GreenMask]
03201 fadd qword ptr[Magic] ; Gk Bk
03202 fild dword ptr [ebx]EdgeAsmFPU.R ; RL Gk Bk
03203 fmul dword ptr[MiniRedMask]
03204 fadd qword ptr[Magic] ; Rk Gk Bk
03205 fxch st(2) ; Bk Gk Rk
03206 fstp qword ptr[Bucket] ; Gk Rk
03207 fstp qword ptr[Bucket2] ; Rk
03208 fld dword ptr[ebx]EdgeAsmFPU.z ; z
03209 fistp [z16] ;
03210
03211 mov esi,dword ptr[Bucket]
03212 mov eax,dword ptr[Bucket2]
03213
03214 and esi,BLUEMASK
03215 and eax,GREENMASK
03216
03217 fstp qword ptr[Bucket]
03218
03219 mov ecx,TDest
03220 mov ebx,dword ptr[Bucket]
03221
03222 and ebx,REDMASK
03223 or esi,eax
03224
03225 mov edi,[z16]
03226 mov eax,pZBufferPtr
03227
03228 add TDest,2
03229 or esi,ebx
03230
03231 shr edi,16
03232 mov ebx,pLeft
03233
03234 cmp word ptr[eax],di
03235 jg SkipSinglePixie
03236
03237 mov word ptr[eax],di
03238 mov word ptr[ecx],si
03239
03240 SkipSinglePixie:
03241 mov ecx,pRight
03242 add pZBufferPtr,2
03243 dec edx
03244
03245 jz GouraudReturnNoZ
03246 mov esi,edx
03247 and esi,1
03248 sub edx,esi
03249 jz GouraudReturnNoZ
03250
03251 NoSinglePixie:
03252 shr edx,1
03253 fld1
03254 mov [widTemp],edx ; just for a temp
03255
03256
03257 ; try to keep fmul fxch pairs seperated to avoid stalling
03258 ; calc this scanlines steps ; FPU Stack
03259 ; st0 st1 st2 st3 st4 st5 st6 st7
03260 fidiv dword ptr [widTemp] ; WID
03261 fild dword ptr [ecx]EdgeAsmFPU.R ; RR WID
03262 fild dword ptr [ebx]EdgeAsmFPU.R ; RL RR WID
03263 fsub st(1),st ; RL RD WID
03264 fild [ecx]EdgeAsmFPU.G ; GR RL RD WID
03265 fild [ebx]EdgeAsmFPU.G ; GL GR RL RD WID
03266 fsub st(1),st ; GL GD RL RD WID
03267 fild [ecx]EdgeAsmFPU.B ; BR GL GD RL RD WID
03268 fild [ebx]EdgeAsmFPU.B ; BL BR GL GD RL RD WID
03269 fsub st(1),st ; BL BD GL GD RL RD WID
03270 fxch st(5) ; RD BD GL GD RL BL WID
03271 fmul st,st(6) ; RI BD GL GD RL BL WID
03272 frndint
03273 fxch st(3) ; GD BD GL RI RL BL WID
03274 fmul st,st(6) ; GI BD GL RI RL BL WID
03275 frndint
03276 fxch st(6) ; WID BD GL RI RL BL GI
03277 fmulp st(1),st ; BI GL RI RL BL GI
03278 frndint
03279 fld qword ptr[RedMask] ; rm BI GL RI RL BL GI
03280 fmul st(3),st ; rm BI GL RI RL BL GI
03281 fmulp st(4),st ; BI GL RI RL BL GI
03282 fld dword ptr[GreenMask] ; gm BI GL RI RL BL GI
03283 fmul st(2),st ; gm BI GL RI RL BL GI
03284 fmulp st(6),st ; BI GL RI RL BL GI
03285 fld dword ptr[BlueMask] ; bm BI GL RI RL BL GI
03286 fmul st(1),st ; bm BI GL RI RL BL GI
03287 fmulp st(5),st ; BI GL RI RL BL GI
03288 fld [ecx]EdgeAsmFPU.z ; rz BI GL RI RL BL GI
03289 fsub [ebx]EdgeAsmFPU.z ; zd BI GL RI RL BL GI
03290 fld [ebx]EdgeAsmFPU.z ; lz zd BI GL RI RL BL GI
03291 fistp [z16] ; zd BI GL RI RL BL GI
03292 fistp [ZStep] ; BI GL RI RL BL GI
03293
03294 PixieLoop:
03295
03296 fld st(3) ; r BI GL RI RL BL GI
03297 fadd qword ptr[Magic] ; rk BI GL RI RL BL GI
03298 fld st(2) ; g rk BI GL RI RL BL GI
03299 fadd qword ptr[Magic] ; gk rk BI GL RI RL BL GI
03300 fxch st(1) ; rk gk BI GL RI RL BL GI
03301 fstp qword ptr[Bucket] ; gk BI GL RI RL BL GI
03302 fstp qword ptr[Bucket2] ; BI GL RI RL BL GI
03303 fld st(4) ; b BI GL RI RL BL GI
03304 fadd qword ptr[Magic] ; bk BI GL RI RL BL GI
03305
03306 mov ecx,dword ptr[Bucket]
03307 mov eax,dword ptr[Bucket2]
03308
03309 fstp qword ptr[Bucket] ; BI GL RI RL BL GI
03310 fadd st(4),st ; BI GL RI RL BL2 GI
03311
03312 and ecx,REDMASK
03313 and eax,GREENMASK
03314
03315 mov ebx,dword ptr[Bucket]
03316 or ecx,eax
03317
03318 fstp qword ptr[Bucket] ; GL RI RL BL2 GI
03319 fadd st,st(4) ; GL2 RI RL BL2 GI
03320 fstp qword ptr[Bucket2] ; RI RL BL2 GI
03321 fadd st(1),st ; RI RL2 BL2 GI
03322 fld qword ptr[Bucket2] ; GL2 RI RL2 BL2 GI
03323 fld qword ptr[Bucket] ; BI GL2 RI RL2 BL2 GI
03324
03325 and ebx,BLUEMASK
03326 mov edi,TDest
03327
03328 or ecx,ebx
03329 add TDest,4
03330
03331 mov eax,[z16]
03332 mov esi,pZBufferPtr
03333
03334 shr eax,16
03335 mov ebx,[ZStep]
03336
03337 cmp word ptr[esi],ax
03338 jg SkipPixie
03339
03340 mov word ptr[esi],ax
03341 rol ecx,16
03342
03343 add [z16],ebx
03344 mov word ptr[esi+2],ax
03345
03346 mov [edi],ecx
03347 SkipPixie:
03348 add pZBufferPtr,4
03349 dec edx
03350
03351 jnz PixieLoop
03352
03353
03354 fstp [u16]
03355 fstp [v16]
03356 fstp [u16]
03357 fstp [v16]
03358 fstp [u16]
03359 fstp [v16]
03360
03361 GouraudReturnNoZ:
03362 pop edi
03363 pop esi
03364 pop ecx
03365 pop ebx
03366 }
03367 }
03368
03369 void DrawScanLineGouraudZBufferNoZWriteSolid_AsmX86FPU(EdgeAsmFPU *pLeft, EdgeAsmFPU *pRight)
03370 {
03371 TDest =Dest;
03372 Red =Green =0;
03373 _asm
03374 {
03375 push ebx
03376 push ecx
03377 push esi
03378 push edi
03379
03380 mov ebx,pLeft
03381 mov ecx,pRight
03382 mov eax,[ebx]EdgeAsmFPU.X
03383 mov edx,[ecx]EdgeAsmFPU.X
03384 sub edx,eax
03385 jle GouraudReturnNoZ
03386
03387 mov esi,eax
03388 inc edx
03389 shl eax,1
03390 add TDest,eax
03391 add pZBufferPtr,eax
03392 test esi,1
03393 jz NoSinglePixie
03394
03395
03396 fild dword ptr [ebx]EdgeAsmFPU.B ; BL
03397 fmul dword ptr[BlueMask]
03398 fadd qword ptr[Magic] ; Bk
03399 fild dword ptr [ebx]EdgeAsmFPU.G ; GL Bk
03400 fmul dword ptr[GreenMask]
03401 fadd qword ptr[Magic] ; Gk Bk
03402 fild dword ptr [ebx]EdgeAsmFPU.R ; RL Gk Bk
03403 fmul dword ptr[MiniRedMask]
03404 fadd qword ptr[Magic] ; Rk Gk Bk
03405 fxch st(2) ; Bk Gk Rk
03406 fstp qword ptr[Bucket] ; Gk Rk
03407 fstp qword ptr[Bucket2] ; Rk
03408 fld dword ptr[ebx]EdgeAsmFPU.z ; z
03409 fistp [z16] ;
03410
03411 mov esi,dword ptr[Bucket]
03412 mov eax,dword ptr[Bucket2]
03413
03414 and esi,BLUEMASK
03415 and eax,GREENMASK
03416
03417 fstp qword ptr[Bucket]
03418
03419 mov ecx,TDest
03420 mov ebx,dword ptr[Bucket]
03421
03422 and ebx,REDMASK
03423 or esi,eax
03424
03425 mov edi,[z16]
03426 mov eax,pZBufferPtr
03427
03428 add TDest,2
03429 or esi,ebx
03430
03431 shr edi,16
03432 mov ebx,pLeft
03433
03434 cmp word ptr[eax],di
03435 jg SkipSinglePixie
03436
03437 mov word ptr[ecx],si
03438
03439 SkipSinglePixie:
03440 mov ecx,pRight
03441 add pZBufferPtr,2
03442 dec edx
03443
03444 jz GouraudReturnNoZ
03445 mov esi,edx
03446 and esi,1
03447 sub edx,esi
03448 jz GouraudReturnNoZ
03449
03450 NoSinglePixie:
03451 shr edx,1
03452 fld1
03453 mov [widTemp],edx ; just for a temp
03454
03455
03456 ; try to keep fmul fxch pairs seperated to avoid stalling
03457 ; calc this scanlines steps ; FPU Stack
03458 ; st0 st1 st2 st3 st4 st5 st6 st7
03459 fidiv dword ptr [widTemp] ; WID
03460 fild dword ptr [ecx]EdgeAsmFPU.R ; RR WID
03461 fild dword ptr [ebx]EdgeAsmFPU.R ; RL RR WID
03462 fsub st(1),st ; RL RD WID
03463 fild [ecx]EdgeAsmFPU.G ; GR RL RD WID
03464 fild [ebx]EdgeAsmFPU.G ; GL GR RL RD WID
03465 fsub st(1),st ; GL GD RL RD WID
03466 fild [ecx]EdgeAsmFPU.B ; BR GL GD RL RD WID
03467 fild [ebx]EdgeAsmFPU.B ; BL BR GL GD RL RD WID
03468 fsub st(1),st ; BL BD GL GD RL RD WID
03469 fxch st(5) ; RD BD GL GD RL BL WID
03470 fmul st,st(6) ; RI BD GL GD RL BL WID
03471 frndint
03472 fxch st(3) ; GD BD GL RI RL BL WID
03473 fmul st,st(6) ; GI BD GL RI RL BL WID
03474 frndint
03475 fxch st(6) ; WID BD GL RI RL BL GI
03476 fmulp st(1),st ; BI GL RI RL BL GI
03477 frndint
03478 fld qword ptr[RedMask] ; rm BI GL RI RL BL GI
03479 fmul st(3),st ; rm BI GL RI RL BL GI
03480 fmulp st(4),st ; BI GL RI RL BL GI
03481 fld dword ptr[GreenMask] ; gm BI GL RI RL BL GI
03482 fmul st(2),st ; gm BI GL RI RL BL GI
03483 fmulp st(6),st ; BI GL RI RL BL GI
03484 fld dword ptr[BlueMask] ; bm BI GL RI RL BL GI
03485 fmul st(1),st ; bm BI GL RI RL BL GI
03486 fmulp st(5),st ; BI GL RI RL BL GI
03487 fld [ecx]EdgeAsmFPU.z ; rz BI GL RI RL BL GI
03488 fsub [ebx]EdgeAsmFPU.z ; zd BI GL RI RL BL GI
03489 fld [ebx]EdgeAsmFPU.z ; lz zd BI GL RI RL BL GI
03490 fistp [z16] ; zd BI GL RI RL BL GI
03491 fistp [ZStep] ; BI GL RI RL BL GI
03492
03493 PixieLoop:
03494
03495 fld st(3) ; r BI GL RI RL BL GI
03496 fadd qword ptr[Magic] ; rk BI GL RI RL BL GI
03497 fld st(2) ; g rk BI GL RI RL BL GI
03498 fadd qword ptr[Magic] ; gk rk BI GL RI RL BL GI
03499 fxch st(1) ; rk gk BI GL RI RL BL GI
03500 fstp qword ptr[Bucket] ; gk BI GL RI RL BL GI
03501 fstp qword ptr[Bucket2] ; BI GL RI RL BL GI
03502 fld st(4) ; b BI GL RI RL BL GI
03503 fadd qword ptr[Magic] ; bk BI GL RI RL BL GI
03504
03505 mov ecx,dword ptr[Bucket]
03506 mov eax,dword ptr[Bucket2]
03507
03508 fstp qword ptr[Bucket] ; BI GL RI RL BL GI
03509 fadd st(4),st ; BI GL RI RL BL2 GI
03510
03511 and ecx,REDMASK
03512 and eax,GREENMASK
03513
03514 mov ebx,dword ptr[Bucket]
03515 or ecx,eax
03516
03517 fstp qword ptr[Bucket] ; GL RI RL BL2 GI
03518 fadd st,st(4) ; GL2 RI RL BL2 GI
03519 fstp qword ptr[Bucket2] ; RI RL BL2 GI
03520 fadd st(1),st ; RI RL2 BL2 GI
03521 fld qword ptr[Bucket2] ; GL2 RI RL2 BL2 GI
03522 fld qword ptr[Bucket] ; BI GL2 RI RL2 BL2 GI
03523
03524 and ebx,BLUEMASK
03525 mov edi,TDest
03526
03527 or ecx,ebx
03528 add TDest,4
03529
03530 mov eax,[z16]
03531 mov esi,pZBufferPtr
03532
03533 shr eax,16
03534 mov ebx,[ZStep]
03535
03536 cmp word ptr[esi],ax
03537 jg SkipPixie
03538
03539 rol ecx,16
03540 add [z16],ebx
03541
03542 mov [edi],ecx
03543 SkipPixie:
03544 add pZBufferPtr,4
03545 dec edx
03546
03547 jnz PixieLoop
03548
03549
03550 fstp [u16]
03551 fstp [v16]
03552 fstp [u16]
03553 fstp [v16]
03554 fstp [u16]
03555 fstp [v16]
03556
03557 GouraudReturnNoZ:
03558 pop edi
03559 pop esi
03560 pop ecx
03561 pop ebx
03562 }
03563 }
03564
03565 void DrawSpan16_AsmLitX86FPU(int32 x1, int32 x2, int32 y)
03566 {
03567 _asm
03568 {
03569 push ebx
03570 push ecx
03571 push esi
03572 push edi
03573
03574 mov eax,x1
03575 mov ecx,x2
03576 sub ecx,eax
03577 jle Return16
03578
03579 mov edi,[GBitPtr16]
03580 mov pTex,edi
03581
03582 fild [y] ; y
03583
03584 mov edi, ClientWindow.Buffer
03585 mov eax, y
03586 imul eax, ClientWindow.Width
03587 add eax, x1
03588 shl eax, 1
03589 add edi, eax
03590 mov eax,ecx
03591 shr ecx,4
03592 and eax,15
03593 _emit 75h
03594 _emit 06h
03595 dec ecx
03596 mov eax,16
03597
03598 mov [NumASpans],ecx
03599 mov [RemainingCount],eax
03600
03601 fild [x1] ; x y
03602
03603
03604
03605 fld [UDivZStepY] ; UZdY x y
03606 fld [UDivZStepX] ; UZdX UZdY x y
03607 fmul st,st(2) ; UZX UZdY x y
03608 fld [VDivZStepY] ; VZdY UZX UZdY x y
03609 fld [VDivZStepX] ; VZdX VZdY UZX UZdY x y
03610 fxch st(3) ; UZdy VZdY UZX VZdX x y
03611 fmul st,st(5) ; UZY VZdY UZX VZdX x y
03612 fxch st(2) ; UZX VZdY UZY VZdX x y
03613 fadd [UDivZOrigin] ; UZXS VZdY UZY VZdX x y
03614 fxch st(3) ; VZdX VZdY UZY UZXS x y
03615 fmul st,st(4) ; VZX VZdY UZY UZXS x y
03616 fxch st(2) ; UZY VZdY VZX UZXS x y
03617 faddp st(3),st ; VZdY VZX UZ x y
03618 fmul st,st(4) ; VZY VZX UZ x y
03619 fxch st(1) ; VZX VZY UZ x y
03620 fadd [VDivZOrigin] ; VZXS VZY UZ x y
03621 fld [ZiStepX] ; ZdX VZXS VZY UZ x y
03622 fmulp st(4),st ; VZXS VZY UZ ZX y
03623 faddp st(1),st ; VZ UZ ZX y
03624 fld [ZiStepY] ; ZdY VZ UZ ZX y
03625 fmulp st(4),st ; VZ UZ ZX ZY
03626 fxch st(2) ; ZX UZ VZ ZY
03627 fadd [ZiOrigin] ; ZXS UZ VZ ZY
03628
03629
03630
03631 faddp st(3),st ; UZ VZ Zi
03632 fld1 ; 1 UZ VZ Zi
03633 fdiv st,st(3) ; ZL UZ VZ Zi
03634
03635
03636
03637 fld st ; ZL ZL UZ VZ Zi
03638 fmul st,st(3) ; VL ZL UZ VZ Zi
03639 fxch st(4) ; Zi ZL UZ VZ VL
03640 fadd [Zi16StepX] ; ZRi ZL UZ VZ VL
03641 fxch st(1) ; ZL ZRi UZ VZ VL
03642 fmul st,st(2) ; UL ZRi UZ VZ VL
03643 fxch st(3) ; VZ ZRi UZ UL VL
03644 fadd [VDivZ16StepX] ; VZR ZRi UZ UL VL
03645 fxch st(2) ; UZ ZRi VZR UL VL
03646 fadd [UDivZ16StepX] ; UZR ZRi VZR UL VL
03647 fld1 ; 1 UZR ZRi VZR UL VL
03648 fdiv st,st(2) ; ZR UZR ZRi VZR UL VL
03649
03650
03651
03652 fld st ; ZR ZR UZR ZRi VZR UL VL
03653 fmul st,st(4) ; VR ZR UZR ZRi VZR UL VL
03654 fxch st(1) ; ZR VR UZR ZRi VZR UL VL
03655
03656 fmul st,st(2) ; UR VR UZR ZRi VZR UL VL
03657
03658 test ecx,ecx
03659 jz HandleLeftoverPixels16
03660
03661 SpanLoop16:
03662
03663 fstp dword ptr[FloatTemp] ; VR UZR ZRi VZR UL VL
03664 fld st(4) ; UL VR UZR ZRi VZR UL VL
03665 fmul [GLMapMulU] ; ULL VR UZR ZRi VZR UL VL
03666 fld st(5) ; UL ULL VR UZR ZRi VZR UL VL
03667 fadd qword ptr[MipMagic] ; ULk ULL VR UZR ZRi VZR UL VL
03668 fxch st(1) ; ULL ULk VR UZR ZRi VZR UL VL
03669 fadd qword ptr[MipMagic] ; ULLk ULk VR UZR ZRi VZR UL VL
03670 fxch st(1) ; ULk ULLk VR UZR ZRi VZR UL VL
03671 fstp qword ptr[Bucket] ; ULLk VR UZR ZRi VZR UL VL
03672 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
03673
03674 mov ebx,dword ptr[Bucket]
03675 mov eax,dword ptr[Bucket2]
03676
03677 fld st(5) ; VL VR UZR ZRi VZR UL VL
03678 fmul [GLMapMulV] ; VLL VR UZR ZRi VZR UL VL
03679
03680 add ebx,dword ptr[UAdjust]
03681 add eax,dword ptr[UAdjustL]
03682
03683 mov [U1],ebx
03684 mov [UFixed],eax
03685
03686 fld st(6) ; VL VLL VR UZR ZRi VZR UL VL
03687 fadd qword ptr[MipMagic] ; VLk VLL VR UZR ZRi VZR UL VL
03688 fxch st(1) ; VLL VLk VR UZR ZRi VZR UL VL
03689 fadd qword ptr[MipMagic] ; VLLk VLk VR UZR ZRi VZR UL VL
03690 fxch st(1) ; VLk VLLk VR UZR ZRi VZR UL VL
03691 fstp qword ptr[Bucket] ; VLLk VR UZR ZRi VZR UL VL
03692 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
03693 fsubr st(5),st ; VR UZR ZRi VZR UL dV
03694
03695 mov ebx,dword ptr[Bucket]
03696 mov eax,dword ptr[Bucket2]
03697
03698 fld dword ptr[FloatTemp] ; UR VR UZR ZRi VZR UL dV
03699
03700 add ebx,dword ptr[VAdjust]
03701 add eax,dword ptr[VAdjustL]
03702
03703 mov [V1],ebx
03704 mov [VFixed],eax
03705
03706 fsubr st(5),st ; UR VR UZR ZRi VZR dU dV
03707 fxch st(6) ; dV VR UZR ZRi VZR dU UR
03708 fadd qword ptr[MipMagic2] ; dVk VR UZR ZRi VZR dU UR
03709 fxch st(5) ; dU VR UZR ZRi VZR dVk UR
03710 fadd qword ptr[MipMagic2] ; dUk VR UZR ZRi VZR dVk UR
03711 fxch st(5) ; dVk VR UZR ZRi VZR dUk UR
03712 fstp qword ptr[DeltaV] ; VR UZR ZRi VZR dUk UR
03713 fxch st(5) ; UR UZR ZRi VZR dUk VR
03714
03715 fxch st(4) ; dUk UZR ZRi VZR UR VR
03716 fstp qword ptr[DeltaU] ; UZR ZRi VZR UR VR
03717
03718
03719 fadd [UDivZ16StepX] ; UZR ZLi VZL UL VL
03720 fxch st(1) ; ZLi UZR VZL UL VL
03721 fadd [Zi16StepX] ; ZRi UZR VZL UL VL
03722 fxch st(2) ; VZL UZR ZRi UL VL
03723 fadd [VDivZ16StepX] ; VZR UZR ZRi UL VL
03724 fxch st(2) ; ZRi UZR VZR UL VL
03725 fxch st(1) ; UZR ZRi VZR UL VL
03726 fld1 ; 1 UZR ZRi VZR UL VL
03727 fdiv st,st(2) ; ZR UZR ZRi VZR UL VL
03728
03729
03730 mov ebx,[UFixed]
03731 cmp ebx,MaxU
03732 jle TryClampU016
03733 mov ecx,MaxU
03734 mov dword ptr[UFixed],ecx
03735 jmp NoClampU016
03736
03737 TryClampU016:
03738 cmp ebx,0
03739 jge NoClampU016
03740 mov dword ptr[UFixed],0
03741 NoClampU016:
03742 mov eax,[VFixed]
03743 cmp eax,MaxV
03744 jle TryClampV016
03745 mov ecx,MaxV
03746 mov dword ptr[VFixed],ecx
03747 jmp NoClampV016
03748
03749 TryClampV016:
03750 cmp eax,0
03751 jge NoClampV016
03752 mov dword ptr[VFixed],0
03753
03754 NoClampV016:
03755
03756
03757 mov esi,dword ptr[UFixed]
03758 mov eax,dword ptr[VFixed]
03759
03760
03761 mov ecx, GMipLevel4_8
03762 sar esi, cl
03763 sar eax, cl
03764 and esi, 0ffh
03765 and eax, 0ffh
03766 mov UDist, esi
03767 mov VDist, eax
03768
03769 mov esi,dword ptr[UFixed]
03770 mov eax,dword ptr[VFixed]
03771 mov ecx, GMipLevel20
03772 shr esi, cl
03773 shr eax, cl
03774
03775 imul eax, GLightWidth
03776 add esi, eax
03777
03778 mov edx, esi
03779 shl esi, 1
03780 add edx, esi
03781
03782 add edx, GLightData
03783
03784
03785 xor ecx, ecx
03786 mov cl, [edx+3]
03787 mov eax, ecx
03788 mov cl, [edx+0]
03789 sub eax, ecx
03790 imul eax, UDist
03791 shl ecx, 8
03792 add eax, ecx
03793 mov [R1], eax
03794
03795 xor ecx, ecx
03796 mov cl, [edx+4]
03797 mov eax, ecx
03798 mov cl, [edx+1]
03799 sub eax, ecx
03800 imul eax, UDist
03801 shl ecx, 8
03802 add eax, ecx
03803 mov [G1], eax
03804
03805 xor ecx, ecx
03806 mov cl, [edx+5]
03807 mov eax, ecx
03808 mov cl, [edx+2]
03809 sub eax, ecx
03810 imul eax, UDist
03811 shl ecx, 8
03812 add eax, ecx
03813 mov [B1], eax
03814
03815 add edx, GLightWidth
03816 add edx, GLightWidth
03817 add edx, GLightWidth
03818
03819
03820 xor ecx, ecx
03821 mov cl, [edx+3]
03822 mov eax, ecx
03823 mov cl, [edx+0]
03824 sub eax, ecx
03825 imul eax, UDist
03826 shl ecx, 8
03827 add eax, ecx
03828 mov [R2], eax
03829
03830 xor ecx, ecx
03831 mov cl, [edx+4]
03832 mov eax, ecx
03833 mov cl, [edx+1]
03834 sub eax, ecx
03835 imul eax, UDist
03836 shl ecx, 8
03837 add eax, ecx
03838 mov [G2], eax
03839
03840 xor ecx, ecx
03841 mov cl, [edx+5]
03842 mov eax, ecx
03843 mov cl, [edx+2]
03844 sub eax, ecx
03845 imul eax, UDist
03846 shl ecx, 8
03847 add eax, ecx
03848 mov [B2], eax
03849
03850
03851 mov eax, [R2]
03852 sub eax, [R1]
03853 imul eax, VDist
03854 sar eax, 8
03855 add eax, [R1]
03856 shr eax, 8
03857 and eax,0feh
03858
03859 mov [RR1], eax
03860
03861 mov eax, [G2]
03862 sub eax, [G1]
03863 imul eax, VDist
03864 sar eax, 8
03865 add eax, [G1]
03866 shr eax, 8
03867 and eax,0feh
03868
03869 mov [GG1], eax
03870
03871 mov eax, [B2]
03872 sub eax, [B1]
03873 imul eax, VDist
03874 sar eax, 8
03875 add eax, [B1]
03876 shr eax, 8
03877 and eax,0feh
03878
03879 mov [BB1], eax
03880
03881 fstp [FTemp0] ; UZR ZRi VZR UL VL
03882 fstp [FTemp1] ; ZRi VZR UL VL
03883 fstp [FTemp2] ; VZR UL VL
03884 fstp [FTemp3] ; UL VL
03885 fstp [FTemp4] ; VL
03886 fstp [FTemp5] ;
03887
03888 fild [RR1] ; LR
03889 fild [GG1] ; LG LR
03890 fild [BB1] ; LB LG LR
03891
03892
03893 mov ebx,dword ptr[U1]
03894 mov eax,dword ptr[V1]
03895
03896 add ebx,dword ptr[UAdjust2]
03897 add eax,dword ptr[VAdjust2]
03898
03899 mov ecx,[VShift]
03900 mov dword ptr[Bucket],ebx
03901
03902 shl eax,cl
03903
03904 push ebp
03905
03906 mov dword ptr[Bucket2],eax
03907 mov ebp,dword ptr[DeltaV]
03908
03909 and eax,[GHMaskShifted16]
03910 and ebx,[GWMaskShifted]
03911
03912 shl ebp,cl
03913 add eax,ebx
03914
03915 mov edx,dword ptr[Bucket2]
03916 mov esi,pTex
03917
03918 shr eax,16
03919 mov dword ptr[DeltaV],ebp
03920
03921 mov ebx,dword ptr[Bucket]
03922
03923 mov ax,word ptr[2*eax+esi]
03924 add edx,dword ptr[DeltaV]
03925
03926 xor eax,0
03927 mov ecx,edx
03928
03929 add ebx,dword ptr[DeltaU]
03930 and ecx,[GHMaskShifted16]
03931
03932 rol eax,16
03933 and ebx,[GWMaskShifted]
03934
03935 xor eax,0
03936 add ecx,ebx
03937
03938 add edx,dword ptr[DeltaV]
03939 mov esi,pTex
03940
03941 shr ecx,16
03942 add ebx,dword ptr[DeltaU]
03943
03944 mov ax,word ptr[2*ecx+esi]
03945 mov ecx,edx
03946
03947 xor eax,0
03948 and ecx,[GHMaskShifted16]
03949
03950 mov esi,eax
03951 mov ebp,eax
03952
03953 and esi,REDMASK
03954 and ebp,GREENMASK
03955
03956 mov dword ptr[Red],esi
03957 mov dword ptr[Green],ebp
03958
03959 fild qword ptr[Red] ; r LB LG LR
03960
03961 mov ebp,eax
03962
03963 fmul st,st(3) ; R LB LG LR
03964 fild qword ptr[Green] ; g R LB LG LR
03965
03966 and ebp,BLUEMASK
03967 and ebx,[GWMaskShifted]
03968
03969 mov dword ptr[Blue],ebp
03970
03971 fmul st,st(3) ; G R LB LG LR
03972 fild [Blue] ; b G R LB LG LR
03973 fmul st,st(3) ; B G R LB LG LR
03974 fxch st(2) ; R G B LB LG LR
03975 fadd qword ptr[Magic] ; Rk G B LB LG LR
03976 fxch st(1) ; G Rk B LB LG LR
03977 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
03978 fxch st(2) ; B Rk Gk LB LG LR
03979 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
03980 fxch st(1) ; Rk Bk Gk LB LG LR
03981 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
03982 fstp qword ptr[Bucket2] ; Gk LB LG LR
03983
03984 mov eax,dword ptr[Bucket]
03985 mov ebp,dword ptr[Bucket2]
03986
03987 fstp qword ptr[Bucket] ; LB LG LR
03988
03989 and eax,REDMASK
03990 and ebp,BLUEMASK
03991
03992 mov esi,dword ptr[Bucket]
03993 or ebp,eax
03994
03995 add ecx,ebx
03996 and esi,GREENMASK
03997
03998 shr ecx,16
03999 or ebp,esi
04000
04001 xor eax,0
04002 mov esi,pTex
04003
04004 rol ebp,16
04005 add edx,dword ptr[DeltaV]
04006
04007 mov ax,word ptr[2*ecx+esi]
04008 mov ecx,edx
04009
04010 add ebx,dword ptr[DeltaU]
04011 and ecx,[GHMaskShifted16]
04012
04013 and ebx,[GWMaskShifted]
04014 xor eax,0
04015
04016 add ecx,ebx
04017 mov [edi],ebp
04018
04019 rol eax,16
04020 add edx,dword ptr[DeltaV]
04021
04022 shr ecx,16
04023 add ebx,dword ptr[DeltaU]
04024
04025 mov ax,word ptr[2*ecx+esi]
04026
04027 mov ecx,edx
04028 xor eax,0
04029
04030 and ecx,[GHMaskShifted16]
04031 mov esi,eax
04032
04033 mov ebp,eax
04034 and esi,REDMASK
04035
04036 and ebp,GREENMASK
04037 mov dword ptr[Red],esi
04038
04039 mov dword ptr[Green],ebp
04041
04042
04043 fild qword ptr[Red] ; r LB LG LR
04044
04045 mov ebp,eax
04046
04047 fmul st,st(3) ; R LB LG LR
04048 fild qword ptr[Green] ; g R LB LG LR
04049
04050 and ebp,BLUEMASK
04051 and ebx,[GWMaskShifted]
04052
04053 mov dword ptr[Blue],ebp
04054
04055 fmul st,st(3) ; G R LB LG LR
04056 fild [Blue] ; b G R LB LG LR
04057 fmul st,st(3) ; B G R LB LG LR
04058 fxch st(2) ; R G B LB LG LR
04059 fadd qword ptr[Magic] ; Rk G B LB LG LR
04060 fxch st(1) ; G Rk B LB LG LR
04061 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04062 fxch st(2) ; B Rk Gk LB LG LR
04063 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04064 fxch st(1) ; Rk Bk Gk LB LG LR
04065 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04066 fstp qword ptr[Bucket2] ; Gk LB LG LR
04067
04068 mov eax,dword ptr[Bucket]
04069 mov ebp,dword ptr[Bucket2]
04070
04071 fstp qword ptr[Bucket] ; LB LG LR
04072
04073 and eax,REDMASK
04074 and ebp,BLUEMASK
04075
04076 mov esi,dword ptr[Bucket]
04077 or ebp,eax
04078
04079 add ecx,ebx
04080 and esi,GREENMASK
04081
04082 shr ecx,16
04083 or ebp,esi
04084
04085 xor eax,0
04086 mov esi,pTex
04087
04088 rol ebp,16
04089 add edx,dword ptr[DeltaV]
04090
04091 mov ax,word ptr[2*ecx+esi]
04092 mov ecx,edx
04093
04094 add ebx,dword ptr[DeltaU]
04095 and ecx,[GHMaskShifted16]
04096
04097 and ebx,[GWMaskShifted]
04098 xor eax,0
04099
04100 add ecx,ebx
04101 mov [edi+4],ebp
04102
04103 rol eax,16
04104 add edx,dword ptr[DeltaV]
04105
04106 shr ecx,16
04107 add ebx,dword ptr[DeltaU]
04108
04109 mov ax,word ptr[2*ecx+esi]
04110
04111 mov ecx,edx
04112 xor eax,0
04113
04114 and ecx,[GHMaskShifted16]
04115 mov esi,eax
04116
04117 mov ebp,eax
04118 and esi,REDMASK
04119
04120 and ebp,GREENMASK
04121 mov dword ptr[Red],esi
04122
04123 mov dword ptr[Green],ebp
04125
04126
04127 fild qword ptr[Red] ; r LB LG LR
04128
04129 mov ebp,eax
04130
04131 fmul st,st(3) ; R LB LG LR
04132 fild qword ptr[Green] ; g R LB LG LR
04133
04134 and ebp,BLUEMASK
04135 and ebx,[GWMaskShifted]
04136
04137 mov dword ptr[Blue],ebp
04138
04139 fmul st,st(3) ; G R LB LG LR
04140 fild [Blue] ; b G R LB LG LR
04141 fmul st,st(3) ; B G R LB LG LR
04142 fxch st(2) ; R G B LB LG LR
04143 fadd qword ptr[Magic] ; Rk G B LB LG LR
04144 fxch st(1) ; G Rk B LB LG LR
04145 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04146 fxch st(2) ; B Rk Gk LB LG LR
04147 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04148 fxch st(1) ; Rk Bk Gk LB LG LR
04149 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04150 fstp qword ptr[Bucket2] ; Gk LB LG LR
04151
04152 mov eax,dword ptr[Bucket]
04153 mov ebp,dword ptr[Bucket2]
04154
04155 fstp qword ptr[Bucket] ; LB LG LR
04156
04157 and eax,REDMASK
04158 and ebp,BLUEMASK
04159
04160 mov esi,dword ptr[Bucket]
04161 or ebp,eax
04162
04163 add ecx,ebx
04164 and esi,GREENMASK
04165
04166 shr ecx,16
04167 or ebp,esi
04168
04169 xor eax,0
04170 mov esi,pTex
04171
04172 rol ebp,16
04173 add edx,dword ptr[DeltaV]
04174
04175 mov ax,word ptr[2*ecx+esi]
04176 mov ecx,edx
04177
04178 add ebx,dword ptr[DeltaU]
04179 and ecx,[GHMaskShifted16]
04180
04181 and ebx,[GWMaskShifted]
04182 xor eax,0
04183
04184 add ecx,ebx
04185 mov [edi+8],ebp
04186
04187 shr ecx,16
04188 add edx,dword ptr[DeltaV]
04189
04190 rol eax,16
04191 mov ax,word ptr[2*ecx+esi]
04192
04193 mov ecx,edx
04194 xor eax,0
04195
04196 and ecx,[GHMaskShifted16]
04197 mov esi,eax
04198
04199 mov ebp,eax
04200 and esi,REDMASK
04201
04202 and ebp,GREENMASK
04203 mov dword ptr[Red],esi
04204
04205 mov dword ptr[Green],ebp
04206 add ebx,dword ptr[DeltaU]
04208
04209
04210 fild qword ptr[Red] ; r LB LG LR
04211
04212 mov ebp,eax
04213
04214 fmul st,st(3) ; R LB LG LR
04215 fild qword ptr[Green] ; g R LB LG LR
04216
04217 and ebp,BLUEMASK
04218 and ebx,[GWMaskShifted]
04219
04220 mov dword ptr[Blue],ebp
04221
04222 fmul st,st(3) ; G R LB LG LR
04223 fild [Blue] ; b G R LB LG LR
04224 fmul st,st(3) ; B G R LB LG LR
04225 fxch st(2) ; R G B LB LG LR
04226 fadd qword ptr[Magic] ; Rk G B LB LG LR
04227 fxch st(1) ; G Rk B LB LG LR
04228 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04229 fxch st(2) ; B Rk Gk LB LG LR
04230 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04231 fxch st(1) ; Rk Bk Gk LB LG LR
04232 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04233 fstp qword ptr[Bucket2] ; Gk LB LG LR
04234
04235 mov eax,dword ptr[Bucket]
04236 mov ebp,dword ptr[Bucket2]
04237
04238 fstp qword ptr[Bucket] ; LB LG LR
04239
04240 and eax,REDMASK
04241 and ebp,BLUEMASK
04242
04243 mov esi,dword ptr[Bucket]
04244 or ebp,eax
04245
04246 add ecx,ebx
04247 and esi,GREENMASK
04248
04249 shr ecx,16
04250 or ebp,esi
04251
04252 xor eax,0
04253 mov esi,pTex
04254
04255 rol ebp,16
04256 add edx,dword ptr[DeltaV]
04257
04258 mov ax,word ptr[2*ecx+esi]
04259 mov ecx,edx
04260
04261 add ebx,dword ptr[DeltaU]
04262 and ecx,[GHMaskShifted16]
04263
04264 and ebx,[GWMaskShifted]
04265 xor eax,0
04266
04267 add ecx,ebx
04268 mov [edi+12],ebp
04269
04270 shr ecx,16
04271 add edx,dword ptr[DeltaV]
04272
04273 rol eax,16
04274 mov ax,word ptr[2*ecx+esi]
04275
04276 mov ecx,edx
04277 xor eax,0
04278
04279 and ecx,[GHMaskShifted16]
04280 mov esi,eax
04281
04282 mov ebp,eax
04283 and esi,REDMASK
04284
04285 and ebp,GREENMASK
04286 mov dword ptr[Red],esi
04287
04288 mov dword ptr[Green],ebp
04289 add ebx,dword ptr[DeltaU]
04291
04292
04293 fild qword ptr[Red] ; r LB LG LR
04294
04295 mov ebp,eax
04296
04297 fmul st,st(3) ; R LB LG LR
04298 fild qword ptr[Green] ; g R LB LG LR
04299
04300 and ebp,BLUEMASK
04301 and ebx,[GWMaskShifted]
04302
04303 mov dword ptr[Blue],ebp
04304
04305 fmul st,st(3) ; G R LB LG LR
04306 fild [Blue] ; b G R LB LG LR
04307 fmul st,st(3) ; B G R LB LG LR
04308 fxch st(2) ; R G B LB LG LR
04309 fadd qword ptr[Magic] ; Rk G B LB LG LR
04310 fxch st(1) ; G Rk B LB LG LR
04311 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04312 fxch st(2) ; B Rk Gk LB LG LR
04313 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04314 fxch st(1) ; Rk Bk Gk LB LG LR
04315 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04316 fstp qword ptr[Bucket2] ; Gk LB LG LR
04317
04318 mov eax,dword ptr[Bucket]
04319 mov ebp,dword ptr[Bucket2]
04320
04321 fstp qword ptr[Bucket] ; LB LG LR
04322
04323 and eax,REDMASK
04324 and ebp,BLUEMASK
04325
04326 mov esi,dword ptr[Bucket]
04327 or ebp,eax
04328
04329 add ecx,ebx
04330 and esi,GREENMASK
04331
04332 shr ecx,16
04333 or ebp,esi
04334
04335 xor eax,0
04336 mov esi,pTex
04337
04338 rol ebp,16
04339 add edx,dword ptr[DeltaV]
04340
04341 mov ax,word ptr[2*ecx+esi]
04342 mov ecx,edx
04343
04344 add ebx,dword ptr[DeltaU]
04345 and ecx,[GHMaskShifted16]
04346
04347 and ebx,[GWMaskShifted]
04348 xor eax,0
04349
04350 add ecx,ebx
04351 mov [edi+16],ebp
04352
04353 shr ecx,16
04354 add edx,dword ptr[DeltaV]
04355
04356 rol eax,16
04357 mov ax,word ptr[2*ecx+esi]
04358
04359 mov ecx,edx
04360 xor eax,0
04361
04362 and ecx,[GHMaskShifted16]
04363 mov esi,eax
04364
04365 mov ebp,eax
04366 and esi,REDMASK
04367
04368 and ebp,GREENMASK
04369 mov dword ptr[Red],esi
04370
04371 mov dword ptr[Green],ebp
04372 add ebx,dword ptr[DeltaU]
04374
04375
04376 fild qword ptr[Red] ; r LB LG LR
04377
04378 mov ebp,eax
04379
04380 fmul st,st(3) ; R LB LG LR
04381 fild qword ptr[Green] ; g R LB LG LR
04382
04383 and ebp,BLUEMASK
04384 and ebx,[GWMaskShifted]
04385
04386 mov dword ptr[Blue],ebp
04387
04388 fmul st,st(3) ; G R LB LG LR
04389 fild [Blue] ; b G R LB LG LR
04390 fmul st,st(3) ; B G R LB LG LR
04391 fxch st(2) ; R G B LB LG LR
04392 fadd qword ptr[Magic] ; Rk G B LB LG LR
04393 fxch st(1) ; G Rk B LB LG LR
04394 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04395 fxch st(2) ; B Rk Gk LB LG LR
04396 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04397 fxch st(1) ; Rk Bk Gk LB LG LR
04398 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04399 fstp qword ptr[Bucket2] ; Gk LB LG LR
04400
04401 mov eax,dword ptr[Bucket]
04402 mov ebp,dword ptr[Bucket2]
04403
04404 fstp qword ptr[Bucket] ; LB LG LR
04405
04406 and eax,REDMASK
04407 and ebp,BLUEMASK
04408
04409 mov esi,dword ptr[Bucket]
04410 or ebp,eax
04411
04412 add ecx,ebx
04413 and esi,GREENMASK
04414
04415 shr ecx,16
04416 or ebp,esi
04417
04418 xor eax,0
04419 mov esi,pTex
04420
04421 rol ebp,16
04422 add edx,dword ptr[DeltaV]
04423
04424 mov ax,word ptr[2*ecx+esi]
04425 mov ecx,edx
04426
04427 add ebx,dword ptr[DeltaU]
04428 and ecx,[GHMaskShifted16]
04429
04430 and ebx,[GWMaskShifted]
04431 xor eax,0
04432
04433 add ecx,ebx
04434 mov [edi+20],ebp
04435
04436 shr ecx,16
04437 add edx,dword ptr[DeltaV]
04438
04439 rol eax,16
04440 mov ax,word ptr[2*ecx+esi]
04441
04442 mov ecx,edx
04443 xor eax,0
04444
04445 and ecx,[GHMaskShifted16]
04446 mov esi,eax
04447
04448 mov ebp,eax
04449 and esi,REDMASK
04450
04451 and ebp,GREENMASK
04452 mov dword ptr[Red],esi
04453
04454 mov dword ptr[Green],ebp
04455 add ebx,dword ptr[DeltaU]
04457
04458
04459 fild qword ptr[Red] ; r LB LG LR
04460
04461 mov ebp,eax
04462
04463 fmul st,st(3) ; R LB LG LR
04464 fild qword ptr[Green] ; g R LB LG LR
04465
04466 and ebp,BLUEMASK
04467 and ebx,[GWMaskShifted]
04468
04469 mov dword ptr[Blue],ebp
04470
04471 fmul st,st(3) ; G R LB LG LR
04472 fild [Blue] ; b G R LB LG LR
04473 fmul st,st(3) ; B G R LB LG LR
04474 fxch st(2) ; R G B LB LG LR
04475 fadd qword ptr[Magic] ; Rk G B LB LG LR
04476 fxch st(1) ; G Rk B LB LG LR
04477 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04478 fxch st(2) ; B Rk Gk LB LG LR
04479 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04480 fxch st(1) ; Rk Bk Gk LB LG LR
04481 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04482 fstp qword ptr[Bucket2] ; Gk LB LG LR
04483
04484 mov eax,dword ptr[Bucket]
04485 mov ebp,dword ptr[Bucket2]
04486
04487 fstp qword ptr[Bucket] ; LB LG LR
04488
04489 and eax,REDMASK
04490 and ebp,BLUEMASK
04491
04492 mov esi,dword ptr[Bucket]
04493 or ebp,eax
04494
04495 add ecx,ebx
04496 and esi,GREENMASK
04497
04498 shr ecx,16
04499 or ebp,esi
04500
04501 xor eax,0
04502 mov esi,pTex
04503
04504 rol ebp,16
04505 add edx,dword ptr[DeltaV]
04506
04507 mov ax,word ptr[2*ecx+esi]
04508 mov ecx,edx
04509
04510 add ebx,dword ptr[DeltaU]
04511 and ecx,[GHMaskShifted16]
04512
04513 and ebx,[GWMaskShifted]
04514 xor eax,0
04515
04516 add ecx,ebx
04517 mov [edi+24],ebp
04518
04519 shr ecx,16
04520 add edx,dword ptr[DeltaV]
04521
04522 rol eax,16
04523 mov ax,word ptr[2*ecx+esi]
04524
04525 mov ecx,edx
04526 xor eax,0
04527
04528 and ecx,[GHMaskShifted16]
04529 mov esi,eax
04530
04531 mov ebp,eax
04532 and esi,REDMASK
04533
04534 and ebp,GREENMASK
04535 mov dword ptr[Red],esi
04536
04537 mov dword ptr[Green],ebp
04538 add ebx,dword ptr[DeltaU]
04540 fild qword ptr[Red] ; r LB LG LR
04541
04542 mov ebp,eax
04543
04544 fmul st,st(3) ; R LB LG LR
04545 fild qword ptr[Green] ; g R LB LG LR
04546
04547 and ebp,BLUEMASK
04548 and ebx,[GWMaskShifted]
04549
04550 mov dword ptr[Blue],ebp
04551
04552 fmul st,st(3) ; G R LB LG LR
04553 fild [Blue] ; b G R LB LG LR
04554 fmul st,st(3) ; B G R LB LG LR
04555 fxch st(2) ; R G B LB LG LR
04556 fadd qword ptr[Magic] ; Rk G B LB LG LR
04557 fxch st(1) ; G Rk B LB LG LR
04558 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04559 fxch st(2) ; B Rk Gk LB LG LR
04560 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04561 fxch st(1) ; Rk Bk Gk LB LG LR
04562 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04563 fstp qword ptr[Bucket2] ; Gk LB LG LR
04564
04565 mov eax,dword ptr[Bucket]
04566 mov ebp,dword ptr[Bucket2]
04567
04568 fstp qword ptr[Bucket] ; LB LG LR
04569
04570 and eax,REDMASK
04571 and ebp,BLUEMASK
04572
04573 mov esi,dword ptr[Bucket]
04574 or ebp,eax
04575
04576 fstp dword ptr[Bucket]
04577 fstp dword ptr[Bucket2]
04578 fstp dword ptr[Bucket]
04579
04580 and esi,GREENMASK
04581
04582 fld dword ptr[FTemp5]
04583 fld dword ptr[FTemp4]
04584
04585 or ebp,esi
04586
04587 fld dword ptr[FTemp3]
04588 fld dword ptr[FTemp2]
04589
04590 rol ebp,16
04591
04592 fld dword ptr[FTemp1]
04593 fld dword ptr[FTemp0]
04594
04595 mov [edi+28],ebp ; store pixel 0
04596 pop ebp
04597
04598
04599 ; get corrected right side deltas ; st0 st1 st2 st3 st4 st5 st6 st7
04600 ; ZR UZR ZRi VZR UL VL
04601 fld st ; ZR ZR UZR ZRi VZR UL VL
04602 fmul st,st(4) ; VR ZR UZR ZRi VZR UL VL
04603 fxch st(1) ; ZR VR UZR ZRi VZR UL VL
04604 fmul st,st(2) ; UR VR UZR ZRi VZR UL VL
04605
04606 add edi,32 ; move screen pointer to start of next aspan
04607 dec [NumASpans] ; dec num affine spans
04608 jnz SpanLoop16
04609
04610 HandleLeftoverPixels16:
04611
04612 mov esi,[pTex]
04613
04614
04615 cmp [RemainingCount],0
04616 jz FPUReturn16
04617
04618
04619 fstp dword ptr[FloatTemp] ; VR UZR ZRi VZR UL VL
04620 fld st(4) ; UL VR UZR ZRi VZR UL VL
04621 fmul [GLMapMulU] ; ULL VR UZR ZRi VZR UL VL
04622 fld st(5) ; UL ULL VR UZR ZRi VZR UL VL
04623 fadd qword ptr[MipMagic] ; ULk ULL VR UZR ZRi VZR UL VL
04624 fxch st(1) ; ULL ULk VR UZR ZRi VZR UL VL
04625 fadd qword ptr[MipMagic] ; ULLk ULk VR UZR ZRi VZR UL VL
04626 fxch st(1) ; ULk ULLk VR UZR ZRi VZR UL VL
04627 fstp qword ptr[Bucket] ; ULLk VR UZR ZRi VZR UL VL
04628 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
04629
04630 mov ebx,dword ptr[Bucket]
04631 mov eax,dword ptr[Bucket2]
04632
04633 fld st(5) ; VL VR UZR ZRi VZR UL VL
04634 fmul [GLMapMulV] ; VLL VR UZR ZRi VZR UL VL
04635
04636 add ebx,dword ptr[UAdjust]
04637 add eax,dword ptr[UAdjustL]
04638
04639 mov [U1],ebx
04640 mov [UFixed],eax
04641
04642 fld st(6) ; VL VLL VR UZR ZRi VZR UL VL
04643 fadd qword ptr[MipMagic] ; VLk VLL VR UZR ZRi VZR UL VL
04644 fxch st(1) ; VLL VLk VR UZR ZRi VZR UL VL
04645 fadd qword ptr[MipMagic] ; VLLk VLk VR UZR ZRi VZR UL VL
04646 fxch st(1) ; VLk VLLk VR UZR ZRi VZR UL VL
04647 fstp qword ptr[Bucket] ; VLLk VR UZR ZRi VZR UL VL
04648 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
04649
04650 mov ebx,dword ptr[Bucket]
04651 mov eax,dword ptr[Bucket2]
04652
04653 fld dword ptr[FloatTemp] ; UR VR UZR ZRi VZR UL dV
04654
04655 add ebx,dword ptr[VAdjust]
04656 add eax,dword ptr[VAdjustL]
04657
04658 mov [V1],ebx
04659 mov [VFixed],eax
04660
04661 dec [RemainingCount]
04662 jz OnePixelSpan16
04663
04664
04665
04666 fstp [FloatTemp] ; inv. inv. inv. inv. UL VL
04667 fstp [FloatTemp] ; inv. inv. inv. UL VL
04668 fstp [FloatTemp] ; inv. inv. UL VL
04669 fstp [FloatTemp] ; inv. UL VL
04670 fstp [FloatTemp] ; UL VL
04671 fild [y] ; y UL VL
04672 fild [x2] ; xr y UL VL
04673
04674 fld [UDivZStepY] ; UZdY xr y UL VL
04675 fld [UDivZStepX] ; UZdX UZdY xr y UL VL
04676 fmul st,st(2) ; UZX UZdY xr y UL VL
04677 fld [VDivZStepY] ; VZdY UZX UZdY xr y UL VL
04678 fld [VDivZStepX] ; VZdX VZdY UZX UZdY xr y UL VL
04679 fxch st(3) ; UZdy VZdY UZX VZdX xr y UL VL
04680 fmul st,st(5) ; UZY VZdY UZX VZdX xr y UL VL
04681 fxch st(2) ; UZX VZdY UZY VZdX xr y UL VL
04682 fadd [UDivZOrigin] ; UZXS VZdY UZY VZdX xr y UL VL
04683 fxch st(3) ; VZdX VZdY UZY UZXS xr y UL VL
04684 fmul st,st(4) ; VZX VZdY UZY UZXS xr y UL VL
04685 fxch st(2) ; UZY VZdY VZX UZXS xr y UL VL
04686 faddp st(3),st ; VZdY VZX UZ xr y UL VL
04687 fmul st,st(4) ; VZY VZX UZ xr y UL VL
04688 fxch st(1) ; VZX VZY UZ xr y UL VL
04689 fadd [VDivZOrigin] ; VZXS VZY UZ xr y UL VL
04690 fld [ZiStepX] ; ZdX VZXS VZY UZ xr y UL VL
04691 fmulp st(4),st ; VZXS VZY UZ ZX y UL VL
04692 faddp st(1),st ; VZ UZ ZX y UL VL
04693 fld [ZiStepY] ; ZdY VZ UZ ZX y UL VL
04694 fmulp st(4),st ; VZ UZ ZX ZY UL VL
04695 fxch st(2) ; ZX UZ VZ ZY UL VL
04696 fadd [ZiOrigin] ; ZXS UZ VZ ZY UL VL
04697
04698 faddp st(3),st ; UZ VZ Zi UL VL
04699 fld1 ; 1 UZ VZ Zi UL VL
04700 fdiv st,st(3) ; ZR UZ VZ Zi UL VL
04701
04702 fld st ; ZR ZR UZ VZ Zi UL VL
04703 fmul st,st(3) ; VR ZR UZ VZ Zi UL VL
04704 fxch st(1) ; ZR VR UZ VZ Zi UL VL
04705 fmul st,st(2) ; UR VR UZ VZ Zi UL VL
04706
04707
04708
04709 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
04710 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
04711 fxch st(1) ; VR UR inv. inv. inv. dU VL
04712 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
04713 fxch st(6) ; dV UR inv. inv. inv. dU VR
04714 fidiv dword ptr[RemainingCount];dv UR inv. inv. inv. dU VR
04715 fadd qword ptr[MipMagic] ; dvk UR inv. inv. inv. dU VR
04716 fxch st(5) ; dU UR inv. inv. inv. dvk VR
04717 fidiv dword ptr[RemainingCount];du UR inv. inv. inv. dvk VR
04718 fadd qword ptr[MipMagic] ; duk UR inv. inv. inv. dvk VR
04719 fxch st(5) ; dvk UR inv. inv. inv. duk VR
04720 fstp qword ptr[DeltaV] ; UR inv. inv. inv. duk VR
04721 fxch st(4) ; duk inv. inv. inv. UR VR
04722 fstp qword ptr[DeltaU] ; inv. inv. inv. UR VR
04723 fld st(1) ; inv. inv. inv. inv. UR VR
04724 fld st(2) ; inv. inv. inv. inv. inv. UR VR
04725
04726 OnePixelSpan16:
04727
04728 mov ebx,[UFixed]
04729 cmp ebx,MaxU
04730 jle TryClampU116
04731 mov ecx,MaxU
04732 mov dword ptr[UFixed],ecx
04733 jmp NoClampU116
04734
04735 TryClampU116:
04736 cmp ebx,0
04737 jge NoClampU116
04738 mov dword ptr[UFixed],0
04739 NoClampU116:
04740 mov eax,[VFixed]
04741 cmp eax,MaxV
04742 jle TryClampV116
04743 mov ecx,MaxV
04744 mov dword ptr[VFixed],ecx
04745 jmp NoClampV116
04746
04747 TryClampV116:
04748 cmp eax,0
04749 jge NoClampV116
04750 mov dword ptr[VFixed],0
04751
04752 NoClampV116:
04753 mov esi,dword ptr[UFixed]
04754 mov eax,dword ptr[VFixed]
04755
04756 mov ecx, GMipLevel4_8
04757 sar esi, cl
04758 sar eax, cl
04759 and esi, 0ffh
04760 and eax, 0ffh
04761 mov UDist, esi
04762 mov VDist, eax
04763
04764 mov esi,dword ptr[UFixed]
04765 mov eax,dword ptr[VFixed]
04766 mov ecx, GMipLevel20
04767 shr esi, cl
04768 shr eax, cl
04769
04770 imul eax, GLightWidth
04771 add esi, eax
04772
04773 mov edx, esi
04774 shl esi, 1
04775 add edx, esi
04776
04777 add edx, GLightData
04778
04779
04780 xor ecx, ecx
04781 mov cl, [edx+3]
04782 mov eax, ecx
04783 mov cl, [edx+0]
04784 sub eax, ecx
04785 imul eax, UDist
04786 shl ecx, 8
04787 add eax, ecx
04788 mov [R1], eax
04789
04790 xor ecx, ecx
04791 mov cl, [edx+4]
04792 mov eax, ecx
04793 mov cl, [edx+1]
04794 sub eax, ecx
04795 imul eax, UDist
04796 shl ecx, 8
04797 add eax, ecx
04798 mov [G1], eax
04799
04800 xor ecx, ecx
04801 mov cl, [edx+5]
04802 mov eax, ecx
04803 mov cl, [edx+2]
04804 sub eax, ecx
04805 imul eax, UDist
04806 shl ecx, 8
04807 add eax, ecx
04808 mov [B1], eax
04809
04810 add edx, GLightWidth
04811 add edx, GLightWidth
04812 add edx, GLightWidth
04813
04814
04815 xor ecx, ecx
04816 mov cl, [edx+3]
04817 mov eax, ecx
04818 mov cl, [edx+0]
04819 sub eax, ecx
04820 imul eax, UDist
04821 shl ecx, 8
04822 add eax, ecx
04823 mov [R2], eax
04824
04825 xor ecx, ecx
04826 mov cl, [edx+4]
04827 mov eax, ecx
04828 mov cl, [edx+1]
04829 sub eax, ecx
04830 imul eax, UDist
04831 shl ecx, 8
04832 add eax, ecx
04833 mov [G2], eax
04834
04835 xor ecx, ecx
04836 mov cl, [edx+5]
04837 mov eax, ecx
04838 mov cl, [edx+2]
04839 sub eax, ecx
04840 imul eax, UDist
04841 shl ecx, 8
04842 add eax, ecx
04843 mov [B2], eax
04844
04845
04846 mov eax, [R2]
04847 sub eax, [R1]
04848 imul eax, VDist
04849 sar eax, 8
04850 add eax, [R1]
04851 shr eax, 8
04852 and eax,0feh
04853
04854 mov [RR1], eax
04855
04856 mov eax, [G2]
04857 sub eax, [G1]
04858 imul eax, VDist
04859 sar eax, 8
04860 add eax, [G1]
04861 shr eax, 8
04862 and eax,0feh
04863
04864 mov [GG1], eax
04865
04866 mov eax, [B2]
04867 sub eax, [B1]
04868 imul eax, VDist
04869 sar eax, 8
04870 add eax, [B1]
04871 shr eax, 8
04872 and eax,0feh
04873
04874 mov [BB1], eax
04875
04876 fstp [FTemp0]
04877 fstp [FTemp1]
04878 fstp [FTemp2]
04879 fstp [FTemp3]
04880 fstp [FTemp4]
04881 fstp [FTemp5]
04882
04883 mov ebx,dword ptr[U1]
04884 mov edx,dword ptr[V1]
04885
04886 fild [RR1] ; LR
04887 fild [GG1] ; LG LR
04888 fild [BB1] ; LB LG LR
04889
04890 mov ecx,[VShift]
04891 add edx,dword ptr[VAdjust2]
04892
04893 add ebx,dword ptr[UAdjust2]
04894 mov eax,dword ptr[DeltaV]
04895
04896 shl eax,cl
04897 mov esi,pTex
04898
04899 shl edx,cl
04900 mov dword ptr[DeltaV],eax
04901
04902
04903 push ebp
04904
04905 LeftoverLoop16:
04906 mov eax,edx
04907 and ebx,[GWMaskShifted]
04908
04909 and eax,[GHMaskShifted16]
04910
04911 add eax,ebx
04912 add ebx,dword ptr[DeltaU]
04913
04914 shr eax,16
04915 add edi,2
04916
04917 mov ax,word ptr[2*eax+esi]
04918 add edx,dword ptr[DeltaV]
04919
04920 xor eax,0
04921
04922 mov esi,eax
04923 mov ebp,eax
04924
04925 and esi,REDMASK
04926 and ebp,GREENMASK
04927
04928 mov dword ptr[Red],esi
04929 mov dword ptr[Green],ebp
04930
04931 fild qword ptr[Red] ; r LB LG LR
04932
04933 mov ebp,eax
04934
04935 fmul st,st(3) ; R LB LG LR
04936 fild qword ptr[Green] ; g R LB LG LR
04937
04938 and ebp,BLUEMASK
04939
04940 mov dword ptr[Blue],ebp
04941
04942 fmul st,st(3) ; G R LB LG LR
04943 fild [Blue] ; b G R LB LG LR
04944 fmul st,st(3) ; B G R LB LG LR
04945 fxch st(2) ; R G B LB LG LR
04946 fadd qword ptr[Magic] ; Rk G B LB LG LR
04947 fxch st(1) ; G Rk B LB LG LR
04948 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
04949 fxch st(2) ; B Rk Gk LB LG LR
04950 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
04951 fxch st(1) ; Rk Bk Gk LB LG LR
04952 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
04953 fstp qword ptr[Bucket2] ; Gk LB LG LR
04954
04955 mov eax,dword ptr[Bucket]
04956 mov ebp,dword ptr[Bucket2]
04957
04958 fstp qword ptr[Bucket] ; LB LG LR
04959
04960 and eax,REDMASK
04961 and ebp,BLUEMASK
04962
04963 mov esi,dword ptr[Bucket]
04964 or ebp,eax
04965
04966 and esi,GREENMASK
04967
04968 or ebp,esi
04969
04970 mov esi,pTex
04971
04972 mov word ptr[edi-2],bp
04973
04974 dec [RemainingCount]
04975 jge LeftoverLoop16
04976
04977 pop ebp
04978
04979
04980 FPUReturn16:
04981 ffree st(0)
04982 ffree st(1)
04983 ffree st(2)
04984 ffree st(3)
04985 ffree st(4)
04986 ffree st(5)
04987 ffree st(6)
04988
04989 Return16:
04990 pop edi
04991 pop esi
04992 pop ecx
04993 pop ebx
04994 }
04995 }
04996
04997 void DrawSpan16_AsmLitZBufferX86FPU(int32 x1, int32 x2, int32 y)
04998 {
04999 TDest =Dest;
05000 _asm
05001 {
05002 push ebx
05003 push ecx
05004 push esi
05005 push edi
05006
05007 mov eax,x1
05008 mov ecx,x2
05009 sub ecx,eax
05010 jle Return16
05011
05012 mov edi,[GBitPtr16]
05013 mov pTex,edi
05014
05015 fild [y] ; y
05016
05017 mov esi,x1
05018 mov edi,[TDest]
05019
05020 shl esi,1
05021 mov eax,ecx
05022
05023 add edi,esi
05024 add pZBufferPtr,esi
05025
05026 shr ecx,4
05027 and eax,15
05028 _emit 75h
05029 _emit 06h
05030 dec ecx
05031 mov eax,16
05032
05033 mov [NumASpans],ecx
05034 mov [RemainingCount],eax
05035
05036 fild [x1] ; x y
05037
05038
05039
05040 fld [UDivZStepY] ; UZdY x y
05041 fld [UDivZStepX] ; UZdX UZdY x y
05042 fmul st,st(2) ; UZX UZdY x y
05043 fld [VDivZStepY] ; VZdY UZX UZdY x y
05044 fld [VDivZStepX] ; VZdX VZdY UZX UZdY x y
05045 fxch st(3) ; UZdy VZdY UZX VZdX x y
05046 fmul st,st(5) ; UZY VZdY UZX VZdX x y
05047 fxch st(2) ; UZX VZdY UZY VZdX x y
05048 fadd [UDivZOrigin] ; UZXS VZdY UZY VZdX x y
05049 fxch st(3) ; VZdX VZdY UZY UZXS x y
05050 fmul st,st(4) ; VZX VZdY UZY UZXS x y
05051 fxch st(2) ; UZY VZdY VZX UZXS x y
05052 faddp st(3),st ; VZdY VZX UZ x y
05053 fmul st,st(4) ; VZY VZX UZ x y
05054 fxch st(1) ; VZX VZY UZ x y
05055 fadd [VDivZOrigin] ; VZXS VZY UZ x y
05056 fld [ZiStepX] ; ZdX VZXS VZY UZ x y
05057
05058
05059 fld [ZiStepX]
05060 fmul dword ptr[ZBufferPrec]
05061 fmul dword ptr[Two]
05062 fistp dword ptr[ZDelta]
05063
05064 fmulp st(4),st ; VZXS VZY UZ ZX y
05065 faddp st(1),st ; VZ UZ ZX y
05066 fld [ZiStepY] ; ZdY VZ UZ ZX y
05067 fmulp st(4),st ; VZ UZ ZX ZY
05068 fxch st(2) ; ZX UZ VZ ZY
05069 fadd [ZiOrigin] ; ZXS UZ VZ ZY
05070
05071
05072
05073 faddp st(3),st ; UZ VZ Zi
05074 fld1 ; 1 UZ VZ Zi
05075 fdiv st,st(3) ; ZL UZ VZ Zi
05076
05077
05078
05079 fld st ; ZL ZL UZ VZ Zi
05080 fmul st,st(3) ; VL ZL UZ VZ Zi
05081 fxch st(4) ; Zi ZL UZ VZ VL
05082
05083
05084 fld st
05085 fmul dword ptr[ZBufferPrec]
05086 fistp dword ptr[ZVal]
05087
05088 fadd [Zi16StepX] ; ZRi ZL UZ VZ VL
05089 fxch st(1) ; ZL ZRi UZ VZ VL
05090 fmul st,st(2) ; UL ZRi UZ VZ VL
05091 fxch st(3) ; VZ ZRi UZ UL VL
05092 fadd [VDivZ16StepX] ; VZR ZRi UZ UL VL
05093 fxch st(2) ; UZ ZRi VZR UL VL
05094 fadd [UDivZ16StepX] ; UZR ZRi VZR UL VL
05095 fld1 ; 1 UZR ZRi VZR UL VL
05096 fdiv st,st(2) ; ZR UZR ZRi VZR UL VL
05097
05098
05099
05100 fld st ; ZR ZR UZR ZRi VZR UL VL
05101 fmul st,st(4) ; VR ZR UZR ZRi VZR UL VL
05102 fxch st(1) ; ZR VR UZR ZRi VZR UL VL
05103
05104 fmul st,st(2) ; UR VR UZR ZRi VZR UL VL
05105
05106 test ecx,ecx
05107 jz HandleLeftoverPixels16
05108
05109 SpanLoop16:
05110
05111 fstp dword ptr[FloatTemp] ; VR UZR ZRi VZR UL VL
05112 fld st(4) ; UL VR UZR ZRi VZR UL VL
05113 fmul [GLMapMulU] ; ULL VR UZR ZRi VZR UL VL
05114 fld st(5) ; UL ULL VR UZR ZRi VZR UL VL
05115 fadd qword ptr[MipMagic] ; ULk ULL VR UZR ZRi VZR UL VL
05116 fxch st(1) ; ULL ULk VR UZR ZRi VZR UL VL
05117 fadd qword ptr[MipMagic] ; ULLk ULk VR UZR ZRi VZR UL VL
05118 fxch st(1) ; ULk ULLk VR UZR ZRi VZR UL VL
05119 fstp qword ptr[Bucket] ; ULLk VR UZR ZRi VZR UL VL
05120 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
05121
05122 mov ebx,dword ptr[Bucket]
05123 mov eax,dword ptr[Bucket2]
05124
05125 fld st(5) ; VL VR UZR ZRi VZR UL VL
05126 fmul [GLMapMulV] ; VLL VR UZR ZRi VZR UL VL
05127
05128 add ebx,dword ptr[UAdjust]
05129 add eax,dword ptr[UAdjustL]
05130
05131 mov [U1],ebx
05132 mov [UFixed],eax
05133
05134 fld st(6) ; VL VLL VR UZR ZRi VZR UL VL
05135 fadd qword ptr[MipMagic] ; VLk VLL VR UZR ZRi VZR UL VL
05136 fxch st(1) ; VLL VLk VR UZR ZRi VZR UL VL
05137 fadd qword ptr[MipMagic] ; VLLk VLk VR UZR ZRi VZR UL VL
05138 fxch st(1) ; VLk VLLk VR UZR ZRi VZR UL VL
05139 fstp qword ptr[Bucket] ; VLLk VR UZR ZRi VZR UL VL
05140 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
05141 fsubr st(5),st ; VR UZR ZRi VZR UL dV
05142
05143 mov ebx,dword ptr[Bucket]
05144 mov eax,dword ptr[Bucket2]
05145
05146 fld dword ptr[FloatTemp] ; UR VR UZR ZRi VZR UL dV
05147
05148 add ebx,dword ptr[VAdjust]
05149 add eax,dword ptr[VAdjustL]
05150
05151 mov [V1],ebx
05152 mov [VFixed],eax
05153
05154 fsubr st(5),st ; UR VR UZR ZRi VZR dU dV
05155 fxch st(6) ; dV VR UZR ZRi VZR dU UR
05156 fadd qword ptr[MipMagic2] ; dVk VR UZR ZRi VZR dU UR
05157 fxch st(5) ; dU VR UZR ZRi VZR dVk UR
05158 fadd qword ptr[MipMagic2] ; dUk VR UZR ZRi VZR dVk UR
05159 fxch st(5) ; dVk VR UZR ZRi VZR dUk UR
05160 fstp qword ptr[DeltaV] ; VR UZR ZRi VZR dUk UR
05161 fxch st(5) ; UR UZR ZRi VZR dUk VR
05162
05163 fxch st(4) ; dUk UZR ZRi VZR UR VR
05164 fstp qword ptr[DeltaU] ; UZR ZRi VZR UR VR
05165
05166
05167 fadd [UDivZ16StepX] ; UZR ZLi VZL UL VL
05168 fxch st(1) ; ZLi UZR VZL UL VL
05169 fadd [Zi16StepX] ; ZRi UZR VZL UL VL
05170 fxch st(2) ; VZL UZR ZRi UL VL
05171 fadd [VDivZ16StepX] ; VZR UZR ZRi UL VL
05172 fxch st(2) ; ZRi UZR VZR UL VL
05173 fxch st(1) ; UZR ZRi VZR UL VL
05174 fld1 ; 1 UZR ZRi VZR UL VL
05175 fdiv st,st(2) ; ZR UZR ZRi VZR UL VL
05176
05177
05178 mov ebx,[UFixed]
05179 cmp ebx,MaxU
05180 jle TryClampU016
05181 mov ecx,MaxU
05182 mov dword ptr[UFixed],ecx
05183 jmp NoClampU016
05184
05185 TryClampU016:
05186 cmp ebx,0
05187 jge NoClampU016
05188 mov dword ptr[UFixed],0
05189 NoClampU016:
05190 mov eax,[VFixed]
05191 cmp eax,MaxV
05192 jle TryClampV016
05193 mov ecx,MaxV
05194 mov dword ptr[VFixed],ecx
05195 jmp NoClampV016
05196
05197 TryClampV016:
05198 cmp eax,0
05199 jge NoClampV016
05200 mov dword ptr[VFixed],0
05201
05202 NoClampV016:
05203
05204
05205 mov esi,dword ptr[UFixed]
05206 mov eax,dword ptr[VFixed]
05207
05208
05209 mov ecx, GMipLevel4_8
05210 sar esi, cl
05211 sar eax, cl
05212 and esi, 0ffh
05213 and eax, 0ffh
05214 mov UDist, esi
05215 mov VDist, eax
05216
05217 mov esi,dword ptr[UFixed]
05218 mov eax,dword ptr[VFixed]
05219 mov ecx, GMipLevel20
05220 shr esi, cl
05221 shr eax, cl
05222
05223 imul eax, GLightWidth
05224 add esi, eax
05225
05226 mov edx, esi
05227 shl esi, 1
05228 add edx, esi
05229
05230 add edx, GLightData
05231
05232
05233 xor ecx, ecx
05234 mov cl, [edx+3]
05235 mov eax, ecx
05236 mov cl, [edx+0]
05237 sub eax, ecx
05238 imul eax, UDist
05239 shl ecx, 8
05240 add eax, ecx
05241 mov [R1], eax
05242
05243 xor ecx, ecx
05244 mov cl, [edx+4]
05245 mov eax, ecx
05246 mov cl, [edx+1]
05247 sub eax, ecx
05248 imul eax, UDist
05249 shl ecx, 8
05250 add eax, ecx
05251 mov [G1], eax
05252
05253 xor ecx, ecx
05254 mov cl, [edx+5]
05255 mov eax, ecx
05256 mov cl, [edx+2]
05257 sub eax, ecx
05258 imul eax, UDist
05259 shl ecx, 8
05260 add eax, ecx
05261 mov [B1], eax
05262
05263 add edx, GLightWidth
05264 add edx, GLightWidth
05265 add edx, GLightWidth
05266
05267
05268 xor ecx, ecx
05269 mov cl, [edx+3]
05270 mov eax, ecx
05271 mov cl, [edx+0]
05272 sub eax, ecx
05273 imul eax, UDist
05274 shl ecx, 8
05275 add eax, ecx
05276 mov [R2], eax
05277
05278 xor ecx, ecx
05279 mov cl, [edx+4]
05280 mov eax, ecx
05281 mov cl, [edx+1]
05282 sub eax, ecx
05283 imul eax, UDist
05284 shl ecx, 8
05285 add eax, ecx
05286 mov [G2], eax
05287
05288 xor ecx, ecx
05289 mov cl, [edx+5]
05290 mov eax, ecx
05291 mov cl, [edx+2]
05292 sub eax, ecx
05293 imul eax, UDist
05294 shl ecx, 8
05295 add eax, ecx
05296 mov [B2], eax
05297
05298
05299 mov eax, [R2]
05300 sub eax, [R1]
05301 imul eax, VDist
05302 sar eax, 8
05303 add eax, [R1]
05304 shr eax, 8
05305 and eax,0feh
05306
05307 mov [RR1], eax
05308
05309 mov eax, [G2]
05310 sub eax, [G1]
05311 imul eax, VDist
05312 sar eax, 8
05313 add eax, [G1]
05314 shr eax, 8
05315 and eax,0feh
05316
05317 mov [GG1], eax
05318
05319 mov eax, [B2]
05320 sub eax, [B1]
05321 imul eax, VDist
05322 sar eax, 8
05323 add eax, [B1]
05324 shr eax, 8
05325 and eax,0feh
05326
05327 mov [BB1], eax
05328
05329 fstp [FTemp0] ; UZR ZRi VZR UL VL
05330 fstp [FTemp1] ; ZRi VZR UL VL
05331 fstp [FTemp2] ; VZR UL VL
05332 fstp [FTemp3] ; UL VL
05333 fstp [FTemp4] ; VL
05334 fstp [FTemp5] ;
05335
05336 fild [RR1] ; LR
05337 fild [GG1] ; LG LR
05338 fild [BB1] ; LB LG LR
05339
05340
05341 mov ebx,dword ptr[U1]
05342 mov eax,dword ptr[V1]
05343
05344 add ebx,dword ptr[UAdjust2]
05345 add eax,dword ptr[VAdjust2]
05346
05347 mov ecx,[VShift]
05348 mov dword ptr[Bucket],ebx
05349
05350 shl eax,cl
05351
05352 push ebp
05353
05354 mov dword ptr[Bucket2],eax
05355 mov ebp,dword ptr[DeltaV]
05356
05357 and eax,[GHMaskShifted16]
05358 and ebx,[GWMaskShifted]
05359
05360 shl ebp,cl
05361 add eax,ebx
05362
05363 mov edx,dword ptr[Bucket2]
05364 mov esi,pTex
05365
05366 shr eax,16
05367 mov dword ptr[DeltaV],ebp
05368
05369 mov ebx,dword ptr[Bucket]
05370
05371 mov ax,word ptr[2*eax+esi]
05372 add edx,dword ptr[DeltaV]
05373
05374 xor eax,0
05375 mov ecx,edx
05376
05377 add ebx,dword ptr[DeltaU]
05378 and ecx,[GHMaskShifted16]
05379
05380 rol eax,16
05381 and ebx,[GWMaskShifted]
05382
05383 xor eax,0
05384 add ecx,ebx
05385
05386 add edx,dword ptr[DeltaV]
05387 mov esi,pTex
05388
05389 shr ecx,16
05390 add ebx,dword ptr[DeltaU]
05391
05392 mov ax,word ptr[2*ecx+esi]
05393 mov ecx,edx
05394
05395 xor eax,0
05396 and ecx,[GHMaskShifted16]
05397
05398 mov esi,eax
05399 mov ebp,eax
05400
05401 and esi,REDMASK
05402 and ebp,GREENMASK
05403
05404 mov dword ptr[Red],esi
05405 mov dword ptr[Green],ebp
05406
05407 fild qword ptr[Red] ; r LB LG LR
05408
05409 mov ebp,eax
05410
05411 fmul st,st(3) ; R LB LG LR
05412 fild qword ptr[Green] ; g R LB LG LR
05413
05414 and ebp,BLUEMASK
05415 and ebx,[GWMaskShifted]
05416
05417 mov dword ptr[Blue],ebp
05418
05419 fmul st,st(3) ; G R LB LG LR
05420 fild [Blue] ; b G R LB LG LR
05421 fmul st,st(3) ; B G R LB LG LR
05422 fxch st(2) ; R G B LB LG LR
05423 fadd qword ptr[Magic] ; Rk G B LB LG LR
05424 fxch st(1) ; G Rk B LB LG LR
05425 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
05426 fxch st(2) ; B Rk Gk LB LG LR
05427 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
05428 fxch st(1) ; Rk Bk Gk LB LG LR
05429 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
05430 fstp qword ptr[Bucket2] ; Gk LB LG LR
05431
05432 mov eax,dword ptr[Bucket]
05433 mov ebp,dword ptr[Bucket2]
05434
05435 fstp qword ptr[Bucket] ; LB LG LR
05436
05437 and eax,REDMASK
05438 and ebp,BLUEMASK
05439
05440 mov esi,dword ptr[Bucket]
05441 or ebp,eax
05442
05443 add ecx,ebx
05444 and esi,GREENMASK
05445
05446 shr ecx,16
05447 or ebp,esi
05448
05449 xor eax,0
05450 mov esi,pTex
05451
05452 rol ebp,16
05453 add edx,dword ptr[DeltaV]
05454
05455 mov ax,word ptr[2*ecx+esi]
05456 mov ecx,[ZVal]
05457
05458 mov esi,[ZVal]
05459 add ecx,[ZDelta]
05460
05461 shr esi,16
05462 mov [ZVal],ecx
05463
05464 mov ecx,pZBufferPtr
05465
05466 cmp si,word ptr[ecx+0]
05467 jle Skip0
05468
05469 mov dword ptr[edi],ebp
05470 mov word ptr[ecx+0],si
05471 mov word ptr[ecx+2],si
05472
05473 Skip0:
05474 mov ecx,edx
05475
05476 add ebx,dword ptr[DeltaU]
05477 and ecx,[GHMaskShifted16]
05478
05479 and ebx,[GWMaskShifted]
05480 xor eax,0
05481
05482 add ecx,ebx
05483 mov esi,pTex
05484
05485 rol eax,16
05486 add edx,dword ptr[DeltaV]
05487
05488 shr ecx,16
05489 add ebx,dword ptr[DeltaU]
05490
05491 mov ax,word ptr[2*ecx+esi]
05492
05493 mov ecx,edx
05494 xor eax,0
05495
05496 and ecx,[GHMaskShifted16]
05497 mov esi,eax
05498
05499 mov ebp,eax
05500 and esi,REDMASK
05501
05502 and ebp,GREENMASK
05503 mov dword ptr[Red],esi
05504
05505 mov dword ptr[Green],ebp
05507
05508
05509 fild qword ptr[Red] ; r LB LG LR
05510
05511 mov ebp,eax
05512
05513 fmul st,st(3) ; R LB LG LR
05514 fild qword ptr[Green] ; g R LB LG LR
05515
05516 and ebp,BLUEMASK
05517 and ebx,[GWMaskShifted]
05518
05519 mov dword ptr[Blue],ebp
05520
05521 fmul st,st(3) ; G R LB LG LR
05522 fild [Blue] ; b G R LB LG LR
05523 fmul st,st(3) ; B G R LB LG LR
05524 fxch st(2) ; R G B LB LG LR
05525 fadd qword ptr[Magic] ; Rk G B LB LG LR
05526 fxch st(1) ; G Rk B LB LG LR
05527 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
05528 fxch st(2) ; B Rk Gk LB LG LR
05529 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
05530 fxch st(1) ; Rk Bk Gk LB LG LR
05531 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
05532 fstp qword ptr[Bucket2] ; Gk LB LG LR
05533
05534 mov eax,dword ptr[Bucket]
05535 mov ebp,dword ptr[Bucket2]
05536
05537 fstp qword ptr[Bucket] ; LB LG LR
05538
05539 and eax,REDMASK
05540 and ebp,BLUEMASK
05541
05542 mov esi,dword ptr[Bucket]
05543 or ebp,eax
05544
05545 add ecx,ebx
05546 and esi,GREENMASK
05547
05548 shr ecx,16
05549 or ebp,esi
05550
05551 xor eax,0
05552 mov esi,pTex
05553
05554 rol ebp,16
05555 add edx,dword ptr[DeltaV]
05556
05557 mov ax,word ptr[2*ecx+esi]
05558 mov ecx,[ZVal]
05559
05560 mov esi,[ZVal]
05561 add ecx,[ZDelta]
05562
05563 shr esi,16
05564 mov [ZVal],ecx
05565
05566 mov ecx,pZBufferPtr
05567
05568 cmp si,word ptr[ecx+4]
05569 jle Skip1
05570
05571 mov [edi+4],ebp
05572 mov word ptr[ecx+4],si
05573 mov word ptr[ecx+6],si
05574
05575 Skip1:
05576 mov ecx,edx
05577
05578 add ebx,dword ptr[DeltaU]
05579 and ecx,[GHMaskShifted16]
05580
05581 and ebx,[GWMaskShifted]
05582 xor eax,0
05583
05584 add ecx,ebx
05585 mov esi,pTex
05586
05587 rol eax,16
05588 add edx,dword ptr[DeltaV]
05589
05590 shr ecx,16
05591 add ebx,dword ptr[DeltaU]
05592
05593 mov ax,word ptr[2*ecx+esi]
05594
05595 mov ecx,edx
05596 xor eax,0
05597
05598 and ecx,[GHMaskShifted16]
05599 mov esi,eax
05600
05601 mov ebp,eax
05602 and esi,REDMASK
05603
05604 and ebp,GREENMASK
05605 mov dword ptr[Red],esi
05606
05607 mov dword ptr[Green],ebp
05609
05610
05611 fild qword ptr[Red] ; r LB LG LR
05612
05613 mov ebp,eax
05614
05615 fmul st,st(3) ; R LB LG LR
05616 fild qword ptr[Green] ; g R LB LG LR
05617
05618 and ebp,BLUEMASK
05619 and ebx,[GWMaskShifted]
05620
05621 mov dword ptr[Blue],ebp
05622
05623 fmul st,st(3) ; G R LB LG LR
05624 fild [Blue] ; b G R LB LG LR
05625 fmul st,st(3) ; B G R LB LG LR
05626 fxch st(2) ; R G B LB LG LR
05627 fadd qword ptr[Magic] ; Rk G B LB LG LR
05628 fxch st(1) ; G Rk B LB LG LR
05629 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
05630 fxch st(2) ; B Rk Gk LB LG LR
05631 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
05632 fxch st(1) ; Rk Bk Gk LB LG LR
05633 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
05634 fstp qword ptr[Bucket2] ; Gk LB LG LR
05635
05636 mov eax,dword ptr[Bucket]
05637 mov ebp,dword ptr[Bucket2]
05638
05639 fstp qword ptr[Bucket] ; LB LG LR
05640
05641 and eax,REDMASK
05642 and ebp,BLUEMASK
05643
05644 mov esi,dword ptr[Bucket]
05645 or ebp,eax
05646
05647 add ecx,ebx
05648 and esi,GREENMASK
05649
05650 shr ecx,16
05651 or ebp,esi
05652
05653 xor eax,0
05654 mov esi,pTex
05655
05656 rol ebp,16
05657 add edx,dword ptr[DeltaV]
05658
05659 mov ax,word ptr[2*ecx+esi]
05660 mov ecx,[ZVal]
05661
05662 mov esi,[ZVal]
05663 add ecx,[ZDelta]
05664
05665 shr esi,16
05666 mov [ZVal],ecx
05667
05668 mov ecx,pZBufferPtr
05669
05670 cmp si,word ptr[ecx+8]
05671 jle Skip2
05672
05673 mov [edi+8],ebp
05674 mov word ptr[ecx+8],si
05675 mov word ptr[ecx+10],si
05676
05677 Skip2:
05678 mov ecx,edx
05679
05680 add ebx,dword ptr[DeltaU]
05681 and ecx,[GHMaskShifted16]
05682
05683 and ebx,[GWMaskShifted]
05684 xor eax,0
05685
05686 add ecx,ebx
05687 mov esi,pTex
05688
05689 shr ecx,16
05690 add edx,dword ptr[DeltaV]
05691
05692 rol eax,16
05693 mov ax,word ptr[2*ecx+esi]
05694
05695 mov ecx,edx
05696 xor eax,0
05697
05698 and ecx,[GHMaskShifted16]
05699 mov esi,eax
05700
05701 mov ebp,eax
05702 and esi,REDMASK
05703
05704 and ebp,GREENMASK
05705 mov dword ptr[Red],esi
05706
05707 mov dword ptr[Green],ebp
05708 add ebx,dword ptr[DeltaU]
05710
05711
05712 fild qword ptr[Red] ; r LB LG LR
05713
05714 mov ebp,eax
05715
05716 fmul st,st(3) ; R LB LG LR
05717 fild qword ptr[Green] ; g R LB LG LR
05718
05719 and ebp,BLUEMASK
05720 and ebx,[GWMaskShifted]
05721
05722 mov dword ptr[Blue],ebp
05723
05724 fmul st,st(3) ; G R LB LG LR
05725 fild [Blue] ; b G R LB LG LR
05726 fmul st,st(3) ; B G R LB LG LR
05727 fxch st(2) ; R G B LB LG LR
05728 fadd qword ptr[Magic] ; Rk G B LB LG LR
05729 fxch st(1) ; G Rk B LB LG LR
05730 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
05731 fxch st(2) ; B Rk Gk LB LG LR
05732 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
05733 fxch st(1) ; Rk Bk Gk LB LG LR
05734 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
05735 fstp qword ptr[Bucket2] ; Gk LB LG LR
05736
05737 mov eax,dword ptr[Bucket]
05738 mov ebp,dword ptr[Bucket2]
05739
05740 fstp qword ptr[Bucket] ; LB LG LR
05741
05742 and eax,REDMASK
05743 and ebp,BLUEMASK
05744
05745 mov esi,dword ptr[Bucket]
05746 or ebp,eax
05747
05748 add ecx,ebx
05749 and esi,GREENMASK
05750
05751 shr ecx,16
05752 or ebp,esi
05753
05754 xor eax,0
05755 mov esi,pTex
05756
05757 rol ebp,16
05758 add edx,dword ptr[DeltaV]
05759
05760 mov ax,word ptr[2*ecx+esi]
05761 mov ecx,[ZVal]
05762
05763 mov esi,[ZVal]
05764 add ecx,[ZDelta]
05765
05766 shr esi,16
05767 mov [ZVal],ecx
05768
05769 mov ecx,pZBufferPtr
05770
05771 cmp si,word ptr[ecx+12]
05772 jle Skip3
05773
05774 mov [edi+12],ebp
05775 mov word ptr[ecx+12],si
05776 mov word ptr[ecx+14],si
05777
05778 Skip3:
05779 mov ecx,edx
05780
05781 add ebx,dword ptr[DeltaU]
05782 and ecx,[GHMaskShifted16]
05783
05784 and ebx,[GWMaskShifted]
05785 xor eax,0
05786
05787 add ecx,ebx
05788 mov esi,pTex
05789
05790 shr ecx,16
05791 add edx,dword ptr[DeltaV]
05792
05793 rol eax,16
05794 mov ax,word ptr[2*ecx+esi]
05795
05796 mov ecx,edx
05797 xor eax,0
05798
05799 and ecx,[GHMaskShifted16]
05800 mov esi,eax
05801
05802 mov ebp,eax
05803 and esi,REDMASK
05804
05805 and ebp,GREENMASK
05806 mov dword ptr[Red],esi
05807
05808 mov dword ptr[Green],ebp
05809 add ebx,dword ptr[DeltaU]
05811
05812
05813 fild qword ptr[Red] ; r LB LG LR
05814
05815 mov ebp,eax
05816
05817 fmul st,st(3) ; R LB LG LR
05818 fild qword ptr[Green] ; g R LB LG LR
05819
05820 and ebp,BLUEMASK
05821 and ebx,[GWMaskShifted]
05822
05823 mov dword ptr[Blue],ebp
05824
05825 fmul st,st(3) ; G R LB LG LR
05826 fild [Blue] ; b G R LB LG LR
05827 fmul st,st(3) ; B G R LB LG LR
05828 fxch st(2) ; R G B LB LG LR
05829 fadd qword ptr[Magic] ; Rk G B LB LG LR
05830 fxch st(1) ; G Rk B LB LG LR
05831 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
05832 fxch st(2) ; B Rk Gk LB LG LR
05833 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
05834 fxch st(1) ; Rk Bk Gk LB LG LR
05835 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
05836 fstp qword ptr[Bucket2] ; Gk LB LG LR
05837
05838 mov eax,dword ptr[Bucket]
05839 mov ebp,dword ptr[Bucket2]
05840
05841 fstp qword ptr[Bucket] ; LB LG LR
05842
05843 and eax,REDMASK
05844 and ebp,BLUEMASK
05845
05846 mov esi,dword ptr[Bucket]
05847 or ebp,eax
05848
05849 add ecx,ebx
05850 and esi,GREENMASK
05851
05852 shr ecx,16
05853 or ebp,esi
05854
05855 xor eax,0
05856 mov esi,pTex
05857
05858 rol ebp,16
05859 add edx,dword ptr[DeltaV]
05860
05861 mov ax,word ptr[2*ecx+esi]
05862 mov ecx,[ZVal]
05863
05864 mov esi,[ZVal]
05865 add ecx,[ZDelta]
05866
05867 shr esi,16
05868 mov [ZVal],ecx
05869
05870 mov ecx,pZBufferPtr
05871
05872 cmp si,word ptr[ecx+16]
05873 jle Skip4
05874
05875 mov [edi+16],ebp
05876 mov word ptr[ecx+16],si
05877 mov word ptr[ecx+18],si
05878
05879 Skip4:
05880 mov ecx,edx
05881
05882 add ebx,dword ptr[DeltaU]
05883 and ecx,[GHMaskShifted16]
05884
05885 and ebx,[GWMaskShifted]
05886 xor eax,0
05887
05888 add ecx,ebx
05889 mov esi,pTex
05890
05891 shr ecx,16
05892 add edx,dword ptr[DeltaV]
05893
05894 rol eax,16
05895 mov ax,word ptr[2*ecx+esi]
05896
05897 mov ecx,edx
05898 xor eax,0
05899
05900 and ecx,[GHMaskShifted16]
05901 mov esi,eax
05902
05903 mov ebp,eax
05904 and esi,REDMASK
05905
05906 and ebp,GREENMASK
05907 mov dword ptr[Red],esi
05908
05909 mov dword ptr[Green],ebp
05910 add ebx,dword ptr[DeltaU]
05912
05913
05914 fild qword ptr[Red] ; r LB LG LR
05915
05916 mov ebp,eax
05917
05918 fmul st,st(3) ; R LB LG LR
05919 fild qword ptr[Green] ; g R LB LG LR
05920
05921 and ebp,BLUEMASK
05922 and ebx,[GWMaskShifted]
05923
05924 mov dword ptr[Blue],ebp
05925
05926 fmul st,st(3) ; G R LB LG LR
05927 fild [Blue] ; b G R LB LG LR
05928 fmul st,st(3) ; B G R LB LG LR
05929 fxch st(2) ; R G B LB LG LR
05930 fadd qword ptr[Magic] ; Rk G B LB LG LR
05931 fxch st(1) ; G Rk B LB LG LR
05932 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
05933 fxch st(2) ; B Rk Gk LB LG LR
05934 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
05935 fxch st(1) ; Rk Bk Gk LB LG LR
05936 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
05937 fstp qword ptr[Bucket2] ; Gk LB LG LR
05938
05939 mov eax,dword ptr[Bucket]
05940 mov ebp,dword ptr[Bucket2]
05941
05942 fstp qword ptr[Bucket] ; LB LG LR
05943
05944 and eax,REDMASK
05945 and ebp,BLUEMASK
05946
05947 mov esi,dword ptr[Bucket]
05948 or ebp,eax
05949
05950 add ecx,ebx
05951 and esi,GREENMASK
05952
05953 shr ecx,16
05954 or ebp,esi
05955
05956 xor eax,0
05957 mov esi,pTex
05958
05959 rol ebp,16
05960 add edx,dword ptr[DeltaV]
05961
05962 mov ax,word ptr[2*ecx+esi]
05963 mov ecx,[ZVal]
05964
05965 mov esi,[ZVal]
05966 add ecx,[ZDelta]
05967
05968 shr esi,16
05969 mov [ZVal],ecx
05970
05971 mov ecx,pZBufferPtr
05972
05973 cmp si,word ptr[ecx+20]
05974 jle Skip5
05975
05976 mov [edi+20],ebp
05977 mov word ptr[ecx+20],si
05978 mov word ptr[ecx+22],si
05979
05980 Skip5:
05981 mov ecx,edx
05982
05983 add ebx,dword ptr[DeltaU]
05984 and ecx,[GHMaskShifted16]
05985
05986 and ebx,[GWMaskShifted]
05987 xor eax,0
05988
05989 add ecx,ebx
05990 mov esi,pTex
05991
05992 shr ecx,16
05993 add edx,dword ptr[DeltaV]
05994
05995 rol eax,16
05996 mov ax,word ptr[2*ecx+esi]
05997
05998 mov ecx,edx
05999 xor eax,0
06000
06001 and ecx,[GHMaskShifted16]
06002 mov esi,eax
06003
06004 mov ebp,eax
06005 and esi,REDMASK
06006
06007 and ebp,GREENMASK
06008 mov dword ptr[Red],esi
06009
06010 mov dword ptr[Green],ebp
06011 add ebx,dword ptr[DeltaU]
06013
06014
06015 fild qword ptr[Red] ; r LB LG LR
06016
06017 mov ebp,eax
06018
06019 fmul st,st(3) ; R LB LG LR
06020 fild qword ptr[Green] ; g R LB LG LR
06021
06022 and ebp,BLUEMASK
06023 and ebx,[GWMaskShifted]
06024
06025 mov dword ptr[Blue],ebp
06026
06027 fmul st,st(3) ; G R LB LG LR
06028 fild [Blue] ; b G R LB LG LR
06029 fmul st,st(3) ; B G R LB LG LR
06030 fxch st(2) ; R G B LB LG LR
06031 fadd qword ptr[Magic] ; Rk G B LB LG LR
06032 fxch st(1) ; G Rk B LB LG LR
06033 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
06034 fxch st(2) ; B Rk Gk LB LG LR
06035 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
06036 fxch st(1) ; Rk Bk Gk LB LG LR
06037 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
06038 fstp qword ptr[Bucket2] ; Gk LB LG LR
06039
06040 mov eax,dword ptr[Bucket]
06041 mov ebp,dword ptr[Bucket2]
06042
06043 fstp qword ptr[Bucket] ; LB LG LR
06044
06045 and eax,REDMASK
06046 and ebp,BLUEMASK
06047
06048 mov esi,dword ptr[Bucket]
06049 or ebp,eax
06050
06051 add ecx,ebx
06052 and esi,GREENMASK
06053
06054 shr ecx,16
06055 or ebp,esi
06056
06057 xor eax,0
06058 mov esi,pTex
06059
06060 rol ebp,16
06061 add edx,dword ptr[DeltaV]
06062
06063 mov ax,word ptr[2*ecx+esi]
06064 mov ecx,[ZVal]
06065
06066 mov esi,[ZVal]
06067 add ecx,[ZDelta]
06068
06069 shr esi,16
06070 mov [ZVal],ecx
06071
06072 mov ecx,pZBufferPtr
06073
06074 cmp si,word ptr[ecx+24]
06075 jle Skip6
06076
06077 mov [edi+24],ebp
06078 mov word ptr[ecx+24],si
06079 mov word ptr[ecx+26],si
06080
06081 Skip6:
06082 mov ecx,edx
06083
06084 add ebx,dword ptr[DeltaU]
06085 and ecx,[GHMaskShifted16]
06086
06087 and ebx,[GWMaskShifted]
06088 xor eax,0
06089
06090 add ecx,ebx
06091 mov esi,pTex
06092
06093 shr ecx,16
06094 add edx,dword ptr[DeltaV]
06095
06096 rol eax,16
06097 mov ax,word ptr[2*ecx+esi]
06098
06099 mov ecx,edx
06100 xor eax,0
06101
06102 and ecx,[GHMaskShifted16]
06103 mov esi,eax
06104
06105 mov ebp,eax
06106 and esi,REDMASK
06107
06108 and ebp,GREENMASK
06109 mov dword ptr[Red],esi
06110
06111 mov dword ptr[Green],ebp
06112 add ebx,dword ptr[DeltaU]
06114 fild qword ptr[Red] ; r LB LG LR
06115
06116 mov ebp,eax
06117
06118 fmul st,st(3) ; R LB LG LR
06119 fild qword ptr[Green] ; g R LB LG LR
06120
06121 and ebp,BLUEMASK
06122 and ebx,[GWMaskShifted]
06123
06124 mov dword ptr[Blue],ebp
06125
06126 fmul st,st(3) ; G R LB LG LR
06127 fild [Blue] ; b G R LB LG LR
06128 fmul st,st(3) ; B G R LB LG LR
06129 fxch st(2) ; R G B LB LG LR
06130 fadd qword ptr[Magic] ; Rk G B LB LG LR
06131 fxch st(1) ; G Rk B LB LG LR
06132 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
06133 fxch st(2) ; B Rk Gk LB LG LR
06134 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
06135 fxch st(1) ; Rk Bk Gk LB LG LR
06136 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
06137 fstp qword ptr[Bucket2] ; Gk LB LG LR
06138
06139 mov eax,dword ptr[Bucket]
06140 mov ebp,dword ptr[Bucket2]
06141
06142 fstp qword ptr[Bucket] ; LB LG LR
06143
06144 and eax,REDMASK
06145 and ebp,BLUEMASK
06146
06147 mov esi,dword ptr[Bucket]
06148 or ebp,eax
06149
06150 fstp dword ptr[Bucket]
06151 fstp dword ptr[Bucket2]
06152 fstp dword ptr[Bucket]
06153
06154 and esi,GREENMASK
06155
06156 fld dword ptr[FTemp5]
06157 fld dword ptr[FTemp4]
06158
06159 or ebp,esi
06160
06161 fld dword ptr[FTemp3]
06162 fld dword ptr[FTemp2]
06163
06164 rol ebp,16
06165
06166 fld dword ptr[FTemp1]
06167 fld dword ptr[FTemp0]
06168 mov ecx,[ZVal]
06169
06170 mov esi,[ZVal]
06171 add ecx,[ZDelta]
06172
06173 shr esi,16
06174 mov [ZVal],ecx
06175
06176 mov ecx,pZBufferPtr
06177
06178 cmp si,word ptr[ecx+28]
06179 jle Skip7
06180
06181 mov dword ptr[edi+28],ebp
06182 mov word ptr[ecx+28],si
06183 mov word ptr[ecx+30],si
06184
06185 Skip7:
06186 pop ebp
06187
06188
06189 ; get corrected right side deltas ; st0 st1 st2 st3 st4 st5 st6 st7
06190 ; ZR UZR ZRi VZR UL VL
06191 fld st ; ZR ZR UZR ZRi VZR UL VL
06192 fmul st,st(4) ; VR ZR UZR ZRi VZR UL VL
06193 fxch st(1) ; ZR VR UZR ZRi VZR UL VL
06194 fmul st,st(2) ; UR VR UZR ZRi VZR UL VL
06195
06196 add edi,32 ; move screen pointer to start of next aspan
06197 add [pZBufferPtr],32
06198 dec [NumASpans] ; dec num affine spans
06199 jnz SpanLoop16
06200
06201 HandleLeftoverPixels16:
06202
06203 mov esi,[pTex]
06204
06205
06206 cmp [RemainingCount],0
06207 jz FPUReturn16
06208
06209
06210 fstp dword ptr[FloatTemp] ; VR UZR ZRi VZR UL VL
06211 fld st(4) ; UL VR UZR ZRi VZR UL VL
06212 fmul [GLMapMulU] ; ULL VR UZR ZRi VZR UL VL
06213 fld st(5) ; UL ULL VR UZR ZRi VZR UL VL
06214 fadd qword ptr[MipMagic] ; ULk ULL VR UZR ZRi VZR UL VL
06215 fxch st(1) ; ULL ULk VR UZR ZRi VZR UL VL
06216 fadd qword ptr[MipMagic] ; ULLk ULk VR UZR ZRi VZR UL VL
06217 fxch st(1) ; ULk ULLk VR UZR ZRi VZR UL VL
06218 fstp qword ptr[Bucket] ; ULLk VR UZR ZRi VZR UL VL
06219 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
06220
06221 mov ebx,dword ptr[Bucket]
06222 mov eax,dword ptr[Bucket2]
06223
06224 fld st(5) ; VL VR UZR ZRi VZR UL VL
06225 fmul [GLMapMulV] ; VLL VR UZR ZRi VZR UL VL
06226
06227 add ebx,dword ptr[UAdjust]
06228 add eax,dword ptr[UAdjustL]
06229
06230 mov [U1],ebx
06231 mov [UFixed],eax
06232
06233 fld st(6) ; VL VLL VR UZR ZRi VZR UL VL
06234 fadd qword ptr[MipMagic] ; VLk VLL VR UZR ZRi VZR UL VL
06235 fxch st(1) ; VLL VLk VR UZR ZRi VZR UL VL
06236 fadd qword ptr[MipMagic] ; VLLk VLk VR UZR ZRi VZR UL VL
06237 fxch st(1) ; VLk VLLk VR UZR ZRi VZR UL VL
06238 fstp qword ptr[Bucket] ; VLLk VR UZR ZRi VZR UL VL
06239 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
06240
06241 mov ebx,dword ptr[Bucket]
06242 mov eax,dword ptr[Bucket2]
06243
06244 fld dword ptr[FloatTemp] ; UR VR UZR ZRi VZR UL dV
06245
06246 add ebx,dword ptr[VAdjust]
06247 add eax,dword ptr[VAdjustL]
06248
06249 mov [V1],ebx
06250 mov [VFixed],eax
06251
06252 dec [RemainingCount]
06253 jz OnePixelSpan16
06254
06255
06256
06257 fstp [FloatTemp] ; inv. inv. inv. inv. UL VL
06258 fstp [FloatTemp] ; inv. inv. inv. UL VL
06259 fstp [FloatTemp] ; inv. inv. UL VL
06260 fstp [FloatTemp] ; inv. UL VL
06261 fstp [FloatTemp] ; UL VL
06262 fild [y] ; y UL VL
06263 fild [x2] ; xr y UL VL
06264
06265 fld [UDivZStepY] ; UZdY xr y UL VL
06266 fld [UDivZStepX] ; UZdX UZdY xr y UL VL
06267 fmul st,st(2) ; UZX UZdY xr y UL VL
06268 fld [VDivZStepY] ; VZdY UZX UZdY xr y UL VL
06269 fld [VDivZStepX] ; VZdX VZdY UZX UZdY xr y UL VL
06270 fxch st(3) ; UZdy VZdY UZX VZdX xr y UL VL
06271 fmul st,st(5) ; UZY VZdY UZX VZdX xr y UL VL
06272 fxch st(2) ; UZX VZdY UZY VZdX xr y UL VL
06273 fadd [UDivZOrigin] ; UZXS VZdY UZY VZdX xr y UL VL
06274 fxch st(3) ; VZdX VZdY UZY UZXS xr y UL VL
06275 fmul st,st(4) ; VZX VZdY UZY UZXS xr y UL VL
06276 fxch st(2) ; UZY VZdY VZX UZXS xr y UL VL
06277 faddp st(3),st ; VZdY VZX UZ xr y UL VL
06278 fmul st,st(4) ; VZY VZX UZ xr y UL VL
06279 fxch st(1) ; VZX VZY UZ xr y UL VL
06280 fadd [VDivZOrigin] ; VZXS VZY UZ xr y UL VL
06281 fld [ZiStepX] ; ZdX VZXS VZY UZ xr y UL VL
06282 fmulp st(4),st ; VZXS VZY UZ ZX y UL VL
06283 faddp st(1),st ; VZ UZ ZX y UL VL
06284 fld [ZiStepY] ; ZdY VZ UZ ZX y UL VL
06285 fmulp st(4),st ; VZ UZ ZX ZY UL VL
06286 fxch st(2) ; ZX UZ VZ ZY UL VL
06287 fadd [ZiOrigin] ; ZXS UZ VZ ZY UL VL
06288
06289 faddp st(3),st ; UZ VZ Zi UL VL
06290 fld1 ; 1 UZ VZ Zi UL VL
06291 fdiv st,st(3) ; ZR UZ VZ Zi UL VL
06292
06293 fld st ; ZR ZR UZ VZ Zi UL VL
06294 fmul st,st(3) ; VR ZR UZ VZ Zi UL VL
06295 fxch st(1) ; ZR VR UZ VZ Zi UL VL
06296 fmul st,st(2) ; UR VR UZ VZ Zi UL VL
06297
06298
06299
06300 ; calculate deltas ; st0 st1 st2 st3 st4 st5 st6 st7
06301 fsubr st(5),st ; UR VR inv. inv. inv. dU VL
06302 fxch st(1) ; VR UR inv. inv. inv. dU VL
06303 fsubr st(6),st ; VR UR inv. inv. inv. dU dV
06304 fxch st(6) ; dV UR inv. inv. inv. dU VR
06305 fidiv dword ptr[RemainingCount];dv UR inv. inv. inv. dU VR
06306 fadd qword ptr[MipMagic] ; dvk UR inv. inv. inv. dU VR
06307 fxch st(5) ; dU UR inv. inv. inv. dvk VR
06308 fidiv dword ptr[RemainingCount];du UR inv. inv. inv. dvk VR
06309 fadd qword ptr[MipMagic] ; duk UR inv. inv. inv. dvk VR
06310 fxch st(5) ; dvk UR inv. inv. inv. duk VR
06311 fstp qword ptr[DeltaV] ; UR inv. inv. inv. duk VR
06312 fxch st(4) ; duk inv. inv. inv. UR VR
06313 fstp qword ptr[DeltaU] ; inv. inv. inv. UR VR
06314 fld st(1) ; inv. inv. inv. inv. UR VR
06315 fld st(2) ; inv. inv. inv. inv. inv. UR VR
06316
06317 OnePixelSpan16:
06318
06319 mov ebx,[UFixed]
06320 cmp ebx,MaxU
06321 jle TryClampU116
06322 mov ecx,MaxU
06323 mov dword ptr[UFixed],ecx
06324 jmp NoClampU116
06325
06326 TryClampU116:
06327 cmp ebx,0
06328 jge NoClampU116
06329 mov dword ptr[UFixed],0
06330 NoClampU116:
06331 mov eax,[VFixed]
06332 cmp eax,MaxV
06333 jle TryClampV116
06334 mov ecx,MaxV
06335 mov dword ptr[VFixed],ecx
06336 jmp NoClampV116
06337
06338 TryClampV116:
06339 cmp eax,0
06340 jge NoClampV116
06341 mov dword ptr[VFixed],0
06342
06343 NoClampV116:
06344 mov esi,dword ptr[UFixed]
06345 mov eax,dword ptr[VFixed]
06346
06347 mov ecx, GMipLevel4_8
06348 sar esi, cl
06349 sar eax, cl
06350 and esi, 0ffh
06351 and eax, 0ffh
06352 mov UDist, esi
06353 mov VDist, eax
06354
06355 mov esi,dword ptr[UFixed]
06356 mov eax,dword ptr[VFixed]
06357 mov ecx, GMipLevel20
06358 shr esi, cl
06359 shr eax, cl
06360
06361 imul eax, GLightWidth
06362 add esi, eax
06363
06364 mov edx, esi
06365 shl esi, 1
06366 add edx, esi
06367
06368 add edx, GLightData
06369
06370
06371 xor ecx, ecx
06372 mov cl, [edx+3]
06373 mov eax, ecx
06374 mov cl, [edx+0]
06375 sub eax, ecx
06376 imul eax, UDist
06377 shl ecx, 8
06378 add eax, ecx
06379 mov [R1], eax
06380
06381 xor ecx, ecx
06382 mov cl, [edx+4]
06383 mov eax, ecx
06384 mov cl, [edx+1]
06385 sub eax, ecx
06386 imul eax, UDist
06387 shl ecx, 8
06388 add eax, ecx
06389 mov [G1], eax
06390
06391 xor ecx, ecx
06392 mov cl, [edx+5]
06393 mov eax, ecx
06394 mov cl, [edx+2]
06395 sub eax, ecx
06396 imul eax, UDist
06397 shl ecx, 8
06398 add eax, ecx
06399 mov [B1], eax
06400
06401 add edx, GLightWidth
06402 add edx, GLightWidth
06403 add edx, GLightWidth
06404
06405
06406 xor ecx, ecx
06407 mov cl, [edx+3]
06408 mov eax, ecx
06409 mov cl, [edx+0]
06410 sub eax, ecx
06411 imul eax, UDist
06412 shl ecx, 8
06413 add eax, ecx
06414 mov [R2], eax
06415
06416 xor ecx, ecx
06417 mov cl, [edx+4]
06418 mov eax, ecx
06419 mov cl, [edx+1]
06420 sub eax, ecx
06421 imul eax, UDist
06422 shl ecx, 8
06423 add eax, ecx
06424 mov [G2], eax
06425
06426 xor ecx, ecx
06427 mov cl, [edx+5]
06428 mov eax, ecx
06429 mov cl, [edx+2]
06430 sub eax, ecx
06431 imul eax, UDist
06432 shl ecx, 8
06433 add eax, ecx
06434 mov [B2], eax
06435
06436
06437 mov eax, [R2]
06438 sub eax, [R1]
06439 imul eax, VDist
06440 sar eax, 8
06441 add eax, [R1]
06442 shr eax, 8
06443 and eax,0feh
06444
06445 mov [RR1], eax
06446
06447 mov eax, [G2]
06448 sub eax, [G1]
06449 imul eax, VDist
06450 sar eax, 8
06451 add eax, [G1]
06452 shr eax, 8
06453 and eax,0feh
06454
06455 mov [GG1], eax
06456
06457 mov eax, [B2]
06458 sub eax, [B1]
06459 imul eax, VDist
06460 sar eax, 8
06461 add eax, [B1]
06462 shr eax, 8
06463 and eax,0feh
06464
06465 mov [BB1], eax
06466
06467 fstp [FTemp0]
06468 fstp [FTemp1]
06469 fstp [FTemp2]
06470 fstp [FTemp3]
06471 fstp [FTemp4]
06472 fstp [FTemp5]
06473
06474 mov ebx,dword ptr[U1]
06475 mov edx,dword ptr[V1]
06476
06477 fild [RR1] ; LR
06478 fild [GG1] ; LG LR
06479 fild [BB1] ; LB LG LR
06480
06481 mov ecx,[VShift]
06482 add edx,dword ptr[VAdjust2]
06483
06484 add ebx,dword ptr[UAdjust2]
06485 mov eax,dword ptr[DeltaV]
06486
06487 shl eax,cl
06488 mov esi,pTex
06489
06490 shl edx,cl
06491 mov dword ptr[DeltaV],eax
06492
06493 mov eax,[ZDelta]
06494 mov ecx,[ZVal]
06495
06496 sar eax,1
06497 push ebp
06498
06499 mov [ZDelta],eax
06500
06501
06502 LeftoverLoop16:
06503 mov eax,edx
06504 and ebx,[GWMaskShifted]
06505
06506 and eax,[GHMaskShifted16]
06507
06508 add eax,ebx
06509 add ebx,dword ptr[DeltaU]
06510
06511 shr eax,16
06512 add edi,2
06513
06514 mov ax,word ptr[2*eax+esi]
06515 add edx,dword ptr[DeltaV]
06516
06517 xor eax,0
06518
06519 mov esi,eax
06520 mov ebp,eax
06521
06522 and esi,REDMASK
06523 and ebp,GREENMASK
06524
06525 mov dword ptr[Red],esi
06526 mov dword ptr[Green],ebp
06527
06528 fild qword ptr[Red] ; r LB LG LR
06529
06530 mov ebp,eax
06531
06532 fmul st,st(3) ; R LB LG LR
06533 fild qword ptr[Green] ; g R LB LG LR
06534
06535 and ebp,BLUEMASK
06536
06537 mov dword ptr[Blue],ebp
06538
06539 fmul st,st(3) ; G R LB LG LR
06540 fild [Blue] ; b G R LB LG LR
06541 fmul st,st(3) ; B G R LB LG LR
06542 fxch st(2) ; R G B LB LG LR
06543 fadd qword ptr[Magic] ; Rk G B LB LG LR
06544 fxch st(1) ; G Rk B LB LG LR
06545 fadd qword ptr[Magic] ; Gk Rk B LB LG LR
06546 fxch st(2) ; B Rk Gk LB LG LR
06547 fadd qword ptr[Magic] ; Bk Rk Gk LB LG LR
06548 fxch st(1) ; Rk Bk Gk LB LG LR
06549 fstp qword ptr[Bucket] ; Bk Gk LB LG LR
06550 fstp qword ptr[Bucket2] ; Gk LB LG LR
06551
06552 mov eax,dword ptr[Bucket]
06553 mov ebp,dword ptr[Bucket2]
06554
06555 fstp qword ptr[Bucket] ; LB LG LR
06556
06557 and eax,REDMASK
06558 and ebp,BLUEMASK
06559
06560 mov esi,dword ptr[Bucket]
06561 or ebp,eax
06562
06563 and esi,GREENMASK
06564 mov eax,ecx
06565
06566 or ebp,esi
06567
06568 shr eax,16
06569 mov esi,pZBufferPtr
06570
06571 cmp ax,word ptr[esi]
06572 jle SkipLeftOver
06573
06574 mov word ptr[edi-2],bp
06575 mov word ptr[esi],ax
06576
06577 SkipLeftOver:
06578 add ecx,[ZDelta]
06579 mov esi,pTex
06580
06581 add pZBufferPtr,2
06582
06583 dec [RemainingCount]
06584 jge LeftoverLoop16
06585
06586 pop ebp
06587
06588
06589 FPUReturn16:
06590 ffree st(0)
06591 ffree st(1)
06592 ffree st(2)
06593 ffree st(3)
06594 ffree st(4)
06595 ffree st(5)
06596 ffree st(6)
06597
06598 Return16:
06599 pop edi
06600 pop esi
06601 pop ecx
06602 pop ebx
06603 }
06604 }
06605
06606 void DrawSpan16_AsmLitZWriteX86FPU(int32 x1, int32 x2, int32 y)
06607 {
06608 TDest =Dest;
06609 _asm
06610 {
06611 push ebx
06612 push ecx
06613 push esi
06614 push edi
06615
06616 mov eax,x1
06617 mov ecx,x2
06618 sub ecx,eax
06619 jle Return16
06620
06621 mov edi,[GBitPtr16]
06622 mov pTex,edi
06623
06624 fild [y] ; y
06625
06626 mov esi,x1
06627 mov edi,[TDest]
06628
06629 shl esi,1
06630 mov eax,ecx
06631
06632 add edi,esi
06633 add pZBufferPtr,esi
06634
06635 shr ecx,4
06636 and eax,15
06637 _emit 75h
06638 _emit 06h
06639 dec ecx
06640 mov eax,16
06641
06642 mov [NumASpans],ecx
06643 mov [RemainingCount],eax
06644
06645 fild [x1] ; x y
06646
06647
06648
06649 fld [UDivZStepY] ; UZdY x y
06650 fld [UDivZStepX] ; UZdX UZdY x y
06651 fmul st,st(2) ; UZX UZdY x y
06652 fld [VDivZStepY] ; VZdY UZX UZdY x y
06653 fld [VDivZStepX] ; VZdX VZdY UZX UZdY x y
06654 fxch st(3) ; UZdy VZdY UZX VZdX x y
06655 fmul st,st(5) ; UZY VZdY UZX VZdX x y
06656 fxch st(2) ; UZX VZdY UZY VZdX x y
06657 fadd [UDivZOrigin] ; UZXS VZdY UZY VZdX x y
06658 fxch st(3) ; VZdX VZdY UZY UZXS x y
06659 fmul st,st(4) ; VZX VZdY UZY UZXS x y
06660 fxch st(2) ; UZY VZdY VZX UZXS x y
06661 faddp st(3),st ; VZdY VZX UZ x y
06662 fmul st,st(4) ; VZY VZX UZ x y
06663 fxch st(1) ; VZX VZY UZ x y
06664 fadd [VDivZOrigin] ; VZXS VZY UZ x y
06665 fld [ZiStepX] ; ZdX VZXS VZY UZ x y
06666
06667
06668 fld [ZiStepX]
06669 fmul dword ptr[ZBufferPrec]
06670 fmul dword ptr[Two]
06671 fistp dword ptr[ZDelta]
06672
06673 fmulp st(4),st ; VZXS VZY UZ ZX y
06674 faddp st(1),st ; VZ UZ ZX y
06675 fld [ZiStepY] ; ZdY VZ UZ ZX y
06676 fmulp st(4),st ; VZ UZ ZX ZY
06677 fxch st(2) ; ZX UZ VZ ZY
06678 fadd [ZiOrigin] ; ZXS UZ VZ ZY
06679
06680
06681
06682 faddp st(3),st ; UZ VZ Zi
06683 fld1 ; 1 UZ VZ Zi
06684 fdiv st,st(3) ; ZL UZ VZ Zi
06685
06686
06687
06688 fld st ; ZL ZL UZ VZ Zi
06689 fmul st,st(3) ; VL ZL UZ VZ Zi
06690 fxch st(4) ; Zi ZL UZ VZ VL
06691
06692
06693 fld st
06694 fmul dword ptr[ZBufferPrec]
06695 fistp dword ptr[ZVal]
06696
06697 fadd [Zi16StepX] ; ZRi ZL UZ VZ VL
06698 fxch st(1) ; ZL ZRi UZ VZ VL
06699 fmul st,st(2) ; UL ZRi UZ VZ VL
06700 fxch st(3) ; VZ ZRi UZ UL VL
06701 fadd [VDivZ16StepX] ; VZR ZRi UZ UL VL
06702 fxch st(2) ; UZ ZRi VZR UL VL
06703 fadd [UDivZ16StepX] ; UZR ZRi VZR UL VL
06704 fld1 ; 1 UZR ZRi VZR UL VL
06705 fdiv st,st(2) ; ZR UZR ZRi VZR UL VL
06706
06707
06708
06709 fld st ; ZR ZR UZR ZRi VZR UL VL
06710 fmul st,st(4) ; VR ZR UZR ZRi VZR UL VL
06711 fxch st(1) ; ZR VR UZR ZRi VZR UL VL
06712
06713 fmul st,st(2) ; UR VR UZR ZRi VZR UL VL
06714
06715 test ecx,ecx
06716 jz HandleLeftoverPixels16
06717
06718 SpanLoop16:
06719
06720 fstp dword ptr[FloatTemp] ; VR UZR ZRi VZR UL VL
06721 fld st(4) ; UL VR UZR ZRi VZR UL VL
06722 fmul [GLMapMulU] ; ULL VR UZR ZRi VZR UL VL
06723 fld st(5) ; UL ULL VR UZR ZRi VZR UL VL
06724 fadd qword ptr[MipMagic] ; ULk ULL VR UZR ZRi VZR UL VL
06725 fxch st(1) ; ULL ULk VR UZR ZRi VZR UL VL
06726 fadd qword ptr[MipMagic] ; ULLk ULk VR UZR ZRi VZR UL VL
06727 fxch st(1) ; ULk ULLk VR UZR ZRi VZR UL VL
06728 fstp qword ptr[Bucket] ; ULLk VR UZR ZRi VZR UL VL
06729 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
06730
06731 mov ebx,dword ptr[Bucket]
06732 mov eax,dword ptr[Bucket2]
06733
06734 fld st(5) ; VL VR UZR ZRi VZR UL VL
06735 fmul [GLMapMulV] ; VLL VR UZR ZRi VZR UL VL
06736
06737 add ebx,dword ptr[UAdjust]
06738 add eax,dword ptr[UAdjustL]
06739
06740 mov [U1],ebx
06741 mov [UFixed],eax
06742
06743 fld st(6) ; VL VLL VR UZR ZRi VZR UL VL
06744 fadd qword ptr[MipMagic] ; VLk VLL VR UZR ZRi VZR UL VL
06745 fxch st(1) ; VLL VLk VR UZR ZRi VZR UL VL
06746 fadd qword ptr[MipMagic] ; VLLk VLk VR UZR ZRi VZR UL VL
06747 fxch st(1) ; VLk VLLk VR UZR ZRi VZR UL VL
06748 fstp qword ptr[Bucket] ; VLLk VR UZR ZRi VZR UL VL
06749 fstp qword ptr[Bucket2] ; VR UZR ZRi VZR UL VL
06750 fsubr st(5),st ; VR UZR ZRi VZR UL dV
06751
06752 mov ebx,dword ptr[Bucket]
06753 mov eax,dword ptr[Bucket2]
06754
06755 fld dword ptr[FloatTemp] ; UR VR UZR ZRi VZR UL dV
06756
06757 add ebx,dword ptr[VAdjust]
06758 add eax,dword ptr[VAdjustL]
06759
06760 mov [V1],ebx
06761 mov [VFixed],eax
06762
06763 fsubr st(5),st ; UR VR UZR ZRi VZR dU dV
06764 fxch st(6) ; dV VR UZR ZRi VZR dU UR
06765 fadd qword ptr[MipMagic2] ; dVk VR UZR ZRi VZR dU UR
06766 fxch st(5) ; dU VR UZR ZRi VZR dVk UR
06767 fadd qword ptr[MipMagic2] ; dUk VR UZR ZRi VZR dVk UR
06768 fxch st(5) ; dVk VR UZR ZRi VZR dUk UR
06769 fstp qword ptr[DeltaV] ; VR UZR ZRi VZR dUk UR
06770 fxch st(5) ; UR UZR ZRi VZR dUk VR
06771
06772 fxch st(4) ; dUk UZR ZRi VZR UR VR
06773 fstp qword ptr[DeltaU] ; UZR ZRi VZR UR VR
06774
06775
06776 fadd [UDivZ16StepX] ; UZR ZLi VZL UL VL
06777 fxch st(1) ; ZLi UZR VZL UL VL
06778 fadd [Zi16StepX] ; ZRi UZR VZL UL VL
06779 fxch st(2) ; VZL UZR ZRi UL VL
06780 fadd [VDivZ16StepX] ; VZR UZR ZRi UL VL
06781 fxch st(2) ; ZRi UZR VZR UL VL
06782 fxch st(1) ; UZR ZRi VZR UL VL
06783 fld1 ; 1 UZR ZRi VZR UL VL
06784 fdiv st,st(2) ; ZR UZR ZRi VZR UL VL
06785
06786
06787 mov ebx,[UFixed]
06788 cmp ebx,MaxU
06789 jle TryClampU016
06790 mov ecx,MaxU
06791 mov dword ptr[UFixed],ecx
06792 jmp NoClampU016
06793
06794 TryClampU016:
06795 cmp ebx,0
06796 jge NoClampU016
06797 mov dword ptr[UFixed],0
06798 NoClampU016:
06799 mov eax,[VFixed]
06800 cmp eax,MaxV
06801 jle TryClampV016
06802 mov ecx,MaxV
06803 mov dword ptr[VFixed],ecx
06804 jmp NoClampV016
06805
06806 TryClampV016:
06807 cmp eax,0
06808 jge NoClampV016
06809 mov dword ptr[VFixed],0
06810
06811 NoClampV016:
06812
06813
06814 mov esi,dword ptr[UFixed]
06815 mov eax,dword ptr[VFixed]
06816
06817
06818 mov ecx, GMipLevel4_8
06819 sar esi, cl
06820 sar eax, cl
06821 and esi, 0ffh
06822 and eax, 0ffh
06823 mov UDist, esi
06824 mov VDist, eax
06825
06826 mov esi,dword ptr[UFixed]
06827 mov eax,dword ptr[VFixed]
06828 mov ecx, GMipLevel20
06829 shr esi, cl
06830 shr eax, cl
06831
06832 imul eax, GLightWidth
06833 add esi, eax
06834
06835 mov edx, esi
06836 shl esi, 1
06837 add edx, esi
06838
06839 add edx, GLightData
06840
06841
06842 xor ecx, ecx
06843 mov cl, [edx+3]
06844 mov eax, ecx
06845 mov cl, [edx+0]
06846 sub eax, ecx
06847 imul eax, UDist
06848 shl ecx, 8
06849 add eax, ecx
06850 mov [R1], eax
06851
06852 xor ecx, ecx
06853 mov cl, [edx+4]
06854 mov eax, ecx
06855 mov cl, [edx+1]
06856 sub eax, ecx
06857 imul eax, UDist
06858 shl ecx, 8
06859 add eax, ecx
06860 mov [G1], eax
06861
06862 xor ecx, ecx
06863 mov cl, [edx+5]
06864 mov eax, ecx
06865 mov cl, [edx+2]
06866 sub eax, ecx
06867 imul eax, UDist
06868 shl ecx, 8
06869 add eax, ecx
06870 mov [B1], eax
06871
06872 add edx, GLightWidth
06873 add edx, GLightWidth
06874 add edx, GLightWidth
06875
06876
06877 xor ecx, ecx
06878 mov cl, [edx+3]
06879 mov eax, ecx
06880 mov cl, [edx+0]
06881 sub eax, ecx
06882 imul eax, UDist
06883 shl ecx, 8
06884 add eax, ecx
06885 mov [R2], eax
06886
06887 xor ecx, ecx
06888 mov cl, [edx+4]
06889 mov eax, ecx
06890 mov cl, [edx+1]
06891 sub eax, ecx
06892 imul eax, UDist
06893 shl ecx, 8
06894 add eax, ecx
06895 mov [G2], eax
06896
06897 xor ecx, ecx
06898 mov cl, [edx+5]
06899 mov eax, ecx
06900 mov cl, [edx+2]
06901 sub eax, ecx
06902 imul eax, UDist
06903 shl ecx, 8
06904 add eax, ecx
06905 mov [B2], eax
06906
06907
06908 mov eax, [R2]
06909 sub eax, [R1]
06910 imul eax, VDist
06911