Main Page | Alphabetical List | Compound List | File List | Compound Members | File Members

drawspan.c

Go to the documentation of this file.
00001 /****************************************************************************************/
00002 /*  drawspan.c                                                                          */
00003 /*                                                                                      */
00004 /*  Author:       Ken Baird                                                             */
00005 /*  Description:  Mostly unused code, a few needed vars, some renderstates that can     */
00006 /*                be used for post poly zfill, and some z correctors with more accuracy */
00007 /*                                                                                      */
00008 /*  The contents of this file are subject to the Genesis3D Public License               */
00009 /*  Version 1.01 (the "License"); you may not use this file except in                   */
00010 /*  compliance with the License. You may obtain a copy of the License at                */
00011 /*  http://www.genesis3d.com                                                            */
00012 /*                                                                                      */
00013 /*  Software distributed under the License is distributed on an "AS IS"                 */
00014 /*  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See                */
00015 /*  the License for the specific language governing rights and limitations              */
00016 /*  under the License.                                                                  */
00017 /*                                                                                      */
00018 /*  The Original Code is Genesis3D, released March 25, 1999.                            */
00019 /*Genesis3D Version 1.1 released November 15, 1999                            */
00020 /*  Copyright (C) 1999 WildTangent, Inc. All Rights Reserved           */
00021 /*                                                                                      */
00022 /****************************************************************************************/
00023 
00024 /*
00025 Code fragments from Chris Hecker's texture mapping articles used with
00026 permission.  http://www.d6.com/users/checker 
00027 */
00028 
00029 #include <Windows.h>
00030 #include <Assert.h>
00031 #include <math.h>
00032 
00033 #include "BaseType.h"
00034 #include "Render.h"
00035 #include "SoftDrv.h"
00036 #include "drawspan.h"
00037 
00038 
00039 double  MipMagic, MipMagic2;
00040 
00041 int32   R1, B1, G1, R2, G2, B2;
00042 int32   RR1, RR2, GG1, GG2, BB1, BB2;
00043 int32   StepR, StepG, StepB;
00044 int32   UDist, VDist;
00045 int32   U1=0, V1=0, NumSpans=0;
00046 int32   CKeyTest=0;
00047 
00048 geFloat                 FloatTemp, FTemp0, FTemp1, FTemp2;
00049 geFloat                 FTemp3, FTemp4, FTemp5, FTemp6, FTemp7, FTemp8;
00050 geFloat const           One                     =1.0f;
00051 geFloat const           Two                     =2.0f;
00052 
00053 extern  U32 UMask, VShift, VMask;
00054 int32   ZDelta, ZVal;
00055 geFloat ZBufferPrec = (geFloat)-ZBUFFER_PREC;
00056 __int64 RedDelta, GreenDelta, BlueDelta;
00057 uint32  NumASpans, RemainingCount;
00058 double  DeltaU, DeltaV;
00059 uint32  UFixed, VFixed;
00060 uint8   *pTex;
00061 
00062 
00063 
00064 void DrawSpan16_AsmX86FPU(int32 x1, int32 x2, int32 y)
00065 {
00066         _asm
00067         {
00068                 mov             eax,x1
00069                 mov             ecx,x2
00070                 sub             ecx,eax
00071                 jle             Return16
00072 
00073                 mov             edi,[GBitPtr16]
00074                 inc             ecx
00075                 shr             edi, 1                                  ; keep texture >>1
00076                 mov             pTex,edi
00077 
00078                 fild    [y]                                             ; y
00079 
00080                 mov             edi, ClientWindow.Buffer
00081                 mov             eax, y
00082                 imul    eax, ClientWindow.Width
00083                 add             eax, x1
00084                 shl             eax, 1
00085                 add             edi, eax
00086                 mov             eax,ecx
00087                 shr             ecx,4
00088                 and             eax,15
00089                 _emit 75h
00090                 _emit 06h
00091                 dec             ecx
00092                 mov             eax,16
00093 
00094                 mov             [NumASpans],ecx
00095                 mov             [RemainingCount],eax
00096 
00097                 fild [x1]                                               ; x    y
00098 
00099                 //decoder won't keep up with these huge instructions
00100                 //need to find some int instructions to cram in here somewhere
00101                 fld             [UDivZStepY]                    ; UZdY x    y
00102                 fld             [UDivZStepX]                    ; UZdX UZdY x    y
00103                 fmul    st,st(2)                                ; UZX  UZdY x    y
00104                 fld             [VDivZStepY]                    ; VZdY UZX  UZdY x    y
00105                 fld             [VDivZStepX]                    ; VZdX VZdY UZX  UZdY x    y
00106                 fxch    st(3)                                   ; UZdy VZdY UZX  VZdX x    y
00107                 fmul    st,st(5)                                ; UZY  VZdY UZX  VZdX x    y
00108                 fxch    st(2)                                   ; UZX  VZdY UZY  VZdX x    y
00109                 fadd    [UDivZOrigin]                   ; UZXS VZdY UZY  VZdX x    y
00110                 fxch    st(3)                                   ; VZdX VZdY UZY  UZXS x    y
00111                 fmul    st,st(4)                                ; VZX  VZdY UZY  UZXS x    y
00112                 fxch    st(2)                                   ; UZY  VZdY VZX  UZXS x    y
00113                 faddp   st(3),st                                ; VZdY VZX  UZ   x    y
00114                 fmul    st,st(4)                                ; VZY  VZX  UZ   x    y
00115                 fxch    st(1)                                   ; VZX  VZY  UZ   x    y
00116                 fadd    [VDivZOrigin]                   ; VZXS VZY  UZ   x    y
00117                 fld             [ZiStepX]                               ; ZdX  VZXS VZY  UZ   x    y
00118                 fmulp   st(4),st                                ; VZXS VZY  UZ   ZX   y
00119                 faddp   st(1),st                                ; VZ   UZ   ZX   y
00120                 fld             [ZiStepY]                               ; ZdY  VZ   UZ   ZX   y
00121                 fmulp   st(4),st                                ; VZ   UZ   ZX   ZY
00122                 fxch    st(2)                                   ; ZX   UZ   VZ   ZY
00123                 fadd    [ZiOrigin]                              ; ZXS  UZ   VZ   ZY
00124 
00125                 //room for two cycles of int instructions here
00126 
00127                 faddp   st(3),st                                ; UZ   VZ   Zi
00128                 fld1                                                    ; 1    UZ   VZ   Zi
00129                 fdiv    st,st(3)                                ; ZL   UZ   VZ   Zi
00130 
00131                 //room for 18 cycles of int instructions here
00132 
00133                 fld             st                                              ; ZL   ZL   UZ   VZ   Zi
00134                 fmul    st,st(3)                                ; VL   ZL   UZ   VZ   Zi
00135                 fxch    st(4)                                   ; Zi   ZL   UZ   VZ   VL
00136                 fadd    [Zi16StepX]                             ; ZRi  ZL   UZ   VZ   VL
00137                 fxch    st(1)                                   ; ZL   ZRi  UZ   VZ   VL
00138                 fmul    st,st(2)                                ; UL   ZRi  UZ   VZ   VL
00139                 fxch    st(3)                                   ; VZ   ZRi  UZ   UL   VL
00140                 fadd    [VDivZ16StepX]                  ; VZR  ZRi  UZ   UL   VL
00141                 fxch    st(2)                                   ; UZ   ZRi  VZR  UL   VL
00142                 fadd    [UDivZ16StepX]                  ; UZR  ZRi  VZR  UL   VL
00143                 fld1                                                    ; 1    UZR  ZRi  VZR  UL   VL
00144                 fdiv    st,st(2)                                ; ZR   UZR  ZRi  VZR  UL   VL
00145 
00146                 //room for 18 cycles of int stuff here
00147 
00148                 fld             st                                              ; ZR   ZR   UZR  ZRi  VZR  UL   VL
00149                 fmul    st,st(4)                                ; VR   ZR   UZR  ZRi  VZR  UL   VL
00150                 fxch    st(1)                                   ; ZR   VR   UZR  ZRi  VZR  UL   VL
00151                 //fmul stall one cycle
00152                 fmul    st,st(2)                                ; UR   VR   UZR  ZRi  VZR  UL   VL
00153 
00154                 test    ecx,ecx
00155                 jz              HandleLeftoverPixels16
00156 
00157 SpanLoop16:
00158                 //need one more stack spot
00159                 fstp    dword ptr[FloatTemp]    ; VR   UZR  ZRi  VZR  UL   VL
00160                 fld             st(4)                                   ; UL   VR   UZR  ZRi  VZR  UL   VL
00161                 fmul    [GLMapMulU]                             ; ULL  VR   UZR  ZRi  VZR  UL   VL
00162                 fld             st(5)                                   ; UL   ULL  VR   UZR  ZRi  VZR  UL   VL
00163                 fadd    qword ptr[MipMagic]             ; ULk  ULL  VR   UZR  ZRi  VZR  UL   VL
00164                 fxch    st(1)                                   ; ULL  ULk  VR   UZR  ZRi  VZR  UL   VL
00165                 fadd    qword ptr[MipMagic]             ; ULLk ULk  VR   UZR  ZRi  VZR  UL   VL
00166                 fxch    st(1)                                   ; ULk  ULLk VR   UZR  ZRi  VZR  UL   VL
00167                 fstp    qword ptr[Bucket]               ; ULLk VR   UZR  ZRi  VZR  UL   VL
00168                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
00169 
00170                 mov             ebx,dword ptr[Bucket]
00171                 mov             eax,dword ptr[Bucket2]
00172 
00173                 fld             st(5)                                   ; VL   VR   UZR  ZRi  VZR  UL   VL
00174                 fmul    [GLMapMulV]                             ; VLL  VR   UZR  ZRi  VZR  UL   VL
00175 
00176                 add             ebx,dword ptr[UAdjust]
00177                 add             eax,dword ptr[UAdjustL]
00178 
00179                 mov             [U1],ebx
00180                 mov             [UFixed],eax
00181 
00182                 fld             st(6)                                   ; VL   VLL  VR   UZR  ZRi  VZR  UL   VL
00183                 fadd    qword ptr[MipMagic]             ; VLk  VLL  VR   UZR  ZRi  VZR  UL   VL
00184                 fxch    st(1)                                   ; VLL  VLk  VR   UZR  ZRi  VZR  UL   VL
00185                 fadd    qword ptr[MipMagic]             ; VLLk VLk  VR   UZR  ZRi  VZR  UL   VL
00186                 fxch    st(1)                                   ; VLk  VLLk VR   UZR  ZRi  VZR  UL   VL
00187                 fstp    qword ptr[Bucket]               ; VLLk VR   UZR  ZRi  VZR  UL   VL
00188                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
00189                 fsubr   st(5),st                                ; VR   UZR  ZRi  VZR  UL   dV
00190 
00191                 mov             ebx,dword ptr[Bucket]
00192                 mov             eax,dword ptr[Bucket2]
00193 
00194                 fld             dword ptr[FloatTemp]    ; UR   VR   UZR  ZRi  VZR  UL   dV
00195 
00196                 add             ebx,dword ptr[VAdjust]
00197                 add             eax,dword ptr[VAdjustL]
00198 
00199                 mov             [V1],ebx
00200                 mov             [VFixed],eax
00201 
00202                 fsubr   st(5),st                                ; UR   VR   UZR  ZRi  VZR  dU   dV
00203                 fxch    st(6)                                   ; dV   VR   UZR  ZRi  VZR  dU   UR
00204                 fadd    qword ptr[MipMagic2]    ; dVk  VR   UZR  ZRi  VZR  dU   UR
00205                 fxch    st(5)                                   ; dU   VR   UZR  ZRi  VZR  dVk  UR
00206                 fadd    qword ptr[MipMagic2]    ; dUk  VR   UZR  ZRi  VZR  dVk  UR
00207                 fxch    st(5)                                   ; dVk  VR   UZR  ZRi  VZR  dUk  UR
00208                 fstp    qword ptr[DeltaV]               ; VR   UZR  ZRi  VZR  dUk  UR
00209                 fxch    st(5)                                   ; UR   UZR  ZRi  VZR  dUk  VR
00210                 //gotta do this to get em lined back up right
00211                 fxch    st(4)                                   ; dUk  UZR  ZRi  VZR  UR   VR
00212                 fstp    qword ptr[DeltaU]               ; UZR  ZRi  VZR  UR   VR
00213 
00214                 //right becomes left                    ; UZL  ZLi  VZL  UL   VL
00215                 fadd    [UDivZ16StepX]                  ; UZR  ZLi  VZL  UL   VL
00216                 fxch    st(1)                                   ; ZLi  UZR  VZL  UL   VL
00217                 fadd    [Zi16StepX]                             ; ZRi  UZR  VZL  UL   VL
00218                 fxch    st(2)                                   ; VZL  UZR  ZRi  UL   VL
00219                 fadd    [VDivZ16StepX]                  ; VZR  UZR  ZRi  UL   VL
00220                 fxch    st(2)                                   ; ZRi  UZR  VZR  UL   VL
00221                 fxch    st(1)                                   ; UZR  ZRi  VZR  UL   VL
00222                 //need those fxch to line things up for loops (bad)
00223 
00224                 fld1                                                    ; 1    UZR  ZRi  VZR  UL   VL
00225 
00226 
00227                 fdiv    st,st(2)                        ; ZR   UZR  ZRi  VZR  UL   VL
00228 
00229                 mov             ebx,dword ptr[U1]
00230                 mov             eax,dword ptr[V1]
00231 
00232                 add             ebx,dword ptr[UAdjust2]
00233                 add             eax,dword ptr[VAdjust2]
00234 
00235                 mov             ecx,16
00236                 mov             dword ptr[Bucket],ebx
00237 
00238                 sub             ecx,[VShift]
00239                 mov             dword ptr[Bucket2],eax
00240 
00241                 shr             eax,cl
00242 
00243                 push    ebp
00244                 shr             ebx,16
00245 
00246                 and             eax,[GHMaskShifted]
00247                 mov             esi,[pTex]
00248 
00249                 and             ebx,[GWMask]
00250                 add             esi,eax
00251 
00252                 mov             ecx,[VShift]
00253                 add             esi,ebx
00254 
00255                 mov             edx,dword ptr[Bucket2]
00256                 mov             ebp,dword ptr[DeltaV]
00257 
00258                 mov             ebx,dword ptr[Bucket]
00259 
00260                 //do 16 pixels
00261 
00262                 add             edx,ebp
00263                 mov             ax,[2*esi]
00264 
00265                 mov             esi,edx
00266                 add             ebx,dword ptr[DeltaU]
00267 
00268                 shl             esi,cl
00269                 and             ebx,[GWMaskShifted]
00270 
00271                 and             esi,[GHMaskShifted16]
00272 
00273                 add             esi,ebx
00274                 mov             [edi+0],ax
00275 
00276                 shr             esi,16
00277                 add             edx,ebp
00278 
00279                 add             esi,pTex
00280                 add             ebx,dword ptr[DeltaU]
00281 
00282                 mov             ax,[2*esi]
00283                 mov             esi,edx
00284 
00285                 and             ebx,[GWMaskShifted]
00286                 mov             [edi+2],ax
00287 
00288                 shl             esi,cl
00289                 add             edx,ebp
00290 
00291                 and             esi,[GHMaskShifted16]
00292 
00293                 add             esi,ebx
00294                 shr             esi,16
00295 
00296                 add             ebx,dword ptr[DeltaU]
00297                 add             esi,pTex
00298 
00299                 mov             ax,[2*esi]
00300                 mov             esi,edx
00301 
00302                 and             ebx,[GWMaskShifted]
00303                 mov             [edi+4],ax
00304 
00305                 shl             esi,cl
00306                 add             edx,ebp
00307 
00308                 and             esi,[GHMaskShifted16]
00309 
00310                 add             esi,ebx
00311                 shr             esi,16
00312 
00313                 add             ebx,dword ptr[DeltaU]
00314                 add             esi,pTex
00315 
00316                 mov             ax,[2*esi]
00317                 mov             esi,edx
00318 
00319                 and             ebx,[GWMaskShifted]
00320                 mov             [edi+6],ax
00321 
00322                 shl             esi,cl
00323                 add             edx,ebp
00324 
00325                 and             esi,[GHMaskShifted16]
00326 
00327                 add             esi,ebx
00328                 shr             esi,16
00329 
00330                 add             ebx,dword ptr[DeltaU]
00331                 add             esi,pTex
00332 
00333                 mov             ax,[2*esi]
00334                 mov             esi,edx
00335 
00336                 and             ebx,[GWMaskShifted]
00337                 mov             [edi+8],ax
00338 
00339                 shl             esi,cl
00340                 add             edx,ebp
00341 
00342                 and             esi,[GHMaskShifted16]
00343 
00344                 add             esi,ebx
00345                 shr             esi,16
00346 
00347                 add             ebx,dword ptr[DeltaU]
00348                 add             esi,pTex
00349 
00350                 mov             ax,[2*esi]
00351                 mov             esi,edx
00352 
00353                 and             ebx,[GWMaskShifted]
00354                 mov             [edi+10],ax
00355 
00356                 shl             esi,cl
00357                 add             edx,ebp
00358 
00359                 and             esi,[GHMaskShifted16]
00360 
00361                 add             esi,ebx
00362                 shr             esi,16
00363 
00364                 add             ebx,dword ptr[DeltaU]
00365                 add             esi,pTex
00366 
00367                 mov             ax,[2*esi]
00368                 mov             esi,edx
00369 
00370                 and             ebx,[GWMaskShifted]
00371                 mov             [edi+12],ax
00372 
00373                 shl             esi,cl
00374                 add             edx,ebp
00375 
00376                 and             esi,[GHMaskShifted16]
00377 
00378                 add             esi,ebx
00379                 shr             esi,16
00380 
00381                 add             ebx,dword ptr[DeltaU]
00382                 add             esi,pTex
00383 
00384                 mov             ax,[2*esi]
00385                 mov             esi,edx
00386 
00387                 and             ebx,[GWMaskShifted]
00388                 mov             [edi+14],ax
00389 
00390                 shl             esi,cl
00391                 add             edx,ebp
00392 
00393                 and             esi,[GHMaskShifted16]
00394 
00395                 add             esi,ebx
00396                 shr             esi,16
00397 
00398                 add             ebx,dword ptr[DeltaU]
00399                 add             esi,pTex
00400 
00401                 mov             ax,[2*esi]
00402                 mov             esi,edx
00403 
00404                 and             ebx,[GWMaskShifted]
00405                 mov             [edi+16],ax
00406 
00407                 shl             esi,cl
00408                 add             edx,ebp
00409 
00410                 and             esi,[GHMaskShifted16]
00411 
00412                 add             esi,ebx
00413                 shr             esi,16
00414 
00415                 add             ebx,dword ptr[DeltaU]
00416                 add             esi,pTex
00417 
00418                 mov             ax,[2*esi]
00419                 mov             esi,edx
00420 
00421                 and             ebx,[GWMaskShifted]
00422                 mov             [edi+18],ax
00423 
00424                 shl             esi,cl
00425                 add             edx,ebp
00426 
00427                 and             esi,[GHMaskShifted16]
00428 
00429                 add             esi,ebx
00430                 shr             esi,16
00431 
00432                 add             ebx,dword ptr[DeltaU]
00433                 add             esi,pTex
00434 
00435                 mov             ax,[2*esi]
00436                 mov             esi,edx
00437 
00438                 and             ebx,[GWMaskShifted]
00439                 mov             [edi+20],ax
00440 
00441                 shl             esi,cl
00442                 add             edx,ebp
00443 
00444                 and             esi,[GHMaskShifted16]
00445 
00446                 add             esi,ebx
00447                 shr             esi,16
00448 
00449                 add             ebx,dword ptr[DeltaU]
00450                 add             esi,pTex
00451 
00452                 mov             ax,[2*esi]
00453                 mov             esi,edx
00454 
00455                 and             ebx,[GWMaskShifted]
00456                 mov             [edi+22],ax
00457 
00458                 shl             esi,cl
00459                 add             edx,ebp
00460 
00461                 and             esi,[GHMaskShifted16]
00462 
00463                 add             esi,ebx
00464                 shr             esi,16
00465 
00466                 add             ebx,dword ptr[DeltaU]
00467                 add             esi,pTex
00468 
00469                 mov             ax,[2*esi]
00470                 mov             esi,edx
00471 
00472                 and             ebx,[GWMaskShifted]
00473                 mov             [edi+24],ax
00474 
00475                 shl             esi,cl
00476                 add             edx,ebp
00477 
00478                 and             esi,[GHMaskShifted16]
00479 
00480                 add             esi,ebx
00481                 shr             esi,16
00482 
00483                 add             ebx,dword ptr[DeltaU]
00484                 add             esi,pTex
00485 
00486                 mov             ax,[2*esi]
00487                 mov             esi,edx
00488 
00489                 and             ebx,[GWMaskShifted]
00490                 mov             [edi+26],ax
00491 
00492                 shl             esi,cl
00493                 add             edx,ebp
00494 
00495                 and             esi,[GHMaskShifted16]
00496 
00497                 add             esi,ebx
00498                 shr             esi,16
00499 
00500                 add             ebx,dword ptr[DeltaU]
00501                 add             esi,pTex
00502 
00503                 mov             ax,[2*esi]
00504                 mov             esi,edx
00505 
00506                 and             ebx,[GWMaskShifted]
00507                 mov             [edi+28],ax
00508 
00509                 shl             esi,cl
00510                 add             edx,ebp
00511 
00512                 and             esi,[GHMaskShifted16]
00513 
00514                 add             esi,ebx
00515                 shr             esi,16
00516 
00517                 add             ebx,dword ptr[DeltaU]
00518                 add             esi,pTex
00519 
00520                 mov             ax,[2*esi]
00521                 mov             esi,edx
00522 
00523                 pop     ebp
00524                 mov             [edi+30],ax
00525 
00526                 ;get corrected right side deltas; st0  st1  st2  st3  st4  st5  st6  st7
00527                                                                                 ; ZR   UZR  ZRi  VZR  UL   VL
00528                 fld             st                                              ; ZR   ZR   UZR  ZRi  VZR  UL   VL
00529                 fmul    st,st(4)                                ; VR   ZR   UZR  ZRi  VZR  UL   VL
00530                 fxch    st(1)                                   ; ZR   VR   UZR  ZRi  VZR  UL   VL
00531                 fmul    st,st(2)                                ; UR   VR   UZR  ZRi  VZR  UL   VL
00532 
00533                 add             edi,32
00534                 dec             [NumASpans]                     ; dec num affine spans
00535                 jnz             SpanLoop16                              ; loop back
00536 
00537 HandleLeftoverPixels16:
00538 
00539                 mov             esi,[pTex]
00540 
00541 
00542                 cmp             [RemainingCount],0
00543                 jz              FPUReturn16
00544 
00545                 //need one more stack spot
00546                 fstp    dword ptr[FloatTemp]    ; VR   UZR  ZRi  VZR  UL   VL
00547                 fld             st(4)                                   ; UL   VR   UZR  ZRi  VZR  UL   VL
00548                 fmul    [GLMapMulU]                             ; ULL  VR   UZR  ZRi  VZR  UL   VL
00549                 fld             st(5)                                   ; UL   ULL  VR   UZR  ZRi  VZR  UL   VL
00550                 fadd    qword ptr[MipMagic]             ; ULk  ULL  VR   UZR  ZRi  VZR  UL   VL
00551                 fxch    st(1)                                   ; ULL  ULk  VR   UZR  ZRi  VZR  UL   VL
00552                 fadd    qword ptr[MipMagic]             ; ULLk ULk  VR   UZR  ZRi  VZR  UL   VL
00553                 fxch    st(1)                                   ; ULk  ULLk VR   UZR  ZRi  VZR  UL   VL
00554                 fstp    qword ptr[Bucket]               ; ULLk VR   UZR  ZRi  VZR  UL   VL
00555                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
00556 
00557                 mov             ebx,dword ptr[Bucket]
00558                 mov             eax,dword ptr[Bucket2]
00559 
00560                 fld             st(5)                                   ; VL   VR   UZR  ZRi  VZR  UL   VL
00561                 fmul    [GLMapMulV]                             ; VLL  VR   UZR  ZRi  VZR  UL   VL
00562 
00563                 add             ebx,dword ptr[UAdjust]
00564                 add             eax,dword ptr[UAdjustL]
00565 
00566                 mov             [U1],ebx
00567                 mov             [UFixed],eax
00568 
00569                 fld             st(6)                                   ; VL   VLL  VR   UZR  ZRi  VZR  UL   VL
00570                 fadd    qword ptr[MipMagic]             ; VLk  VLL  VR   UZR  ZRi  VZR  UL   VL
00571                 fxch    st(1)                                   ; VLL  VLk  VR   UZR  ZRi  VZR  UL   VL
00572                 fadd    qword ptr[MipMagic]             ; VLLk VLk  VR   UZR  ZRi  VZR  UL   VL
00573                 fxch    st(1)                                   ; VLk  VLLk VR   UZR  ZRi  VZR  UL   VL
00574                 fstp    qword ptr[Bucket]               ; VLLk VR   UZR  ZRi  VZR  UL   VL
00575                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
00576 
00577                 mov             ebx,dword ptr[Bucket]
00578                 mov             eax,dword ptr[Bucket2]
00579 
00580                 fld             dword ptr[FloatTemp]    ; UR   VR   UZR  ZRi  VZR  UL   dV
00581 
00582                 add             ebx,dword ptr[VAdjust]
00583                 add             eax,dword ptr[VAdjustL]
00584 
00585                 mov             [V1],ebx
00586                 mov             [VFixed],eax
00587 
00588                 dec             [RemainingCount]
00589                 jz              OnePixelSpan16
00590 
00591 
00592                 //must get rid of this wasted time
00593                 fstp    [FloatTemp]                             ; inv. inv. inv. inv. UL   VL
00594                 fstp    [FloatTemp]                             ; inv. inv. inv. UL   VL
00595                 fstp    [FloatTemp]                             ; inv. inv. UL   VL
00596                 fstp    [FloatTemp]                             ; inv. UL   VL
00597                 fstp    [FloatTemp]                             ; UL   VL
00598                 fild    [y]                                             ; y    UL   VL
00599                 fild    [x2]                                    ; xr   y    UL   VL
00600 
00601                 fld             [UDivZStepY]                    ; UZdY xr   y    UL   VL
00602                 fld             [UDivZStepX]                    ; UZdX UZdY xr   y    UL   VL
00603                 fmul    st,st(2)                                ; UZX  UZdY xr   y    UL   VL
00604                 fld             [VDivZStepY]                    ; VZdY UZX  UZdY xr   y    UL   VL
00605                 fld             [VDivZStepX]                    ; VZdX VZdY UZX  UZdY xr   y    UL   VL
00606                 fxch    st(3)                                   ; UZdy VZdY UZX  VZdX xr   y    UL   VL
00607                 fmul    st,st(5)                                ; UZY  VZdY UZX  VZdX xr   y    UL   VL
00608                 fxch    st(2)                                   ; UZX  VZdY UZY  VZdX xr   y    UL   VL
00609                 fadd    [UDivZOrigin]                   ; UZXS VZdY UZY  VZdX xr   y    UL   VL
00610                 fxch    st(3)                                   ; VZdX VZdY UZY  UZXS xr   y    UL   VL
00611                 fmul    st,st(4)                                ; VZX  VZdY UZY  UZXS xr   y    UL   VL
00612                 fxch    st(2)                                   ; UZY  VZdY VZX  UZXS xr   y    UL   VL
00613                 faddp   st(3),st                                ; VZdY VZX  UZ   xr   y    UL   VL
00614                 fmul    st,st(4)                                ; VZY  VZX  UZ   xr   y    UL   VL
00615                 fxch    st(1)                                   ; VZX  VZY  UZ   xr   y    UL   VL
00616                 fadd    [VDivZOrigin]                   ; VZXS VZY  UZ   xr   y    UL   VL
00617                 fld             [ZiStepX]                               ; ZdX  VZXS VZY  UZ   xr   y    UL   VL
00618                 fmulp   st(4),st                                ; VZXS VZY  UZ   ZX   y    UL   VL
00619                 faddp   st(1),st                                ; VZ   UZ   ZX   y    UL   VL
00620                 fld             [ZiStepY]                               ; ZdY  VZ   UZ   ZX   y    UL   VL
00621                 fmulp   st(4),st                                ; VZ   UZ   ZX   ZY   UL   VL
00622                 fxch    st(2)                                   ; ZX   UZ   VZ   ZY   UL   VL
00623                 fadd    [ZiOrigin]                              ; ZXS  UZ   VZ   ZY   UL   VL
00624 
00625                 faddp   st(3),st                                ; UZ   VZ   Zi   UL   VL
00626                 fld1                                                    ; 1    UZ   VZ   Zi   UL   VL
00627                 fdiv    st,st(3)                                ; ZR   UZ   VZ   Zi   UL   VL
00628 
00629                 fld             st                                              ; ZR   ZR   UZ   VZ   Zi   UL   VL
00630                 fmul    st,st(3)                                ; VR   ZR   UZ   VZ   Zi   UL   VL
00631                 fxch    st(1)                                   ; ZR   VR   UZ   VZ   Zi   UL   VL
00632                 fmul    st,st(2)                                ; UR   VR   UZ   VZ   Zi   UL   VL
00633 
00634                 //lazy idiv below... should 1/int mul mul
00635 
00636                 ; calculate deltas                              ; st0  st1  st2  st3  st4  st5  st6  st7
00637                 fsubr   st(5),st                                ; UR   VR   inv. inv. inv. dU   VL
00638                 fxch    st(1)                                   ; VR   UR   inv. inv. inv. dU   VL
00639                 fsubr   st(6),st                                ; VR   UR   inv. inv. inv. dU   dV
00640                 fxch    st(6)                                   ; dV   UR   inv. inv. inv. dU   VR
00641                 fidiv   dword ptr[RemainingCount];dv   UR   inv. inv. inv. dU   VR
00642                 fadd    qword ptr[MipMagic]             ; dvk  UR   inv. inv. inv. dU   VR
00643                 fxch    st(5)                                   ; dU   UR   inv. inv. inv. dvk  VR
00644                 fidiv   dword ptr[RemainingCount];du   UR   inv. inv. inv. dvk  VR
00645                 fadd    qword ptr[MipMagic]             ; duk  UR   inv. inv. inv. dvk  VR
00646                 fxch    st(5)                                   ; dvk  UR   inv. inv. inv. duk  VR
00647                 fstp    qword ptr[DeltaV]               ; UR   inv. inv. inv. duk  VR
00648                 fxch    st(4)                                   ; duk  inv. inv. inv. UR   VR
00649                 fstp    qword ptr[DeltaU]               ; inv. inv. inv. UR   VR
00650                 fld             st(1)                                   ; inv. inv. inv. inv. UR   VR
00651                 fld             st(2)                                   ; inv. inv. inv. inv. inv. UR   VR
00652 
00653                 mov             ebx,dword ptr[U1]
00654                 mov             edx,dword ptr[V1]
00655 
00656                 mov             ecx,16
00657                 add             ebx,dword ptr[UAdjust2]
00658 
00659                 sub             ecx,[VShift]
00660                 add             edx,dword ptr[VAdjust2]
00661 
00662 OnePixelSpan16:
00663                 push    ebp
00664 
00665 LeftoverLoop16:
00666                 mov             eax,edx
00667                 shr             eax,cl
00668                 mov             ebp,ebx
00669                 and             eax,[GHMaskShifted]
00670                 shr             ebp,16
00671                 and             ebp,[GWMask]
00672                 add             eax,ebp
00673                 add             eax,esi
00674                 mov             ax,[2*eax]
00675                 mov             [edi],ax
00676                 add             ebx,dword ptr[DeltaU]
00677                 add             edi,2
00678                 add             edx,dword ptr[DeltaV]
00679 
00680                 dec             [RemainingCount]
00681                 jge             LeftoverLoop16
00682 
00683                 pop             ebp
00684 
00685 FPUReturn16:
00686                 ffree   st(0)
00687                 ffree   st(1)
00688                 ffree   st(2)
00689                 ffree   st(3)
00690                 ffree   st(4)
00691                 ffree   st(5)
00692                 ffree   st(6)
00693 
00694 Return16:
00695         }
00696 }
00697 
00698 void DrawSpan16_8AsmLitX86FPU(int32 x1, int32 x2, int32 y)
00699 {
00700         _asm
00701         {
00702                 mov             eax,x1
00703                 mov             ecx,x2
00704                 sub             ecx,eax
00705                 jle             Return16
00706 
00707                 mov             edi,[GBitPtr16]
00708                 inc             ecx
00709                 shr             edi, 1                                  ; keep texture >>1
00710                 mov             pTex,edi
00711 
00712                 fild    [y]                                             ; y
00713 
00714                 mov             edi, ClientWindow.Buffer
00715                 mov             eax, y
00716                 imul    eax, ClientWindow.Width
00717                 add             eax, x1
00718                 shl             eax, 1
00719                 add             edi, eax
00720                 mov             eax,ecx
00721                 shr             ecx,3
00722                 and             eax,7
00723                 _emit 75h
00724                 _emit 06h
00725                 dec             ecx
00726                 mov             eax,8
00727 
00728                 mov             [NumASpans],ecx
00729                 mov             [RemainingCount],eax
00730 
00731                 fild [x1]                                               ; x    y
00732 
00733                 //decoder won't keep up with these huge instructions
00734                 //need to find some int instructions to cram in here somewhere
00735                 fld             [UDivZStepY]                    ; UZdY x    y
00736                 fld             [UDivZStepX]                    ; UZdX UZdY x    y
00737                 fmul    st,st(2)                                ; UZX  UZdY x    y
00738                 fld             [VDivZStepY]                    ; VZdY UZX  UZdY x    y
00739                 fld             [VDivZStepX]                    ; VZdX VZdY UZX  UZdY x    y
00740                 fxch    st(3)                                   ; UZdy VZdY UZX  VZdX x    y
00741                 fmul    st,st(5)                                ; UZY  VZdY UZX  VZdX x    y
00742                 fxch    st(2)                                   ; UZX  VZdY UZY  VZdX x    y
00743                 fadd    [UDivZOrigin]                   ; UZXS VZdY UZY  VZdX x    y
00744                 fxch    st(3)                                   ; VZdX VZdY UZY  UZXS x    y
00745                 fmul    st,st(4)                                ; VZX  VZdY UZY  UZXS x    y
00746                 fxch    st(2)                                   ; UZY  VZdY VZX  UZXS x    y
00747                 faddp   st(3),st                                ; VZdY VZX  UZ   x    y
00748                 fmul    st,st(4)                                ; VZY  VZX  UZ   x    y
00749                 fxch    st(1)                                   ; VZX  VZY  UZ   x    y
00750                 fadd    [VDivZOrigin]                   ; VZXS VZY  UZ   x    y
00751                 fld             [ZiStepX]                               ; ZdX  VZXS VZY  UZ   x    y
00752                 fmulp   st(4),st                                ; VZXS VZY  UZ   ZX   y
00753                 faddp   st(1),st                                ; VZ   UZ   ZX   y
00754                 fld             [ZiStepY]                               ; ZdY  VZ   UZ   ZX   y
00755                 fmulp   st(4),st                                ; VZ   UZ   ZX   ZY
00756                 fxch    st(2)                                   ; ZX   UZ   VZ   ZY
00757                 fadd    [ZiOrigin]                              ; ZXS  UZ   VZ   ZY
00758 
00759                 //room for two cycles of int instructions here
00760 
00761                 faddp   st(3),st                                ; UZ   VZ   Zi
00762                 fld1                                                    ; 1    UZ   VZ   Zi
00763                 fdiv    st,st(3)                                ; ZL   UZ   VZ   Zi
00764 
00765                 //room for 18 cycles of int instructions here
00766 
00767                 fld             st                                              ; ZL   ZL   UZ   VZ   Zi
00768                 fmul    st,st(3)                                ; VL   ZL   UZ   VZ   Zi
00769                 fxch    st(4)                                   ; Zi   ZL   UZ   VZ   VL
00770                 fadd    [Zi16StepX]                             ; ZRi  ZL   UZ   VZ   VL
00771                 fxch    st(1)                                   ; ZL   ZRi  UZ   VZ   VL
00772                 fmul    st,st(2)                                ; UL   ZRi  UZ   VZ   VL
00773                 fxch    st(3)                                   ; VZ   ZRi  UZ   UL   VL
00774                 fadd    [VDivZ16StepX]                  ; VZR  ZRi  UZ   UL   VL
00775                 fxch    st(2)                                   ; UZ   ZRi  VZR  UL   VL
00776                 fadd    [UDivZ16StepX]                  ; UZR  ZRi  VZR  UL   VL
00777                 fld1                                                    ; 1    UZR  ZRi  VZR  UL   VL
00778                 fdiv    st,st(2)                                ; ZR   UZR  ZRi  VZR  UL   VL
00779 
00780                 //room for 18 cycles of int stuff here
00781 
00782                 fld             st                                              ; ZR   ZR   UZR  ZRi  VZR  UL   VL
00783                 fmul    st,st(4)                                ; VR   ZR   UZR  ZRi  VZR  UL   VL
00784                 fxch    st(1)                                   ; ZR   VR   UZR  ZRi  VZR  UL   VL
00785                 //fmul stall one cycle
00786                 fmul    st,st(2)                                ; UR   VR   UZR  ZRi  VZR  UL   VL
00787 
00788                 test    ecx,ecx
00789                 jz              HandleLeftoverPixels16
00790 
00791 SpanLoop16:
00792                 //need one more stack spot
00793                 fstp    dword ptr[FloatTemp]    ; VR   UZR  ZRi  VZR  UL   VL
00794                 fld             st(4)                                   ; UL   VR   UZR  ZRi  VZR  UL   VL
00795                 fmul    [GLMapMulU]                             ; ULL  VR   UZR  ZRi  VZR  UL   VL
00796                 fld             st(5)                                   ; UL   ULL  VR   UZR  ZRi  VZR  UL   VL
00797                 fadd    qword ptr[MipMagic]             ; ULk  ULL  VR   UZR  ZRi  VZR  UL   VL
00798                 fxch    st(1)                                   ; ULL  ULk  VR   UZR  ZRi  VZR  UL   VL
00799                 fadd    qword ptr[MipMagic]             ; ULLk ULk  VR   UZR  ZRi  VZR  UL   VL
00800                 fxch    st(1)                                   ; ULk  ULLk VR   UZR  ZRi  VZR  UL   VL
00801                 fstp    qword ptr[Bucket]               ; ULLk VR   UZR  ZRi  VZR  UL   VL
00802                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
00803 
00804                 mov             ebx,dword ptr[Bucket]
00805                 mov             eax,dword ptr[Bucket2]
00806 
00807                 fld             st(5)                                   ; VL   VR   UZR  ZRi  VZR  UL   VL
00808                 fmul    [GLMapMulV]                             ; VLL  VR   UZR  ZRi  VZR  UL   VL
00809 
00810                 add             ebx,dword ptr[UAdjust]
00811                 add             eax,dword ptr[UAdjustL]
00812 
00813                 mov             [U1],ebx
00814                 mov             [UFixed],eax
00815 
00816                 fld             st(6)                                   ; VL   VLL  VR   UZR  ZRi  VZR  UL   VL
00817                 fadd    qword ptr[MipMagic]             ; VLk  VLL  VR   UZR  ZRi  VZR  UL   VL
00818                 fxch    st(1)                                   ; VLL  VLk  VR   UZR  ZRi  VZR  UL   VL
00819                 fadd    qword ptr[MipMagic]             ; VLLk VLk  VR   UZR  ZRi  VZR  UL   VL
00820                 fxch    st(1)                                   ; VLk  VLLk VR   UZR  ZRi  VZR  UL   VL
00821                 fstp    qword ptr[Bucket]               ; VLLk VR   UZR  ZRi  VZR  UL   VL
00822                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
00823                 fsubr   st(5),st                                ; VR   UZR  ZRi  VZR  UL   dV
00824 
00825                 mov             ebx,dword ptr[Bucket]
00826                 mov             eax,dword ptr[Bucket2]
00827 
00828                 fld             dword ptr[FloatTemp]    ; UR   VR   UZR  ZRi  VZR  UL   dV
00829 
00830                 add             ebx,dword ptr[VAdjust]
00831                 add             eax,dword ptr[VAdjustL]
00832 
00833                 mov             [V1],ebx
00834                 mov             [VFixed],eax
00835 
00836                 fsubr   st(5),st                                ; UR   VR   UZR  ZRi  VZR  dU   dV
00837                 fxch    st(6)                                   ; dV   VR   UZR  ZRi  VZR  dU   UR
00838                 fadd    qword ptr[MipMagic2]    ; dVk  VR   UZR  ZRi  VZR  dU   UR
00839                 fxch    st(5)                                   ; dU   VR   UZR  ZRi  VZR  dVk  UR
00840                 fadd    qword ptr[MipMagic2]    ; dUk  VR   UZR  ZRi  VZR  dVk  UR
00841                 fxch    st(5)                                   ; dVk  VR   UZR  ZRi  VZR  dUk  UR
00842                 fstp    qword ptr[DeltaV]               ; VR   UZR  ZRi  VZR  dUk  UR
00843                 fxch    st(5)                                   ; UR   UZR  ZRi  VZR  dUk  VR
00844                 //gotta do this to get em lined back up right
00845                 fxch    st(4)                                   ; dUk  UZR  ZRi  VZR  UR   VR
00846                 fstp    qword ptr[DeltaU]               ; UZR  ZRi  VZR  UR   VR
00847 
00848                 //right becomes left                    ; UZL  ZLi  VZL  UL   VL
00849                 fadd    [UDivZ16StepX]                  ; UZR  ZLi  VZL  UL   VL
00850                 fxch    st(1)                                   ; ZLi  UZR  VZL  UL   VL
00851                 fadd    [Zi16StepX]                             ; ZRi  UZR  VZL  UL   VL
00852                 fxch    st(2)                                   ; VZL  UZR  ZRi  UL   VL
00853                 fadd    [VDivZ16StepX]                  ; VZR  UZR  ZRi  UL   VL
00854                 fxch    st(2)                                   ; ZRi  UZR  VZR  UL   VL
00855                 fxch    st(1)                                   ; UZR  ZRi  VZR  UL   VL
00856                 //need those fxch to line things up for loops (bad)
00857 
00858                 // Clamp U/V
00859                 mov             ebx,[UFixed]
00860                 cmp             ebx,MaxU
00861                 jle             TryClampU016
00862                 mov             ecx,MaxU
00863                 mov             dword ptr[UFixed],ecx
00864                 jmp             NoClampU016
00865 
00866 TryClampU016:
00867                 cmp             ebx,0
00868                 jge             NoClampU016
00869                 mov             dword ptr[UFixed],0
00870 NoClampU016:
00871                 mov             eax,[VFixed]
00872                 cmp             eax,MaxV
00873                 jle             TryClampV016
00874                 mov             ecx,MaxV
00875                 mov             dword ptr[VFixed],ecx
00876                 jmp             NoClampV016
00877 
00878 TryClampV016:
00879                 cmp             eax,0
00880                 jge             NoClampV016
00881                 mov             dword ptr[VFixed],0
00882 
00883 NoClampV016:
00884 
00885                 fld1                                                    ; 1    UZR  ZRi  VZR  UL   VL
00886 
00887                 // Cache U1/V1
00888                 mov             ebx,dword ptr[UFixed]
00889 
00890                 fdiv    st,st(2)                        ; ZR   UZR  ZRi  VZR  UL   VL
00891 
00892 
00893                 mov             eax,dword ptr[VFixed]
00894 
00895                 mov             ebx,dword ptr[U1]
00896                 mov             eax,dword ptr[V1]
00897 
00898                 add             ebx,dword ptr[UAdjust2]
00899                 add             eax,dword ptr[VAdjust2]
00900 
00901                 mov             ecx,16
00902                 mov             dword ptr[Bucket],ebx
00903 
00904                 sub             ecx,[VShift]
00905                 mov             dword ptr[Bucket2],eax
00906 
00907                 shr             eax,cl
00908 
00909                 push    ebp
00910                 shr             ebx,16
00911 
00912                 and             eax,[GHMaskShifted]
00913                 mov             esi,[pTex]
00914 
00915                 and             ebx,[GWMask]
00916                 add             esi,eax
00917 
00918                 mov             ecx,[VShift]
00919                 add             esi,ebx
00920 
00921                 mov             edx,dword ptr[Bucket2]
00922                 mov             ebp,dword ptr[DeltaV]
00923 
00924                 mov             ebx,dword ptr[Bucket]
00925 
00926                 //do 8 pixels
00927 
00928                 add             edx,ebp
00929                 mov             ax,[2*esi]
00930 
00931                 mov             esi,edx
00932                 add             ebx,dword ptr[DeltaU]
00933 
00934                 shl             esi,cl
00935                 and             ebx,[GWMaskShifted]
00936 
00937                 and             esi,[GHMaskShifted16]
00938 
00939                 add             esi,ebx
00940                 mov             [edi+0],ax
00941 
00942                 shr             esi,16
00943                 add             edx,ebp
00944 
00945                 add             esi,pTex
00946                 add             ebx,dword ptr[DeltaU]
00947 
00948                 mov             ax,[2*esi]
00949                 mov             esi,edx
00950 
00951                 and             ebx,[GWMaskShifted]
00952                 mov             [edi+2],ax
00953 
00954                 shl             esi,cl
00955                 add             edx,ebp
00956 
00957                 and             esi,[GHMaskShifted16]
00958 
00959                 add             esi,ebx
00960                 shr             esi,16
00961 
00962                 add             ebx,dword ptr[DeltaU]
00963                 add             esi,pTex
00964 
00965                 mov             ax,[2*esi]
00966                 mov             esi,edx
00967 
00968                 and             ebx,[GWMaskShifted]
00969                 mov             [edi+4],ax
00970 
00971                 shl             esi,cl
00972                 add             edx,ebp
00973 
00974                 and             esi,[GHMaskShifted16]
00975 
00976                 add             esi,ebx
00977                 shr             esi,16
00978 
00979                 add             ebx,dword ptr[DeltaU]
00980                 add             esi,pTex
00981 
00982                 mov             ax,[2*esi]
00983                 mov             esi,edx
00984 
00985                 and             ebx,[GWMaskShifted]
00986                 mov             [edi+6],ax
00987 
00988                 shl             esi,cl
00989                 add             edx,ebp
00990 
00991                 and             esi,[GHMaskShifted16]
00992 
00993                 add             esi,ebx
00994                 shr             esi,16
00995 
00996                 add             ebx,dword ptr[DeltaU]
00997                 add             esi,pTex
00998 
00999                 mov             ax,[2*esi]
01000                 mov             esi,edx
01001 
01002                 and             ebx,[GWMaskShifted]
01003                 mov             [edi+8],ax
01004 
01005                 shl             esi,cl
01006                 add             edx,ebp
01007 
01008                 and             esi,[GHMaskShifted16]
01009 
01010                 add             esi,ebx
01011                 shr             esi,16
01012 
01013                 add             ebx,dword ptr[DeltaU]
01014                 add             esi,pTex
01015 
01016                 mov             ax,[2*esi]
01017                 mov             esi,edx
01018 
01019                 and             ebx,[GWMaskShifted]
01020                 mov             [edi+10],ax
01021 
01022                 shl             esi,cl
01023                 add             edx,ebp
01024 
01025                 and             esi,[GHMaskShifted16]
01026 
01027                 add             esi,ebx
01028                 shr             esi,16
01029 
01030                 add             ebx,dword ptr[DeltaU]
01031                 add             esi,pTex
01032 
01033                 mov             ax,[2*esi]
01034                 mov             esi,edx
01035 
01036                 and             ebx,[GWMaskShifted]
01037                 mov             [edi+12],ax
01038 
01039                 shl             esi,cl
01040                 add             edx,ebp
01041 
01042                 and             esi,[GHMaskShifted16]
01043 
01044                 add             esi,ebx
01045                 shr             esi,16
01046 
01047                 add             ebx,dword ptr[DeltaU]
01048                 add             esi,pTex
01049 
01050                 mov             ax,[2*esi]
01051                 mov             esi,edx
01052 
01053                 pop     ebp
01054                 mov             [edi+14],ax
01055 
01056                 ;get corrected right side deltas; st0  st1  st2  st3  st4  st5  st6  st7
01057                                                                                 ; ZR   UZR  ZRi  VZR  UL   VL
01058                 fld             st                                              ; ZR   ZR   UZR  ZRi  VZR  UL   VL
01059                 fmul    st,st(4)                                ; VR   ZR   UZR  ZRi  VZR  UL   VL
01060                 fxch    st(1)                                   ; ZR   VR   UZR  ZRi  VZR  UL   VL
01061                 fmul    st,st(2)                                ; UR   VR   UZR  ZRi  VZR  UL   VL
01062 
01063                 add             edi,16
01064                 dec             [NumASpans]                     ; dec num affine spans
01065                 jnz             SpanLoop16                              ; loop back
01066 
01067 HandleLeftoverPixels16:
01068 
01069                 mov             esi,[pTex]
01070 
01071 
01072                 cmp             [RemainingCount],0
01073                 jz              FPUReturn16
01074 
01075                 //need one more stack spot
01076                 fstp    dword ptr[FloatTemp]    ; VR   UZR  ZRi  VZR  UL   VL
01077                 fld             st(4)                                   ; UL   VR   UZR  ZRi  VZR  UL   VL
01078                 fmul    [GLMapMulU]                             ; ULL  VR   UZR  ZRi  VZR  UL   VL
01079                 fld             st(5)                                   ; UL   ULL  VR   UZR  ZRi  VZR  UL   VL
01080                 fadd    qword ptr[MipMagic]             ; ULk  ULL  VR   UZR  ZRi  VZR  UL   VL
01081                 fxch    st(1)                                   ; ULL  ULk  VR   UZR  ZRi  VZR  UL   VL
01082                 fadd    qword ptr[MipMagic]             ; ULLk ULk  VR   UZR  ZRi  VZR  UL   VL
01083                 fxch    st(1)                                   ; ULk  ULLk VR   UZR  ZRi  VZR  UL   VL
01084                 fstp    qword ptr[Bucket]               ; ULLk VR   UZR  ZRi  VZR  UL   VL
01085                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
01086 
01087                 mov             ebx,dword ptr[Bucket]
01088                 mov             eax,dword ptr[Bucket2]
01089 
01090                 fld             st(5)                                   ; VL   VR   UZR  ZRi  VZR  UL   VL
01091                 fmul    [GLMapMulV]                             ; VLL  VR   UZR  ZRi  VZR  UL   VL
01092 
01093                 add             ebx,dword ptr[UAdjust]
01094                 add             eax,dword ptr[UAdjustL]
01095 
01096                 mov             [U1],ebx
01097                 mov             [UFixed],eax
01098 
01099                 fld             st(6)                                   ; VL   VLL  VR   UZR  ZRi  VZR  UL   VL
01100                 fadd    qword ptr[MipMagic]             ; VLk  VLL  VR   UZR  ZRi  VZR  UL   VL
01101                 fxch    st(1)                                   ; VLL  VLk  VR   UZR  ZRi  VZR  UL   VL
01102                 fadd    qword ptr[MipMagic]             ; VLLk VLk  VR   UZR  ZRi  VZR  UL   VL
01103                 fxch    st(1)                                   ; VLk  VLLk VR   UZR  ZRi  VZR  UL   VL
01104                 fstp    qword ptr[Bucket]               ; VLLk VR   UZR  ZRi  VZR  UL   VL
01105                 fstp    qword ptr[Bucket2]              ; VR   UZR  ZRi  VZR  UL   VL
01106 
01107                 mov             ebx,dword ptr[Bucket]
01108                 mov             eax,dword ptr[Bucket2]
01109 
01110                 fld             dword ptr[FloatTemp]    ; UR   VR   UZR  ZRi  VZR  UL   dV
01111 
01112                 add             ebx,dword ptr[VAdjust]
01113                 add             eax,dword ptr[VAdjustL]
01114 
01115                 mov             [V1],ebx
01116                 mov             [VFixed],eax
01117 
01118                 dec             [RemainingCount]
01119                 jz              OnePixelSpan16
01120 
01121 
01122                 //must get rid of this wasted time
01123                 fstp    [FloatTemp]                             ; inv. inv. inv. inv. UL   VL
01124                 fstp    [FloatTemp]                             ; inv. inv. inv. UL   VL
01125                 fstp    [FloatTemp]                             ; inv. inv. UL   VL
01126                 fstp    [FloatTemp]                             ; inv. UL   VL
01127                 fstp    [FloatTemp]                             ; UL   VL
01128                 fild    [y]                                             ; y    UL   VL
01129                 fild    [x2]                                    ; xr   y    UL   VL
01130 
01131                 fld             [UDivZStepY]                    ; UZdY xr   y    UL   VL
01132                 fld             [UDivZStepX]                    ; UZdX UZdY xr   y    UL   VL
01133                 fmul    st,st(2)                                ; UZX  UZdY xr   y    UL   VL
01134                 fld             [VDivZStepY]                    ; VZdY UZX  UZdY xr   y    UL   VL
01135                 fld             [VDivZStepX]                    ; VZdX VZdY UZX  UZdY xr   y    UL   VL
01136                 fxch    st(3)                                   ; UZdy VZdY UZX  VZdX xr   y    UL   VL
01137                 fmul    st,st(5)                                ; UZY  VZdY UZX  VZdX xr   y    UL   VL
01138                 fxch    st(2)                                   ; UZX  VZdY UZY  VZdX xr   y    UL   VL
01139                 fadd    [UDivZOrigin]                   ; UZXS VZdY UZY  VZdX xr   y    UL   VL
01140                 fxch    st(3)                                   ; VZdX VZdY UZY  UZXS xr   y    UL   VL
01141                 fmul    st,st(4)                                ; VZX  VZdY UZY  UZXS xr   y    UL   VL
01142                 fxch    st(2)                                   ; UZY  VZdY VZX  UZXS xr   y    UL   VL
01143                 faddp   st(3),st                                ; VZdY VZX  UZ   xr   y    UL   VL
01144                 fmul    st,st(4)                                ; VZY  VZX  UZ   xr   y    UL   VL
01145                 fxch    st(1)                                   ; VZX  VZY  UZ   xr   y    UL   VL
01146                 fadd    [VDivZOrigin]                   ; VZXS VZY  UZ   xr   y    UL   VL
01147                 fld             [ZiStepX]                               ; ZdX  VZXS VZY  UZ   xr   y    UL   VL
01148                 fmulp   st(4),st                                ; VZXS VZY  UZ   ZX   y    UL   VL
01149                 faddp   st(1),st                                ; VZ   UZ   ZX   y    UL   VL
01150                 fld             [ZiStepY]                               ; ZdY  VZ   UZ   ZX   y    UL   VL
01151                 fmulp   st(4),st                                ; VZ   UZ   ZX   ZY   UL   VL
01152                 fxch    st(2)                                   ; ZX   UZ   VZ   ZY   UL   VL
01153                 fadd    [ZiOrigin]                              ; ZXS  UZ   VZ   ZY   UL   VL
01154 
01155                 faddp   st(3),st                                ; UZ   VZ   Zi   UL   VL
01156                 fld1                                                    ; 1    UZ   VZ   Zi   UL   VL
01157                 fdiv    st,st(3)                                ; ZR   UZ   VZ   Zi   UL   VL
01158 
01159                 fld             st                                              ; ZR   ZR   UZ   VZ   Zi   UL   VL
01160                 fmul    st,st(3)                                ; VR   ZR   UZ   VZ   Zi   UL   VL
01161                 fxch    st(1)                                   ; ZR   VR   UZ   VZ   Zi   UL   VL
01162                 fmul    st,st(2)                                ; UR   VR   UZ   VZ   Zi   UL   VL
01163 
01164                 //lazy idiv below... should 1/int mul mul
01165 
01166                 ; calculate deltas                              ; st0  st1  st2  st3  st4  st5  st6  st7
01167                 fsubr   st(5),st                                ; UR   VR   inv. inv. inv. dU   VL
01168                 fxch    st(1)                                   ; VR   UR   inv. inv. inv. dU   VL
01169                 fsubr   st(6),st                                ; VR   UR   inv. inv. inv. dU   dV
01170                 fxch    st(6)                                   ; dV   UR   inv. inv. inv. dU   VR
01171                 fidiv   dword ptr[RemainingCount];dv   UR   inv. inv. inv. dU   VR
01172                 fadd    qword ptr[MipMagic]             ; dvk  UR   inv. inv. inv. dU   VR
01173                 fxch    st(5)                                   ; dU   UR   inv. inv. inv. dvk  VR
01174                 fidiv   dword ptr[RemainingCount];du   UR   inv. inv. inv. dvk  VR
01175                 fadd    qword ptr[MipMagic]             ; duk  UR   inv. inv. inv. dvk  VR
01176                 fxch    st(5)                                   ; dvk  UR   inv. inv. inv. duk  VR
01177                 fstp    qword ptr[DeltaV]               ; UR   inv. inv. inv. duk  VR
01178                 fxch    st(4)                                   ; duk  inv. inv. inv. UR   VR
01179                 fstp    qword ptr[DeltaU]               ; inv. inv. inv. UR   VR
01180                 fld             st(1)                                   ; inv. inv. inv. inv. UR   VR
01181                 fld             st(2)                                   ; inv. inv. inv. inv. inv. UR   VR
01182 
01183                 // Clamp U/V
01184                 mov             ebx,[UFixed]
01185                 cmp             ebx,MaxU
01186                 jle             TryClampU116
01187                 mov             ecx,MaxU
01188                 mov             dword ptr[UFixed],ecx
01189                 jmp             NoClampU116
01190 
01191 TryClampU116:
01192                 cmp             ebx,0
01193                 jge             NoClampU116
01194                 mov             dword ptr[UFixed],0
01195 NoClampU116:
01196                 mov             eax,[VFixed]
01197                 cmp             eax,MaxV
01198                 jle             TryClampV116
01199                 mov             ecx,MaxV
01200                 mov             dword ptr[VFixed],ecx
01201                 jmp             NoClampV116
01202 
01203 TryClampV116:
01204                 cmp             eax,0
01205                 jge             NoClampV116
01206                 mov             dword ptr[VFixed],0
01207 
01208 NoClampV116:
01209                 // Cache U1/V1
01210                 mov             ebx,dword ptr[UFixed]
01211                 mov             eax,dword ptr[VFixed]
01212 
01213                 mov             ebx,dword ptr[U1]
01214                 mov             edx,dword ptr[V1]
01215 
01216                 mov             ecx,16
01217                 add             ebx,dword ptr[UAdjust2]
01218 
01219                 sub             ecx,[VShift]
01220                 add             edx,dword ptr[VAdjust2]
01221 
01222 //              mov             dword ptr[Bucket],ebx
01223 //              mov             dword ptr[Bucket2],eax
01224 
01225 OnePixelSpan16:
01226                 push    ebp
01227 
01228 LeftoverLoop16:
01229                 mov             eax,edx
01230                 shr             eax,cl
01231                 mov             ebp,ebx
01232                 and             eax,[GHMaskShifted]
01233                 shr             ebp,16
01234                 and             ebp,[GWMask]
01235                 add             eax,ebp
01236                 add             eax,esi
01237                 mov             ax,[2*eax]
01238                 mov             [edi],ax
01239                 add             ebx,dword ptr[DeltaU]
01240                 add             edi,2
01241                 add             edx,dword ptr[DeltaV]
01242 
01243                 dec             [RemainingCount]
01244                 jge             LeftoverLoop16
01245 
01246                 pop             ebp
01247 
01248 FPUReturn16:
01249                 ffree   st(0)
01250                 ffree   st(1)
01251                 ffree   st(2)
01252                 ffree   st(3)
01253                 ffree   st(4)
01254                 ffree   st(5)
01255                 ffree   st(6)
01256 
01257 Return16:
01258         }
01259 //      LightCachedSpan16_AsmLerpLUT(x1, x2, y);
01260 //      LightCachedSpan16_AsmLerpFPU(x1, x2, y);
01261 }
01262 
01263 

Generated on Tue Sep 30 12:35:39 2003 for GTestAndEngine by doxygen 1.3.2