;Skein-256-256 and ThreeFish-256 for '51 ; (C) 2008 by Jan Waclawek http://www.efton.sk ; ;free for personal/academic use, ; for commercial use please contact author: wek at efton dot sk ; ;Skein-256-256 implementation parameters: ; -- uses the same resources as ThreeFish, plus 3 bytes of stack ; (1 byte can be exchanged for a register) ; -- no fractional bits ; -- no tree mode etc., plain hash ; -- max input = 64kBytes ; -- if other output size is desired, exchange Skein256256HashInitTab ; for the other initial vector, and take only the appropriate number ; of bytes out of the output ; -- not optimised for size nor speed ; -- can be sped up by unrolling, and taking into account zero bytes ; of most of tweak in initial part of ThreeFish ; ; ; ;ThreeFish-256 implementation parameters: ;encryption only ;uses: ; -- 32+24=56 bytes of direct addressable RAM(*) ; -- 40 bytes of indirect addressable RAM ; -- 0 bytes of stack ; -- for registers usage see comments at the beginning of code ; -- depending on application, all of these variables are overlayable ; -- (*) up to 16 bytes of direct addressable memory has to be freed ; if Tweak is moved to indirect, but then 8 bytes have to be ; used for initial key injection, or the key injection has to ; be rewritten in a slightly less optimal way not to use those ; 8 bytes out of Tweak ; ;cycles (for "classical" 12/6/2-clocker): ; -- 12760 including call and ret, if all optimisations are on ; ;code memory (FLASH/ROM) size: ; -- 2543 bytes ; ; ; ;Skein hash, and ThreeFish block cipher by ; Niels Ferguson - Stefan Lucks - Bruce Schneier - Doug Whiting - ; Mihir Bellare - Tadayoshi Kohno - Jon Callas - Jesse Walker ;Homepage: ; http://www.schneier.com/skein.html ; ; ; ;Implementation notes (see also comments in code): ; ;internal memory used only (*) ; -- deliberately leaving some of the direct addressable space free, ; even at a cost of a few extra cycles ;unrolled 8 loops as per recommendation in specs ;agressively unrolled everything possible, trading code memory for clocks ;full tweak/key input - not optimised for SKEIN ; ;(*) using internal memory is fast, on the other hand it prevents us to use ; the trick with pre-computed key+tweaks, as it would consume additional ; 3*4=12 words = 96 bytes ; -- think about it when eventually moving some data to external memory ; ;There are several conditional branches TFISH_SLOWx, which if set to nonzero, ;would use the multiplication version rather than the shifting/swapping ;This might be faster on certain single-clock (non-cycle-compatible) ;'51 derivatives - try. ;set the following conditionals to 1 to build a small application using which ; ThreeFish and Skein can be tested against published test vectors in a simulator TEST_SKEIN SET 1 TEST_THREEFISH SET 1 ; $NOMOD51 ; $MOD52 $INCLUDE(MOD52) WORDSIZE EQU 8 DATASIZE EQU 4 ROUNDS EQU 72 DSEG AT 30h X: ds WORDSIZE*DATASIZE ;plain&ciphertex 4 words = 32 bytes Tweak: ds WORDSIZE*(2+1) ;2+1 words = 16+8 = 24 bytes I_placeholder: ISEG AT I_placeholder ;the following can be entirely in the indirect RAM (i.e. above 080h) ; -- we could possibly spare some cycles by forcing it into direct ram ; and unrolling loops, but that might be too limiting for the rest of ; application, even if this might be overlayable area, interrupts etc. ; should have some space to live ; Key: ds WORDSIZE*(DATASIZE+1) ;4+1 words = 32+8 = 40 bytes Stack: ;placeholder for stack CSEG IF (TEST_THREEFISH = 1) OR (TEST_SKEIN = 1) Reset: mov sp,#Stack-1 ENDIF IF (TEST_SKEIN = 1) ;------------------------------------- Tests ------------------------------------ SKEIN_XDATA SET 0 TestSkein: mov dptr,#TestSkeinTab TSLoop: clr a movc a,@a+dptr inc dptr mov r7,a clr a movc a,@a+dptr inc dptr mov r6,a call Skein mov r0,#X TSLoopX1: clr a movc a,@a+dptr inc dptr xrl a,@r0 jnz SkeinError inc r0 cjne r0,#X+WORDSIZE*DATASIZE,TSLoopX1 mov a,dpl cjne a,#LOW(TestSkeinTabEnd),TSLoop mov a,dph cjne a,#HIGH(TestSkeinTabEnd),TSLoop sjmp SkeinOK SkeinOK: jmp TestThreeFish SkeinError: jmp Error TestSkeinTab: dw TestSkeinTabResult1-TestSkeinTabMessagedata1 TestSkeinTabMessagedata1: db 0FFh TestSkeinTabResult1: db 0A4h,07Bh,0E7h,01Ah,018h,05Bh,0A0h,0AFh,082h,00Bh,03Ch,0E8h,045h,0A3h,0D3h,05Ah db 080h,0ECh,064h,0F9h,06Ah,00Dh,06Ah,036h,0E3h,0F5h,036h,036h,024h,0D8h,0A0h,091h dw TestSkeinTabResult2-TestSkeinTabMessagedata2 TestSkeinTabMessagedata2: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h TestSkeinTabResult2: db 0CCh,02Dh,0A8h,02Fh,039h,073h,0C2h,0F7h,0A8h,0CEh,0D0h,0BBh,0B5h,04Ah,0A0h,028h db 0ECh,0AFh,06Bh,059h,011h,062h,08Dh,00Fh,0FAh,0BBh,020h,008h,0E4h,011h,0D1h,071h dw TestSkeinTabResult3-TestSkeinTabMessagedata3 TestSkeinTabMessagedata3: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h db 0DFh,0DEh,0DDh,0DCh,0DBh,0DAh,0D9h,0D8h,0D7h,0D6h,0D5h,0D4h,0D3h,0D2h,0D1h,0D0h db 0CFh,0CEh,0CDh,0CCh,0CBh,0CAh,0C9h,0C8h,0C7h,0C6h,0C5h,0C4h,0C3h,0C2h,0C1h,0C0h TestSkeinTabResult3: db 0FAh,01Ah,076h,02Bh,06Bh,01Ch,072h,0B7h,00Dh,052h,092h,063h,053h,0E1h,00Eh,0B8h db 0FBh,00Eh,0DDh,073h,013h,0DAh,020h,0A2h,041h,031h,080h,0B8h,0E2h,089h,0B8h,072h ;Skein-256: 256-bit hash, msgLen = 0 bits, data = 'zero' dw TestSkeinTabResult4-TestSkeinTabMessagedata4 TestSkeinTabMessagedata4: TestSkeinTabResult4: db 0BCh,027h,063h,0F7h,007h,0E2h,062h,0B8h,00Eh,003h,013h,079h,015h,043h,0A7h,0ABh db 00Ah,04Bh,06Ch,0D0h,083h,027h,00Ah,0FBh,02Fh,0CEh,042h,072h,0E1h,0BBh,00Ah,0A9h ;Skein-256: 256-bit hash, msgLen = 8 bits, data = 'zero' dw TestSkeinTabResult5-TestSkeinTabMessagedata5 TestSkeinTabMessagedata5: db 000h TestSkeinTabResult5: db 0A6h,0CAh,0BFh,02Dh,0B0h,01Ah,06Fh,027h,042h,040h,042h,093h,064h,039h,092h,08Ch db 0A8h,038h,054h,093h,052h,094h,0DFh,098h,019h,0E7h,0C4h,0C6h,061h,0FCh,031h,09Fh ;Skein-256: 256-bit hash, msgLen = 32 bits, data = 'zero' dw TestSkeinTabResult6-TestSkeinTabMessagedata6 TestSkeinTabMessagedata6: db 000h,000h,000h,000h TestSkeinTabResult6: db 0A8h,017h,045h,0A0h,0D1h,05Dh,010h,032h,04Ah,044h,050h,069h,041h,023h,0EDh,063h db 02Ah,0E3h,06Ah,069h,030h,08Bh,085h,012h,0F5h,06Eh,0A5h,03Ch,04Bh,074h,0D1h,018h ;Skein-256: 256-bit hash, msgLen = 64 bits, data = 'zero' dw TestSkeinTabResult7-TestSkeinTabMessagedata7 TestSkeinTabMessagedata7: db 000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult7: db 070h,003h,05Eh,0DFh,023h,05Ah,044h,09Ch,043h,036h,0B8h,022h,02Dh,0F8h,089h,0FBh db 001h,07Ch,0FAh,070h,035h,077h,00Dh,00Fh,0D9h,08Fh,067h,087h,0ACh,0C9h,07Bh,06Ch ;Skein-256: 256-bit hash, msgLen = 128 bits, data = 'zero' dw TestSkeinTabResult8-TestSkeinTabMessagedata8 TestSkeinTabMessagedata8: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult8: db 0F6h,0C8h,039h,070h,0CFh,027h,0D7h,0C1h,0C6h,0FBh,0A8h,08Bh,0D8h,0DBh,0DCh,06Eh db 073h,09Eh,0ECh,04Fh,074h,054h,0AEh,0F0h,068h,03Ah,0C5h,02Ah,02Ah,0E2h,08Dh,04Eh ;Skein-256: 256-bit hash, msgLen = 192 bits, data = 'zero' dw TestSkeinTabResult9-TestSkeinTabMessagedata9 TestSkeinTabMessagedata9: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult9: db 05Ah,0CEh,0BDh,060h,0D8h,08Ch,056h,0EFh,03Eh,022h,04Ah,053h,05Eh,0B1h,0B8h,083h db 014h,0CCh,0A8h,055h,0F4h,0AEh,011h,04Ah,071h,033h,086h,049h,0FAh,01Ch,023h,0BDh ;Skein-256: 256-bit hash, msgLen = 256 bits, data = 'zero' dw TestSkeinTabResult10-TestSkeinTabMessagedata10 TestSkeinTabMessagedata10: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult10: db 0BDh,026h,091h,0B6h,08Fh,09Bh,0E3h,03Eh,060h,089h,0DAh,056h,00Dh,07Ch,051h,09Dh db 0C2h,044h,053h,03Dh,0A7h,01Eh,07Bh,03Bh,011h,0E2h,094h,0A1h,05Eh,06Ah,06Ch,030h ;Skein-256: 256-bit hash, msgLen = 384 bits, data = 'zero' dw TestSkeinTabResult11-TestSkeinTabMessagedata11 TestSkeinTabMessagedata11: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult11: db 04Eh,00Eh,02Eh,006h,0C1h,0DCh,0AFh,0B5h,093h,0EAh,00Eh,038h,0DFh,044h,0E2h,0E0h db 08Fh,014h,005h,0A4h,0C8h,03Bh,0B7h,0E3h,060h,0B0h,097h,015h,0DAh,09Dh,033h,0B0h ;Skein-256: 256-bit hash, msgLen = 512 bits, data = 'zero' dw TestSkeinTabResult12-TestSkeinTabMessagedata12 TestSkeinTabMessagedata12: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult12: db 0B7h,0CCh,086h,069h,07Dh,0C3h,072h,00Ch,0B6h,0D0h,038h,006h,077h,00Bh,038h,086h db 05Ch,05Fh,09Ch,0FEh,05Ah,027h,0FDh,0D3h,0F9h,0A7h,0E6h,0D4h,0E0h,0A4h,035h,08Bh ;Skein-256: 256-bit hash, msgLen = 768 bits, data = 'zero' dw TestSkeinTabResult13-TestSkeinTabMessagedata13 TestSkeinTabMessagedata13: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult13: db 006h,0F8h,074h,086h,064h,02Ah,0F0h,093h,041h,0A1h,0A8h,019h,006h,0E0h,08Eh,06Ah db 040h,07Ch,05Ah,050h,014h,0F1h,021h,061h,094h,022h,06Fh,0D4h,0E9h,029h,09Dh,0D7h ;Skein-256: 256-bit hash, msgLen = 1024 bits, data = 'zero' dw TestSkeinTabResult14-TestSkeinTabMessagedata14 TestSkeinTabMessagedata14: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult14: db 0DBh,065h,089h,05Eh,0B2h,011h,0FBh,09Ch,0E3h,0B6h,00Bh,002h,030h,0C2h,0FDh,0A1h db 0EFh,0BAh,03Bh,06Eh,0DCh,023h,0E9h,027h,0C6h,00Bh,024h,074h,01Eh,042h,039h,0C9h ;Skein-256: 256-bit hash, msgLen = 2048 bits, data = 'zero' dw TestSkeinTabResult15-TestSkeinTabMessagedata15 TestSkeinTabMessagedata15: db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h TestSkeinTabResult15: db 0C2h,0B3h,000h,031h,04Ch,00Eh,066h,0DCh,03Eh,02Ah,044h,029h,0B1h,0F6h,03Dh,0F3h db 0C2h,0E6h,05Ch,088h,065h,0E7h,0C3h,085h,0BAh,0FEh,0A4h,02Ch,09Eh,02Ch,06Fh,042h ;Skein-256: 256-bit hash, msgLen = 0 bits, data = 'incrementing' dw TestSkeinTabResult16-TestSkeinTabMessagedata16 TestSkeinTabMessagedata16: TestSkeinTabResult16: db 0BCh,027h,063h,0F7h,007h,0E2h,062h,0B8h,00Eh,003h,013h,079h,015h,043h,0A7h,0ABh db 00Ah,04Bh,06Ch,0D0h,083h,027h,00Ah,0FBh,02Fh,0CEh,042h,072h,0E1h,0BBh,00Ah,0A9h ;Skein-256: 256-bit hash, msgLen = 8 bits, data = 'incrementing' dw TestSkeinTabResult17-TestSkeinTabMessagedata17 TestSkeinTabMessagedata17: db 0FFh TestSkeinTabResult17: db 0A4h,07Bh,0E7h,01Ah,018h,05Bh,0A0h,0AFh,082h,00Bh,03Ch,0E8h,045h,0A3h,0D3h,05Ah db 080h,0ECh,064h,0F9h,06Ah,00Dh,06Ah,036h,0E3h,0F5h,036h,036h,024h,0D8h,0A0h,091h ;Skein-256: 256-bit hash, msgLen = 32 bits, data = 'incrementing' dw TestSkeinTabResult18-TestSkeinTabMessagedata18 TestSkeinTabMessagedata18: db 0FFh,0FEh,0FDh,0FCh TestSkeinTabResult18: db 0A6h,01Eh,060h,021h,0BFh,0E2h,047h,0D7h,05Ah,0A6h,0C9h,002h,070h,021h,075h,0CFh db 0D4h,0DEh,005h,0B4h,092h,0AAh,02Dh,039h,09Dh,0ACh,03Fh,066h,0B0h,02Eh,0A9h,03Bh ;Skein-256: 256-bit hash, msgLen = 64 bits, data = 'incrementing' dw TestSkeinTabResult19-TestSkeinTabMessagedata19 TestSkeinTabMessagedata19: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h TestSkeinTabResult19: db 097h,015h,051h,07Ch,0AFh,0ACh,081h,00Bh,0C7h,04Ah,00Fh,0CAh,037h,09Eh,0DBh,011h db 095h,0EFh,0B0h,0EDh,050h,0E8h,089h,053h,0ACh,0D1h,039h,00Fh,050h,08Bh,094h,0ABh ;Skein-256: 256-bit hash, msgLen = 128 bits, data = 'incrementing' dw TestSkeinTabResult20-TestSkeinTabMessagedata20 TestSkeinTabMessagedata20: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h TestSkeinTabResult20: db 034h,060h,0D8h,00Fh,01Bh,0E5h,043h,0D0h,07Fh,02Eh,075h,0C2h,05Bh,0E2h,073h,03Ch db 0CDh,075h,0F5h,066h,0B1h,098h,0F0h,02Fh,0DDh,0C9h,01Ch,0E4h,0F2h,0D4h,08Bh,0D2h ;Skein-256: 256-bit hash, msgLen = 192 bits, data = 'incrementing' dw TestSkeinTabResult21-TestSkeinTabMessagedata21 TestSkeinTabMessagedata21: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h TestSkeinTabResult21: db 0A6h,0B2h,018h,017h,0BAh,050h,0C8h,0E6h,075h,05Bh,024h,08Dh,076h,0AAh,031h,00Fh db 0B8h,06Ah,0AEh,069h,035h,057h,079h,09Ah,079h,05Ah,054h,091h,012h,084h,0FDh,0C0h ;Skein-256: 256-bit hash, msgLen = 256 bits, data = 'incrementing' dw TestSkeinTabResult22-TestSkeinTabMessagedata22 TestSkeinTabMessagedata22: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h TestSkeinTabResult22: db 0CCh,02Dh,0A8h,02Fh,039h,073h,0C2h,0F7h,0A8h,0CEh,0D0h,0BBh,0B5h,04Ah,0A0h,028h db 0ECh,0AFh,06Bh,059h,011h,062h,08Dh,00Fh,0FAh,0BBh,020h,008h,0E4h,011h,0D1h,071h ;Skein-256: 256-bit hash, msgLen = 384 bits, data = 'incrementing' dw TestSkeinTabResult23-TestSkeinTabMessagedata23 TestSkeinTabMessagedata23: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h db 0DFh,0DEh,0DDh,0DCh,0DBh,0DAh,0D9h,0D8h,0D7h,0D6h,0D5h,0D4h,0D3h,0D2h,0D1h,0D0h TestSkeinTabResult23: db 0AFh,011h,02Fh,0AEh,04Ah,0A8h,040h,0E2h,07Eh,062h,081h,08Dh,006h,061h,004h,075h db 0D9h,056h,08Ah,084h,034h,059h,05Dh,049h,0D6h,02Fh,086h,047h,0AAh,06Ch,019h,027h ;Skein-256: 256-bit hash, msgLen = 512 bits, data = 'incrementing' dw TestSkeinTabResult24-TestSkeinTabMessagedata24 TestSkeinTabMessagedata24: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h db 0DFh,0DEh,0DDh,0DCh,0DBh,0DAh,0D9h,0D8h,0D7h,0D6h,0D5h,0D4h,0D3h,0D2h,0D1h,0D0h db 0CFh,0CEh,0CDh,0CCh,0CBh,0CAh,0C9h,0C8h,0C7h,0C6h,0C5h,0C4h,0C3h,0C2h,0C1h,0C0h TestSkeinTabResult24: db 0FAh,01Ah,076h,02Bh,06Bh,01Ch,072h,0B7h,00Dh,052h,092h,063h,053h,0E1h,00Eh,0B8h db 0FBh,00Eh,0DDh,073h,013h,0DAh,020h,0A2h,041h,031h,080h,0B8h,0E2h,089h,0B8h,072h ;Skein-256: 256-bit hash, msgLen = 768 bits, data = 'incrementing' dw TestSkeinTabResult25-TestSkeinTabMessagedata25 TestSkeinTabMessagedata25: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h db 0DFh,0DEh,0DDh,0DCh,0DBh,0DAh,0D9h,0D8h,0D7h,0D6h,0D5h,0D4h,0D3h,0D2h,0D1h,0D0h db 0CFh,0CEh,0CDh,0CCh,0CBh,0CAh,0C9h,0C8h,0C7h,0C6h,0C5h,0C4h,0C3h,0C2h,0C1h,0C0h db 0BFh,0BEh,0BDh,0BCh,0BBh,0BAh,0B9h,0B8h,0B7h,0B6h,0B5h,0B4h,0B3h,0B2h,0B1h,0B0h db 0AFh,0AEh,0ADh,0ACh,0ABh,0AAh,0A9h,0A8h,0A7h,0A6h,0A5h,0A4h,0A3h,0A2h,0A1h,0A0h TestSkeinTabResult25: db 030h,070h,095h,0DFh,04Ah,00Dh,0F7h,092h,0E1h,0ABh,068h,06Fh,0F6h,05Ch,016h,0E1h db 0F6h,02Bh,0B3h,060h,041h,0DEh,088h,0ECh,026h,0D4h,083h,05Fh,036h,004h,07Fh,04Fh ;Skein-256: 256-bit hash, msgLen = 1024 bits, data = 'incrementing' dw TestSkeinTabResult26-TestSkeinTabMessagedata26 TestSkeinTabMessagedata26: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h db 0DFh,0DEh,0DDh,0DCh,0DBh,0DAh,0D9h,0D8h,0D7h,0D6h,0D5h,0D4h,0D3h,0D2h,0D1h,0D0h db 0CFh,0CEh,0CDh,0CCh,0CBh,0CAh,0C9h,0C8h,0C7h,0C6h,0C5h,0C4h,0C3h,0C2h,0C1h,0C0h db 0BFh,0BEh,0BDh,0BCh,0BBh,0BAh,0B9h,0B8h,0B7h,0B6h,0B5h,0B4h,0B3h,0B2h,0B1h,0B0h db 0AFh,0AEh,0ADh,0ACh,0ABh,0AAh,0A9h,0A8h,0A7h,0A6h,0A5h,0A4h,0A3h,0A2h,0A1h,0A0h db 09Fh,09Eh,09Dh,09Ch,09Bh,09Ah,099h,098h,097h,096h,095h,094h,093h,092h,091h,090h db 08Fh,08Eh,08Dh,08Ch,08Bh,08Ah,089h,088h,087h,086h,085h,084h,083h,082h,081h,080h TestSkeinTabResult26: db 0D5h,00Eh,055h,0EBh,001h,0B8h,0B5h,0B6h,04Ah,0AEh,0BAh,067h,03Ch,027h,06Ah,00Dh db 049h,081h,060h,07Dh,0DBh,020h,09Bh,083h,0A5h,0E0h,0E2h,061h,0CCh,098h,030h,034h ;Skein-256: 256-bit hash, msgLen = 2048 bits, data = 'incrementing' dw TestSkeinTabResult27-TestSkeinTabMessagedata27 TestSkeinTabMessagedata27: db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h db 0DFh,0DEh,0DDh,0DCh,0DBh,0DAh,0D9h,0D8h,0D7h,0D6h,0D5h,0D4h,0D3h,0D2h,0D1h,0D0h db 0CFh,0CEh,0CDh,0CCh,0CBh,0CAh,0C9h,0C8h,0C7h,0C6h,0C5h,0C4h,0C3h,0C2h,0C1h,0C0h db 0BFh,0BEh,0BDh,0BCh,0BBh,0BAh,0B9h,0B8h,0B7h,0B6h,0B5h,0B4h,0B3h,0B2h,0B1h,0B0h db 0AFh,0AEh,0ADh,0ACh,0ABh,0AAh,0A9h,0A8h,0A7h,0A6h,0A5h,0A4h,0A3h,0A2h,0A1h,0A0h db 09Fh,09Eh,09Dh,09Ch,09Bh,09Ah,099h,098h,097h,096h,095h,094h,093h,092h,091h,090h db 08Fh,08Eh,08Dh,08Ch,08Bh,08Ah,089h,088h,087h,086h,085h,084h,083h,082h,081h,080h db 07Fh,07Eh,07Dh,07Ch,07Bh,07Ah,079h,078h,077h,076h,075h,074h,073h,072h,071h,070h db 06Fh,06Eh,06Dh,06Ch,06Bh,06Ah,069h,068h,067h,066h,065h,064h,063h,062h,061h,060h db 05Fh,05Eh,05Dh,05Ch,05Bh,05Ah,059h,058h,057h,056h,055h,054h,053h,052h,051h,050h db 04Fh,04Eh,04Dh,04Ch,04Bh,04Ah,049h,048h,047h,046h,045h,044h,043h,042h,041h,040h db 03Fh,03Eh,03Dh,03Ch,03Bh,03Ah,039h,038h,037h,036h,035h,034h,033h,032h,031h,030h db 02Fh,02Eh,02Dh,02Ch,02Bh,02Ah,029h,028h,027h,026h,025h,024h,023h,022h,021h,020h db 01Fh,01Eh,01Dh,01Ch,01Bh,01Ah,019h,018h,017h,016h,015h,014h,013h,012h,011h,010h db 00Fh,00Eh,00Dh,00Ch,00Bh,00Ah,009h,008h,007h,006h,005h,004h,003h,002h,001h,000h TestSkeinTabResult27: db 012h,0A3h,024h,05Fh,02Eh,0F8h,087h,099h,0B9h,0E2h,004h,080h,021h,065h,0DCh,075h db 0FFh,036h,05Bh,0E4h,0F8h,064h,0FEh,01Dh,0ACh,00Dh,047h,0B6h,0F2h,05Bh,0D5h,046h ;Skein-256: 256-bit hash, msgLen = 0 bits, data = 'random' dw TestSkeinTabResult28-TestSkeinTabMessagedata28 TestSkeinTabMessagedata28: TestSkeinTabResult28: db 0BCh,027h,063h,0F7h,007h,0E2h,062h,0B8h,00Eh,003h,013h,079h,015h,043h,0A7h,0ABh db 00Ah,04Bh,06Ch,0D0h,083h,027h,00Ah,0FBh,02Fh,0CEh,042h,072h,0E1h,0BBh,00Ah,0A9h ;Skein-256: 256-bit hash, msgLen = 8 bits, data = 'random' dw TestSkeinTabResult29-TestSkeinTabMessagedata29 TestSkeinTabMessagedata29: db 0FBh TestSkeinTabResult29: db 05Bh,0E9h,0A2h,010h,0ACh,091h,07Fh,049h,0A5h,080h,054h,045h,0DDh,097h,0BBh,056h db 0F0h,04Fh,036h,063h,099h,0CFh,00Ah,0CDh,0E4h,022h,0FFh,025h,03Ah,03Fh,049h,0FBh ;Skein-256: 256-bit hash, msgLen = 32 bits, data = 'random' dw TestSkeinTabResult30-TestSkeinTabMessagedata30 TestSkeinTabMessagedata30: db 0FBh,0D1h,07Ch,026h TestSkeinTabResult30: db 0BAh,04Dh,044h,062h,035h,018h,0D9h,094h,0ADh,001h,09Eh,0BDh,0B5h,099h,044h,0A9h db 0FBh,046h,017h,0D8h,059h,0D9h,02Eh,033h,082h,088h,09Dh,0A4h,04Ch,0B8h,005h,0A2h ;Skein-256: 256-bit hash, msgLen = 64 bits, data = 'random' dw TestSkeinTabResult31-TestSkeinTabMessagedata31 TestSkeinTabMessagedata31: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h TestSkeinTabResult31: db 0D5h,086h,02Bh,0B1h,046h,024h,02Fh,0EBh,069h,0DAh,042h,073h,0CFh,020h,034h,0EFh db 0E2h,02Bh,0DBh,07Bh,061h,02Eh,04Dh,0E7h,02Dh,071h,087h,084h,097h,037h,06Bh,0F9h ;Skein-256: 256-bit hash, msgLen = 128 bits, data = 'random' dw TestSkeinTabResult32-TestSkeinTabMessagedata32 TestSkeinTabMessagedata32: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h TestSkeinTabResult32: db 0BBh,0E5h,0C3h,057h,0ECh,09Dh,042h,00Eh,0E6h,023h,076h,0E7h,09Dh,03Ch,0D4h,056h db 06Bh,07Ah,059h,097h,029h,021h,0AFh,0C5h,0B6h,05Ah,099h,08Eh,071h,054h,0EDh,0CDh ;Skein-256: 256-bit hash, msgLen = 192 bits, data = 'random' dw TestSkeinTabResult33-TestSkeinTabMessagedata33 TestSkeinTabMessagedata33: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh TestSkeinTabResult33: db 0B4h,0ABh,0EBh,04Ch,0B4h,08Bh,0DDh,025h,065h,0A6h,05Bh,02Eh,061h,014h,0BAh,0EDh db 024h,0CFh,005h,0B8h,024h,02Ah,0B0h,091h,00Eh,01Fh,0FEh,00Bh,0ECh,0F0h,03Dh,0C6h ;Skein-256: 256-bit hash, msgLen = 256 bits, data = 'random' dw TestSkeinTabResult34-TestSkeinTabMessagedata34 TestSkeinTabMessagedata34: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh,078h,043h,094h,030h,0CDh,0FCh,05Dh,0C8h TestSkeinTabResult34: db 0FCh,0AEh,0CDh,067h,037h,0C0h,05Ch,029h,006h,09Dh,06Fh,069h,098h,063h,097h,0C9h db 03Dh,00Dh,094h,082h,058h,027h,07Ch,0D0h,047h,024h,027h,02Bh,07Ch,0A9h,071h,034h ;Skein-256: 256-bit hash, msgLen = 384 bits, data = 'random' dw TestSkeinTabResult35-TestSkeinTabMessagedata35 TestSkeinTabMessagedata35: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh,078h,043h,094h,030h,0CDh,0FCh,05Dh,0C8h db 078h,0BBh,039h,03Ah,01Ah,05Fh,079h,0BEh,0F3h,009h,095h,0A8h,05Ah,012h,092h,033h TestSkeinTabResult35: db 088h,033h,039h,0A8h,011h,028h,064h,0F6h,08Fh,016h,0F7h,035h,064h,052h,00Fh,097h db 0DDh,0BBh,097h,0DBh,056h,08Dh,082h,034h,0F3h,0BCh,058h,053h,001h,089h,04Dh,0A5h ;Skein-256: 256-bit hash, msgLen = 512 bits, data = 'random' dw TestSkeinTabResult36-TestSkeinTabMessagedata36 TestSkeinTabMessagedata36: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh,078h,043h,094h,030h,0CDh,0FCh,05Dh,0C8h db 078h,0BBh,039h,03Ah,01Ah,05Fh,079h,0BEh,0F3h,009h,095h,0A8h,05Ah,012h,092h,033h db 039h,0BAh,08Ah,0B7h,0D8h,0FCh,06Dh,0C5h,0FEh,0C6h,0F4h,0EDh,022h,0C1h,022h,0BBh TestSkeinTabResult36: db 0B7h,0ACh,080h,0EBh,0B7h,0A3h,067h,017h,0C7h,019h,0C2h,0A2h,0F3h,008h,0C8h,08Ah db 032h,066h,0FCh,0FBh,088h,093h,0C4h,096h,015h,0ACh,098h,0BDh,0D5h,092h,0F0h,0ECh ;Skein-256: 256-bit hash, msgLen = 768 bits, data = 'random' dw TestSkeinTabResult37-TestSkeinTabMessagedata37 TestSkeinTabMessagedata37: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh,078h,043h,094h,030h,0CDh,0FCh,05Dh,0C8h db 078h,0BBh,039h,03Ah,01Ah,05Fh,079h,0BEh,0F3h,009h,095h,0A8h,05Ah,012h,092h,033h db 039h,0BAh,08Ah,0B7h,0D8h,0FCh,06Dh,0C5h,0FEh,0C6h,0F4h,0EDh,022h,0C1h,022h,0BBh db 0E7h,0EBh,061h,098h,018h,092h,096h,06Dh,0E5h,0CEh,0F5h,076h,0F7h,01Fh,0C7h,0A8h db 00Dh,014h,0DAh,0B2h,0D0h,0C0h,039h,040h,0B9h,05Bh,09Fh,0B3h,0A7h,027h,0C6h,06Ah TestSkeinTabResult37: db 0A9h,08Ch,0FEh,01Fh,0C4h,071h,0F0h,0E7h,0BAh,028h,008h,046h,067h,038h,0DFh,024h db 08Ch,0B1h,09Eh,062h,015h,0BFh,0D3h,006h,050h,0FCh,029h,0EFh,040h,0FAh,032h,01Dh ;Skein-256: 256-bit hash, msgLen = 1024 bits, data = 'random' dw TestSkeinTabResult38-TestSkeinTabMessagedata38 TestSkeinTabMessagedata38: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh,078h,043h,094h,030h,0CDh,0FCh,05Dh,0C8h db 078h,0BBh,039h,03Ah,01Ah,05Fh,079h,0BEh,0F3h,009h,095h,0A8h,05Ah,012h,092h,033h db 039h,0BAh,08Ah,0B7h,0D8h,0FCh,06Dh,0C5h,0FEh,0C6h,0F4h,0EDh,022h,0C1h,022h,0BBh db 0E7h,0EBh,061h,098h,018h,092h,096h,06Dh,0E5h,0CEh,0F5h,076h,0F7h,01Fh,0C7h,0A8h db 00Dh,014h,0DAh,0B2h,0D0h,0C0h,039h,040h,0B9h,05Bh,09Fh,0B3h,0A7h,027h,0C6h,06Ah db 06Eh,01Fh,0F0h,0DCh,031h,01Bh,09Ah,0A2h,01Ah,030h,054h,048h,048h,002h,015h,04Ch db 018h,026h,0C2h,0A2h,07Ah,009h,014h,015h,02Ah,0EBh,076h,0F1h,016h,08Dh,044h,010h TestSkeinTabResult38: db 023h,0EEh,03Dh,0AAh,0F7h,06Fh,075h,0DAh,0FDh,0E5h,0EBh,07Ch,080h,01Fh,03Fh,077h db 023h,027h,029h,00Dh,0BEh,018h,046h,0E9h,078h,0ADh,0ACh,097h,0F5h,0FAh,02Bh,06Dh ;Skein-256: 256-bit hash, msgLen = 2048 bits, data = 'random' dw TestSkeinTabResult39-TestSkeinTabMessagedata39 TestSkeinTabMessagedata39: db 0FBh,0D1h,07Ch,026h,0B6h,01Ah,082h,0E1h,02Eh,012h,05Fh,00Dh,045h,09Bh,096h,0C9h db 01Ah,0B4h,083h,07Dh,0FFh,022h,0B3h,09Bh,078h,043h,094h,030h,0CDh,0FCh,05Dh,0C8h db 078h,0BBh,039h,03Ah,01Ah,05Fh,079h,0BEh,0F3h,009h,095h,0A8h,05Ah,012h,092h,033h db 039h,0BAh,08Ah,0B7h,0D8h,0FCh,06Dh,0C5h,0FEh,0C6h,0F4h,0EDh,022h,0C1h,022h,0BBh db 0E7h,0EBh,061h,098h,018h,092h,096h,06Dh,0E5h,0CEh,0F5h,076h,0F7h,01Fh,0C7h,0A8h db 00Dh,014h,0DAh,0B2h,0D0h,0C0h,039h,040h,0B9h,05Bh,09Fh,0B3h,0A7h,027h,0C6h,06Ah db 06Eh,01Fh,0F0h,0DCh,031h,01Bh,09Ah,0A2h,01Ah,030h,054h,048h,048h,002h,015h,04Ch db 018h,026h,0C2h,0A2h,07Ah,009h,014h,015h,02Ah,0EBh,076h,0F1h,016h,08Dh,044h,010h db 0E1h,014h,0AAh,047h,0F7h,0C5h,0C6h,015h,043h,0C4h,0D9h,059h,018h,082h,034h,0F7h db 097h,0F4h,05Ah,01Dh,016h,065h,0E3h,076h,046h,0D8h,012h,09Ah,045h,0EEh,070h,078h db 009h,091h,0BBh,06Bh,010h,002h,039h,0E4h,066h,0D5h,08Dh,04Ch,0DDh,09Dh,09Dh,001h db 090h,0ABh,064h,047h,00Dh,0DCh,087h,0F5h,0E5h,009h,0E9h,0A8h,0CFh,082h,04Fh,058h db 0EFh,004h,073h,02Eh,0ABh,028h,009h,02Dh,018h,0A5h,0ADh,0A4h,05Bh,06Dh,049h,0FBh db 00Fh,033h,0F4h,0CCh,007h,0E3h,09Eh,0C6h,044h,09Eh,08Ch,00Ah,0BBh,017h,0C6h,058h db 066h,000h,09Ah,03Dh,09Ch,031h,0C0h,0D7h,065h,0E4h,0AFh,088h,0B8h,060h,023h,0E9h db 0A0h,067h,0E3h,032h,00Ch,009h,024h,06Ah,03Fh,0AEh,08Ah,03Fh,0D9h,07Ch,048h,07Eh TestSkeinTabResult39: db 0A8h,05Eh,0E5h,03Dh,06Dh,07Bh,0A5h,02Eh,06Eh,0E8h,042h,024h,093h,013h,07Fh,0F3h db 0E7h,078h,013h,0BFh,0BFh,0F1h,0FBh,0A5h,09Dh,0ECh,042h,050h,042h,07Dh,082h,034h TestSkeinTabEnd: ENDIF IF (TEST_THREEFISH = 1) TestThreeFish: mov dptr,#TestThreeFishTab mov r0,#Tweak TTFLoop: TTFX1: clr a movc a,@a+dptr mov @r0,a inc dptr inc r0 cjne r0,#Tweak+2*WORDSIZE,TTFX1 mov r0,#Key TTFX2: clr a movc a,@a+dptr mov @r0,a inc dptr inc r0 cjne r0,#Key+DATASIZE*WORDSIZE,TTFX2 mov r0,#X TTFX3: clr a movc a,@a+dptr mov @r0,a inc dptr inc r0 cjne r0,#X+DATASIZE*WORDSIZE,TTFX3 call ThreeFish ;does not modify dptr mov r0,#X ;now check result TTFX4: clr a movc a,@a+dptr xrl a,@r0 jnz Error inc dptr inc r0 cjne r0,#X+DATASIZE*WORDSIZE,TTFX4 mov a,dpl cjne a,#LOW(TestThreeFishTabEnd),TTFLoop mov a,dph cjne a,#HIGH(TestThreeFishTabEnd),TTFLoop jmp TestThreeFishOK TestThreeFishOK: ;intentional fallthrough Stop: sjmp Stop Error: ljmp Error ;"reference" values taken from skein_golden_kat_short_internals.txt TestThreeFishTab: ;------- ;tweak db 000h,001h,002h,003h,004h,005h,006h,007h,008h,009h,00Ah,00Bh,00Ch,00Dh,00Eh,00Fh ;tweak ;key db 010h,011h,012h,013h,014h,015h,016h,017h,018h,019h,01Ah,01Bh,01Ch,01Dh,01Eh,01Fh ;key db 020h,021h,022h,023h,024h,025h,026h,027h,028h,029h,02Ah,02Bh,02Ch,02Dh,02Eh,02Fh ;plaintext db 0FFh,0FEh,0FDh,0FCh,0FBh,0FAh,0F9h,0F8h,0F7h,0F6h,0F5h,0F4h,0F3h,0F2h,0F1h,0F0h ;plaintext db 0EFh,0EEh,0EDh,0ECh,0EBh,0EAh,0E9h,0E8h,0E7h,0E6h,0E5h,0E4h,0E3h,0E2h,0E1h,0E0h ;ciphertext db 01Eh,09Bh,08Fh,064h,01Bh,0EDh,095h,011h,0BEh,04Fh,040h,0DFh,057h,0C3h,0D7h,0A1h db 0BCh,042h,071h,08Eh,0DDh,07Ah,0F7h,013h,09Bh,03Dh,04Ch,052h,0B2h,0A9h,020h,0F8h ;----- ;tweak db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;key db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;plaintext db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;ciphertext db 0E3h,097h,056h,0F9h,0F3h,0B6h,0CFh,03Fh,0F9h,01Dh,02Bh,0C3h,0D3h,024h,0CEh,061h db 085h,074h,0EAh,016h,023h,0B2h,036h,07Fh,088h,038h,02Eh,02Ah,093h,0AFh,0A8h,058h ;test vectors created from the outputs of configuration UBI, ; given in appendix B of the Skein paper ; ;the following is the "plain" 256-128 Skein configuration UBI ;tweak db 020h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;position in bytes, incl. this message db 000h,000h ;reserved db 000h ;tree level, bit pad db 0C4h ;type=cfg, First, Final ;key db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;plaintext db 'S','H','A','3' ;schema identifier db 001h,000h ;version db 000h,000h ;reserved db 080h,000h,000h,000h,000h,000h,000h,000h ;output length db 000h,000h,000h ;tree parameters db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;reserve ;ciphertext db 0B2h,0AAh,03Eh,00Eh,0A3h,07Eh,02Fh,030h,0ABh,075h,013h,069h,03Ah,068h,0E4h,0ADh db 00Ah,0ABh,008h,0F2h,0BEh,0FAh,05Ch,097h,05Bh,0F5h,031h,0F8h,095h,0BAh,0F4h,02Ah ; after xor with plaintext. i.e. the UBI output = constant input to rest of "plain" SKEIN-256-128 hash ; db 0E1h,0E2h,07Fh,03Dh,0A2h,07Eh,02Fh,030h,02Bh,075h,013h,069h,03Ah,068h,0E4h,0ADh ; db 00Ah,0ABh,008h,0F2h,0BEh,0FAh,05Ch,097h,05Bh,0F5h,031h,0F8h,095h,0BAh,0F4h,02Ah ;the following is the "plain" 256-160 Skein configuration UBI ;tweak db 020h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;position in bytes, incl. this message db 000h,000h ;reserved db 000h ;tree level, bit pad db 0C4h ;type=cfg, First, Final ;key db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;plaintext db 'S','H','A','3' ;schema identifier db 001h,000h ;version db 000h,000h ;reserved db 0A0h,000h,000h,000h,000h,000h,000h,000h ;output length db 000h,000h,000h ;tree parameters db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;reserve ;ciphertext db 070h,03Fh,029h,090h,081h,00Dh,08Ah,0A3h,069h,0FFh,063h,059h,06Ah,0DBh,03Ch,0B7h db 047h,0B4h,0A1h,007h,0EAh,0E8h,033h,096h,022h,09Ch,052h,0C9h,09Eh,0D0h,00Eh,0CAh ; after xor with plaintext. i.e. the UBI output = constant input to rest of "plain" SKEIN-256-160 hash ; db 023h,077h,068h,0A3h,080h,00Dh,08Ah,0A3h,0C9h,0FFh,063h,059h,06Ah,0DBh,03Ch,0B7h ; db 047h,0B4h,0A1h,007h,0EAh,0E8h,033h,096h,022h,09Ch,052h,0C9h,09Eh,0D0h,00Eh,0CAh ;the following is the "plain" 256-224 Skein configuration UBI ;tweak db 020h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;position in bytes, incl. this message db 000h,000h ;reserved db 000h ;tree level, bit pad db 0C4h ;type=cfg, First, Final ;key db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;plaintext db 'S','H','A','3' ;schema identifier db 001h,000h ;version db 000h,000h ;reserved db 0E0h,000h,000h,000h,000h,000h,000h,000h ;output length db 000h,000h,000h ;tree parameters db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;reserve ;ciphertext db 062h,0BCh,0A1h,0A9h,068h,029h,009h,0B8h,03Ch,029h,069h,0A0h,014h,0DCh,040h,0D3h db 05Ah,0DCh,0E4h,0BDh,094h,065h,086h,0AEh,01Dh,0EAh,060h,05Ah,0C2h,067h,097h,033h ; after xor with plaintext. i.e. the UBI output = constant input to rest of "plain" SKEIN-256-224 hash ; db 031h,0F4h,0E0h,09Ah,069h,029h,009h,0B8h,0DCh,029h,069h,0A0h,014h,0DCh,040h,0D3h ; db 05Ah,0DCh,0E4h,0BDh,094h,065h,086h,0AEh,01Dh,0EAh,060h,05Ah,0C2h,067h,097h,033h ;the following is the "plain" 256-256 Skein configuration UBI ;tweak db 020h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;position in bytes, incl. this message db 000h,000h ;reserved db 000h ;tree level, bit pad db 0C4h ;type=cfg, First, Final ;key db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;plaintext db 'S','H','A','3' ;schema identifier db 001h,000h ;version db 000h,000h ;reserved db 000h,001h,000h,000h,000h,000h,000h,000h ;output length db 000h,000h,000h ;tree parameters db 000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h,000h ;reserve ;ciphertext db 015h,048h,027h,03Dh,069h,012h,085h,038h,001h,0FEh,0A8h,0C5h,0DEh,0D5h,072h,04Bh db 0A5h,0B3h,05Eh,0CAh,098h,092h,01Ah,028h,0C4h,070h,060h,0F4h,049h,052h,0CAh,054h ; after xor with plaintext. i.e. the UBI output = constant input to rest of "plain" SKEIN-256-256 hash ; db 046h,000h,066h,00Eh,068h,012h,085h,038h,001h,0FFh,0A8h,0C5h,0DEh,0D5h,072h,04Bh ; db 0A5h,0B3h,05Eh,0CAh,098h,092h,01Ah,028h,0C4h,070h,060h,0F4h,049h,052h,0CAh,054h TestThreeFishTabEnd: ENDIF ;------------------------------- Skein-256-256 ------------------------------- ;Input: pointer to CODE/XDATA data in DPTR (see SKEIN_XDATA switch - if 1, XDATA, else CODE) ; data size (in bytes) in r7:r6 (those who wish to implement larger input will have to modify it anyway to their paging scheme - there's only 64k of CODE/XRAM address space in '51) ;Output: hash in X ;uses: a, r0, r1, r2, 1+2 bytes of stack (to store block size and to call ThreeFish) ; - see also ThreeFish-256 memory and register usage ; Skein: clr a ;initial set up Tweak mov r0,#Tweak SkeinX03: mov @r0,a ;no bytes yet, no tree, no nonintegral bytes -- set all zero inc r0 cjne r0, #Tweak+15,SkeinX03 mov Tweak+15,#48 + 040h ;type=Message, set First block flag mov r0, #Key ;set up the configuration UBI output as Key SkeinX01: mov a, r0 add a, #Skein256256HashInitTab-SkeinX02-Key movc a, @a+pc SkeinX02: mov @r0,a inc r0 cjne r0, #Key+WORDSIZE*DATASIZE, SkeinX01 SkeinLoop: ;looping to process all data mov r2, #WORDSIZE*DATASIZE ;r2=min(r7:r6, 32) cjne r7, #0, SkeinX12 mov a, r6 cjne a, #WORDSIZE*DATASIZE+1,SkeinX11 SkeinX11: jnc SkeinX12 ;enough data to fill a full segment orl Tweak+15,#080h ;otherwise flag Last block mov r2, a SkeinX12: mov a, r2 ;prepare tweak: add data size push acc ;we store data size to be able to add a, Tweak+0 mov Tweak+0, a clr a addc a, Tweak+1 mov Tweak+1,a ;we won't add to more bytes of tweak, as for this incarnation, the max. input size is <64k bytes clr c mov a,r6 ;and subtract from total nr of bytes to be processed subb a,r2 mov r6,a mov a,r7 subb a,#0 mov r7,a mov r0,#X ;now set up input data field inc r2 ;this is to treat the "pathological" situation of zero bytes input to Skein/UBI (see definition of UBI) sjmp SkeinX16 ;we (ab)use the space created by the previous jump to position this pc-relative table Skein256256HashInitTab: ;constant input to rest of "plain" SKEIN-256-256 hash db 046h,000h,066h,00Eh,068h,012h,085h,038h,001h,0FFh,0A8h,0C5h,0DEh,0D5h,072h,04Bh db 0A5h,0B3h,05Eh,0CAh,098h,092h,01Ah,028h,0C4h,070h,060h,0F4h,049h,052h,0CAh,054h ;constant input to rest of "plain" SKEIN-256-128 hash ; db 0E1h,0E2h,07Fh,03Dh,0A2h,07Eh,02Fh,030h,02Bh,075h,013h,069h,03Ah,068h,0E4h,0ADh ; db 00Ah,0ABh,008h,0F2h,0BEh,0FAh,05Ch,097h,05Bh,0F5h,031h,0F8h,095h,0BAh,0F4h,02Ah ;constant input to rest of "plain" SKEIN-256-160 hash ; db 023h,077h,068h,0A3h,080h,00Dh,08Ah,0A3h,0C9h,0FFh,063h,059h,06Ah,0DBh,03Ch,0B7h ; db 047h,0B4h,0A1h,007h,0EAh,0E8h,033h,096h,022h,09Ch,052h,0C9h,09Eh,0D0h,00Eh,0CAh ;constant input to rest of "plain" SKEIN-256-224 hash ; db 031h,0F4h,0E0h,09Ah,069h,029h,009h,0B8h,0DCh,029h,069h,0A0h,014h,0DCh,040h,0D3h ; db 05Ah,0DCh,0E4h,0BDh,094h,065h,086h,0AEh,01Dh,0EAh,060h,05Ah,0C2h,067h,097h,033h ;constant input to rest of "plain" SKEIN-256-256 hash ; db 046h,000h,066h,00Eh,068h,012h,085h,038h,001h,0FFh,0A8h,0C5h,0DEh,0D5h,072h,04Bh ; db 0A5h,0B3h,05Eh,0CAh,098h,092h,01Ah,028h,0C4h,070h,060h,0F4h,049h,052h,0CAh,054h SkeinX15: ;get data from message IF (SKEIN_XDATA = 1) movx a,@dptr ELSE clr a movc a,@a+dptr ENDIF inc dptr mov @r0,a inc r0 SkeinX16: djnz r2,SkeinX15 clr a sjmp SkeinX18 SkeinX17: mov @r0,a inc r0 SkeinX18: cjne r0,#X+WORDSIZE*DATASIZE,SkeinX17 call ThreeFish pop acc ;recover pointer mov r0,#X mov r1,#Key jz SkeinX192 ;bypass for the "pathological" case mov r2,a clr c mov a,dpl subb a,r2 mov dpl,a mov a,dph subb a,#0 mov dph,a SkeinX191: ;final XOR of UBI IF (SKEIN_XDATA = 1) movx a,@dptr ELSE clr a movc a,@a+dptr ENDIF inc dptr xrl a,@r0 mov @r1,a inc r0 inc r1 djnz r2,SkeinX191 sjmp SkeinX193 SkeinX192: mov a,@r0 mov @r1,a inc r0 inc r1 SkeinX193: cjne r0,#X+WORDSIZE*DATASIZE,SkeinX192 anl Tweak+15,#0BFh ;clear "first block" flag mov a, r6 orl a, r7 jz SkeinX20 jmp SkeinLoop SkeinX20: clr a ;and now, the output function mov Tweak+0,#8 ;8 bytes as input mov Tweak+1,a ;note, that we did not use more bytes of the length field! mov Tweak+15,#63+40h+80h ;Type=Output, first, final mov r0,#X SkeinX21: mov @r0,a ;clear data field inc r0 cjne r0,#X+WORDSIZE*DATASIZE,SkeinX21 jmp ThreeFish ;as input data were all 0, we can do this, and final hash is returned in X (we could even fall through intentionally ;-) ) ;------------------------------- ThreeFish-256 ------------------------------- ;Input: X, Tweak, Key ;Output: X ;uses: a, b, r0, r1, r2, r3 ;optimisation: set all (except _SLOW2x) to zero ;once any of them are nonzero, all "higher" must be nonzero too, as we have not ;implemented all options in the final "undo all renames" - see at the end ; ; ; d j = 0 j = 1 ; rota div8 mod8 rota div8 mod8 ; 0 5 0 5 56 7 0 ; 1 36 4 4 28 3 4 ; 2 13 1 5 46 5 6 ; 3 58 7 2 44 5 4 ; 4 26 3 2 20 2 4 ; 5 53 6 5 35 4 3 ; 6 11 1 3 42 5 2 ; 7 59 7 3 50 6 2 ; ;The "original", version is based on mul. This turned out to be non-optimal ; in all cases, at least for the "classical" 12/6/2-clocker. ; ;the first optimisation is trivial - there is no need for rotation if ; (rota mod 8 = 0) - this is in (d,j)=(0,1) TFISH_SLOW SET 0 ; ;the idea behind the following optimisations is, that if (rota mod 8) = 4, ; swap can efficiently replace mul, and xchd can efficiently replace ; the costly moving-and-masking operations ; (that cost can be demonstrated by setting SLOW2x to zero) ;however, this optimisation changes the byte-shift values as it performs ; "in-place", so conditional changes are also in the subsequent shift defines, ; and, more importantly, in the final key injection, where all the byte-shifts ; are supposed to be undone ; ; (rota mod 8 = 4) TFISH_SLOW2 SET 0 TFISH_SLOW2x SET 1 ; ;optimisations SLOW3 and SLOW3a are shifts by 2 ; (and 6, which is 2 in the other direction), ; these are not swap/xchd-based, but true rrc/rlc ;this optimisation is not allowed (i.e. must not be set to 0) if TFISH_SLOW2 is not zero ; as we don't have all options for output unshifting implemented ; (rota mod 8 = 2) TFISH_SLOW3 SET 0 ;this optimisation - similarly to SLOW3 - is not allowed (i.e. must not be set to 0) if TFISH_SLOW2 or TFISH_SLOW3 is not zero ; as we don't have all options for output unshifting implemented ; (rota mod 8 = 6) TFISH_SLOW3a SET 0 ; ;optimisations _SLOW3 and _SLOW5 are a combination of swap/xchd and rlc/rrc ;set TFISH_SLOW4 to zero only if TFISH_SLOW3 and TFISH_SLOW3a is zero (which means also others are zero) ; (rota mod 8 = 5) TFISH_SLOW4 SET 0 ; (rota mod 8 = 3) TFISH_SLOW5 SET 0 ; IF ((TFISH_SLOW = 0) AND (TFISH_SLOW2 = 0) AND (TFISH_SLOW3 = 0) AND (TFISH_SLOW3a = 0) AND (TFISH_SLOW4 = 0) AND (TFISH_SLOW5 = 0)) TFISH_OPTIM_ALL SET 1 ELSE TFISH_OPTIM_ALL SET 0 ENDIF ;if none of the rounds use mul, b is free - and we use it to store ; this might be non-optimal for some applications, they might store ; r0 between key application in some other memory location ; - however, note that b is used also in the key application, ; so to free it up completely also that should be modified ;in this case (TFISH_OPTIM_ALL = 1), r3 is unused ThreeFish: ;first, create key checksum ;simultaneously, do the first full key injection mov r0,#Key ;xor all keys together to chsum mov a,@r0 mov Tweak+2*WORDSIZE+0,a ;we will use this as a direct buffer xrl Tweak+2*WORDSIZE+0,#055h ;init value 2^64/3 add a,X+0*WORDSIZE+0 mov X+0*WORDSIZE+0,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+1,a xrl Tweak+2*WORDSIZE+1,#055h addc a,X+0*WORDSIZE+1 mov X+0*WORDSIZE+1,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+2,a xrl Tweak+2*WORDSIZE+2,#055h addc a,X+0*WORDSIZE+2 mov X+0*WORDSIZE+2,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+3,a xrl Tweak+2*WORDSIZE+3,#055h addc a,X+0*WORDSIZE+3 mov X+0*WORDSIZE+3,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+4,a xrl Tweak+2*WORDSIZE+4,#055h addc a,X+0*WORDSIZE+4 mov X+0*WORDSIZE+4,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+5,a xrl Tweak+2*WORDSIZE+5,#055h addc a,X+0*WORDSIZE+5 mov X+0*WORDSIZE+5,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+6,a xrl Tweak+2*WORDSIZE+6,#055h addc a,X+0*WORDSIZE+6 mov X+0*WORDSIZE+6,a inc r0 mov a,@r0 mov Tweak+2*WORDSIZE+7,a xrl Tweak+2*WORDSIZE+7,#055h addc a,X+0*WORDSIZE+7 mov X+0*WORDSIZE+7,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+0,a add a,X+1*WORDSIZE+0 mov X+1*WORDSIZE+0,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+1,a addc a,X+1*WORDSIZE+1 mov X+1*WORDSIZE+1,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+2,a addc a,X+1*WORDSIZE+2 mov X+1*WORDSIZE+2,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+3,a addc a,X+1*WORDSIZE+3 mov X+1*WORDSIZE+3,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+4,a addc a,X+1*WORDSIZE+4 mov X+1*WORDSIZE+4,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+5,a addc a,X+1*WORDSIZE+5 mov X+1*WORDSIZE+5,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+6,a addc a,X+1*WORDSIZE+6 mov X+1*WORDSIZE+6,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+7,a addc a,X+1*WORDSIZE+7 mov X+1*WORDSIZE+7,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+0,a add a,X+2*WORDSIZE+0 mov X+2*WORDSIZE+0,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+1,a addc a,X+2*WORDSIZE+1 mov X+2*WORDSIZE+1,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+2,a addc a,X+2*WORDSIZE+2 mov X+2*WORDSIZE+2,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+3,a addc a,X+2*WORDSIZE+3 mov X+2*WORDSIZE+3,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+4,a addc a,X+2*WORDSIZE+4 mov X+2*WORDSIZE+4,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+5,a addc a,X+2*WORDSIZE+5 mov X+2*WORDSIZE+5,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+6,a addc a,X+2*WORDSIZE+6 mov X+2*WORDSIZE+6,a inc r0 mov a,@r0 xrl Tweak+2*WORDSIZE+7,a addc a,X+2*WORDSIZE+7 mov X+2*WORDSIZE+7,a inc r0 mov r1,#Key+WORDSIZE*DATASIZE mov a,@r0 xrl Tweak+2*WORDSIZE+0,a mov @r1,Tweak+2*WORDSIZE+0 add a,X+3*WORDSIZE+0 mov X+3*WORDSIZE+0,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+1,a mov @r1,Tweak+2*WORDSIZE+1 addc a,X+3*WORDSIZE+1 mov X+3*WORDSIZE+1,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+2,a mov @r1,Tweak+2*WORDSIZE+2 addc a,X+3*WORDSIZE+2 mov X+3*WORDSIZE+2,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+3,a mov @r1,Tweak+2*WORDSIZE+3 addc a,X+3*WORDSIZE+3 mov X+3*WORDSIZE+3,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+4,a mov @r1,Tweak+2*WORDSIZE+4 addc a,X+3*WORDSIZE+4 mov X+3*WORDSIZE+4,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+5,a mov @r1,Tweak+2*WORDSIZE+5 addc a,X+3*WORDSIZE+5 mov X+3*WORDSIZE+5,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+6,a mov @r1,Tweak+2*WORDSIZE+6 addc a,X+3*WORDSIZE+6 mov X+3*WORDSIZE+6,a inc r0 inc r1 mov a,@r0 xrl Tweak+2*WORDSIZE+7,a mov @r1,Tweak+2*WORDSIZE+7 addc a,X+3*WORDSIZE+7 mov X+3*WORDSIZE+7,a ;now the tweak checksum ;- fully unrolled and taking advantage of direct memory ;- and, again the initial key injection continues mov a,Tweak+0 mov Tweak+2*WORDSIZE+0,a add a,X+1*WORDSIZE+0 mov X+1*WORDSIZE+0,a mov a,Tweak+1 mov Tweak+2*WORDSIZE+1,a addc a,X+1*WORDSIZE+1 mov X+1*WORDSIZE+1,a mov a,Tweak+2 mov Tweak+2*WORDSIZE+2,a addc a,X+1*WORDSIZE+2 mov X+1*WORDSIZE+2,a mov a,Tweak+3 mov Tweak+2*WORDSIZE+3,a addc a,X+1*WORDSIZE+3 mov X+1*WORDSIZE+3,a mov a,Tweak+4 mov Tweak+2*WORDSIZE+4,a addc a,X+1*WORDSIZE+4 mov X+1*WORDSIZE+4,a mov a,Tweak+5 mov Tweak+2*WORDSIZE+5,a addc a,X+1*WORDSIZE+5 mov X+1*WORDSIZE+5,a mov a,Tweak+6 mov Tweak+2*WORDSIZE+6,a addc a,X+1*WORDSIZE+6 mov X+1*WORDSIZE+6,a mov a,Tweak+7 mov Tweak+2*WORDSIZE+7,a addc a,X+1*WORDSIZE+7 mov X+1*WORDSIZE+7,a mov a,Tweak+WORDSIZE+0 xrl Tweak+2*WORDSIZE+0,a add a,X+2*WORDSIZE+0 mov X+2*WORDSIZE+0,a mov a,Tweak+WORDSIZE+1 xrl Tweak+2*WORDSIZE+1,a addc a,X+2*WORDSIZE+1 mov X+2*WORDSIZE+1,a mov a,Tweak+WORDSIZE+2 xrl Tweak+2*WORDSIZE+2,a addc a,X+2*WORDSIZE+2 mov X+2*WORDSIZE+2,a mov a,Tweak+WORDSIZE+3 xrl Tweak+2*WORDSIZE+3,a addc a,X+2*WORDSIZE+3 mov X+2*WORDSIZE+3,a mov a,Tweak+WORDSIZE+4 xrl Tweak+2*WORDSIZE+4,a addc a,X+2*WORDSIZE+4 mov X+2*WORDSIZE+4,a mov a,Tweak+WORDSIZE+5 xrl Tweak+2*WORDSIZE+5,a addc a,X+2*WORDSIZE+5 mov X+2*WORDSIZE+5,a mov a,Tweak+WORDSIZE+6 xrl Tweak+2*WORDSIZE+6,a addc a,X+2*WORDSIZE+6 mov X+2*WORDSIZE+6,a mov a,Tweak+WORDSIZE+7 xrl Tweak+2*WORDSIZE+7,a addc a,X+2*WORDSIZE+7 mov X+2*WORDSIZE+7,a mov r2,#0 ; 72/8 ;now the main loop, with 8 unrolled mixes mov r0,#Key+WORDSIZE ;initial key pointer mov r1,#Tweak+WORDSIZE ;initial tweak pointer1 TFishLoop: ;------------------- d = 0 X0R0 EQU X+0*WORDSIZE ;there is no renaming now X1R0 EQU X+1*WORDSIZE X2R0 EQU X+2*WORDSIZE X3R0 EQU X+3*WORDSIZE X0S0 EQU 0 ;and there is also no shift within the words themselves X1S0 EQU 0 X2S0 EQU 0 X3S0 EQU 0 X00ROTA EQU 5 ;j=0 --> rot=5 X00RMUL EQU (1 SHL (X00ROTA MOD 8)) X00RB EQU X00ROTA/8 X01ROTA EQU 56 ;j=1 --> rot=56 X01RMUL EQU (1 SHL (X01ROTA MOD 8)) X01RB EQU X01ROTA/8 IF (TFISH_OPTIM_ALL=1) mov b, r0 ;store, as we need r0 in the optimisations ENDIF mov a,X0R0+((0+X0S0) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R0+((0+X1S0) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R0+((1+X0S0) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R0+((1+X1S0) MOD WORDSIZE) xch a,X0R0+((2+X0S0) MOD WORDSIZE) addc a,X1R0+((2+X1S0) MOD WORDSIZE) xch a,X0R0+((3+X0S0) MOD WORDSIZE) addc a,X1R0+((3+X1S0) MOD WORDSIZE) xch a,X0R0+((4+X0S0) MOD WORDSIZE) addc a,X1R0+((4+X1S0) MOD WORDSIZE) xch a,X0R0+((5+X0S0) MOD WORDSIZE) addc a,X1R0+((5+X1S0) MOD WORDSIZE) xch a,X0R0+((6+X0S0) MOD WORDSIZE) addc a,X1R0+((6+X1S0) MOD WORDSIZE) xch a,X0R0+((7+X0S0) MOD WORDSIZE) addc a,X1R0+((7+X1S0) MOD WORDSIZE) mov X0R0+((0+X0S0) MOD WORDSIZE),a IF (TFISH_SLOW4) ;now rota and xor - first prepare rota mov a,X1R0+((7+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R0+((0+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((0+X0S0+1+X00RB) MOD WORDSIZE) mov r3,b xch a,X1R0+((1+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((1+X0S0+1+X00RB) MOD WORDSIZE) mov r3,b xch a,X1R0+((2+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((2+X0S0+1+X00RB) MOD WORDSIZE) mov r3,b xch a,X1R0+((3+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((3+X0S0+1+X00RB) MOD WORDSIZE) mov r3,b xch a,X1R0+((4+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((4+X0S0+1+X00RB) MOD WORDSIZE) mov r3,b xch a,X1R0+((5+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((5+X0S0+1+X00RB) MOD WORDSIZE) mov r3,b xch a,X1R0+((6+X1S0) MOD WORDSIZE) mov b,#X00RMUL mul ab orl a,r3 xrl a,X0R0+((6+X0S0+1+X00RB) MOD WORDSIZE) mov X1R0+((7+X1S0) MOD WORDSIZE),a mov a,X1R0+((0+X1S0) MOD WORDSIZE) orl a,b xrl a,X0R0+((7+X0S0+1+X00RB) MOD WORDSIZE) mov X1R0+((0+X1S0) MOD WORDSIZE),a ELSE IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X1R0+((7+X1S0) MOD WORDSIZE) mov c,acc.3 mov r0,#X1R0+((0+X1S0) MOD WORDSIZE) xchd a,@r0 swap a rlc a xrl a,X0R0+((0+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 IF (((1+X1S0) MOD WORDSIZE) = 0) mov r0,#X1R0+((1+X1S0) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R0+((1+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 IF (((2+X1S0) MOD WORDSIZE) = 0) mov r0,#X1R0+((2+X1S0) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R0+((2+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 IF (((3+X1S0) MOD WORDSIZE) = 0) mov r0,#X1R0+((3+X1S0) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R0+((3+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 IF (((4+X1S0) MOD WORDSIZE) = 0) mov r0,#X1R0+((4+X1S0) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R0+((4+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 IF (((5+X1S0) MOD WORDSIZE) = 0) mov r0,#X1R0+((5+X1S0) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R0+((5+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 IF (((6+X1S0) MOD WORDSIZE) = 0) mov r0,#X1R0+((6+X1S0) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R0+((6+X0S0+1+X00RB) MOD WORDSIZE) xch a,@r0 swap a rlc a xrl a,X0R0+((7+X0S0+1+X00RB) MOD WORDSIZE) mov X1R0+((7+X1S0) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;now, we are going just to repeat the whole pattern for the remaining 2 words mov a,X2R0+((0+X2S0) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R0+((0+X3S0) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R0+((1+X2S0) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R0+((1+X3S0) MOD WORDSIZE) xch a,X2R0+((2+X2S0) MOD WORDSIZE) addc a,X3R0+((2+X3S0) MOD WORDSIZE) xch a,X2R0+((3+X2S0) MOD WORDSIZE) addc a,X3R0+((3+X3S0) MOD WORDSIZE) xch a,X2R0+((4+X2S0) MOD WORDSIZE) addc a,X3R0+((4+X3S0) MOD WORDSIZE) xch a,X2R0+((5+X2S0) MOD WORDSIZE) addc a,X3R0+((5+X3S0) MOD WORDSIZE) xch a,X2R0+((6+X2S0) MOD WORDSIZE) addc a,X3R0+((6+X3S0) MOD WORDSIZE) xch a,X2R0+((7+X2S0) MOD WORDSIZE) addc a,X3R0+((7+X3S0) MOD WORDSIZE) mov X2R0+((0+X2S0) MOD WORDSIZE),a IF(TFISH_SLOW) ;explanation see below at ELSE ;now rota and xor - first prepare rota mov a,X3R0+((7+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R0+((0+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((0+X2S0+1+X01RB) MOD WORDSIZE) mov r3,b xch a,X3R0+((1+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((1+X2S0+1+X01RB) MOD WORDSIZE) mov r3,b xch a,X3R0+((2+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((2+X2S0+1+X01RB) MOD WORDSIZE) mov r3,b xch a,X3R0+((3+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((3+X2S0+1+X01RB) MOD WORDSIZE) mov r3,b xch a,X3R0+((4+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((4+X2S0+1+X01RB) MOD WORDSIZE) mov r3,b xch a,X3R0+((5+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((5+X2S0+1+X01RB) MOD WORDSIZE) mov r3,b xch a,X3R0+((6+X3S0) MOD WORDSIZE) mov b,#X01RMUL mul ab orl a,r3 xrl a,X2R0+((6+X2S0+1+X01RB) MOD WORDSIZE) mov X3R0+((7+X3S0) MOD WORDSIZE),a mov a,X3R0+((0+X3S0) MOD WORDSIZE) orl a,b xrl a,X2R0+((7+X2S0+1+X01RB) MOD WORDSIZE) mov X3R0+((0+X3S0) MOD WORDSIZE),a ELSE ;exploiting the fact, that X01RMUL is here 0 mov a,X3R0+((0+X3S0) MOD WORDSIZE) xrl a,X2R0+((0+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((1+X3S0) MOD WORDSIZE) xrl a,X2R0+((1+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((2+X3S0) MOD WORDSIZE) xrl a,X2R0+((2+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((3+X3S0) MOD WORDSIZE) xrl a,X2R0+((3+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((4+X3S0) MOD WORDSIZE) xrl a,X2R0+((4+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((5+X3S0) MOD WORDSIZE) xrl a,X2R0+((5+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((6+X3S0) MOD WORDSIZE) xrl a,X2R0+((6+X2S0+1+X01RB) MOD WORDSIZE) xch a,X3R0+((7+X3S0) MOD WORDSIZE) xrl a,X2R0+((7+X2S0+1+X01RB) MOD WORDSIZE) mov X3R0+((0+X3S0) MOD WORDSIZE),a ENDIF ;--------------- d = 1 X0R1 EQU X+0*WORDSIZE ;in all odd rounds, X1 and X3 are swapped (the permutation is 0123->0321) X1R1 EQU X+3*WORDSIZE X2R1 EQU X+2*WORDSIZE X3R1 EQU X+1*WORDSIZE X0S1 EQU 1 ;1 is due to shift because of use of xch IF (TFISH_SLOW4) X00RBS EQU 1 ELSE X00RBS EQU 0 ENDIF X3S1 EQU WORDSIZE - X00RB + X00RBS ;1 is due to shift because of use of xch - see below ;and this is X3, because of the word permutation X2S1 EQU 1 ;1 is due to shift because of use of xch X1S1 EQU WORDSIZE - X01RB + 1 ;1 is due to shift because of use of xch - see below X10ROTA EQU 36 ;j=0 --> rot=36 X10RMUL EQU (1 SHL (X10ROTA MOD 8)) X10RB EQU X10ROTA/8 X11ROTA EQU 28 ;j=1 --> rot=28 X11RMUL EQU (1 SHL (X11ROTA MOD 8)) X11RB EQU X11ROTA/8 ;j = 0 mov a,X0R1+((0+X0S1) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R1+((0+X1S1) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R1+((1+X0S1) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R1+((1+X1S1) MOD WORDSIZE) xch a,X0R1+((2+X0S1) MOD WORDSIZE) addc a,X1R1+((2+X1S1) MOD WORDSIZE) xch a,X0R1+((3+X0S1) MOD WORDSIZE) addc a,X1R1+((3+X1S1) MOD WORDSIZE) xch a,X0R1+((4+X0S1) MOD WORDSIZE) addc a,X1R1+((4+X1S1) MOD WORDSIZE) xch a,X0R1+((5+X0S1) MOD WORDSIZE) addc a,X1R1+((5+X1S1) MOD WORDSIZE) xch a,X0R1+((6+X0S1) MOD WORDSIZE) addc a,X1R1+((6+X1S1) MOD WORDSIZE) xch a,X0R1+((7+X0S1) MOD WORDSIZE) addc a,X1R1+((7+X1S1) MOD WORDSIZE) mov X0R1+((0+X0S1) MOD WORDSIZE),a IF(TFISH_SLOW2) ;now rota and xor - first prepare rota mov a,X1R1+((7+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R1+((0+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((0+X0S1+1+X10RB) MOD WORDSIZE) mov r3,b xch a,X1R1+((1+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((1+X0S1+1+X10RB) MOD WORDSIZE) mov r3,b xch a,X1R1+((2+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((2+X0S1+1+X10RB) MOD WORDSIZE) mov r3,b xch a,X1R1+((3+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((3+X0S1+1+X10RB) MOD WORDSIZE) mov r3,b xch a,X1R1+((4+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((4+X0S1+1+X10RB) MOD WORDSIZE) mov r3,b xch a,X1R1+((5+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((5+X0S1+1+X10RB) MOD WORDSIZE) mov r3,b xch a,X1R1+((6+X1S1) MOD WORDSIZE) mov b,#X10RMUL mul ab orl a,r3 xrl a,X0R1+((6+X0S1+1+X10RB) MOD WORDSIZE) mov X1R1+((7+X1S1) MOD WORDSIZE),a mov a,X1R1+((0+X1S1) MOD WORDSIZE) orl a,b xrl a,X0R1+((7+X0S1+1+X10RB) MOD WORDSIZE) mov X1R1+((0+X1S1) MOD WORDSIZE),a ELSE IF (TFISH_SLOW2x) ;this is the optimal version - see above IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X1R1+((7+X1S1) MOD WORDSIZE) mov r0,#X1R1+((0+X1S1) MOD WORDSIZE) xchd a,@r0 swap a xrl a,X0R1+((0+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 ;speedup idea for 1-clockers: here, inc r0 can be used in all of the following, except 1 when it wraps around ; -- similarly, xrl a,direct could be replaced by xrl a,@r1; however, this would require storing r1 temporarily, too; ; -- [later] NO this is a stupid idea: inc r1; xrl a,@r1 is no more effective than xrl a,direct ; [...] ; or a "global" variable (best in direct memory) to keep r0 and r1 (key and tweak positions) between the key injections ; (of course at each key injection the pointers can be calculated or looked up in table, but both options sound quite costly) ; (an even more brutal option would be to implement all combinations of key and data word additions, and then use some ; sophisticated indirect jump system to use those appropriate at the given point ; - but the code size penalty for this just sounds too big) IF (((1+X1S1) MOD WORDSIZE) = 0) mov r0,#X1R1+((1+X1S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X0R1+((1+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 IF (((2+X1S1) MOD WORDSIZE) = 0) mov r0,#X1R1+((2+X1S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X0R1+((2+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 IF (((3+X1S1) MOD WORDSIZE) = 0) mov r0,#X1R1+((3+X1S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X0R1+((3+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 IF (((4+X1S1) MOD WORDSIZE) = 0) mov r0,#X1R1+((4+X1S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X0R1+((4+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 IF (((5+X1S1) MOD WORDSIZE) = 0) mov r0,#X1R1+((5+X1S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X0R1+((5+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 IF (((6+X1S1) MOD WORDSIZE) = 0) mov r0,#X1R1+((6+X1S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X0R1+((6+X0S1+1+X10RB) MOD WORDSIZE) xch a,@r0 swap a xrl a,X0R1+((7+X0S1+1+X10RB) MOD WORDSIZE) mov X1R1+((7+X1S1) MOD WORDSIZE),a ELSE ;other way of optimising this particular shift - but LESS optimal! (72 cycles more) mov a,X0R1+((0+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((0+X1S1) MOD WORDSIZE) swap a xrl X1R1+((0+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((0+X1S1) MOD WORDSIZE),a xrl a,X0R1+((1+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((1+X1S1) MOD WORDSIZE) swap a xrl X1R1+((1+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((1+X1S1) MOD WORDSIZE),a xrl a,X0R1+((2+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((2+X1S1) MOD WORDSIZE) swap a xrl X1R1+((2+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((2+X1S1) MOD WORDSIZE),a xrl a,X0R1+((3+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((3+X1S1) MOD WORDSIZE) swap a xrl X1R1+((3+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((3+X1S1) MOD WORDSIZE),a xrl a,X0R1+((4+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((4+X1S1) MOD WORDSIZE) swap a xrl X1R1+((4+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((4+X1S1) MOD WORDSIZE),a xrl a,X0R1+((5+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((5+X1S1) MOD WORDSIZE) swap a xrl X1R1+((5+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((5+X1S1) MOD WORDSIZE),a xrl a,X0R1+((6+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((6+X1S1) MOD WORDSIZE) swap a xrl X1R1+((6+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((6+X1S1) MOD WORDSIZE),a xrl a,X0R1+((7+X0S1+1+X10RB) MOD WORDSIZE) xch a,X1R1+((7+X1S1) MOD WORDSIZE) swap a xrl X1R1+((7+X1S1) MOD WORDSIZE),a anl a,#00Fh xrl X1R1+((7+X1S1) MOD WORDSIZE),a xrl X1R1+((0+X1S1) MOD WORDSIZE),a ENDIF IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;j = 1 mov a,X2R1+((0+X2S1) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R1+((0+X3S1) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R1+((1+X2S1) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R1+((1+X3S1) MOD WORDSIZE) xch a,X2R1+((2+X2S1) MOD WORDSIZE) addc a,X3R1+((2+X3S1) MOD WORDSIZE) xch a,X2R1+((3+X2S1) MOD WORDSIZE) addc a,X3R1+((3+X3S1) MOD WORDSIZE) xch a,X2R1+((4+X2S1) MOD WORDSIZE) addc a,X3R1+((4+X3S1) MOD WORDSIZE) xch a,X2R1+((5+X2S1) MOD WORDSIZE) addc a,X3R1+((5+X3S1) MOD WORDSIZE) xch a,X2R1+((6+X2S1) MOD WORDSIZE) addc a,X3R1+((6+X3S1) MOD WORDSIZE) xch a,X2R1+((7+X2S1) MOD WORDSIZE) addc a,X3R1+((7+X3S1) MOD WORDSIZE) mov X2R1+((0+X2S1) MOD WORDSIZE),a ;now rota and xor - first prepare rota IF(TFISH_SLOW2) mov a,X3R1+((7+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R1+((0+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((0+X2S1+1+X11RB) MOD WORDSIZE) mov r3,b xch a,X3R1+((1+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((1+X2S1+1+X11RB) MOD WORDSIZE) mov r3,b xch a,X3R1+((2+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((2+X2S1+1+X11RB) MOD WORDSIZE) mov r3,b xch a,X3R1+((3+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((3+X2S1+1+X11RB) MOD WORDSIZE) mov r3,b xch a,X3R1+((4+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((4+X2S1+1+X11RB) MOD WORDSIZE) mov r3,b xch a,X3R1+((5+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((5+X2S1+1+X11RB) MOD WORDSIZE) mov r3,b xch a,X3R1+((6+X3S1) MOD WORDSIZE) mov b,#X11RMUL mul ab orl a,r3 xrl a,X2R1+((6+X2S1+1+X11RB) MOD WORDSIZE) mov X3R1+((7+X3S1) MOD WORDSIZE),a mov a,X3R1+((0+X3S1) MOD WORDSIZE) orl a,b xrl a,X2R1+((7+X2S1+1+X11RB) MOD WORDSIZE) mov X3R1+((0+X3S1) MOD WORDSIZE),a ELSE ;this is the optimal version - see above IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X3R1+((7+X3S1) MOD WORDSIZE) mov r0,#X3R1+((0+X3S1) MOD WORDSIZE) xchd a,@r0 swap a xrl a,X2R1+((0+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 IF (((1+X3S1) MOD WORDSIZE) = 0) mov r0,#X3R1+((1+X3S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R1+((1+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 IF (((2+X3S1) MOD WORDSIZE) = 0) mov r0,#X3R1+((2+X3S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R1+((2+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 IF (((3+X3S1) MOD WORDSIZE) = 0) mov r0,#X3R1+((3+X3S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R1+((3+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 IF (((4+X3S1) MOD WORDSIZE) = 0) mov r0,#X3R1+((4+X3S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R1+((4+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 IF (((5+X3S1) MOD WORDSIZE) = 0) mov r0,#X3R1+((5+X3S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R1+((5+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 IF (((6+X3S1) MOD WORDSIZE) = 0) mov r0,#X3R1+((6+X3S1) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R1+((6+X2S1+1+X11RB) MOD WORDSIZE) xch a,@r0 swap a xrl a,X2R1+((7+X2S1+1+X11RB) MOD WORDSIZE) mov X3R1+((7+X3S1) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;--------------- d = 2 X0R2 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1R2 EQU X+1*WORDSIZE X2R2 EQU X+2*WORDSIZE X3R2 EQU X+3*WORDSIZE X0S2 EQU X0S1+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW2) X11RBS EQU 1 ELSE X11RBS EQU 0 ENDIF X1S2 EQU X3S1 + WORDSIZE - X11RB + X11RBS ;X11RBS is due to shift if xch used - see below X2S2 EQU X2S1+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW2) X10RBS EQU 1 ELSE X10RBS EQU 0 ENDIF X3S2 EQU X1S1 + WORDSIZE - X10RB + X10RBS ;X10RBS: xch is not used in enhanced version X20ROTA EQU 13 ;j=0 --> rot=13 X20RMUL EQU (1 SHL (X20ROTA MOD 8)) X20RB EQU X20ROTA/8 X21ROTA EQU 46 ;j=1 --> rot=46 X21RMUL EQU (1 SHL (X21ROTA MOD 8)) X21RB EQU X21ROTA/8 ;j = 0 mov a,X0R2+((0+X0S2) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R2+((0+X1S2) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R2+((1+X0S2) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R2+((1+X1S2) MOD WORDSIZE) xch a,X0R2+((2+X0S2) MOD WORDSIZE) addc a,X1R2+((2+X1S2) MOD WORDSIZE) xch a,X0R2+((3+X0S2) MOD WORDSIZE) addc a,X1R2+((3+X1S2) MOD WORDSIZE) xch a,X0R2+((4+X0S2) MOD WORDSIZE) addc a,X1R2+((4+X1S2) MOD WORDSIZE) xch a,X0R2+((5+X0S2) MOD WORDSIZE) addc a,X1R2+((5+X1S2) MOD WORDSIZE) xch a,X0R2+((6+X0S2) MOD WORDSIZE) addc a,X1R2+((6+X1S2) MOD WORDSIZE) xch a,X0R2+((7+X0S2) MOD WORDSIZE) addc a,X1R2+((7+X1S2) MOD WORDSIZE) mov X0R2+((0+X0S2) MOD WORDSIZE),a IF (TFISH_SLOW4) ;now rota and xor - first prepare rota mov a,X1R2+((7+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R2+((0+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((0+X0S2+1+X20RB) MOD WORDSIZE) mov r3,b xch a,X1R2+((1+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((1+X0S2+1+X20RB) MOD WORDSIZE) mov r3,b xch a,X1R2+((2+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((2+X0S2+1+X20RB) MOD WORDSIZE) mov r3,b xch a,X1R2+((3+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((3+X0S2+1+X20RB) MOD WORDSIZE) mov r3,b xch a,X1R2+((4+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((4+X0S2+1+X20RB) MOD WORDSIZE) mov r3,b xch a,X1R2+((5+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((5+X0S2+1+X20RB) MOD WORDSIZE) mov r3,b xch a,X1R2+((6+X1S2) MOD WORDSIZE) mov b,#X20RMUL mul ab orl a,r3 xrl a,X0R2+((6+X0S2+1+X20RB) MOD WORDSIZE) mov X1R2+((7+X1S2) MOD WORDSIZE),a mov a,X1R2+((0+X1S2) MOD WORDSIZE) orl a,b xrl a,X0R2+((7+X0S2+1+X20RB) MOD WORDSIZE) mov X1R2+((0+X1S2) MOD WORDSIZE),a ELSE IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X1R2+((7+X1S2) MOD WORDSIZE) mov c,acc.3 mov r0,#X1R2+((0+X1S2) MOD WORDSIZE) xchd a,@r0 swap a rlc a xrl a,X0R2+((0+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 IF (((1+X1S2) MOD WORDSIZE) = 0) mov r0,#X1R2+((1+X1S2) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R2+((1+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 IF (((2+X1S2) MOD WORDSIZE) = 0) mov r0,#X1R2+((2+X1S2) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R2+((2+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 IF (((3+X1S2) MOD WORDSIZE) = 0) mov r0,#X1R2+((3+X1S2) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R2+((3+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 IF (((4+X1S2) MOD WORDSIZE) = 0) mov r0,#X1R2+((4+X1S2) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R2+((4+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 IF (((5+X1S2) MOD WORDSIZE) = 0) mov r0,#X1R2+((5+X1S2) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R2+((5+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 IF (((6+X1S2) MOD WORDSIZE) = 0) mov r0,#X1R2+((6+X1S2) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R2+((6+X0S2+1+X20RB) MOD WORDSIZE) xch a,@r0 swap a rlc a xrl a,X0R2+((7+X0S2+1+X20RB) MOD WORDSIZE) mov X1R2+((7+X1S2) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;j = 1 mov a,X2R2+((0+X2S2) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R2+((0+X3S2) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R2+((1+X2S2) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R2+((1+X3S2) MOD WORDSIZE) xch a,X2R2+((2+X2S2) MOD WORDSIZE) addc a,X3R2+((2+X3S2) MOD WORDSIZE) xch a,X2R2+((3+X2S2) MOD WORDSIZE) addc a,X3R2+((3+X3S2) MOD WORDSIZE) xch a,X2R2+((4+X2S2) MOD WORDSIZE) addc a,X3R2+((4+X3S2) MOD WORDSIZE) xch a,X2R2+((5+X2S2) MOD WORDSIZE) addc a,X3R2+((5+X3S2) MOD WORDSIZE) xch a,X2R2+((6+X2S2) MOD WORDSIZE) addc a,X3R2+((6+X3S2) MOD WORDSIZE) xch a,X2R2+((7+X2S2) MOD WORDSIZE) addc a,X3R2+((7+X3S2) MOD WORDSIZE) mov X2R2+((0+X2S2) MOD WORDSIZE),a IF (TFISH_SLOW3a) mov a,X3R2+((7+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R2+((0+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((0+X2S2+1+X21RB) MOD WORDSIZE) mov r3,b xch a,X3R2+((1+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((1+X2S2+1+X21RB) MOD WORDSIZE) mov r3,b xch a,X3R2+((2+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((2+X2S2+1+X21RB) MOD WORDSIZE) mov r3,b xch a,X3R2+((3+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((3+X2S2+1+X21RB) MOD WORDSIZE) mov r3,b xch a,X3R2+((4+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((4+X2S2+1+X21RB) MOD WORDSIZE) mov r3,b xch a,X3R2+((5+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((5+X2S2+1+X21RB) MOD WORDSIZE) mov r3,b xch a,X3R2+((6+X3S2) MOD WORDSIZE) mov b,#X21RMUL mul ab orl a,r3 xrl a,X2R2+((6+X2S2+1+X21RB) MOD WORDSIZE) mov X3R2+((7+X3S2) MOD WORDSIZE),a mov a,X3R2+((0+X3S2) MOD WORDSIZE) orl a,b xrl a,X2R2+((7+X2S2+1+X21RB) MOD WORDSIZE) mov X3R2+((0+X3S2) MOD WORDSIZE),a ELSE mov a,X3R2+((0+X3S2) MOD WORDSIZE) rrc a mov a,X3R2+((7+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((6+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((5+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((4+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((3+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((2+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((1+X3S2) MOD WORDSIZE) rrc a xch a,X3R2+((0+X3S2) MOD WORDSIZE) rrc a mov X3R2+((7+X3S2) MOD WORDSIZE),a rrc a mov a,X3R2+((6+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((7+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((5+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((6+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((4+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((5+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((3+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((4+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((2+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((3+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((1+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((2+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((0+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((1+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 xch a,X3R2+((7+X3S2) MOD WORDSIZE) rrc a xrl a,X2R2+((0+1+X2S2+1+X21RB) MOD WORDSIZE) ;extra +1 because instead of rl 6 we are performing rr 2 mov X3R2+((6+X3S2) MOD WORDSIZE),a ENDIF ;--------------- d = 3 X0R3 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1R3 EQU X+3*WORDSIZE X2R3 EQU X+2*WORDSIZE X3R3 EQU X+1*WORDSIZE X0S3 EQU X0S2+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW4) X20RBS EQU 1 ELSE X20RBS EQU 0 ENDIF X3S3 EQU X1S2 + WORDSIZE - X20RB + X20RBS X2S3 EQU X2S2+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW3a) X21RBS EQU 1 ELSE X21RBS EQU -2-1 ;2 is because of xch, 1 is because of shift right by 2 rather than to left by 6 ENDIF X1S3 EQU X3S2 + WORDSIZE - X21RB + X21RBS ;1 is due to shift because of use of xch - see below X30ROTA EQU 58 ;j=0 --> rot=58 X30RMUL EQU (1 SHL (X30ROTA MOD 8)) X30RB EQU X30ROTA/8 X31ROTA EQU 44 ;j=1 --> rot=44 X31RMUL EQU (1 SHL (X31ROTA MOD 8)) X31RB EQU X31ROTA/8 ;j = 0 mov a,X0R3+((0+X0S3) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R3+((0+X1S3) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R3+((1+X0S3) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R3+((1+X1S3) MOD WORDSIZE) xch a,X0R3+((2+X0S3) MOD WORDSIZE) addc a,X1R3+((2+X1S3) MOD WORDSIZE) xch a,X0R3+((3+X0S3) MOD WORDSIZE) addc a,X1R3+((3+X1S3) MOD WORDSIZE) xch a,X0R3+((4+X0S3) MOD WORDSIZE) addc a,X1R3+((4+X1S3) MOD WORDSIZE) xch a,X0R3+((5+X0S3) MOD WORDSIZE) addc a,X1R3+((5+X1S3) MOD WORDSIZE) xch a,X0R3+((6+X0S3) MOD WORDSIZE) addc a,X1R3+((6+X1S3) MOD WORDSIZE) xch a,X0R3+((7+X0S3) MOD WORDSIZE) addc a,X1R3+((7+X1S3) MOD WORDSIZE) mov X0R3+((0+X0S3) MOD WORDSIZE),a IF (TFISH_SLOW3) ;now rota and xor - first prepare rota mov a,X1R3+((7+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R3+((0+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((0+X0S3+1+X30RB) MOD WORDSIZE) mov r3,b xch a,X1R3+((1+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((1+X0S3+1+X30RB) MOD WORDSIZE) mov r3,b xch a,X1R3+((2+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((2+X0S3+1+X30RB) MOD WORDSIZE) mov r3,b xch a,X1R3+((3+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((3+X0S3+1+X30RB) MOD WORDSIZE) mov r3,b xch a,X1R3+((4+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((4+X0S3+1+X30RB) MOD WORDSIZE) mov r3,b xch a,X1R3+((5+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((5+X0S3+1+X30RB) MOD WORDSIZE) mov r3,b xch a,X1R3+((6+X1S3) MOD WORDSIZE) mov b,#X30RMUL mul ab orl a,r3 xrl a,X0R3+((6+X0S3+1+X30RB) MOD WORDSIZE) mov X1R3+((7+X1S3) MOD WORDSIZE),a mov a,X1R3+((0+X1S3) MOD WORDSIZE) orl a,b xrl a,X0R3+((7+X0S3+1+X30RB) MOD WORDSIZE) mov X1R3+((0+X1S3) MOD WORDSIZE),a ELSE mov a,X1R3+((7+X1S3) MOD WORDSIZE) rlc a mov a,X1R3+((0+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((1+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((2+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((3+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((4+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((5+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((6+X1S3) MOD WORDSIZE) rlc a xch a,X1R3+((7+X1S3) MOD WORDSIZE) rlc a mov X1R3+((0+X1S3) MOD WORDSIZE),a rlc a mov a,X1R3+((1+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((0+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((2+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((1+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((3+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((2+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((4+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((3+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((5+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((4+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((6+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((5+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((7+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((6+X0S3+1+X30RB) MOD WORDSIZE) xch a,X1R3+((0+X1S3) MOD WORDSIZE) rlc a xrl a,X0R3+((7+X0S3+1+X30RB) MOD WORDSIZE) mov X1R3+((1+X1S3) MOD WORDSIZE),a ENDIF ;j = 1 mov a,X2R3+((0+X2S3) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R3+((0+X3S3) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R3+((1+X2S3) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R3+((1+X3S3) MOD WORDSIZE) xch a,X2R3+((2+X2S3) MOD WORDSIZE) addc a,X3R3+((2+X3S3) MOD WORDSIZE) xch a,X2R3+((3+X2S3) MOD WORDSIZE) addc a,X3R3+((3+X3S3) MOD WORDSIZE) xch a,X2R3+((4+X2S3) MOD WORDSIZE) addc a,X3R3+((4+X3S3) MOD WORDSIZE) xch a,X2R3+((5+X2S3) MOD WORDSIZE) addc a,X3R3+((5+X3S3) MOD WORDSIZE) xch a,X2R3+((6+X2S3) MOD WORDSIZE) addc a,X3R3+((6+X3S3) MOD WORDSIZE) xch a,X2R3+((7+X2S3) MOD WORDSIZE) addc a,X3R3+((7+X3S3) MOD WORDSIZE) mov X2R3+((0+X2S3) MOD WORDSIZE),a IF (TFISH_SLOW2) ;now rota and xor - first prepare rota mov a,X3R3+((7+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R3+((0+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((0+X2S3+1+X31RB) MOD WORDSIZE) mov r3,b xch a,X3R3+((1+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((1+X2S3+1+X31RB) MOD WORDSIZE) mov r3,b xch a,X3R3+((2+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((2+X2S3+1+X31RB) MOD WORDSIZE) mov r3,b xch a,X3R3+((3+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((3+X2S3+1+X31RB) MOD WORDSIZE) mov r3,b xch a,X3R3+((4+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((4+X2S3+1+X31RB) MOD WORDSIZE) mov r3,b xch a,X3R3+((5+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((5+X2S3+1+X31RB) MOD WORDSIZE) mov r3,b xch a,X3R3+((6+X3S3) MOD WORDSIZE) mov b,#X31RMUL mul ab orl a,r3 xrl a,X2R3+((6+X2S3+1+X31RB) MOD WORDSIZE) mov X3R3+((7+X3S3) MOD WORDSIZE),a mov a,X3R3+((0+X3S3) MOD WORDSIZE) orl a,b xrl a,X2R3+((7+X2S3+1+X31RB) MOD WORDSIZE) mov X3R3+((0+X3S3) MOD WORDSIZE),a ELSE ;this is the optimal version - see above IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X3R3+((7+X3S3) MOD WORDSIZE) mov r0,#X3R3+((0+X3S3) MOD WORDSIZE) xchd a,@r0 swap a xrl a,X2R3+((0+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 IF (((1+X3S3) MOD WORDSIZE) = 0) mov r0,#X3R3+((1+X3S3) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R3+((1+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 IF (((2+X3S3) MOD WORDSIZE) = 0) mov r0,#X3R3+((2+X3S3) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R3+((2+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 IF (((3+X3S3) MOD WORDSIZE) = 0) mov r0,#X3R3+((3+X3S3) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R3+((3+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 IF (((4+X3S3) MOD WORDSIZE) = 0) mov r0,#X3R3+((4+X3S3) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R3+((4+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 IF (((5+X3S3) MOD WORDSIZE) = 0) mov r0,#X3R3+((5+X3S3) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R3+((5+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 IF (((6+X3S3) MOD WORDSIZE) = 0) mov r0,#X3R3+((6+X3S3) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R3+((6+X2S3+1+X31RB) MOD WORDSIZE) xch a,@r0 swap a xrl a,X2R3+((7+X2S3+1+X31RB) MOD WORDSIZE) mov X3R3+((7+X3S3) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;----------- key injection #1 ;r0 points to the current word of key, r1 points to the current word of tweak X0RK0 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1RK0 EQU X+1*WORDSIZE X2RK0 EQU X+2*WORDSIZE X3RK0 EQU X+3*WORDSIZE X0SK0 EQU X0S3+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW2) X31RBS EQU 1 ELSE X31RBS EQU 0 ENDIF X1SK0 EQU X3S3 + WORDSIZE - X31RB + X31RBS ;X31RBS is 1 if xch used in TFISH_SLOW2 version X2SK0 EQU X2S3+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW3) X30RBS EQU 1 ELSE X30RBS EQU 2 ENDIF X3SK0 EQU X1S3 + WORDSIZE - X30RB + X30RBS IF (TFISH_OPTIM_ALL=1) mov r0,b ENDIF mov a,X0RK0+((0+X0SK0) MOD WORDSIZE) add a,@r0 inc r0 xch a,X0RK0+((1+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X0RK0+((2+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X0RK0+((3+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X0RK0+((4+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X0RK0+((5+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X0RK0+((6+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X0RK0+((7+X0SK0) MOD WORDSIZE) addc a,@r0 inc r0 mov X0RK0+((0+X0SK0) MOD WORDSIZE),a cjne r0,#Key+(DATASIZE+1)*WORDSIZE,TFishK0X1 mov r0,#Key TFishK0X1: mov b,r0 ;backup key pointer for the next key injection mov a,X1RK0+((0+X1SK0) MOD WORDSIZE) add a,@r0 inc r0 xch a,X1RK0+((1+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK0+((2+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK0+((3+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK0+((4+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK0+((5+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK0+((6+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK0+((7+X1SK0) MOD WORDSIZE) addc a,@r0 inc r0 mov X1RK0+((0+X1SK0) MOD WORDSIZE),a cjne r0,#Key+(DATASIZE+1)*WORDSIZE,TFishK0X2 mov r0,#Key TFishK0X2: mov a,X2RK0+((0+X2SK0) MOD WORDSIZE) add a,@r0 inc r0 xch a,X2RK0+((1+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK0+((2+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK0+((3+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK0+((4+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK0+((5+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK0+((6+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK0+((7+X2SK0) MOD WORDSIZE) addc a,@r0 inc r0 mov X2RK0+((0+X2SK0) MOD WORDSIZE),a cjne r0,#Key+(DATASIZE+1)*WORDSIZE,TFishK0X3 mov r0,#Key TFishK0X3: mov a,X3RK0+((0+X3SK0) MOD WORDSIZE) add a,@r0 inc r0 xch a,X3RK0+((1+X3SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK0+((2+X3SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK0+((3+X3SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK0+((4+X3SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK0+((5+X3SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK0+((6+X3SK0) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK0+((7+X3SK0) MOD WORDSIZE) addc a,@r0 mov X3RK0+((0+X3SK0) MOD WORDSIZE),a mov r0,b ;store key pointer for next round ;now add tweak and round nr. Don'd forget, that due to xch, there is already one byte-shift on these words mov a,X1RK0+((0+X1SK0+1) MOD WORDSIZE) add a,@r1 inc r1 xch a,X1RK0+((1+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X1RK0+((2+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X1RK0+((3+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X1RK0+((4+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X1RK0+((5+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X1RK0+((6+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X1RK0+((7+X1SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 mov X1RK0+((0+X1SK0+1) MOD WORDSIZE),a cjne r1,#Tweak+(2+1)*WORDSIZE,TFishK0X4 mov r1,#Tweak TFishK0X4: mov b,r1 ;store for next round mov a,X2RK0+((0+X2SK0+1) MOD WORDSIZE) add a,@r1 inc r1 xch a,X2RK0+((1+X2SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X2RK0+((2+X2SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X2RK0+((3+X2SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X2RK0+((4+X2SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X2RK0+((5+X2SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X2RK0+((6+X2SK0+1) MOD WORDSIZE) addc a,@r1 inc r1 xch a,X2RK0+((7+X2SK0+1) MOD WORDSIZE) addc a,@r1 mov X2RK0+((0+X2SK0+1) MOD WORDSIZE),a mov r1,#0 ;constant for further addition inc r2 ;increment round counter mov a,X3RK0+((0+X3SK0+1) MOD WORDSIZE) add a,r2 xch a,X3RK0+((1+X3SK0+1) MOD WORDSIZE) addc a,r1 xch a,X3RK0+((2+X3SK0+1) MOD WORDSIZE) addc a,r1 xch a,X3RK0+((3+X3SK0+1) MOD WORDSIZE) addc a,r1 xch a,X3RK0+((4+X3SK0+1) MOD WORDSIZE) addc a,r1 xch a,X3RK0+((5+X3SK0+1) MOD WORDSIZE) addc a,r1 xch a,X3RK0+((6+X3SK0+1) MOD WORDSIZE) addc a,r1 xch a,X3RK0+((7+X3SK0+1) MOD WORDSIZE) addc a,r1 mov X3RK0+((0+X3SK0+1) MOD WORDSIZE),a mov r1,b ;store tweak pointer IF (TFISH_OPTIM_ALL=1) mov b,r0 ENDIF ;--------------- d = 4 X0R4 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1R4 EQU X+1*WORDSIZE X2R4 EQU X+2*WORDSIZE X3R4 EQU X+3*WORDSIZE X0S4 EQU X0SK0+1 ;1 is due to shift because of use of xch X1S4 EQU X1SK0+1+1 ;1 is due to shift because of use of xch X2S4 EQU X2SK0+1+1 ;1 is due to shift because of use of xch X3S4 EQU X3SK0+1+1 ;1 is due to shift because of use of xch X40ROTA EQU 26 ;j=0 --> rot=26 X40RMUL EQU (1 SHL (X40ROTA MOD 8)) X40RB EQU X40ROTA/8 X41ROTA EQU 20 ;j=1 --> rot=20 X41RMUL EQU (1 SHL (X41ROTA MOD 8)) X41RB EQU X41ROTA/8 ;j = 0 mov a,X0R4+((0+X0S4) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R4+((0+X1S4) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R4+((1+X0S4) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R4+((1+X1S4) MOD WORDSIZE) xch a,X0R4+((2+X0S4) MOD WORDSIZE) addc a,X1R4+((2+X1S4) MOD WORDSIZE) xch a,X0R4+((3+X0S4) MOD WORDSIZE) addc a,X1R4+((3+X1S4) MOD WORDSIZE) xch a,X0R4+((4+X0S4) MOD WORDSIZE) addc a,X1R4+((4+X1S4) MOD WORDSIZE) xch a,X0R4+((5+X0S4) MOD WORDSIZE) addc a,X1R4+((5+X1S4) MOD WORDSIZE) xch a,X0R4+((6+X0S4) MOD WORDSIZE) addc a,X1R4+((6+X1S4) MOD WORDSIZE) xch a,X0R4+((7+X0S4) MOD WORDSIZE) addc a,X1R4+((7+X1S4) MOD WORDSIZE) mov X0R4+((0+X0S4) MOD WORDSIZE),a IF (TFISH_SLOW3) ;now rota and xor - first prepare rota mov a,X1R4+((7+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab ;now the rota itself mov R3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R4+((0+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((0+X0S4+1+X40RB) MOD WORDSIZE) mov R3,b xch a,X1R4+((1+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((1+X0S4+1+X40RB) MOD WORDSIZE) mov R3,b xch a,X1R4+((2+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((2+X0S4+1+X40RB) MOD WORDSIZE) mov R3,b xch a,X1R4+((3+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((3+X0S4+1+X40RB) MOD WORDSIZE) mov R3,b xch a,X1R4+((4+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((4+X0S4+1+X40RB) MOD WORDSIZE) mov R3,b xch a,X1R4+((5+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((5+X0S4+1+X40RB) MOD WORDSIZE) mov R3,b xch a,X1R4+((6+X1S4) MOD WORDSIZE) mov b,#X40RMUL mul ab orl a,R3 xrl a,X0R4+((6+X0S4+1+X40RB) MOD WORDSIZE) mov X1R4+((7+X1S4) MOD WORDSIZE),a mov a,X1R4+((0+X1S4) MOD WORDSIZE) orl a,b xrl a,X0R4+((7+X0S4+1+X40RB) MOD WORDSIZE) mov X1R4+((0+X1S4) MOD WORDSIZE),a ELSE mov a,X1R4+((7+X1S4) MOD WORDSIZE) rlc a mov a,X1R4+((0+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((1+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((2+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((3+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((4+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((5+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((6+X1S4) MOD WORDSIZE) rlc a xch a,X1R4+((7+X1S4) MOD WORDSIZE) rlc a mov X1R4+((0+X1S4) MOD WORDSIZE),a rlc a mov a,X1R4+((1+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((0+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((2+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((1+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((3+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((2+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((4+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((3+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((5+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((4+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((6+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((5+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((7+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((6+X0S4+1+X40RB) MOD WORDSIZE) xch a,X1R4+((0+X1S4) MOD WORDSIZE) rlc a xrl a,X0R4+((7+X0S4+1+X40RB) MOD WORDSIZE) mov X1R4+((1+X1S4) MOD WORDSIZE),a ENDIF ;j = 1 mov a,X2R4+((0+X2S4) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R4+((0+X3S4) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R4+((1+X2S4) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R4+((1+X3S4) MOD WORDSIZE) xch a,X2R4+((2+X2S4) MOD WORDSIZE) addc a,X3R4+((2+X3S4) MOD WORDSIZE) xch a,X2R4+((3+X2S4) MOD WORDSIZE) addc a,X3R4+((3+X3S4) MOD WORDSIZE) xch a,X2R4+((4+X2S4) MOD WORDSIZE) addc a,X3R4+((4+X3S4) MOD WORDSIZE) xch a,X2R4+((5+X2S4) MOD WORDSIZE) addc a,X3R4+((5+X3S4) MOD WORDSIZE) xch a,X2R4+((6+X2S4) MOD WORDSIZE) addc a,X3R4+((6+X3S4) MOD WORDSIZE) xch a,X2R4+((7+X2S4) MOD WORDSIZE) addc a,X3R4+((7+X3S4) MOD WORDSIZE) mov X2R4+((0+X2S4) MOD WORDSIZE),a IF (TFISH_SLOW2) ;now rota and xor - first prepare rota mov a,X3R4+((7+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab ;now the rota itself mov R3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R4+((0+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((0+X2S4+1+X41RB) MOD WORDSIZE) mov R3,b xch a,X3R4+((1+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((1+X2S4+1+X41RB) MOD WORDSIZE) mov R3,b xch a,X3R4+((2+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((2+X2S4+1+X41RB) MOD WORDSIZE) mov R3,b xch a,X3R4+((3+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((3+X2S4+1+X41RB) MOD WORDSIZE) mov R3,b xch a,X3R4+((4+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((4+X2S4+1+X41RB) MOD WORDSIZE) mov R3,b xch a,X3R4+((5+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((5+X2S4+1+X41RB) MOD WORDSIZE) mov R3,b xch a,X3R4+((6+X3S4) MOD WORDSIZE) mov b,#X41RMUL mul ab orl a,R3 xrl a,X2R4+((6+X2S4+1+X41RB) MOD WORDSIZE) mov X3R4+((7+X3S4) MOD WORDSIZE),a mov a,X3R4+((0+X3S4) MOD WORDSIZE) orl a,b xrl a,X2R4+((7+X2S4+1+X41RB) MOD WORDSIZE) mov X3R4+((0+X3S4) MOD WORDSIZE),a ELSE ;this is the optimal version - see above IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X3R4+((7+X3S4) MOD WORDSIZE) mov r0,#X3R4+((0+X3S4) MOD WORDSIZE) xchd a,@r0 swap a xrl a,X2R4+((0+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 IF (((1+X3S4) MOD WORDSIZE) = 0) mov r0,#X3R4+((1+X3S4) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R4+((1+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 IF (((2+X3S4) MOD WORDSIZE) = 0) mov r0,#X3R4+((2+X3S4) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R4+((2+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 IF (((3+X3S4) MOD WORDSIZE) = 0) mov r0,#X3R4+((3+X3S4) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R4+((3+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 IF (((4+X3S4) MOD WORDSIZE) = 0) mov r0,#X3R4+((4+X3S4) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R4+((4+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 IF (((5+X3S4) MOD WORDSIZE) = 0) mov r0,#X3R4+((5+X3S4) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R4+((5+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 IF (((6+X3S4) MOD WORDSIZE) = 0) mov r0,#X3R4+((6+X3S4) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a xrl a,X2R4+((6+X2S4+1+X41RB) MOD WORDSIZE) xch a,@r0 swap a xrl a,X2R4+((7+X2S4+1+X41RB) MOD WORDSIZE) mov X3R4+((7+X3S4) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;--------------- d = 5 X0R5 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1R5 EQU X+3*WORDSIZE X2R5 EQU X+2*WORDSIZE X3R5 EQU X+1*WORDSIZE X0S5 EQU X0S4+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW3) X40RBS EQU 1 ELSE X40RBS EQU 2 ENDIF X3S5 EQU X1S4 + WORDSIZE - X40RB + X40RBS X2S5 EQU X2S4+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW2) X41RBS EQU 1 ELSE X41RBS EQU 0 ENDIF X1S5 EQU X3S4 + WORDSIZE - X41RB + X41RBS ;X41RBS is 1 if xch used X50ROTA EQU 53 ;j=0 --> rot=53 X50RMUL EQU (1 SHL (X50ROTA MOD 8)) X50RB EQU X50ROTA/8 X51ROTA EQU 35 ;j=1 --> rot=35 X51RMUL EQU (1 SHL (X51ROTA MOD 8)) X51RB EQU X51ROTA/8 ;j = 0 mov a,X0R5+((0+X0S5) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R5+((0+X1S5) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R5+((1+X0S5) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R5+((1+X1S5) MOD WORDSIZE) xch a,X0R5+((2+X0S5) MOD WORDSIZE) addc a,X1R5+((2+X1S5) MOD WORDSIZE) xch a,X0R5+((3+X0S5) MOD WORDSIZE) addc a,X1R5+((3+X1S5) MOD WORDSIZE) xch a,X0R5+((4+X0S5) MOD WORDSIZE) addc a,X1R5+((4+X1S5) MOD WORDSIZE) xch a,X0R5+((5+X0S5) MOD WORDSIZE) addc a,X1R5+((5+X1S5) MOD WORDSIZE) xch a,X0R5+((6+X0S5) MOD WORDSIZE) addc a,X1R5+((6+X1S5) MOD WORDSIZE) xch a,X0R5+((7+X0S5) MOD WORDSIZE) addc a,X1R5+((7+X1S5) MOD WORDSIZE) mov X0R5+((0+X0S5) MOD WORDSIZE),a IF (TFISH_SLOW4) ;now rota and xor - first prepare rota mov a,X1R5+((7+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R5+((0+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((0+X0S5+1+X50RB) MOD WORDSIZE) mov r3,b xch a,X1R5+((1+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((1+X0S5+1+X50RB) MOD WORDSIZE) mov r3,b xch a,X1R5+((2+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((2+X0S5+1+X50RB) MOD WORDSIZE) mov r3,b xch a,X1R5+((3+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((3+X0S5+1+X50RB) MOD WORDSIZE) mov r3,b xch a,X1R5+((4+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((4+X0S5+1+X50RB) MOD WORDSIZE) mov r3,b xch a,X1R5+((5+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((5+X0S5+1+X50RB) MOD WORDSIZE) mov r3,b xch a,X1R5+((6+X1S5) MOD WORDSIZE) mov b,#X50RMUL mul ab orl a,r3 xrl a,X0R5+((6+X0S5+1+X50RB) MOD WORDSIZE) mov X1R5+((7+X1S5) MOD WORDSIZE),a mov a,X1R5+((0+X1S5) MOD WORDSIZE) orl a,b xrl a,X0R5+((7+X0S5+1+X50RB) MOD WORDSIZE) mov X1R5+((0+X1S5) MOD WORDSIZE),a ELSE IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X1R5+((7+X1S5) MOD WORDSIZE) mov c,acc.3 mov r0,#X1R5+((0+X1S5) MOD WORDSIZE) xchd a,@r0 swap a rlc a xrl a,X0R5+((0+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 IF (((1+X1S5) MOD WORDSIZE) = 0) mov r0,#X1R5+((1+X1S5) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R5+((1+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 IF (((2+X1S5) MOD WORDSIZE) = 0) mov r0,#X1R5+((2+X1S5) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R5+((2+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 IF (((3+X1S5) MOD WORDSIZE) = 0) mov r0,#X1R5+((3+X1S5) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R5+((3+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 IF (((4+X1S5) MOD WORDSIZE) = 0) mov r0,#X1R5+((4+X1S5) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R5+((4+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 IF (((5+X1S5) MOD WORDSIZE) = 0) mov r0,#X1R5+((5+X1S5) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R5+((5+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 IF (((6+X1S5) MOD WORDSIZE) = 0) mov r0,#X1R5+((6+X1S5) MOD WORDSIZE) ELSE inc r0 ENDIF xchd a,@r0 swap a rlc a xrl a,X0R5+((6+X0S5+1+X50RB) MOD WORDSIZE) xch a,@r0 swap a rlc a xrl a,X0R5+((7+X0S5+1+X50RB) MOD WORDSIZE) mov X1R5+((7+X1S5) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;j = 1 mov a,X2R5+((0+X2S5) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R5+((0+X3S5) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R5+((1+X2S5) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R5+((1+X3S5) MOD WORDSIZE) xch a,X2R5+((2+X2S5) MOD WORDSIZE) addc a,X3R5+((2+X3S5) MOD WORDSIZE) xch a,X2R5+((3+X2S5) MOD WORDSIZE) addc a,X3R5+((3+X3S5) MOD WORDSIZE) xch a,X2R5+((4+X2S5) MOD WORDSIZE) addc a,X3R5+((4+X3S5) MOD WORDSIZE) xch a,X2R5+((5+X2S5) MOD WORDSIZE) addc a,X3R5+((5+X3S5) MOD WORDSIZE) xch a,X2R5+((6+X2S5) MOD WORDSIZE) addc a,X3R5+((6+X3S5) MOD WORDSIZE) xch a,X2R5+((7+X2S5) MOD WORDSIZE) addc a,X3R5+((7+X3S5) MOD WORDSIZE) mov X2R5+((0+X2S5) MOD WORDSIZE),a IF (TFISH_SLOW5) ;now rota and xor - first prepare rota mov a,X3R5+((7+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R5+((0+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((0+X2S5+1+X51RB) MOD WORDSIZE) mov r3,b xch a,X3R5+((1+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((1+X2S5+1+X51RB) MOD WORDSIZE) mov r3,b xch a,X3R5+((2+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((2+X2S5+1+X51RB) MOD WORDSIZE) mov r3,b xch a,X3R5+((3+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((3+X2S5+1+X51RB) MOD WORDSIZE) mov r3,b xch a,X3R5+((4+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((4+X2S5+1+X51RB) MOD WORDSIZE) mov r3,b xch a,X3R5+((5+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((5+X2S5+1+X51RB) MOD WORDSIZE) mov r3,b xch a,X3R5+((6+X3S5) MOD WORDSIZE) mov b,#X51RMUL mul ab orl a,r3 xrl a,X2R5+((6+X2S5+1+X51RB) MOD WORDSIZE) mov X3R5+((7+X3S5) MOD WORDSIZE),a mov a,X3R5+((0+X3S5) MOD WORDSIZE) orl a,b xrl a,X2R5+((7+X2S5+1+X51RB) MOD WORDSIZE) mov X3R5+((0+X3S5) MOD WORDSIZE),a ELSE IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X3R5+((7+X3S5) MOD WORDSIZE) mov c,acc.4 mov r0,#X3R5+((6+X3S5) MOD WORDSIZE) xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((7+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 IF (((5+X3S5) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X3R5+((5+X3S5) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((6+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 IF (((4+X3S5) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X3R5+((4+X3S5) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((5+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 IF (((3+X3S5) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X3R5+((3+X3S5) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((4+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 IF (((2+X3S5) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X3R5+((2+X3S5) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((3+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 IF (((1+X3S5) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X3R5+((1+X3S5) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((2+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 IF (((0+X3S5) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X3R5+((0+X3S5) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X2R5+((1+X2S5+1+X51RB) MOD WORDSIZE) xch a,@r0 swap a rrc a xrl a,X2R5+((0+X2S5+1+X51RB) MOD WORDSIZE) mov X3R5+((7+X3S5) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;--------------- d = 6 X0R6 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1R6 EQU X+1*WORDSIZE X2R6 EQU X+2*WORDSIZE X3R6 EQU X+3*WORDSIZE X0S6 EQU X0S5+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW5) X51RBS EQU 1 ELSE X51RBS EQU -1 ENDIF X1S6 EQU X3S5 + WORDSIZE - X51RB + X51RBS X2S6 EQU X2S5+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW4) X50RBS EQU 1 ELSE X50RBS EQU 0 ENDIF X3S6 EQU X1S5 + WORDSIZE - X50RB + X50RBS X60ROTA EQU 11 ;j=0 --> rot=11 X60RMUL EQU (1 SHL (X60ROTA MOD 8)) X60RB EQU X60ROTA/8 X61ROTA EQU 42 ;j=1 --> rot=42 X61RMUL EQU (1 SHL (X61ROTA MOD 8)) X61RB EQU X61ROTA/8 ;j = 0 mov a,X0R6+((0+X0S6) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R6+((0+X1S6) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R6+((1+X0S6) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R6+((1+X1S6) MOD WORDSIZE) xch a,X0R6+((2+X0S6) MOD WORDSIZE) addc a,X1R6+((2+X1S6) MOD WORDSIZE) xch a,X0R6+((3+X0S6) MOD WORDSIZE) addc a,X1R6+((3+X1S6) MOD WORDSIZE) xch a,X0R6+((4+X0S6) MOD WORDSIZE) addc a,X1R6+((4+X1S6) MOD WORDSIZE) xch a,X0R6+((5+X0S6) MOD WORDSIZE) addc a,X1R6+((5+X1S6) MOD WORDSIZE) xch a,X0R6+((6+X0S6) MOD WORDSIZE) addc a,X1R6+((6+X1S6) MOD WORDSIZE) xch a,X0R6+((7+X0S6) MOD WORDSIZE) addc a,X1R6+((7+X1S6) MOD WORDSIZE) mov X0R6+((0+X0S6) MOD WORDSIZE),a IF (TFISH_SLOW5) ;now rota and xor - first prepare rota mov a,X1R6+((7+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R6+((0+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((0+X0S6+1+X60RB) MOD WORDSIZE) mov r3,b xch a,X1R6+((1+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((1+X0S6+1+X60RB) MOD WORDSIZE) mov r3,b xch a,X1R6+((2+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((2+X0S6+1+X60RB) MOD WORDSIZE) mov r3,b xch a,X1R6+((3+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((3+X0S6+1+X60RB) MOD WORDSIZE) mov r3,b xch a,X1R6+((4+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((4+X0S6+1+X60RB) MOD WORDSIZE) mov r3,b xch a,X1R6+((5+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((5+X0S6+1+X60RB) MOD WORDSIZE) mov r3,b xch a,X1R6+((6+X1S6) MOD WORDSIZE) mov b,#X60RMUL mul ab orl a,r3 xrl a,X0R6+((6+X0S6+1+X60RB) MOD WORDSIZE) mov X1R6+((7+X1S6) MOD WORDSIZE),a mov a,X1R6+((0+X1S6) MOD WORDSIZE) orl a,b xrl a,X0R6+((7+X0S6+1+X60RB) MOD WORDSIZE) mov X1R6+((0+X1S6) MOD WORDSIZE),a ELSE IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X1R6+((7+X1S6) MOD WORDSIZE) mov c,acc.4 mov r0,#X1R6+((6+X1S6) MOD WORDSIZE) xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((7+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 IF (((5+X1S6) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R6+((5+X1S6) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((6+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 IF (((4+X1S6) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R6+((4+X1S6) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((5+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 IF (((3+X1S6) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R6+((3+X1S6) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((4+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 IF (((2+X1S6) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R6+((2+X1S6) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((3+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 IF (((1+X1S6) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R6+((1+X1S6) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((2+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 IF (((0+X1S6) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R6+((0+X1S6) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R6+((1+X0S6+1+X60RB) MOD WORDSIZE) xch a,@r0 swap a rrc a xrl a,X0R6+((0+X0S6+1+X60RB) MOD WORDSIZE) mov X1R6+((7+X1S6) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;j = 1 mov a,X2R6+((0+X2S6) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R6+((0+X3S6) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R6+((1+X2S6) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R6+((1+X3S6) MOD WORDSIZE) xch a,X2R6+((2+X2S6) MOD WORDSIZE) addc a,X3R6+((2+X3S6) MOD WORDSIZE) xch a,X2R6+((3+X2S6) MOD WORDSIZE) addc a,X3R6+((3+X3S6) MOD WORDSIZE) xch a,X2R6+((4+X2S6) MOD WORDSIZE) addc a,X3R6+((4+X3S6) MOD WORDSIZE) xch a,X2R6+((5+X2S6) MOD WORDSIZE) addc a,X3R6+((5+X3S6) MOD WORDSIZE) xch a,X2R6+((6+X2S6) MOD WORDSIZE) addc a,X3R6+((6+X3S6) MOD WORDSIZE) xch a,X2R6+((7+X2S6) MOD WORDSIZE) addc a,X3R6+((7+X3S6) MOD WORDSIZE) mov X2R6+((0+X2S6) MOD WORDSIZE),a IF (TFISH_SLOW3) ;now rota and xor - first prepare rota mov a,X3R6+((7+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R6+((0+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((0+X2S6+1+X61RB) MOD WORDSIZE) mov r3,b xch a,X3R6+((1+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((1+X2S6+1+X61RB) MOD WORDSIZE) mov r3,b xch a,X3R6+((2+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((2+X2S6+1+X61RB) MOD WORDSIZE) mov r3,b xch a,X3R6+((3+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((3+X2S6+1+X61RB) MOD WORDSIZE) mov r3,b xch a,X3R6+((4+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((4+X2S6+1+X61RB) MOD WORDSIZE) mov r3,b xch a,X3R6+((5+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((5+X2S6+1+X61RB) MOD WORDSIZE) mov r3,b xch a,X3R6+((6+X3S6) MOD WORDSIZE) mov b,#X61RMUL mul ab orl a,r3 xrl a,X2R6+((6+X2S6+1+X61RB) MOD WORDSIZE) mov X3R6+((7+X3S6) MOD WORDSIZE),a mov a,X3R6+((0+X3S6) MOD WORDSIZE) orl a,b xrl a,X2R6+((7+X2S6+1+X61RB) MOD WORDSIZE) mov X3R6+((0+X3S6) MOD WORDSIZE),a ELSE mov a,X3R6+((7+X3S6) MOD WORDSIZE) rlc a mov a,X3R6+((0+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((1+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((2+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((3+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((4+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((5+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((6+X3S6) MOD WORDSIZE) rlc a xch a,X3R6+((7+X3S6) MOD WORDSIZE) rlc a mov X3R6+((0+X3S6) MOD WORDSIZE),a rlc a mov a,X3R6+((1+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((0+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((2+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((1+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((3+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((2+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((4+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((3+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((5+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((4+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((6+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((5+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((7+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((6+X2S6+1+X61RB) MOD WORDSIZE) xch a,X3R6+((0+X3S6) MOD WORDSIZE) rlc a xrl a,X2R6+((7+X2S6+1+X61RB) MOD WORDSIZE) mov X3R6+((1+X3S6) MOD WORDSIZE),a ENDIF ;--------------- d = 7 X0R7 EQU X+0*WORDSIZE ;in all even rounds, X1 and X3 are swapped again, so now it's the straight way again (the permutation is 0123->0321) X1R7 EQU X+3*WORDSIZE X2R7 EQU X+2*WORDSIZE X3R7 EQU X+1*WORDSIZE X0S7 EQU X0S6+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW5) X60RBS EQU 1 ELSE X60RBS EQU -1 ENDIF X3S7 EQU X1S6 + WORDSIZE - X60RB + X60RBS X2S7 EQU X2S6+1 ;1 is due to shift because of use of xch IF (TFISH_SLOW3) X61RBS EQU 1 ELSE X61RBS EQU 2 ENDIF X1S7 EQU X3S6 + WORDSIZE - X61RB + X61RBS X70ROTA EQU 59 ;j=0 --> rot=59 X70RMUL EQU (1 SHL (X70ROTA MOD 8)) X70RB EQU X70ROTA/8 X71ROTA EQU 50 ;j=1 --> rot=50 X71RMUL EQU (1 SHL (X71ROTA MOD 8)) X71RB EQU X71ROTA/8 ;j = 0 mov a,X0R7+((0+X0S7) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X1R7+((0+X1S7) MOD WORDSIZE) ; - no point in mixing them together xch a,X0R7+((1+X0S7) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X1R7+((1+X1S7) MOD WORDSIZE) xch a,X0R7+((2+X0S7) MOD WORDSIZE) addc a,X1R7+((2+X1S7) MOD WORDSIZE) xch a,X0R7+((3+X0S7) MOD WORDSIZE) addc a,X1R7+((3+X1S7) MOD WORDSIZE) xch a,X0R7+((4+X0S7) MOD WORDSIZE) addc a,X1R7+((4+X1S7) MOD WORDSIZE) xch a,X0R7+((5+X0S7) MOD WORDSIZE) addc a,X1R7+((5+X1S7) MOD WORDSIZE) xch a,X0R7+((6+X0S7) MOD WORDSIZE) addc a,X1R7+((6+X1S7) MOD WORDSIZE) xch a,X0R7+((7+X0S7) MOD WORDSIZE) addc a,X1R7+((7+X1S7) MOD WORDSIZE) mov X0R7+((0+X0S7) MOD WORDSIZE),a IF (TFISH_SLOW5) ;now rota and xor - first prepare rota mov a,X1R7+((7+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X1R7+((0+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((0+X0S7+1+X70RB) MOD WORDSIZE) mov r3,b xch a,X1R7+((1+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((1+X0S7+1+X70RB) MOD WORDSIZE) mov r3,b xch a,X1R7+((2+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((2+X0S7+1+X70RB) MOD WORDSIZE) mov r3,b xch a,X1R7+((3+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((3+X0S7+1+X70RB) MOD WORDSIZE) mov r3,b xch a,X1R7+((4+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((4+X0S7+1+X70RB) MOD WORDSIZE) mov r3,b xch a,X1R7+((5+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((5+X0S7+1+X70RB) MOD WORDSIZE) mov r3,b xch a,X1R7+((6+X1S7) MOD WORDSIZE) mov b,#X70RMUL mul ab orl a,r3 xrl a,X0R7+((6+X0S7+1+X70RB) MOD WORDSIZE) mov X1R7+((7+X1S7) MOD WORDSIZE),a mov a,X1R7+((0+X1S7) MOD WORDSIZE) orl a,b xrl a,X0R7+((7+X0S7+1+X70RB) MOD WORDSIZE) mov X1R7+((0+X1S7) MOD WORDSIZE),a ELSE IF (TFISH_OPTIM_ALL=0) mov b,r0 ;we will need r0 here ENDIF mov a,X1R7+((7+X1S7) MOD WORDSIZE) mov c,acc.4 mov r0,#X1R7+((6+X1S7) MOD WORDSIZE) xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((7+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 IF (((5+X1S7) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R7+((5+X1S7) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((6+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 IF (((4+X1S7) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R7+((4+X1S7) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((5+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 IF (((3+X1S7) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R7+((3+X1S7) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((4+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 IF (((2+X1S7) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R7+((2+X1S7) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((3+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 IF (((1+X1S7) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R7+((1+X1S7) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((2+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 IF (((0+X1S7) MOD WORDSIZE) = (WORDSIZE - 1)) mov r0,#X1R7+((0+X1S7) MOD WORDSIZE) ELSE dec r0 ENDIF xch a,@r0 xchd a,@r0 swap a rrc a xrl a,X0R7+((1+X0S7+1+X70RB) MOD WORDSIZE) xch a,@r0 swap a rrc a xrl a,X0R7+((0+X0S7+1+X70RB) MOD WORDSIZE) mov X1R7+((7+X1S7) MOD WORDSIZE),a IF (TFISH_OPTIM_ALL=0) mov r0,b ;restore r0 ENDIF ENDIF ;j = 1 mov a,X2R7+((0+X2S7) MOD WORDSIZE) ;first, the addition, then the rota and xor add a,X3R7+((0+X3S7) MOD WORDSIZE) ; - no point in mixing them together xch a,X2R7+((1+X2S7) MOD WORDSIZE) ; - however, note, that using xch we effectively shift the word left by 8 bits addc a,X3R7+((1+X3S7) MOD WORDSIZE) xch a,X2R7+((2+X2S7) MOD WORDSIZE) addc a,X3R7+((2+X3S7) MOD WORDSIZE) xch a,X2R7+((3+X2S7) MOD WORDSIZE) addc a,X3R7+((3+X3S7) MOD WORDSIZE) xch a,X2R7+((4+X2S7) MOD WORDSIZE) addc a,X3R7+((4+X3S7) MOD WORDSIZE) xch a,X2R7+((5+X2S7) MOD WORDSIZE) addc a,X3R7+((5+X3S7) MOD WORDSIZE) xch a,X2R7+((6+X2S7) MOD WORDSIZE) addc a,X3R7+((6+X3S7) MOD WORDSIZE) xch a,X2R7+((7+X2S7) MOD WORDSIZE) addc a,X3R7+((7+X3S7) MOD WORDSIZE) mov X2R7+((0+X2S7) MOD WORDSIZE),a IF (TFISH_SLOW3) mov a,X3R7+((7+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab ;now the rota itself mov r3,b ;-- this also uses xch hence shifts 8 bits to the left xch a,X3R7+((0+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((0+X2S7+1+X71RB) MOD WORDSIZE) mov r3,b xch a,X3R7+((1+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((1+X2S7+1+X71RB) MOD WORDSIZE) mov r3,b xch a,X3R7+((2+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((2+X2S7+1+X71RB) MOD WORDSIZE) mov r3,b xch a,X3R7+((3+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((3+X2S7+1+X71RB) MOD WORDSIZE) mov r3,b xch a,X3R7+((4+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((4+X2S7+1+X71RB) MOD WORDSIZE) mov r3,b xch a,X3R7+((5+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((5+X2S7+1+X71RB) MOD WORDSIZE) mov r3,b xch a,X3R7+((6+X3S7) MOD WORDSIZE) mov b,#X71RMUL mul ab orl a,r3 xrl a,X2R7+((6+X2S7+1+X71RB) MOD WORDSIZE) mov X3R7+((7+X3S7) MOD WORDSIZE),a mov a,X3R7+((0+X3S7) MOD WORDSIZE) orl a,b xrl a,X2R7+((7+X2S7+1+X71RB) MOD WORDSIZE) mov X3R7+((0+X3S7) MOD WORDSIZE),a ELSE mov a,X3R7+((7+X3S7) MOD WORDSIZE) rlc a mov a,X3R7+((0+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((1+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((2+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((3+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((4+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((5+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((6+X3S7) MOD WORDSIZE) rlc a xch a,X3R7+((7+X3S7) MOD WORDSIZE) rlc a mov X3R7+((0+X3S7) MOD WORDSIZE),a rlc a mov a,X3R7+((1+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((0+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((2+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((1+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((3+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((2+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((4+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((3+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((5+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((4+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((6+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((5+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((7+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((6+X2S7+1+X71RB) MOD WORDSIZE) xch a,X3R7+((0+X3S7) MOD WORDSIZE) rlc a xrl a,X2R7+((7+X2S7+1+X71RB) MOD WORDSIZE) mov X3R7+((1+X3S7) MOD WORDSIZE),a ENDIF ;----------- key injection #1 ;r0 points to the current word of key, r1 points to the current word of tweak ; ;We are now also going to pay the price of our pride: ;we need to undo all the rotation and renaming, which comes out ;differently in all of 4 words. We try to do this in-line, but ;this cannot be done just with defining variou constants, the code must be ;written ad-hoc. So, the XnSK1 constants here are only as a guideline, ;after translation when they are determined, the code was written by hand. ; X0RK1 EQU X+0*WORDSIZE X1RK1 EQU X+1*WORDSIZE X2RK1 EQU X+2*WORDSIZE X3RK1 EQU X+3*WORDSIZE X0SK1 EQU (X0S7+1) MOD WORDSIZE ;1 IF(TFISH_SLOW3) X71RBS EQU 1 ELSE X71RBS EQU 2 ENDIF X1SK1 EQU (X3S7 + WORDSIZE - X71RB + X71RBS) MOD WORDSIZE X2SK1 EQU (X2S7+1) MOD WORDSIZE ;2 IF(TFISH_SLOW5) X70RBS EQU 1 ELSE X70RBS EQU -1 ENDIF X3SK1 EQU (X1S7 + WORDSIZE - X70RB + X70RBS) MOD WORDSIZE IF (TFISH_OPTIM_ALL=1) mov r0,b ENDIF ;here, undo shift of 1, i.e. should be 0, is 1 mov a,X0RK1+1 add a,@r0 inc r0 xch a,X0RK1+0 xch a,X0RK1+2 addc a,@r0 inc r0 mov X0RK1+1,a mov a,X0RK1+3 addc a,@r0 inc r0 xch a,X0RK1+2 xch a,X0RK1+4 addc a,@r0 inc r0 mov X0RK1+3,a mov a,X0RK1+5 addc a,@r0 inc r0 xch a,X0RK1+4 xch a,X0RK1+6 addc a,@r0 inc r0 mov X0RK1+5,a mov a,X0RK1+7 addc a,@r0 inc r0 xch a,X0RK1+6 addc a,@r0 inc r0 mov X0RK1+7,a cjne r0,#Key+(DATASIZE+1)*WORDSIZE,TFishK1X1 mov r0,#Key TFishK1X1: mov b,r0 ;backup key pointer for the next key injection mov a,X1RK1+((0+X1SK1) MOD WORDSIZE) add a,@r0 inc r0 xch a,X1RK1+((1+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK1+((2+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK1+((3+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK1+((4+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK1+((5+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK1+((6+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X1RK1+((7+X1SK1) MOD WORDSIZE) addc a,@r0 inc r0 mov X1RK1+((0+X1SK1) MOD WORDSIZE),a cjne r0,#Key+(DATASIZE+1)*WORDSIZE,TFishK1X2 mov r0,#Key TFishK1X2: mov a,X2RK1+((0+X2SK1) MOD WORDSIZE) add a,@r0 inc r0 xch a,X2RK1+((1+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK1+((2+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK1+((3+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK1+((4+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK1+((5+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK1+((6+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X2RK1+((7+X2SK1) MOD WORDSIZE) addc a,@r0 inc r0 mov X2RK1+((0+X2SK1) MOD WORDSIZE),a cjne r0,#Key+(DATASIZE+1)*WORDSIZE,TFishK1X3 mov r0,#Key TFishK1X3: mov a,X3RK1+((0+X3SK1) MOD WORDSIZE) add a,@r0 inc r0 xch a,X3RK1+((1+X3SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK1+((2+X3SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK1+((3+X3SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK1+((4+X3SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK1+((5+X3SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK1+((6+X3SK1) MOD WORDSIZE) addc a,@r0 inc r0 xch a,X3RK1+((7+X3SK1) MOD WORDSIZE) addc a,@r0 mov X3RK1+((0+X3SK1) MOD WORDSIZE),a mov r0,b ;store key pointer for next round IF (TFISH_SLOW2) TEMP SET 3 ELSE IF (TFISH_SLOW3) TEMP SET 1 ELSE IF (TFISH_SLOW4) TEMP SET 3 ELSE IF (TFISH_SLOW5) TEMP SET 1 ELSE TEMP SET 5 ENDIF ENDIF ENDIF ENDIF IF (TEMP = 3) ;Now add tweak and round nr. ;Here, we are going to undo a shift of (3+1)=4 ;(at the beginning of key injection it is 3, ;but the first addition adds 1 more due to xch), ;i.e. take from 4 put to 0 mov a,X1RK1+1 xch a,X1RK1+4 add a,@r1 inc r1 xch a,X1RK1+0 mov r3,a mov a,X1RK1+2 xch a,X1RK1+5 addc a,@r1 inc r1 mov X1RK1+1,a mov a,X1RK1+3 xch a,X1RK1+6 addc a,@r1 inc r1 mov X1RK1+2,a mov a,X1RK1+7 addc a,@r1 inc r1 mov X1RK1+3,a mov a,r3 addc a,@r1 inc r1 xch a,X1RK1+4 addc a,@r1 inc r1 xch a,X1RK1+5 addc a,@r1 inc r1 xch a,X1RK1+6 addc a,@r1 inc r1 mov X1RK1+7,a ELSE IF (TEMP = 5) ;the shift here is now (5+1), i.e. 6 mov a,X1RK1+6 add a,@r1 inc r1 xch a,X1RK1+0 xch a,X1RK1+1 xch a,X1RK1+2 xch a,X1RK1+3 xch a,X1RK1+4 xch a,X1RK1+5 mov X1RK1+6,a mov a,X1RK1+7 addc a,@r1 inc r1 xch a,X1RK1+1 addc a,@r1 inc r1 xch a,X1RK1+2 addc a,@r1 inc r1 xch a,X1RK1+3 addc a,@r1 inc r1 xch a,X1RK1+4 addc a,@r1 inc r1 xch a,X1RK1+5 addc a,@r1 inc r1 xch a,X1RK1+6 addc a,@r1 inc r1 mov X1RK1+7,a ELSE ;Here, we are going to undo a shift of (1+1)=2 mov a,X1RK1+2 add a,@r1 inc r1 xch a,X1RK1+0 xch a,X1RK1+5 mov X1RK1+2,a mov a,X1RK1+3 addc a,@r1 inc r1 xch a,X1RK1+1 xch a,X1RK1+6 mov X1RK1+3,a mov a,X1RK1+4 addc a,@r1 inc r1 xch a,X1RK1+2 addc a,@r1 inc r1 xch a,X1RK1+3 addc a,@r1 inc r1 mov X1RK1+4,a mov a,X1RK1+7 addc a,@r1 inc r1 xch a,X1RK1+5 addc a,@r1 inc r1 xch a,X1RK1+6 addc a,@r1 inc r1 mov X1RK1+7,a ENDIF ENDIF cjne r1,#Tweak+(2+1)*WORDSIZE,TFishK1X4 mov r1,#Tweak TFishK1X4: mov b,r1 ;store for next round ;Here, we are going to undo a shift of (2+1)=3 mov a,X2RK1+3 add a,@r1 inc r1 xch a,X2RK1+0 xch a,X2RK1+4 addc a,@r1 inc r1 xch a,X2RK1+1 xch a,X2RK1+5 addc a,@r1 inc r1 xch a,X2RK1+2 xch a,X2RK1+6 addc a,@r1 inc r1 mov X2RK1+3,a mov a,X2RK1+7 addc a,@r1 inc r1 xch a,X2RK1+4 addc a,@r1 inc r1 xch a,X2RK1+5 addc a,@r1 inc r1 xch a,X2RK1+6 addc a,@r1 mov X2RK1+7,a mov r1,#0 ;constant for further addition inc r2 ;increment round counter IF (TFISH_SLOW2) TEMP SET 7 ELSE IF (TFISH_SLOW3) TEMP SET 5 ELSE IF (TFISH_SLOW3a) TEMP SET 7 ELSE IF (TFISH_SLOW4) TEMP SET 3 ELSE IF (TFISH_SLOW5) TEMP SET 2 ELSE TEMP SET 0 ENDIF ENDIF ENDIF ENDIF ENDIF IF (TEMP = 7) ;the shift here is now (7+1), i.e. 0 - hence we need to perform the addition "in place" mov a,X3RK1+0 add a,r2 mov X3RK1+0,a mov a,X3RK1+1 addc a,r1 mov X3RK1+1,a mov a,X3RK1+2 addc a,r1 mov X3RK1+2,a mov a,X3RK1+3 addc a,r1 mov X3RK1+3,a mov a,X3RK1+4 addc a,r1 mov X3RK1+4,a mov a,X3RK1+5 addc a,r1 mov X3RK1+5,a mov a,X3RK1+6 addc a,r1 mov X3RK1+6,a mov a,X3RK1+7 addc a,r1 mov X3RK1+7,a ELSE IF (TEMP = 5) ;the shift here is now (5+1), i.e. 6 mov a,X3RK1+6 add a,r2 xch a,X3RK1+0 xch a,X3RK1+1 xch a,X3RK1+2 xch a,X3RK1+3 xch a,X3RK1+4 xch a,X3RK1+5 mov X3RK1+6,a mov a,X3RK1+7 addc a,r1 xch a,X3RK1+1 addc a,r1 xch a,X3RK1+2 addc a,r1 xch a,X3RK1+3 addc a,r1 xch a,X3RK1+4 addc a,r1 xch a,X3RK1+5 addc a,r1 xch a,X3RK1+6 addc a,r1 mov X3RK1+7,a ELSE IF (TEMP = 3) ;undo shift of 3+1=4 mov a,X3RK1+1 xch a,X3RK1+4 add a,r2 xch a,X3RK1+0 mov r3,a mov a,X3RK1+2 xch a,X3RK1+5 addc a,r1 mov X3RK1+1,a mov a,X3RK1+3 xch a,X3RK1+6 addc a,r1 mov X3RK1+2,a mov a,X3RK1+7 addc a,r1 mov X3RK1+3,a mov a,r3 addc a,r1 xch a,X3RK1+4 addc a,r1 xch a,X3RK1+5 addc a,r1 xch a,X3RK1+6 addc a,r1 mov X3RK1+7,a ELSE IF (TEMP = 2) ;undo shift of 2+1=3 mov a,X3RK1+3 add a,r2 xch a,X3RK1+0 xch a,X3RK1+4 addc a,r1 xch a,X3RK1+1 xch a,X3RK1+5 addc a,r1 xch a,X3RK1+2 xch a,X3RK1+6 addc a,r1 mov X3RK1+3,a mov a,X3RK1+7 addc a,r1 xch a,X3RK1+4 addc a,r1 xch a,X3RK1+5 addc a,r1 xch a,X3RK1+6 addc a,r1 mov X3RK1+7,a ELSE ;undo shift of 0+1=1 mov a,X3RK1+1 add a,r2 xch a,X3RK1+0 xch a,X3RK1+2 addc a,r1 mov X3RK1+1,a mov a,X3RK1+3 addc a,r1 xch a,X3RK1+2 xch a,X3RK1+4 addc a,r1 mov X3RK1+3,a mov a,X3RK1+5 addc a,r1 xch a,X3RK1+4 xch a,X3RK1+6 addc a,r1 mov X3RK1+5,a mov a,X3RK1+7 addc a,r1 xch a,X3RK1+6 addc a,r1 mov X3RK1+7,a ENDIF ENDIF ENDIF ENDIF mov r1,b ;store tweak pointer cjne r2,#ROUNDS/4,TFishLoopA ret TFishLoopA: jmp TFishLoop end