// Minor adaptation for packets with 0 payload (although payload indicated)
// See !!!!
unit UCSAEmu;

interface
uses sysutils,windows;

type
PKBarray=^TKBarray;
TKBarray=array[0..56] of cardinal;

PBlock=^TBlock;
TBlock=Array[0..7] of byte; {optimization}

TKey=Record
Complete:Boolean;
odd_kk,even_kk:TKBArray;
odd_ck,even_ck:TBlock;
end;
var CsaKey:TKey;
gCK: PBlock;
gKK: PKBarray;

function CSAGetVersion:PChar;cdecl;
procedure CSASetKeys(Oddkey,evenKey: TBlock);cdecl;
procedure CSADecrypt(data:PByteArray);register;

exports CSAGetVersion name 'CSAGetVersion';
exports CSASetKeys name 'CSASetKeys';
exports CSADecrypt name 'CSADecrypt';

implementation

function CSAGetVersion:PChar;cdecl;
begin
  Result:='SoftCSA/Pikachu/0.001';
end;

type TSbox = Array[0..31] of cardinal;
const
sbox1:TSBox = (2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0);
sbox2:TSBox = (3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1); 
sbox3:TSBox = (2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1); 
sbox4:TSBox = (3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1); 
sbox5:TSBox = (2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2); 
sbox6:TSBox = (0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0); 
sbox7:TSBox = (0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2);

// --- block cypher ------------------------------------------------------------
const
// key preparation
key_perm:Array[0..$3f] of cardinal=(
$12,$24,$09,$07,$2A,$31,$1D,$15,$1C,$36,$3E,$32,$13,$21,$3B,$40,
$18,$14,$25,$27,$02,$35,$1B,$01,$22,$04,$0D,$0E,$39,$28,$1A,$29,
$33,$23,$34,$0C,$16,$30,$1E,$3A,$2D,$1F,$08,$19,$17,$2F,$3D,$11,
$3C,$05,$38,$2B,$0B,$06,$0A,$2C,$20,$3F,$2E,$0F,$03,$26,$10,$37);
BitValueTable:Array[0..7] of byte=($80,$40,$20,$10,$08,$04,$02,$01);

// block - sbox
block_sbox:Array[0..$FF] of cardinal=( 
$3A,$EA,$68,$FE,$33,$E9,$88,$1A,$83,$CF,$E1,$7F,$BA,$E2,$38,$12, 
$E8,$27,$61,$95,$0C,$36,$E5,$70,$A2,$06,$82,$7C,$17,$A3,$26,$49, 
$BE,$7A,$6D,$47,$C1,$51,$8F,$F3,$CC,$5B,$67,$BD,$CD,$18,$08,$C9, 
$FF,$69,$EF,$03,$4E,$48,$4A,$84,$3F,$B4,$10,$04,$DC,$F5,$5C,$C6, 
$16,$AB,$AC,$4C,$F1,$6A,$2F,$3C,$3B,$D4,$D5,$94,$D0,$C4,$63,$62, 
$71,$A1,$F9,$4F,$2E,$AA,$C5,$56,$E3,$39,$93,$CE,$65,$64,$E4,$58, 
$6C,$19,$42,$79,$DD,$EE,$96,$F6,$8A,$EC,$1E,$85,$53,$45,$DE,$BB,
$7E,$0A,$9A,$13,$2A,$9D,$C2,$5E,$5A,$1F,$32,$35,$9C,$A8,$73,$30, 

$29,$3D,$E7,$92,$87,$1B,$2B,$4B,$A5,$57,$97,$40,$15,$E6,$BC,$0E, 
$EB,$C3,$34,$2D,$B8,$44,$25,$A4,$1C,$C7,$23,$ED,$90,$6E,$50,$00, 
$99,$9E,$4D,$D9,$DA,$8D,$6F,$5F,$3E,$D7,$21,$74,$86,$DF,$6B,$05, 
$8E,$5D,$37,$11,$D2,$28,$75,$D6,$A7,$77,$24,$BF,$F0,$B0,$02,$B7, 
$F8,$FC,$81,$09,$B1,$01,$76,$91,$7D,$0F,$C8,$A0,$F2,$CB,$78,$60, 
$D1,$F7,$E0,$B5,$98,$22,$B3,$20,$1D,$A6,$DB,$7B,$59,$9F,$AE,$31, 
$FB,$D3,$B6,$CA,$43,$72,$07,$F4,$D8,$41,$14,$55,$0D,$54,$8B,$B9, 
$AD,$46,$0B,$AF,$80,$52,$2C,$FA,$8C,$89,$66,$FD,$B2,$A9,$9B,$C0);

// block - perm 
block_perm:Array[0..$FF] of cardinal=( 
$00,$02,$80,$82,$20,$22,$A0,$A2, $10,$12,$90,$92,$30,$32,$B0,$B2, 
$04,$06,$84,$86,$24,$26,$A4,$A6, $14,$16,$94,$96,$34,$36,$B4,$B6, 
$40,$42,$C0,$C2,$60,$62,$E0,$E2, $50,$52,$D0,$D2,$70,$72,$F0,$F2,
$44,$46,$C4,$C6,$64,$66,$E4,$E6, $54,$56,$D4,$D6,$74,$76,$F4,$F6, 
$01,$03,$81,$83,$21,$23,$A1,$A3, $11,$13,$91,$93,$31,$33,$B1,$B3, 
$05,$07,$85,$87,$25,$27,$A5,$A7, $15,$17,$95,$97,$35,$37,$B5,$B7,
$41,$43,$C1,$C3,$61,$63,$E1,$E3, $51,$53,$D1,$D3,$71,$73,$F1,$F3, 
$45,$47,$C5,$C7,$65,$67,$E5,$E7, $55,$57,$D5,$D7,$75,$77,$F5,$F7, 

$08,$0A,$88,$8A,$28,$2A,$A8,$AA, $18,$1A,$98,$9A,$38,$3A,$B8,$BA,
$0C,$0E,$8C,$8E,$2C,$2E,$AC,$AE, $1C,$1E,$9C,$9E,$3C,$3E,$BC,$BE, 
$48,$4A,$C8,$CA,$68,$6A,$E8,$EA, $58,$5A,$D8,$DA,$78,$7A,$F8,$FA, 
$4C,$4E,$CC,$CE,$6C,$6E,$EC,$EE, $5C,$5E,$DC,$DE,$7C,$7E,$FC,$FE, 
$09,$0B,$89,$8B,$29,$2B,$A9,$AB, $19,$1B,$99,$9B,$39,$3B,$B9,$BB, 
$0D,$0F,$8D,$8F,$2D,$2F,$AD,$AF, $1D,$1F,$9D,$9F,$3D,$3F,$BD,$BF, 
$49,$4B,$C9,$CB,$69,$6B,$E9,$EB, $59,$5B,$D9,$DB,$79,$7B,$F9,$FB, 
$4D,$4F,$CD,$CF,$6D,$6F,$ED,$EF, $5D,$5F,$DD,$DF,$7D,$7F,$FD,$FF); 

// -- Registers -- 
var 
A,B:Array[0..10] of cardinal; 
next_A1: cardinal absolute A[0];
next_B1: cardinal absolute B[0];
X,Y,Z,D,E,F,p,q,r: cardinal;

procedure block_decypher(R:TBlock;Var bd: TBlock);
// Ressource Eater No.2
var r8buf,SBout,SBin,i:cardinal;
begin
for i:=56 downto 1 do
begin
SBin:=gKK[i] xor R[6]; 
SBout:=block_sbox[SBin]; 
r8buf:=SBout xor R[7]; 
R[7]:=R[6]; 
R[6]:=R[5] xor block_perm[SBout]; 
R[5]:=R[4]; 
R[4]:=R[3] xor r8buf; 
R[3]:=R[2] xor r8buf;
R[2]:=R[1] xor r8buf; 
R[1]:=R[0]; 
R[0]:=R8buf; 
end;
BD:=R;
end; 

procedure stream_cypher(sb: PByteArray; Var cb:TBlock);
// Ressource Eater No.1 
var i,j: cardinal; 
in1: cardinal; // most significant nibble of input byte 
in2: cardinal; // least significant nibble of input byte 
SB2,SB1,SC2Out: TBlock; 

op : cardinal; 
extra_B : cardinal; 
next_E : cardinal; 
s1,s2,s3,s4,s5,s6,s7: cardinal;
begin 
if sb<>nil then begin 
// load first 32 bits of CK into A[1]..A[8]
// load last 32 bits of CK into B[1]..B[8] 
// all other regs = 0 
A[1]:= (gCK[0] shr 4);
A[2]:= (gCK[0] ) and $f; 
A[3]:= (gCK[1] shr 4); 
A[4]:= (gCK[1] ) and $f; 
A[5]:= (gCK[2] shr 4); 
A[6]:= (gCK[2] ) and $f; 
A[7]:= (gCK[3] shr 4); 
A[8]:= (gCK[3] ) and $f; 
A[9]:= 0; 
A[10]:= 0;
B[1]:= (gCK[4] shr 4); 
B[2]:= (gCK[4] ) and $f;
B[3]:= (gCK[5] shr 4); 
B[4]:= (gCK[5] ) and $f; 
B[5]:= (gCK[6] shr 4); 
B[6]:= (gCK[6] ) and $f; 
B[7]:= (gCK[7] shr 4); 
B[8]:= (gCK[7] ) and $f;
B[9]:= 0;
B[10]:= 0;

asm 
xor eax,eax; 
mov X,eax; 
mov Y,eax; 
mov Z,eax; 
mov D,eax; 
mov E,eax; 
mov F,eax; 
mov p,eax; 
mov q,eax; 
mov r,eax; 
end; 

end;(* init *) 
// 8 bytes per operation 
for i:=0 to 7 do begin 
if sb<>nil then begin 
in1:= (sb[i] shr 4);
in2:= (sb[i] ) and $0f;
end; (* init *) 
op:= 0; 
// 2 bits per iteration
for j:=0 to 3 do begin 
// from A[1]..A[10], 35 bits are selected as inputs to 7 s-boxes 
// 5 bits input per s-box, 2 bits output per s-box 
s1:= sbox1[ (((A[4] ) and 1) shl 4) or (((A[1] shr 2) and 1) shl 3) or (((A[6] shr 1) and 1) shl 2) or (((A[7] shr 3) and 1) shl 1) or (((A[9] ) and 1) ) ]; 
s2:= sbox2[ (((A[2] shr 1) and 1) shl 4) or (((A[3] shr 2) and 1) shl 3) or (((A[6] shr 3) and 1) shl 2) or (((A[7] ) and 1) shl 1) or (((A[9] shr 1) and 1) ) ]; 
s3:= sbox3[ (((A[1] shr 3) and 1) shl 4) or (((A[2] ) and 1) shl 3) or (((A[5] shr 1) and 1) shl 2) or (((A[5] shr 3) and 1) shl 1) or (((A[6] shr 2) and 1) ) ]; 
s4:= sbox4[ (((A[3] shr 3) and 1) shl 4) or (((A[1] shr 1) and 1) shl 3) or (((A[2] shr 3) and 1) shl 2) or (((A[4] shr 2) and 1) shl 1) or (((A[8] ) and 1) ) ]; 
s5:= sbox5[ (((A[5] shr 2) and 1) shl 4) or (((A[4] shr 3) and 1) shl 3) or (((A[6] ) and 1) shl 2) or (((A[8] shr 1) and 1) shl 1) or (((A[9] shr 2) and 1) ) ];
s6:= sbox6[ (((A[3] shr 1) and 1) shl 4) or (((A[4] shr 1) and 1) shl 3) or (((A[5] ) and 1) shl 2) or (((A[7] shr 2) and 1) shl 1) or (((A[9] shr 3) and 1) ) ]; 
s7:= sbox7[ (((A[2] shr 2) and 1) shl 4) or (((A[3] ) and 1) shl 3) or (((A[7] shr 1) and 1) shl 2) or (((A[8] shr 2) and 1) shl 1) or (((A[8] shr 3) and 1) ) ]; 

// use 4x4 xor to produce extra nibble for T3 
extra_B:= ( ((B[3] and 1) shl 3) xor ((B[6] and 2) shl 2) xor ((B[7] and 4) shl 1) xor ((B[9] and 8) ) ) or
( ((B[6] and 1) shl 2) xor ((B[8] and 2) shl 1) xor ((B[3] and 8) shr 1) xor ((B[4] and 4) ) ) or
( ((B[5] and 8) shr 2) xor ((B[8] and 4) shr 1) xor ((B[4] and 1) shl 1) xor ((B[5] and 2) ) ) or
( ((B[9] and 4) shr 2) xor ((B[6] and 8) shr 3) xor ((B[3] and 2) shr 1) xor ((B[8] and 1) ) ) ;

// T1 = xor all inputs
// in1,in2, D are only used in T1 during initialisation, not generation
next_A1:= A[10] xor X; 
if sb<>nil then begin 
//next_A1 = next_A1 xor D xor ((j % 2) ? in2 : in1); 
if ((j mod 2))>0 then 
next_A1:= next_A1 xor D xor in2 else next_A1:=next_A1 xor D xor in1; 
end; 

// T2 = xor all inputs 
// in1,in2 are only used in T1 during initialisation, not generation 
// if p=0, use this, if p=1, rotate the result left
next_B1:= B[7] xor B[10] xor Y; 
if sb<>nil then begin 
//next_B1 = next_B1 xor ((j % 2) ? in1 : in2); 
if ((j mod 2))>0 then
next_B1:=next_B1 xor in1 else next_B1:=next_B1 xor in2;
end; 

// if p=1, rotate left 
if p>0 then 
next_B1:= ( (next_B1 shl 1) or ((next_B1 shr 3) and 1) ) and $f;

// T3 = xor all inputs 
D:= E xor Z xor extra_B; 

// T4 = sum, carry of Z + E + r 
next_E:= F; 
if q>0 then begin 
F:= Z + E + r; 
// r is the carry 
r:= (F shr 4) and 1; 
F:= F and $0f; 
end (* q *) 
else F:= E; 
E:= next_E;

asm 
push ECX 
push EDI 
push ESI 
pushfd 
std
lea ESI,BYTE PTR A[9*4] 
lea EDI,BYTE PTR A[10*4] 
mov ECX,10 
rep movsd 

lea ESI,BYTE PTR B[9*4]
lea EDI,BYTE PTR B[10*4] 
mov ECX,10 
rep movsd 

popfd 
pop ESI
pop EDI 
pop ECX 
end; 

X:= ((s4 and 1) shl 3) or ((s3 and 1) shl 2) or (s2 and 2) or ((s1 and 2) shr 1); 
Y:= ((s6 and 1) shl 3) or ((s5 and 1) shl 2) or (s4 and 2) or ((s3 and 2) shr 1); 
Z:= ((s2 and 1) shl 3) or ((s1 and 1) shl 2) or (s6 and 2) or ((s5 and 2) shr 1); 
p:= (s7 and 2) shr 1; 
q:= (s7 and 1);

// require 4 loops per output byte 
// 2 output bits are a function of the 4 bits of D 
// xor 2 by 2 
op:= (op shl 2) xor ( (((D xor (D shr 1)) shr 1) and 2) or ((D xor (D shr 1)) and 1) ); 
end; 
// return input data during init 
//cb[i] = (init) ? sb[i] : op; 
if sb<>nil then cb[i]:= sb[i] else cb[i]:=op; 
end;
end;


procedure key_schedule(CK:TBlock;Var KK:TKBArray);
var i,j,k,L:Byte;
bit,newbit:Array[0..63] of byte;
kb:Array[0..7] of TBlock;
begin
// 56 steps
// 56 key bytes kk(56)..kk(1) by key schedule from CK

// kb(7,1) .. kb(7,8) = CK(1) .. CK(8)
kb[7]:=CK;

// calculate kb[6] .. kb[1]
for i:=0 to 6 do begin
// 64 bit perm on kb
for j:=0 to 7 do begin
L:=j shl 3;
for k:=0 to 7 do begin
bit[L+k]:= (kb[7-i][j] shr (7-k)) and 1;
newbit[key_perm[l+k]-1]:= bit[L+k];
end;
end;
for j:=0 to 7 do begin
kb[6-i][j]:= 0;
L:=j shl 3; 
for k:=0 to 7 do begin 
kb[6-i][j]:=kb[6-i][j] or (newbit[L+k] shl (7-k)); 
end; 
end; 
end; 

// xor to give kk
for i:=0 to 6 do begin
L:=i shl 3;
for j:=0 to 7 do begin
kk[1+l+j]:= kb[1+i][j] xor i;
end;
end;
end;


procedure CSASetKeys(oddkey,evenkey:TBlock);
begin
csaKey.Complete:=true;
csakey.odd_ck:=oddkey;
csaKey.even_ck:=evenKey;
key_schedule(csakey.odd_ck,csakey.odd_kk);
key_schedule(csakey.even_ck,csakey.even_kk);
end;

procedure CSADecrypt(data:PByteArray);
var i,j,k,offset,N:cardinal;
stream,ib,block:TBlock;
residue: cardinal;
begin
  //if not csakey.complete then exit;
  offset:=4;

  if(data[3] and $40>0) then begin // ODD
    gKK:=@csakey.odd_kk;
    gCK:=@csakey.odd_ck;
  end else begin // EVEN
    gKK:=@csakey.even_kk;
    gCK:=@csakey.even_ck;
  end;

  data[3]:=data[3] and $3f; (* remove scrambling bits *)
  if ((data[3] and $20) = $20) then begin
    offset:=offset + data[4] +1; (* skip adaption field *)
    N:= (188 - offset) div 8;
    residue:= (188 - offset) mod 8;
  end else begin (* no adaption field *)
    N:=23;
    residue:=0;
  end;
  // !!!!
  if n=0 then Exit;
  
  stream_cypher(@data[offset], ib);


  for j:=1 to N do begin
    block_decypher(ib, block);

    if (j <> N) then begin
      stream_cypher(nil, stream);
      // xor sb x stream
      k:=j shl 3;
      for i:=0 to 7 do
      ib[i]:= data[offset+k+i] xor stream[i];
    end
    else begin
      // last block - sb[N+1] = IV(initialisation vetor)(=0)
      zeromemory(@ib,8);
    end;

    // xor ib x block
    k:=(j-1) shl 3;
    for i:=0 to 7 do data[offset+k+i]:= ib[i] xor block[i];
  end; (* for(j=1; j<(N+1); j++) *)

  if residue>0 then begin
    stream_cypher(nil, stream);
    for i:=0 to residue-1 do
    data[188-residue+i]:= data[188-residue+i] xor stream[i];
  end;
end;

end.
