Order-0 Adaptive Arithmetic Encoding
by herm1t

See also the project folder

; By using this file, you agree to the terms and conditions set
; forth in the COPYING file which can be found at the top level
; of this distribution.
;
; Order-0 Adaptive Arithmetic Encoding (c) herm1t@vx.netlux.org, 2006-04-16
;
; extern int ari_compress(uchar *src, uchar *dst, int src_len, uchar *temp);
; extern int ari_expand(uchar *src, uchar *dst, uchar *temp);
;
; * src is data to be encoded(ari_cpmpress) or decoded (ari_decode)
; * dst is encoded data (ari_compress) or decoded (ari_decode)
; * src_len is the length of input data
; * temp is a working buffer 3374 bytes long
;
; if the size of compressed data in ari_compress will exceed the
; size of the source buffer function returns zero, size of the
; compressed data otherwise.
;
%define First_qtr       0x4000
%define Half            0x8000
%define Third_qtr       0xc000
%define Top_value       0xffff
%define No_of_chars     0x0100
%define No_of_symbols   0x0101
%define Max_frequency   0x3fff
%define EOF_symbol      0x0101
%define Code_value_bits 16

%define high            [ebp + 0]       ;4
%define low             [ebp + 4]       ;4
%define bits_to_follow  [ebp + 8]       ;4
%define pos             [ebp + 12]      ;4
%define buf             [ebp + 16]      ;4
%define buf_len         [ebp + 20]      ;4
%define save_esp        [ebp + 24]      ;4
%define char_to_index   [ebp + 28]      ;1024
%define index_to_char   [ebp + 1052]    ;258
%define freq            [ebp + 1310]    ;1032
%define cum_freq        [ebp + 2342]    ;1032
; 3374
%define value           [ebp + 8]       ;4

        BITS    32
        CPU     386

        global  ari_compress, ari_expand
        
ari_compress:
        pusha
        cld

        mov     esi, [esp + 36]
        mov     eax, [esp + 40]
        mov     ecx, [esp + 44]
        mov     ebp, [esp + 48]
        mov     save_esp, esp
        mov     buf_len, ecx
        
        mov     buf, eax
        xor     eax, eax
        mov     pos, eax
        
        call    start_model
        call    start_encoding
        
        lea     ebx, char_to_index
.for:   xor     eax, eax
        lodsb
        mov     eax, [ebx + eax * 4]

        push    eax
        call    encode_symbol
        call    update_model
        add     esp, 4
        loop    .for
        
        push    EOF_symbol
        call    encode_symbol
        add     esp, 4

        call    done_encoding
        
        mov     eax, pos
        cdq
        push    byte 8
        pop     ebx
        div     ebx
        or      edx, edx
        jz      .L1
        inc     eax
.L1:    mov     [esp + 28], eax
        popa
        ret

        
bit_plus_follow:
        pusha

        mov     edi, buf
        mov     edx, pos
        mov     ecx, bits_to_follow     
        mov     eax, [esp + 36]
        
        call    output_bit
        xor     al, 1
.L0:    jcxz    .L1
        call    output_bit
        loop    .L0
        mov     bits_to_follow, ecx
.L1:    mov     pos, edx

        push    edx
        shr     edx, 3
        cmp     edx, buf_len
        jb      .ok
        mov     esp, save_esp
        xor     eax, eax
        jmp     ari_compress.L1
.ok:    pop     edx

        popa
        retn    4

output_bit:
        or      eax, eax
        jz      .L0
        bts     [edi], edx
        jmp     .L1
.L0:    btr     [edi], edx
.L1:    inc     edx
        ret

ari_expand:
        pusha
        cld
        
        mov     eax, [esp + 36]
        mov     edi, [esp + 40]
        mov     ebp, [esp + 44]
        
        mov     buf, eax
        xor     eax, eax
        mov     pos, eax
        
        lea     esi, index_to_char
        
        call    start_model
        call    start_decoding

.for:   call    decode_symbol
        mov     ebx, eax
        cmp     eax, EOF_symbol
        je      .end_for
        mov     al, [esi + ebx]
        stosb
        push    ebx
        call    update_model
        add     esp, 4
        jmp     .for
.end_for:

        sub     edi, [esp + 40]
        mov     [esp + 28], edi
        popa
        ret

start_encoding:
        xor     eax, eax
        mov     bits_to_follow, eax

.common:xor     eax, eax
        mov     low, eax
        dec     ax
        mov     high, eax
        ret

start_decoding:
        pusha
        push    byte Code_value_bits
        pop     ecx
        xor     ebx, ebx
.for:   call    input_bit
        shl     ebx, 1
        or      ebx, eax
        loop    .for
        mov     value, ebx
        popa
        jmp     start_encoding.common

done_encoding:
        inc     dword bits_to_follow
        cmp     dword low, First_qtr
        jae     .L0
        push    0
        jmp     .bits
.L0:    push    1
.bits:  call    bit_plus_follow
        ret

start_model:
        pusha

        lea     edi, char_to_index
        lea     esi, index_to_char
        
        xor     ecx, ecx
.for1:  mov     eax, ecx
        inc     eax
        stosd
        mov     [esi + eax], cl
        inc     ecx
        cmp     ecx, No_of_chars
        jb      .for1
                
        lea     esi, freq
        lea     edi, cum_freq
        
        xor     ecx, ecx
        mov     edx, No_of_symbols
.for2:  xor     eax, eax
        inc     eax     
        mov     [esi + ecx * 4], eax
        mov     eax, edx
        sub     eax, ecx
        stosd
        inc     ecx
        cmp     ecx, edx
        jbe     .for2

        xor     eax, eax
        mov     [esi], eax

        popa
        ret

update_model:
        pusha
        mov     edx, [esp + 36]
        
        lea     esi, cum_freq
        lea     edi, freq       

        cmp     dword [esi], Max_frequency
        jne     .end_if1
            xor         eax, eax
            mov         ecx, No_of_symbols
.for1:      mov         ebx, [edi + ecx * 4]
            inc         ebx
            shr         ebx, 1
            mov         [edi + ecx * 4], ebx
            mov         [esi + ecx * 4], eax
            add         eax, ebx
            dec         ecx
            jns         .for1
.end_if1:

        mov     ecx, edx
.for2:  mov     eax, [edi + ecx * 4]
        cmp     eax, [edi + ecx * 4 - 4]
        jne     .end_for2
        dec     ecx
        jmp     .for2
.end_for2:
        inc     dword [edi + ecx * 4]

        cmp     ecx, edx
        jae     .while
            lea         edi, index_to_char
            movzx       eax, byte [edi + ecx]
            movzx       ebx, byte [edi + edx]
            mov         [edi + ecx], bl
            mov         [edi + edx], al
            lea         edi, char_to_index
            mov         [edi + eax * 4], edx
            mov         [edi + ebx * 4], ecx
.while: or      ecx, ecx
        jz      .end_while
        dec     ecx
        inc     dword [esi + ecx * 4]
        jmp     .while
.end_while:
        popa
        ret

encode_symbol:
        pusha
        lea     edi, cum_freq
        mov     esi, [esp + 36]
        mov     ebx, low
        mov     ecx, high

        mov     eax, ecx
        sub     eax, ebx
        inc     eax
        push    eax

        mul     dword [edi + esi * 4 - 4]
        div     dword [edi]
        dec     eax
        mov     ecx, eax
        add     ecx, ebx

        pop     eax
        mul     dword [edi + esi * 4]
        div     dword [edi]
        add     ebx, eax

.for:
        mov     eax, Half
        cmp     ecx, eax
        jae     .L0
            push        0
            call        bit_plus_follow
            jmp         .end_if 

.L0:    cmp     ebx, eax
        jb      .L1
            push        1
            call        bit_plus_follow
            sub         ebx, eax
            sub         ecx, eax
            jmp         .end_if
            
.L1:    mov     eax, First_qtr
        cmp     ebx, eax
        jb      .end_for
        cmp     ecx, Third_qtr
        jae     .end_for
                inc     dword bits_to_follow
                sub     ebx, eax
                sub     ecx, eax
.end_if:
        shl     ebx, 1
        shl     ecx, 1
        inc     ecx
        jmp     .for
.end_for:
        mov     low, ebx
        mov     high, ecx

        popa
        ret

decode_symbol:
        pusha
        
        lea     esi, cum_freq
        
        mov     ebx, high
        sub     ebx, low
        inc     ebx

        mov     eax, value
        sub     eax, low
        inc     eax
        mul     dword [esi]
        dec     eax
        div     ebx

        xor     ecx, ecx
        inc     ecx
.for:   cmp     [esi + ecx * 4], eax
        jbe     .end_for
        inc     ecx
        jmp     .for
.end_for:

        mov     eax, ebx
        mul     dword [esi + ecx * 4 - 4]
        div     dword [esi]
        add     eax, low
        dec     eax
        mov     edi, eax

        mov     eax, ebx
        mul     dword [esi + ecx * 4]
        div     dword [esi]
        add     eax, low
        mov     edx, eax

        mov     ebx, value
.main:
        cmp     edi, Half
        jb      .end_if

        mov     eax, Half
        cmp     edx, eax
        jb      .L0
                sub     ebx, eax
                sub     edx, eax
                sub     edi, eax
                jmp     .end_if
.L0:
        mov     eax, First_qtr
        cmp     edx, eax
        jb      .end_main
        cmp     edi, Third_qtr
        jae     .end_main
                sub     ebx, eax
                sub     edx, eax
                sub     edi, eax
.end_if:
        shl     edx, 1
        shl     edi, 1
        inc     edi
        call    input_bit
        shl     ebx, 1
        or      ebx, eax
        jmp     .main
.end_main:
        mov     low, edx
        mov     high, edi
        mov     value, ebx

        mov     [esp + 28], ecx
        popa
        ret

input_bit:
        pusha
        xor     eax, eax
        mov     edi, buf
        mov     ecx, pos
        bt      [edi], ecx
        jnc     .L0
        inc     eax
.L0:    inc     dword pos
        mov     [esp + 28], eax
        popa
        ret