unnrv2b.S revision 677833bc953b6cb418c701facbdcf4aa18d6c44e
/*
* Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer
* Copyright (C) 2002 Eric Biederman
*
* This file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* Originally this code was part of ucl the data compression library
* for upx the ``Ultimate Packer of eXecutables''.
*
* - Converted to gas assembly, and refitted to work with etherboot.
* Eric Biederman 20 Aug 2002
*
* - Converted to functional ia64 assembly (Can this get smaller?)
* Eric Biederman 5 Dec 2002
*/
.text
.globl _start
_start:
/* See where I am running, and compute gp */
{
/* Do no call alloc here as I do not know how many argument
* registers are being passed through the decompressor, and if I report
* to few the unreported registers may get stomped.
*
* Instead just explicitly get the value of ar.pfs.
*/
mov r17=0
mov r8=ar.pfs
mov gp = ip /* The linker scripts sets gp at _start */
}
{.mlx
movl r9=0x123456789abcdef0 /* Get uncompressed_offset into r9 */
}
;;
{
add r14 = @gprel(payload + 4),gp
add r15 = r9,gp
mov r16=1 /* last_m_off = 1 */
}
{
mov r20 = 0xd00
add r21 = r9,gp
br.sptk.few decompr_loop_n2b
}
/* ------------- DECOMPRESSION -------------
Input:
r8 - ar.pfs
r14 - source
r15 - dest
r16 - 1
r17 - (buffer) 0
r20 - 0xd00 (constant)
r21 - start address
Usage:
r9 - scratch register for memory copies
r18 - scratch register for getbit
r19 - scratch register for loads and stores
Output:
r2 - 0
r3 - 0
*/
getbit:
add r18 = r17,r17
;;
cmp.ne p8,p0 = r0,r18
cmp.leu p6,p7 = r18,r17
;;
mov r17 = r18
(p8) br.cond.sptk.few getbit_end
/* Do a unaligned 64bit load */
;;
ld1 r17 = [r14],1
;;
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,8,8
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,16,8
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,24,8
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,32,8
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,40,8
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,48,8
ld1 r18 = [r14],1
;;
dep r17 = r18,r17,56,8
;;
add r18 = r17,r17,1
;;
cmp.leu p6,p7=r18,r17
;;
mov r17=r18
;;
getbit_end:
br.ret.sptk.few b6
decompr_literals_n2b:
ld1 r19 = [r14],1
;;
st1 [r15] = r19,1
;;
decompr_loop_n2b:
br.call.sptk.few b6 = getbit
;;
(p6) br.cond.sptk.few decompr_literals_n2b
(p7) add r2 = 1,r0 /* m_off = 1 */
;;
loop1_n2b:
br.call.sptk.few b6 = getbit
;;
(p6) add r2 = r2,r2,1 /* m_off = m_off*2 + getbit() */
(p7) add r2 = r2,r2
br.call.sptk.few b6 = getbit
;;
(p7) br.cond.sptk.few loop1_n2b /* while(!getbit()) */
;;
mov r3 = r0
cmp.eq p6,p0 = 2,r2
add r2 = -3,r2
(p6) br.cond.sptk.few decompr_ebpeax_n2b /* if (m_off == 2) goto decompr_ebpeax_n2b ? */
;;
ld1 r19 = [r14],1
shl r2 = r2,8
;;
dep r2 = r19,r2,0,8 /* m_off = (m_off - 3)*256 + src[ilen++] */
;;
cmp4.eq p6,p0 = -1,r2 /* if (m_off == 0xffffffff) goto decomp_end_n2b */
;;
(p6) br.cond.sptk.few decompr_end_n2b
mov r16 = r2 /* last_m_off = m_off */
;;
decompr_ebpeax_n2b:
br.call.sptk.few b6 = getbit
;;
(p6) add r3 = r3,r3,1 /* m_len = getbit() */
(p7) add r3 = r3,r3
br.call.sptk.few b6 = getbit
;;
(p6) add r3 = r3,r3,1 /* m_len = m_len*2 + getbit()) */
(p7) add r3 = r3,r3
;;
cmp.ne p6,p0 = r0,r3
(p6) br.cond.sptk.few decompr_got_mlen_n2b /* if (m_len == 0) goto decompr_got_mlen_n2b */
add r3 = 1,r3 /* m_len++ */
;;
loop2_n2b:
br.call.sptk.few b6 = getbit
;;
(p6) add r3 = r3,r3,1 /* m_len = m_len*2 + getbit() */
(p7) add r3 = r3,r3
br.call.sptk.few b6 = getbit
;;
(p7) br.cond.sptk.few loop2_n2b /* while(!getbit()) */
add r3 = 2, r3 /* m_len += 2 */
;;
decompr_got_mlen_n2b:
cmp.gtu p6,p7 = r16, r20
;;
(p6) add r3 = 2, r3 /* m_len = m_len + 1 + (last_m_off > 0xd00) */
(p7) add r3 = 1, r3
sub r9 = r15, r16,1 /* m_pos = dst + olen - last_m_off - 1 */
;;
1:
ld1 r19 = [r9],1
add r3 = -1,r3
;;
st1 [r15] = r19,1 /* dst[olen++] = *m_pos++ while(m_len > 0) */
cmp.ne p6,p0 = r0,r3
(p6) br.cond.sptk.few 1b
;;
br.cond.sptk.few decompr_loop_n2b
decompr_end_n2b:
/* Branch to the start address */
mov ar.pfs=r8
;;
mov b6 = r21
;;
br.sptk.few b6
payload: