tstDisasm-2.cpp revision 83dc9ca94cd3c31dabc33a35b945de124d43aaea
/* $Id$ */
/** @file
* Testcase - Generic Disassembler Tool.
*/
/*
* Copyright (C) 2008 Sun Microsystems, Inc.
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 USA or visit http://www.sun.com if you need
* additional information or have any questions.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include <iprt/initterm.h>
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
typedef enum { kAsmStyle_Default, kAsmStyle_yasm, kAsmStyle_masm, kAsmStyle_gas, kAsmStyle_invalid } ASMSTYLE;
typedef struct MYDISSTATE
{
bool fUndefOp; /**< Whether the current instruction is really an undefined opcode.*/
int rc; /**< Set if we hit EOF. */
} MYDISSTATE;
typedef MYDISSTATE *PMYDISSTATE;
/**
* Default style.
*
* @param pState The disassembler state.
*/
{
}
/**
* Yasm style.
*
* @param pState The disassembler state.
*/
{
char szTmp[256];
#if 0
/* a very quick hack. */
*psz = '\0';
if (psz)
*pszEnd++ = ' ';
*pszEnd = '\0';
#else
while (cch < 71)
#endif
}
/**
* Checks if the encoding of the current instruction is something
* we can never get the assembler to produce.
*
* @returns true if it's odd, false if it isn't.
* @param pCpu The disassembler output.
*/
{
/*
* Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
*/
{
/* No scaled index SIB (index=4), except for ESP. */
return true;
/* EBP + displacement */
return true;
}
/*
* Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
*/
return true;
/*
* Check for multiple prefixes of the same kind.
*/
{
uint32_t f;
switch (*pu8)
{
case 0xf0:
f = PREFIX_LOCK;
break;
case 0xf2:
case 0xf3:
f = PREFIX_REP; /* yes, both */
break;
case 0x2e:
case 0x3e:
case 0x26:
case 0x36:
case 0x64:
case 0x65:
f = PREFIX_SEG;
break;
case 0x66:
f = PREFIX_OPSIZE;
break;
case 0x67:
f = PREFIX_ADDRSIZE;
break;
case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
break;
default:
f = 0;
break;
}
if (!f)
break; /* done */
if (fPrefixes & f)
return true;
fPrefixes |= f;
}
/* segment overrides are fun */
if (fPrefixes & PREFIX_SEG)
{
/* no effective address which it may apply to. */
return true;
}
/* fixed register + addr override doesn't go down all that well. */
if (fPrefixes & PREFIX_ADDRSIZE)
{
return true;
}
/* Almost all prefixes are bad. */
if (fPrefixes)
{
{
/* nop w/ prefix(es). */
case OP_NOP:
return true;
case OP_JMP:
break;
/* fall thru */
case OP_JO:
case OP_JNO:
case OP_JC:
case OP_JNC:
case OP_JE:
case OP_JNE:
case OP_JBE:
case OP_JNBE:
case OP_JS:
case OP_JNS:
case OP_JP:
case OP_JNP:
case OP_JL:
case OP_JNL:
case OP_JLE:
case OP_JNLE:
/** @todo branch hinting 0x2e/0x3e... */
return true;
}
}
/* All but the segment prefix is bad news. */
if (fPrefixes & ~PREFIX_SEG)
{
{
case OP_POP:
case OP_PUSH:
return true;
if ( (fPrefixes & ~PREFIX_OPSIZE)
return true;
break;
case OP_POPA:
case OP_POPF:
case OP_PUSHA:
case OP_PUSHF:
if (fPrefixes & ~PREFIX_OPSIZE)
return true;
break;
}
}
/* Implicit 8-bit register instructions doesn't mix with operand size. */
if ( (fPrefixes & PREFIX_OPSIZE)
)
{
{
case OP_ADD:
case OP_OR:
case OP_ADC:
case OP_SBB:
case OP_AND:
case OP_SUB:
case OP_XOR:
case OP_CMP:
return true;
default:
break;
}
}
/*
* Check for the version of xyz reg,reg instruction that the assembler doesn't use.
*
* For example:
*/
{
{
case OP_ADD:
case OP_OR:
case OP_ADC:
case OP_SBB:
case OP_AND:
case OP_SUB:
case OP_XOR:
case OP_CMP:
return true;
/* 82 (see table A-6). */
return true;
break;
/* ff /0, fe /0, ff /1, fe /0 */
case OP_DEC:
case OP_INC:
return true;
case OP_POP:
case OP_PUSH:
return true;
default:
break;
}
}
/* shl eax,1 will be assembled to the form without the immediate byte. */
{
{
case OP_SHL:
case OP_SHR:
case OP_SAR:
case OP_RCL:
case OP_RCR:
case OP_ROL:
case OP_ROR:
return true;
}
}
/* And some more - see table A-6. */
{
{
case OP_ADD:
case OP_OR:
case OP_ADC:
case OP_SBB:
case OP_AND:
case OP_SUB:
case OP_XOR:
case OP_CMP:
return true;
break;
}
}
/* check for REX.X = 1 without SIB. */
/* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
says (intel doesn't appear to care). */
{
case OP_SETO:
case OP_SETNO:
case OP_SETC:
case OP_SETNC:
case OP_SETE:
case OP_SETNE:
case OP_SETBE:
case OP_SETNBE:
case OP_SETS:
case OP_SETNS:
case OP_SETP:
case OP_SETNP:
case OP_SETL:
case OP_SETNL:
case OP_SETLE:
case OP_SETNLE:
return true;
break;
}
/*
* The MOVZX reg32,mem16 instruction without an operand size prefix
* doesn't quite make sense...
*/
return true;
return false;
}
/**
* Masm style.
*
* @param pState The disassembler state.
*/
{
}
/**
* This is a temporary workaround for catching a few illegal opcodes
* that the disassembler is currently letting thru, just enough to make
* the assemblers happy.
*
* We're too close to a release to dare mess with these things now as
* they may consequences for performance and let alone introduce bugs.
*
* @returns true if it's valid. false if it isn't.
*
* @param pCpu The disassembler output.
*/
{
{
/* These doesn't take memory operands. */
case OP_MOV_CR:
case OP_MOV_DR:
case OP_MOV_TR:
return false;
break;
/* The 0x8f /0 variant of this instruction doesn't get its /r value verified. */
case OP_POP:
return false;
break;
/* The 0xc6 /0 and 0xc7 /0 variants of this instruction don't get their /r values verified. */
case OP_MOV:
return false;
break;
default:
break;
}
return true;
}
/**
* Callback for reading bytes.
*
* @todo This should check that the disassembler doesn't do unnecessary reads,
* however the current doesn't do this and is just complicated...
*/
static DECLCALLBACK(int) MyDisasInstrRead(RTUINTPTR uSrcAddr, uint8_t *pbDst, uint32_t cbRead, void *pvDisCpu)
{
{
/*
* Straight forward reading.
*/
if (cbRead == 1)
{
}
else
{
}
}
else
{
/*
* Jumping up the stream.
* This occures when the byte sequence is added to the output string.
*/
if (offReq64 < 32)
{
{
return VERR_EOF;
}
/* reset the stream. */
/* skip ahead. */
/* do the reading. */
{
}
else
{
{
}
return VERR_EOF;
}
}
else
{
return VERR_INTERNAL_ERROR;
}
}
return VINF_SUCCESS;
}
/**
* Disassembles a block of memory.
*
* @returns VBox status code.
* @param argv0 Program name (for errors and warnings).
* @param enmCpuMode The cpu mode to disassemble in.
* @param uAddress The address we're starting to disassemble at.
* @param pbFile Where to start disassemble.
* @param cbFile How much to disassemble.
* @param enmStyle The assembly output style.
* @param fListing Whether to print in a listing like mode.
* @param enmUndefOp How to deal with undefined opcodes.
*/
static int MyDisasmBlock(const char *argv0, DISCPUMODE enmCpuMode, uint64_t uAddress, uint8_t *pbFile, size_t cbFile,
{
/*
* Initialize the CPU context.
*/
switch (enmStyle)
{
case kAsmStyle_Default:
break;
case kAsmStyle_yasm:
break;
case kAsmStyle_masm:
break;
default:
}
/*
* The loop.
*/
int rcRet = VINF_SUCCESS;
{
/*
* Disassemble it.
*/
if ( RT_SUCCESS(rc)
|| ( ( rc == VERR_DIS_INVALID_OPCODE
|| rc == VERR_DIS_GEN_FAILURE)
{
|| rc == VERR_DIS_GEN_FAILURE
{
RTPrintf(" db");
{
uint8_t b;
}
}
{
RTPrintf("%s: error at %#RX64: unexpected valid instruction (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
}
{
RTPrintf("%s: error at %#RX64: undefined opcode (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
}
else
{
/* Use db for odd encodings that we can't make the assembler use. */
{
RTPrintf(" db");
{
uint8_t b;
}
RTPrintf(" ; ");
}
}
}
else
{
else
{
if (rcRet == VINF_SUCCESS)
break;
}
}
/* next */
}
return rcRet;
}
/**
* Converts a hex char to a number.
*
* @returns 0..15 on success, -1 on failure.
* @param ch The character.
*/
static int HexDigitToNum(char ch)
{
switch (ch)
{
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'A':
case 'a': return 0xa;
case 'B':
case 'b': return 0xb;
case 'C':
case 'c': return 0xc;
case 'D':
case 'd': return 0xd;
case 'E':
case 'e': return 0xe;
case 'F':
case 'f': return 0xf;
default:
return -1;
}
}
/**
* Prints usage info.
*
* @returns 1.
* @param argv0 The program name.
*/
{
"usage: %s [options] <file1> [file2..fileN]\n"
" or: %s [options] <-x|--hex-bytes> <hex byte> [more hex..]\n"
" or: %s <--help|-h>\n"
"\n"
"Options:\n"
" --address|-a <address>\n"
" The base address. Default: 0\n"
" --max-bytes|-b <bytes>\n"
" The maximum number of bytes to disassemble. Default: 1GB\n"
" --cpumode|-c <16|32|64>\n"
" The cpu mode. Default: 32\n"
" --listing|-l, --no-listing|-L\n"
" Enables or disables listing mode. Default: --no-listing\n"
" --offset|-o <offset>\n"
" The file offset at which to start disassembling. Default: 0\n"
" --style|-s <default|yasm|masm>\n"
" The assembly output style. Default: default\n"
" --undef-op|-u <fail|all|db>\n"
" How to treat undefined opcodes. Default: fail\n"
return 1;
}
{
RTR3Init();
/* options */
bool fListing = true;
bool fHexBytes = false;
/*
* Parse arguments.
*/
static const RTGETOPTDEF g_aOptions[] =
{
};
int ch;
{
switch (ch)
{
case 'a':
break;
case 'b':
cbMax = ValueUnion.i;
break;
case 'c':
else
{
return 1;
}
break;
case 'h':
case 'l':
fListing = true;
break;
case 'L':
fListing = false;
break;
case 'o':
off = ValueUnion.i;
break;
case 's':
{
return 1;
}
else
{
return 1;
}
break;
case 'u':
else
{
RTStrmPrintf(g_pStdErr, "%s: unknown undefined opcode handling method: %s\n", argv0, ValueUnion.psz);
return 1;
}
break;
case 'x':
fHexBytes = true;
break;
case VINF_GETOPT_NOT_OPTION:
break;
default:
return 1;
}
}
int rc = VINF_SUCCESS;
if (fHexBytes)
{
/*
* Convert the remaining arguments from a hex byte string into
* a buffer that we disassemble.
*/
{
while (*psz)
{
/** @todo this stuff belongs in IPRT, same stuff as mac address reading. Could be reused for IPv6 with a different item size.*/
/* skip white space */
psz++;
if (!*psz)
break;
/* one digit followed by a space or EOS, or two digits. */
if (iNum == -1)
return 1;
{
if (iDigit == -1)
return 1;
}
/* add the byte */
{
if (!pb)
{
return 1;
}
}
}
}
/*
* Disassemble it.
*/
}
else
{
/*
* Process the files.
*/
{
/*
* Read the file into memory.
*/
void *pvFile;
if (RT_FAILURE(rc))
{
break;
}
/*
* Disassemble it.
*/
rc = MyDisasmBlock(argv0, enmCpuMode, uAddress, (uint8_t *)pvFile, cbFile, enmStyle, fListing, enmUndefOp);
if (RT_FAILURE(rc))
break;
}
}
}