DBGPlugInLinux.cpp revision f2ffd08143645ae9fe9c2a8d74aebc5bc54ad127
/* $Id$ */
/** @file
* DBGPlugInLinux - Debugger and Guest OS Digger Plugin For Linux.
*/
/*
* Copyright (C) 2008-2013 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*/
/*******************************************************************************
* Header Files *
*******************************************************************************/
#include "DBGPlugIns.h"
#include "DBGPlugInCommonELF.h"
/*******************************************************************************
* Structures and Typedefs *
*******************************************************************************/
/** @name InternalLinux structures
* @{ */
/** @} */
/**
* Linux guest OS digger instance data.
*/
typedef struct DBGDIGGERLINUX
{
/** Whether the information is valid or not.
* (For fending off illegal interface method calls.) */
bool fValid;
/** Set if 64-bit, clear if 32-bit. */
bool f64Bit;
/** The address of the linux banner.
* This is set during probing. */
/** Kernel base address.
* This is set during probing, refined during kallsyms parsing. */
/** The kernel size. */
/** The number of kernel symbols (kallsyms_num_syms).
* This is set during init. */
/** The size of the kernel name table (sizeof(kallsyms_names)). */
/** Number of entries in the kernel_markers table. */
/** The size of the kernel symbol token table. */
/** The address of the encoded kernel symbol names (kallsyms_names). */
/** The address of the kernel symbol addresses (kallsyms_addresses). */
/** The address of the kernel symbol name markers (kallsyms_markers). */
/** The address of the kernel symbol token table (kallsyms_token_table). */
/** The address of the kernel symbol token index table (kallsyms_token_index). */
/** Pointer to the linux guest OS digger instance data. */
typedef DBGDIGGERLINUX *PDBGDIGGERLINUX;
/*******************************************************************************
* Defined Constants And Macros *
*******************************************************************************/
/** Validates a 32-bit linux kernel address */
/** Validates a 64-bit linux kernel address */
#define LNX64_VALID_ADDRESS(Addr) ((Addr) > UINT64_C(0xffff800000000000) && (Addr) < UINT64_C(0xfffffffffffff000))
/** The max kernel size. */
/** The maximum size we expect for kallsyms_names. */
/** The maximum size we expect for kallsyms_token_table. */
/** The minimum number of symbols we expect in kallsyms_num_syms. */
/** The maximum number of symbols we expect in kallsyms_num_syms. */
/** The min length an encoded symbol in kallsyms_names is expected to have. */
/** The max length an encoded symbol in kallsyms_names is expected to have.
* @todo check real life here. */
/** The approximate maximum length of a string token. */
/** Module tag for linux ('linuxmod' on little endian ASCII systems). */
/*******************************************************************************
* Internal Functions *
*******************************************************************************/
/*******************************************************************************
* Global Variables *
*******************************************************************************/
/** Table of common linux kernel addresses. */
static uint64_t g_au64LnxKernelAddresses[] =
{
UINT64_C(0xc0100000),
UINT64_C(0x90100000),
UINT64_C(0xffffffff80200000)
};
/**
* @copydoc DBGFOSREG::pfnQueryInterface
*/
static DECLCALLBACK(void *) dbgDiggerLinuxQueryInterface(PUVM pUVM, void *pvData, DBGFOSINTERFACE enmIf)
{
return NULL;
}
/**
* @copydoc DBGFOSREG::pfnQueryVersion
*/
static DECLCALLBACK(int) dbgDiggerLinuxQueryVersion(PUVM pUVM, void *pvData, char *pszVersion, size_t cchVersion)
{
/*
* It's all in the linux banner.
*/
if (RT_SUCCESS(rc))
{
while ( pszEnd > pszVersion
pszEnd--;
*pszEnd = '\0';
}
else
return rc;
}
/**
* @copydoc DBGFOSREG::pfnTerm
*/
{
}
/**
* @copydoc DBGFOSREG::pfnRefresh
*/
{
/*
* For now we'll flush and reload everything.
*/
}
/**
* Worker for dbgDiggerLinuxFindStartOfNamesAndSymbolCount that update the
* digger data.
*
* @returns VINF_SUCCESS.
* @param pThis The Linux digger data to update.
* @param pAddrKernelNames The kallsyms_names address.
* @param cKernelSymbols The number of kernel symbol.
* @param cbAddress The guest address size.
*/
{
Log(("dbgDiggerLinuxFoundStartOfNames: AddrKernelAddresses=%RGv\n"
"dbgDiggerLinuxFoundStartOfNames: cKernelSymbols=%#x (at %RGv)\n"
"dbgDiggerLinuxFoundStartOfNames: AddrKernelName=%RGv\n",
return VINF_SUCCESS;
}
/**
* Tries to find the address of the kallsyms_names, kallsyms_num_syms and
* kallsyms_addresses symbols.
*
* The kallsyms_num_syms is read and stored in pThis->cKernelSymbols, while the
* addresses of the other two are stored as pThis->AddrKernelNames and
* pThis->AddrKernelAddresses.
*
* @returns VBox status code, success indicating that all three variables have
* been found and taken down.
* @param pUVM The user mode VM handle.
* @param pThis The Linux digger data.
* @param pHitAddr An address we think is inside kallsyms_names.
*/
static int dbgDiggerLinuxFindStartOfNamesAndSymbolCount(PUVM pUVM, PDBGDIGGERLINUX pThis, PCDBGFADDRESS pHitAddr)
{
/*
* Search backwards in chunks.
*/
union
{
} uBuf;
for (;;)
{
if (RT_FAILURE(rc))
return rc;
/*
* We assume that the three symbols are aligned on guest pointer boundrary.
*
* The boundrary between the two tables should be noticable as the number
* is unlikely to be more than 16 millions, there will be at least one zero
* byte where it is, 64-bit will have 5 zero bytes. Zero bytes aren't all
* that common in the kallsyms_names table.
*
* Also the kallsyms_names table starts with a length byte, which means
* we're likely to see a byte in the range 1..31.
*
* The kallsyms_addresses are mostly sorted (except for the start where the
* absolute symbols are), so we'll spot a bunch of kernel addresses
* immediately preceeding the kallsyms_num_syms field.
*
* Lazy bird: If kallsyms_num_syms is on a buffer boundrary, we skip
* the check for kernel addresses preceeding it.
*/
{
while (i-- > 0)
{
if ( pb[0] <= LNX_MAX_KALLSYMS_ENC_LENGTH
&& pb[0] >= LNX_MIN_KALLSYMS_ENC_LENGTH)
{
return dbgDiggerLinuxFoundStartOfNames(pThis,
}
}
}
else
{
while (i-- > 0)
{
if ( pb[0] <= LNX_MAX_KALLSYMS_ENC_LENGTH
&& pb[0] >= LNX_MIN_KALLSYMS_ENC_LENGTH)
{
return dbgDiggerLinuxFoundStartOfNames(pThis,
}
}
}
/*
* Advance
*/
{
return VERR_NOT_FOUND;
}
}
}
/**
* Worker for dbgDiggerLinuxFindEndNames that records the findings.
*
* @returns VINF_SUCCESS
* @param pThis The linux digger data to update.
* @param pAddrMarkers The address of the marker (kallsyms_markers).
* @param cbMarkerEntry The size of a marker entry (32-bit or 64-bit).
*/
static int dbgDiggerLinuxFoundMarkers(PDBGDIGGERLINUX pThis, PCDBGFADDRESS pAddrMarkers, uint32_t cbMarkerEntry)
{
Log(("dbgDiggerLinuxFoundMarkers: AddrKernelNames=%RGv cbKernelNames=%#x\n"
"dbgDiggerLinuxFoundMarkers: AddrKernelNameMarkers=%RGv cKernelNameMarkers=%#x\n"
"dbgDiggerLinuxFoundMarkers: AddrKernelTokenTable=%RGv\n",
return VINF_SUCCESS;
}
/**
* Tries to find the end of kallsyms_names and thereby the start of
* kallsyms_markers and kallsyms_token_table.
*
* The kallsyms_names size is stored in pThis->cbKernelNames, the addresses of
* the two other symbols in pThis->AddrKernelNameMarkers and
* pThis->AddrKernelTokenTable. The number of marker entries is stored in
* pThis->cKernelNameMarkers.
*
* @returns VBox status code, success indicating that all three variables have
* been found and taken down.
* @param pUVM The user mode VM handle.
* @param pThis The Linux digger data.
* @param pHitAddr An address we think is inside kallsyms_names.
*/
static int dbgDiggerLinuxFindEndOfNamesAndMore(PUVM pUVM, PDBGDIGGERLINUX pThis, PCDBGFADDRESS pHitAddr)
{
/*
* Search forward in chunks.
*/
union
{
} uBuf;
bool fPendingZeroHit = false;
for (;;)
{
if (RT_FAILURE(rc))
return rc;
/*
* The kallsyms_names table is followed by kallsyms_markers we assume,
* using sizeof(unsigned long) alignment like the preceeding symbols.
*
* The kallsyms_markers table has entried sizeof(unsigned long) and
* contains offsets into kallsyms_names. The kallsyms_markers used to
* index kallsyms_names and reduce seek time when looking up the name
* symbol names.
*
* Because of this, the first entry is always zero and all the entries
* are ascending. It also follows that the size of the table can be
* calculated from kallsyms_num_syms.
*
* Note! We could also have walked kallsyms_names by skipping
* kallsyms_num_syms names, but this is faster and we will
* validate the encoded names later.
*/
{
if ( RT_UNLIKELY(fPendingZeroHit)
return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrSub(&CurAddr, sizeof(uint64_t)), sizeof(uint64_t));
{
{
fPendingZeroHit = true;
break;
}
return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrAdd(&CurAddr, i * sizeof(uint64_t)), sizeof(uint64_t));
}
}
else
{
if ( RT_UNLIKELY(fPendingZeroHit)
return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrSub(&CurAddr, sizeof(uint32_t)), sizeof(uint32_t));
{
{
fPendingZeroHit = true;
break;
}
return dbgDiggerLinuxFoundMarkers(pThis, DBGFR3AddrAdd(&CurAddr, i * sizeof(uint32_t)), sizeof(uint32_t));
}
}
/*
* Advance
*/
{
return VERR_NOT_FOUND;
}
offBuf = 0;
}
}
/**
* Locates the kallsyms_token_index table.
*
* Storing the address in pThis->AddrKernelTokenIndex and the size of the token
* table in pThis->cbKernelTokenTable.
*
* @returns VBox status code.
* @param pUVM The user mode VM handle.
* @param pThis The Linux digger data.
*/
{
/*
* The kallsyms_token_table is very much like a string table. Due to the
* nature of the compression algorithm it is reasonably short (one example
* here is 853 bytes), so we'll not be reading it in chunks but in full.
* To be on the safe side, we read 8KB, ASSUMING we won't run into unmapped
* memory or any other nasty stuff...
*/
union
{
} uBuf;
if (RT_FAILURE(rc))
return rc;
/*
* We've got two choices here, either walk the string table or look for
* the next structure, kallsyms_token_index.
*
* The token index is a table of 256 uint16_t entries (index by bytes
* from kallsyms_names) that gives offsets in kallsyms_token_table. It
* starts with a zero entry and the following entries are sorted in
* ascending order. The range of the entries are reasonably small since
* kallsyms_token_table is small.
*
* The alignment seems to be sizeof(unsigned long), just like
* kallsyms_token_table.
*
* So, we start by looking for a zero 16-bit entry.
*/
)
{
return VINF_SUCCESS;
}
Log(("dbgDiggerLinuxFindTokenIndex: Failed (%RGv..%RGv)\n", CurAddr.FlatPtr, CurAddr.FlatPtr + (RTGCUINTPTR)sizeof(uBuf)));
return VERR_NOT_FOUND;
}
/**
* Loads the kernel symbols from the kallsyms tables.
*
* @returns VBox status code.
* @param pUVM The user mode VM handle.
* @param pThis The Linux digger data.
*/
{
/*
* Allocate memory for temporary table copies, reading the tables as we go.
*/
int rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelAddresses, pvAddresses, pThis->cKernelSymbols * cbGuestAddr);
if (RT_SUCCESS(rc))
{
if (RT_SUCCESS(rc))
{
rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelTokenTable, pszzTokens, pThis->cbKernelTokenTable);
if (RT_SUCCESS(rc))
{
rc = DBGFR3MemRead(pUVM, 0 /*idCpu*/, &pThis->AddrKernelTokenIndex, paoffTokens, 256 * sizeof(uint16_t));
if (RT_SUCCESS(rc))
{
/*
* Figure out the kernel start and end.
*/
uint32_t i;
if (cbGuestAddr == sizeof(uint64_t))
{
for (i = 0; i < pThis->cKernelSymbols; i++)
if ( pauAddrs[i] < uKernelStart
&& LNX64_VALID_ADDRESS(pauAddrs[i])
uKernelStart = pauAddrs[i];
if ( pauAddrs[i] > uKernelEnd
&& LNX64_VALID_ADDRESS(pauAddrs[i])
uKernelEnd = pauAddrs[i];
}
else
{
for (i = 0; i < pThis->cKernelSymbols; i++)
if ( pauAddrs[i] < uKernelStart
&& LNX32_VALID_ADDRESS(pauAddrs[i])
uKernelStart = pauAddrs[i];
if ( pauAddrs[i] > uKernelEnd
&& LNX32_VALID_ADDRESS(pauAddrs[i])
uKernelEnd = pauAddrs[i];
}
/*
* Create a module for the kernel.
*/
if (RT_SUCCESS(rc))
{
rc = VINF_SUCCESS;
/*
* Enumerate the symbols.
*/
{
/* Decode the symbol name first. */
{
{
char szSymbol[4096];
while (cbName-- > 0)
{
{
char ch;
}
else
{
break;
}
}
/* The address. */
pbCurAddr += cbGuestAddr;
/* Add it without the type char. */
{
if (RT_FAILURE(rc))
{
if ( rc == VERR_DBG_SYMBOL_NAME_OUT_OF_RANGE
|| rc == VERR_DBG_INVALID_RVA
|| rc == VERR_DBG_ADDRESS_CONFLICT
|| rc == VERR_DBG_DUPLICATE_SYMBOL)
{
Log2(("dbgDiggerLinuxLoadKernelSymbols: RTDbgModSymbolAdd(,%s,) failed %Rrc (ignored)\n", szSymbol, rc));
rc = VINF_SUCCESS;
}
else
}
}
}
else
{
Log(("dbgDiggerLinuxLoadKernelSymbols: offName=%#x cLeft=%#x cbName=%#x cbKernelNames=%#x\n",
}
}
else
{
Log(("dbgDiggerLinuxLoadKernelSymbols: offName=%#x cLeft=%#x cbKernelNames=%#x\n",
}
}
/*
* Link the module into the address space.
*/
if (RT_SUCCESS(rc))
{
if (hAs != NIL_RTDBGAS)
else
}
else
}
else
}
else
Log(("dbgDiggerLinuxFindTokenIndex: Reading token index at %RGv failed: %Rrc\n",
}
else
Log(("dbgDiggerLinuxFindTokenIndex: Reading token table at %RGv failed: %Rrc\n",
}
else
Log(("dbgDiggerLinuxFindTokenIndex: Reading encoded names at %RGv failed: %Rrc\n",
}
else
Log(("dbgDiggerLinuxFindTokenIndex: Reading symbol addresses at %RGv failed: %Rrc\n",
return rc;
}
/**
* Checks if there is a likely kallsyms_names fragment at pHitAddr.
*
* @returns true if it's a likely fragment, false if not.
* @param pUVM The user mode VM handle.
* @param pHitAddr The address where paNeedle was found.
* @param pabNeedle The fragment we've been searching for.
* @param cbNeedle The length of the fragment.
*/
static bool dbgDiggerLinuxIsLikelyNameFragment(PUVM pUVM, PCDBGFADDRESS pHitAddr, uint8_t const *pabNeedle, uint8_t cbNeedle)
{
/*
* Examples of lead and tail bytes of our choosen needle in a randomly
* picked kernel:
* k o b j
* 22 6b 6f 62 6a aa
* fc 6b 6f 62 6a aa
* 82 6b 6f 62 6a 5f - ascii trail byte (_).
* ee 6b 6f 62 6a aa
* fc 6b 6f 62 6a 5f - ascii trail byte (_).
* 0a 74 6b 6f 62 6a 5f ea - ascii lead (t) and trail (_) bytes.
* 0b 54 6b 6f 62 6a aa - ascii lead byte (T).
* ... omitting 29 samples similar to the last two ...
* d8 6b 6f 62 6a aa
* d8 6b 6f 62 6a aa
* d8 6b 6f 62 6a aa
* d8 6b 6f 62 6a aa
* f9 5f 6b 6f 62 6a 5f 94 - ascii lead and trail bytes (_)
* f9 5f 6b 6f 62 6a 0c - ascii lead byte (_).
* fd 6b 6f 62 6a 0f
* ... enough.
*/
if (RT_SUCCESS(rc))
{
{
return true;
Log(("dbgDiggerLinuxIsLikelyNameFragment: failed at %RGv: bLead=%#x bTail=%#x (offTail=%#x)\n",
}
else
}
else
return false;
}
/**
* @copydoc DBGFOSREG::pfnInit
*/
{
/*
* Assume 64-bit kernels all live way beyond 32-bit address space.
*/
/*
* Go looking for the kallsyms table. If it's there, it will be somewhere
* after the linux_banner symbol, so use it for starting the search.
*/
while (cbLeft > 4096)
{
if (RT_FAILURE(rc))
break;
{
/* There will be another hit near by. */
if ( RT_SUCCESS(rc)
{
/*
* We've got a very likely candidate for a location inside kallsyms_names.
* Try find the start of it, that is to say, try find kallsyms_num_syms.
* kallsyms_num_syms is aligned on sizeof(unsigned long) boundrary
*/
if (RT_SUCCESS(rc))
if (RT_SUCCESS(rc))
if (RT_SUCCESS(rc))
if (RT_SUCCESS(rc))
break;
}
}
/*
* Advance.
*/
{
Log(("dbgDiggerLinuxInit: Failed to find kallsyms\n"));
break;
}
cbLeft -= cbDistance;
}
return VINF_SUCCESS;
}
/**
* @copydoc DBGFOSREG::pfnProbe
*/
{
/*
* Look for "Linux version " at the start of the rodata segment.
* Hope that this comes before any message buffer or other similar string.
*
* Note! Only Linux version 2.x.y, where x in {0..6}.
*/
for (unsigned i = 0; i < RT_ELEMENTS(g_au64LnxKernelAddresses); i++)
{
if (RT_SUCCESS(rc))
{
char szTmp[128];
if ( RT_SUCCESS(rc)
&& *pszY >= '0'
&& *pszY <= '6')
{
return true;
}
}
if (RT_SUCCESS(rc))
{
char szTmp[128];
if ( RT_SUCCESS(rc)
&& *pszY >= '0'
&& *pszY <= '9')
{
return true;
}
}
}
return false;
}
/**
* @copydoc DBGFOSREG::pfnDestruct
*/
{
}
/**
* @copydoc DBGFOSREG::pfnConstruct
*/
{
return VINF_SUCCESS;
}
const DBGFOSREG g_DBGDiggerLinux =
{
/* .u32Magic = */ DBGFOSREG_MAGIC,
/* .fFlags = */ 0,
/* .cbData = */ sizeof(DBGDIGGERLINUX),
/* .szName = */ "Linux",
/* .pfnConstruct = */ dbgDiggerLinuxConstruct,
/* .pfnDestruct = */ dbgDiggerLinuxDestruct,
/* .pfnProbe = */ dbgDiggerLinuxProbe,
/* .pfnInit = */ dbgDiggerLinuxInit,
/* .pfnRefresh = */ dbgDiggerLinuxRefresh,
/* .pfnTerm = */ dbgDiggerLinuxTerm,
/* .pfnQueryVersion = */ dbgDiggerLinuxQueryVersion,
/* .pfnQueryInterface = */ dbgDiggerLinuxQueryInterface,
/* .u32EndMagic = */ DBGFOSREG_MAGIC
};