xml.cpp revision 6ed89c8f32109ca25f62bc7edbc8fb5a422ac44e
/* $Id$ */
/** @file
* IPRT - XML Manipulation API.
*/
/*
* Copyright (C) 2007-2010 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
#include <libxml/xmlschemas.h>
#include <map>
#include <boost/shared_ptr.hpp>
////////////////////////////////////////////////////////////////////////////////
//
// globals
//
////////////////////////////////////////////////////////////////////////////////
/**
* Global module initialization structure. This is to wrap non-reentrant bits
* of libxml, among other things.
*
* The constructor and destructor of this structure are used to perform global
* module initiaizaton and cleanup. There must be only one global variable of
* this structure.
*/
static
class Global
{
public:
Global()
{
/* Check the parser version. The docs say it will kill the app if
* there is a serious version mismatch, but I couldn't find it in the
* let's leave it as is for informational purposes. */
/* Init libxml */
/* Save the default entity resolver before someone has replaced it */
}
~Global()
{
/* Shutdown libxml */
}
struct
{
/** Used to provide some thread safety missing in libxml2 (see e.g.
* XmlTreeBackend::read()) */
}
sxml; /* XXX naming this xml will break with gcc-3.3 */
}
namespace xml
{
////////////////////////////////////////////////////////////////////////////////
//
// Exceptions
//
////////////////////////////////////////////////////////////////////////////////
{
}
{
if (!aErr)
throw EInvalidArg(RT_SRC_POS);
}
/**
* Composes a single message for the given error. The caller must free the
* returned string using RTStrFree() when no more necessary.
*/
// static
{
/* strip spaces, trailing EOLs and dot-like char */
--msgLen;
return finalMsg;
}
: RuntimeError(NULL),
{
char *pszContext2;
char *newMsg;
}
////////////////////////////////////////////////////////////////////////////////
//
// File Class
//
//////////////////////////////////////////////////////////////////////////////
{
Data()
{ }
bool opened : 1;
bool flushOnClose : 1;
};
: m(new Data())
{
m->strFileName = aFileName;
m->flushOnClose = aFlushIt;
switch (aMode)
{
/** @todo change to RTFILE_O_DENY_WRITE where appropriate. */
case Mode_Read:
break;
case Mode_WriteCreate: // fail if file exists
break;
case Mode_Overwrite: // overwrite if file exists
break;
case Mode_ReadWrite:
}
if (RT_FAILURE(vrc))
m->opened = true;
}
: m(new Data())
{
if (aHandle == NIL_RTFILE)
throw EInvalidArg(RT_SRC_POS);
if (aFileName)
m->strFileName = aFileName;
m->flushOnClose = aFlushIt;
setPos(0);
}
{
if (m->flushOnClose)
{
RTFileFlush(m->handle);
if (!m->strFileName.isEmpty())
}
if (m->opened)
RTFileClose(m->handle);
delete m;
}
{
return m->strFileName.c_str();
}
{
uint64_t p = 0;
if (RT_SUCCESS(vrc))
return p;
}
{
uint64_t p = 0;
unsigned method = RTFILE_SEEK_BEGIN;
int vrc = VINF_SUCCESS;
/* check if we overflow int64_t and move to INT64_MAX first */
{
}
/* seek the rest */
if (RT_SUCCESS(vrc))
if (RT_SUCCESS(vrc))
return;
}
{
if (RT_SUCCESS(vrc))
return (int)len;
}
{
if (RT_SUCCESS (vrc))
return (int)len;
return -1 /* failure */;
}
{
if (RT_SUCCESS (vrc))
return;
}
////////////////////////////////////////////////////////////////////////////////
//
// MemoryBuf Class
//
//////////////////////////////////////////////////////////////////////////////
{
Data()
const char *buf;
char *uri;
};
: m (new Data())
{
throw EInvalidArg (RT_SRC_POS);
}
{
}
{
return m->uri;
}
{
return m->pos;
}
{
throw EInvalidArg();
throw EInvalidArg();
}
{
return 0 /* nothing to read */;
return (int)len;
}
////////////////////////////////////////////////////////////////////////////////
//
// GlobalLock class
//
////////////////////////////////////////////////////////////////////////////////
struct GlobalLock::Data
{
Data()
: pOldLoader(NULL),
{
}
};
: m(new Data())
{
}
GlobalLock::~GlobalLock()
{
if (m->pOldLoader)
delete m;
m = NULL;
}
{
m->pOldLoader = xmlGetExternalEntityLoader();
}
// static
const char *aID,
{
}
////////////////////////////////////////////////////////////////////////////////
//
// Node class
//
////////////////////////////////////////////////////////////////////////////////
{
struct compare_const_char
{
{
}
};
// attributes, if this is an element; can be empty
// child elements, if this is an element; can be empty
};
m(new Data)
{
}
{
delete m;
}
/**
* Private implementation.
* @param elmRoot
*/
{
// go thru this element's attributes
while (plibAttr)
{
const char *pcszKey;
// store
}
// go thru this element's child elements
while (plibNode)
{
if (pNew)
{
// store
// recurse for this child element to get its own children
}
}
}
/**
* Returns the name of the node, which is either the element name or
* the attribute name. For other node types it probably returns NULL.
* @return
*/
{
return m_pcszName;
}
/**
* Variant of nameEquals that checks the namespace as well.
* @param pcszNamespace
* @param pcsz
* @return
*/
{
if (m_pcszName == pcsz)
return true;
if (m_pcszName == NULL)
return false;
return false;
return false;
// name matches: then check namespaces as well
if (!pcszNamespace)
return true;
// caller wants namespace:
if (!m_pcszNamespacePrefix)
// but node has no namespace:
return false;
}
/**
* Returns the value of a node. If this node is an attribute, returns
* the attribute value; if this node is an element, then this returns
* the element text content.
* @return
*/
{
if ( (m_plibAttr)
&& (m_plibAttr->children)
)
// libxml hides attribute values in another node created as a
// single child of the attribute node, and it's in the content field
if ( (m_plibNode)
&& (m_plibNode->children)
)
return NULL;
}
/**
* Copies the value of a node into the given integer variable.
* Returns TRUE only if a value was found and was actually an
* integer of the given type.
* @return
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Copies the value of a node into the given integer variable.
* Returns TRUE only if a value was found and was actually an
* integer of the given type.
* @return
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Copies the value of a node into the given integer variable.
* Returns TRUE only if a value was found and was actually an
* integer of the given type.
* @return
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Copies the value of a node into the given integer variable.
* Returns TRUE only if a value was found and was actually an
* integer of the given type.
* @return
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Returns the line number of the current node in the source XML file.
* Useful for error messages.
* @return
*/
int Node::getLineNumber() const
{
if (m_plibAttr)
return m_plibNode->line;
}
/**
* Private element constructor.
* @param pelmRoot
* @param pParent
* @param plibNode
*/
NULL)
{
if (!(m_pelmRoot = pelmRoot))
// NULL passed, then this is the root element
m_pelmRoot = this;
{
}
}
/**
* Builds a list of direct child elements of the current element that
* match the given string; if pcszMatch is NULL, all direct child
* elements are returned.
* @param children out: list of nodes to which children will be appended.
* @param pcszMatch in: match string, or NULL to return all children.
* @return Number of items appended to the list (0 if none).
*/
const char *pcszMatch /*= NULL*/)
const
{
int i = 0;
++it)
{
// export this child node if ...
if (p->isElement())
if ( (!pcszMatch) // the caller wants all nodes or
)
{
++i;
}
}
return i;
}
/**
* Returns the first child element whose name matches pcszMatch.
*
* @param pcszNamespace Namespace prefix (e.g. "vbox") or NULL to match any namespace.
* @param pcszMatch Element name to match.
* @return
*/
const char *pcszMatch)
const
{
it,
++it)
{
{
return pelm;
}
}
return NULL;
}
/**
* Returns the first child element whose "id" attribute matches pcszId.
* @param pcszId identifier to look for.
* @return child element or NULL if not found.
*/
{
it,
++it)
{
{
const AttributeNode *pAttr;
)
return pelm;
}
}
return NULL;
}
/**
* Looks up the given attribute node in this element's attribute map.
*
* With respect to namespaces, the internal attributes map stores namespace
* prefixes with attribute names only if the attribute uses a non-default
* namespace. As a result, the following rules apply:
*
* -- To find attributes from a non-default namespace, pcszMatch must not
* be prefixed with a namespace.
*
* -- To find attributes from the default namespace (or if the document does
* not use namespaces), pcszMatch must be prefixed with the namespace
* prefix and a colon.
*
* For example, if the document uses the "vbox:" namespace by default, you
* must omit "vbox:" from pcszMatch to find such attributes, whether they
* are specifed in the xml or not.
*
* @param pcszMatch
* @return
*/
{
return NULL;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as a string.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param ppcsz out: attribute value
* @return TRUE if attribute was found and str was thus updated.
*/
{
{
return true;
}
return false;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as a string.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param str out: attribute value; overwritten only if attribute was found
* @return TRUE if attribute was found and str was thus updated.
*/
{
{
return true;
}
return false;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as a signed integer. This calls
* RTStrToInt32Ex internally and will only output the integer if that
* function returns no error.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param i out: attribute value; overwritten only if attribute was found
* @return TRUE if attribute was found and str was thus updated.
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as an unsigned integer.This calls
* RTStrToUInt32Ex internally and will only output the integer if that
* function returns no error.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param i out: attribute value; overwritten only if attribute was found
* @return TRUE if attribute was found and str was thus updated.
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as a signed long integer. This calls
* RTStrToInt64Ex internally and will only output the integer if that
* function returns no error.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param i out: attribute value
* @return TRUE if attribute was found and str was thus updated.
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as an unsigned long integer.This calls
* RTStrToUInt64Ex internally and will only output the integer if that
* function returns no error.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param i out: attribute value; overwritten only if attribute was found
* @return TRUE if attribute was found and str was thus updated.
*/
{
const char *pcsz;
)
return true;
return false;
}
/**
* Convenience method which attempts to find the attribute with the given
* name and returns its value as a boolean. This accepts "true", "false",
* "yes", "no", "1" or "0" as valid values.
*
* @param pcszMatch name of attribute to find (see findAttribute() for namespace remarks)
* @param f out: attribute value; overwritten only if attribute was found
* @return TRUE if attribute was found and str was thus updated.
*/
{
const char *pcsz;
{
)
{
f = true;
return true;
}
)
{
f = false;
return true;
}
}
return false;
}
/**
* Creates a new child element node and appends it to the list
* of children in "this".
*
* @param pcszElementName
* @return
*/
{
// we must be an element, not an attribute
if (!m_plibNode)
throw ENodeIsNotElement(RT_SRC_POS);
// libxml side: create new node
(const xmlChar*)pcszElementName)))
// now wrap this in C++
return p;
}
/**
* Creates a content node and appends it to the list of children
* in "this".
*
* @param pcszContent
* @return
*/
{
// libxml side: create new node
// now wrap this in C++
return p;
}
/**
* Sets the given attribute; overloaded version for const char *.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param pcszValue
* @return
*/
{
{
// libxml side: xmlNewProp creates an attribute
// C++ side: create an attribute node around it
const char *pcszKey;
// store
}
else
{
// overwrite existing libxml attribute node
// and fix our existing C++ side around it
}
return pattrReturn;
}
/**
* Sets the given attribute; overloaded version for int32_t.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param i
* @return
*/
{
char szValue[10];
return p;
}
/**
* Sets the given attribute; overloaded version for uint32_t.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param u
* @return
*/
{
char szValue[10];
return p;
}
/**
* Sets the given attribute; overloaded version for int64_t.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param i
* @return
*/
{
char szValue[20];
return p;
}
/**
* Sets the given attribute; overloaded version for uint64_t.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param u
* @return
*/
{
char szValue[20];
return p;
}
/**
* Sets the given attribute to the given uint32_t, outputs a hexadecimal string.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param u
* @return
*/
{
char szValue[10];
return p;
}
/**
* Sets the given attribute; overloaded version for bool.
*
* If an attribute with the given name exists, it is overwritten,
* otherwise a new attribute is created. Returns the attribute node
* that was either created or changed.
*
* @param pcszName
* @param i
* @return
*/
{
}
/**
* Private constructor for a new attribute node. This one is special:
* in ppcszKey, it returns a pointer to a string buffer that should be
* used to index the attribute correctly with namespaces.
*
* @param pParent
* @param elmRoot
* @param plibAttr
* @param ppcszKey
*/
const char **ppcszKey)
: Node(IsAttribute,
NULL,
{
*ppcszKey = m_pcszName;
)
{
if ( !elmRoot.m_pcszNamespaceHref
)
{
// not default namespace:
}
}
}
NULL)
{
}
/*
* NodesLoop
*
*/
{
};
{
m = new Data;
}
{
delete m;
}
/**
* Handy convenience helper for looping over all child elements. Create an
* instance of NodesLoop on the stack and call this method until it returns
* NULL, like this:
* <code>
* xml::ElementNode node; // should point to an element
* xml::NodesLoop loop(node, "child"); // find all "child" elements under node
* const xml::ElementNode *pChild = NULL;
* while (pChild = loop.forAllNodes())
* ...;
* </code>
* @return
*/
{
{
++(m->it);
}
return pNode;
}
////////////////////////////////////////////////////////////////////////////////
//
// Document class
//
////////////////////////////////////////////////////////////////////////////////
{
Data()
{
plibDocument = NULL;
pRootElement = NULL;
}
~Data()
{
reset();
}
void reset()
{
if (plibDocument)
{
plibDocument = NULL;
}
if (pRootElement)
{
delete pRootElement;
pRootElement = NULL;
}
}
{
if (p->plibDocument)
{
1); // recursive == copy all
}
}
};
: m(new Data)
{
}
: m(new Data)
{
m->copyFrom(x.m);
}
{
m->reset();
m->copyFrom(x.m);
return *this;
}
{
delete m;
}
/**
* private method to refresh all internal structures after the internal pDocument
* has changed. Called from XmlFileParser::read(). m->reset() must have been
* called before to make sure all members except the internal pDocument are clean.
*/
{
}
/**
* Returns the root element of the document, or NULL if the document is empty.
* Const variant.
* @return
*/
{
return m->pRootElement;
}
/**
* Returns the root element of the document, or NULL if the document is empty.
* Non-const variant.
* @return
*/
{
return m->pRootElement;
}
/**
* Creates a new element node and sets it as the root element. This will
* only work if the document is empty; otherwise EDocumentNotEmpty is thrown.
*/
{
if (m->pRootElement || m->plibDocument)
throw EDocumentNotEmpty(RT_SRC_POS);
// libxml side: create document, create root node
(const xmlChar*)pcszRootElementName)))
// now wrap this in C++
return m->pRootElement;
}
////////////////////////////////////////////////////////////////////////////////
//
// XmlParserBase class
//
////////////////////////////////////////////////////////////////////////////////
{
m_ctxt = xmlNewParserCtxt();
}
{
}
////////////////////////////////////////////////////////////////////////////////
//
// XmlMemParser class
//
////////////////////////////////////////////////////////////////////////////////
: XmlParserBase()
{
}
{
}
/**
* Parse the given buffer and fills the given Document object with its contents.
* Throws XmlError on parsing errors.
*
* The document that is passed in will be reset before being filled if not empty.
*
* @param pvBuf in: memory buffer to parse.
* @param cbSize in: size of the memory buffer.
* @param strFilename in: name fo file to parse.
* @param doc out: document to be reset and filled with data according to file contents.
*/
{
// global.setExternalEntityLoader(ExternalEntityLoader);
(const char*)pvBuf,
NULL, // encoding = auto
}
////////////////////////////////////////////////////////////////////////////////
//
// XmlMemWriter class
//
////////////////////////////////////////////////////////////////////////////////
{
}
{
}
{
int size;
}
////////////////////////////////////////////////////////////////////////////////
//
// XmlFileParser class
//
////////////////////////////////////////////////////////////////////////////////
struct XmlFileParser::Data
{
Data()
{
}
~Data()
{
}
};
: XmlParserBase(),
m(new Data())
{
}
{
delete m;
m = NULL;
}
struct IOContext
{
{
}
{
}
{
}
};
struct ReadContext : IOContext
{
ReadContext(const char *pcszFilename)
{
}
};
struct WriteContext : IOContext
{
{
}
};
/**
* Reads the given file and fills the given Document object with its contents.
* Throws XmlError on parsing errors.
*
* The document that is passed in will be reset before being filled if not empty.
*
* @param strFilename in: name fo file to parse.
* @param doc out: document to be reset and filled with data according to file contents.
*/
{
// global.setExternalEntityLoader(ExternalEntityLoader);
m->strXmlFilename = strFilename;
&context,
NULL, // encoding = auto
}
// static
{
/* To prevent throwing exceptions while inside libxml2 code, we catch
* them and forward to our level using a couple of variables. */
try
{
}
return -1 /* failure */;
}
{
/// @todo to be written
return -1;
}
////////////////////////////////////////////////////////////////////////////////
//
// XmlFileWriter class
//
////////////////////////////////////////////////////////////////////////////////
struct XmlFileWriter::Data
{
};
{
m = new Data();
}
{
delete m;
}
{
/* serialize to the stream */
xmlIndentTreeOutput = 1;
xmlTreeIndentString = " ";
xmlSaveNoEmptyTags = 0;
&context,
NULL,
if (rc == -1)
{
/* look if there was a forwared exception from the lower level */
// if (m->trappedErr.get() != NULL)
// m->trappedErr->rethrow();
/* there must be an exception from the Output implementation,
* otherwise the save operation must always succeed. */
}
}
{
if (!fSafe)
else
{
/* Empty string and directory spec must be avoid. */
/* Construct both filenames first to ease error handling. */
char szTmpFilename[RTPATH_MAX];
if (RT_FAILURE(rc))
char szPrevFilename[RTPATH_MAX];
if (RT_FAILURE(rc))
/* Write the XML document to the temporary file. */
/* Make a backup of any existing file (ignore failure). */
/* Commit the temporary file. Just leave the tmp file behind on failure. */
if (RT_FAILURE(rc))
/* Flush the directory changes (required on linux at least). */
AssertMsg(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED || rc == VERR_NOT_IMPLEMENTED, ("%Rrc\n", rc));
}
}
{
/* To prevent throwing exceptions while inside libxml2 code, we catch
* them and forward to our level using a couple of variables. */
try
{
}
return -1 /* failure */;
}
{
/// @todo to be written
return -1;
}
} // end namespace xml