ministring.h revision bbc36e83409c0609b551ce09d272eb30a894e612
/** @file
* IPRT - Mini C++ string class.
*/
/*
* Copyright (C) 2007-2009 Oracle Corporation
*
* This file is part of VirtualBox Open Source Edition (OSE), as
* available from http://www.virtualbox.org. This file is free software;
* you can redistribute it and/or modify it under the terms of the GNU
* General Public License (GPL) as published by the Free Software
* Foundation, in version 2 as it comes in the "COPYING" file of the
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
*
* The contents of this file may alternatively be used under the terms
* of the Common Development and Distribution License Version 1.0
* (CDDL) only, as it comes in the "COPYING.CDDL" file of the
* VirtualBox OSE distribution, in which case the provisions of the
* CDDL are applicable instead of those of the GPL.
*
* You may elect to license modified versions of this file under the
* terms and conditions of either the GPL or the CDDL or both.
*/
#ifndef ___VBox_ministring_h
#define ___VBox_ministring_h
#include <iprt/mem.h>
#include <iprt/string.h>
#include <new>
namespace iprt
{
/**
* @brief Mini C++ string class.
*
* "MiniString" is a small C++ string class that does not depend on anything
* else except IPRT memory management functions. Semantics are like in
* std::string, except it can do a lot less.
*
* Note that MiniString does not differentiate between NULL strings and
* empty strings. In other words, MiniString("") and MiniString(NULL)
* behave the same. In both cases, MiniString allocates no memory, reports
* a zero length and zero allocated bytes for both, and returns an empty
* C string from c_str().
*/
#ifdef VBOX
/** @remarks Much of the code in here used to be in com::Utf8Str so that
* com::Utf8Str can now derive from MiniString and only contain code
* that is COM-specific, such as com::Bstr conversions. Compared to
* the old Utf8Str though, MiniString always knows the length of its
* member string and the size of the buffer so it can use memcpy()
* instead of strdup().
*/
#endif
class RT_DECL_CLASS MiniString
{
public:
/**
* Creates an empty string that has no memory allocated.
*/
MiniString()
: m_psz(NULL),
m_cbLength(0),
m_cbAllocated(0)
{
}
/**
* Creates a copy of another MiniString.
*
* This allocates s.length() + 1 bytes for the new instance, unless s is empty.
*
* @param s The source string.
*
* @throws std::bad_alloc
*/
MiniString(const MiniString &s)
{
copyFrom(s);
}
/**
* Creates a copy of a C string.
*
* This allocates strlen(pcsz) + 1 bytes for the new instance, unless s is empty.
*
* @param pcsz The source string.
*
* @throws std::bad_alloc
*/
MiniString(const char *pcsz)
{
copyFrom(pcsz);
}
/**
* Destructor.
*/
virtual ~MiniString()
{
cleanup();
}
/**
* String length in bytes.
*
* Returns the length of the member string, which is equal to strlen(c_str()).
* In other words, this does not count unicode codepoints but returns the number
* of bytes. This is always cached so calling this is cheap and requires no
* strlen() invocation.
*
* @returns m_cbLength.
*/
size_t length() const
{
return m_cbLength;
}
/**
* The allocated buffer size (in bytes).
*
* Returns the number of bytes allocated in the internal string buffer, which is
* at least length() + 1 if length() > 0; for an empty string, this returns 0.
*
* @returns m_cbAllocated.
*/
size_t capacity() const
{
return m_cbAllocated;
}
/**
* Make sure at that least cb of buffer space is reserved.
*
* Requests that the contained memory buffer have at least cb bytes allocated.
* This may expand or shrink the string's storage, but will never truncate the
* contained string. In other words, cb will be ignored if it's smaller than
* length() + 1.
*
* @param cb New minimum size (in bytes) of member memory buffer.
*
* @throws std::bad_alloc On allocation error. The object is left unchanged.
*/
void reserve(size_t cb)
{
if ( cb != m_cbAllocated
&& cb > m_cbLength + 1
)
{
int vrc = RTStrRealloc(&m_psz, cb);
if (RT_SUCCESS(vrc))
m_cbAllocated = cb;
#ifdef RT_EXCEPTIONS_ENABLED
else
throw std::bad_alloc();
#endif
}
}
/**
* Deallocates all memory.
*/
inline void setNull()
{
cleanup();
}
/**
* Assigns a copy of pcsz to "this".
*
* @param pcsz The source string.
*
* @throws std::bad_alloc On allocation failure. The object is left describing
* a NULL string.
*
* @returns Reference to the object.
*/
MiniString &operator=(const char *pcsz)
{
if (m_psz != pcsz)
{
cleanup();
copyFrom(pcsz);
}
return *this;
}
/**
* Assigns a copy of s to "this".
*
* @param s The source string.
*
* @throws std::bad_alloc On allocation failure. The object is left describing
* a NULL string.
*
* @returns Reference to the object.
*/
MiniString &operator=(const MiniString &s)
{
if (this != &s)
{
cleanup();
copyFrom(s);
}
return *this;
}
/**
* Appends the string "that" to "this".
*
* @param that The string to append.
*
* @throws std::bad_alloc On allocation error. The object is left unchanged.
*
* @returns Reference to the object.
*/
MiniString &append(const MiniString &that);
/**
* Appends the string "that" to "this".
*
* @param pszThat The C string to append.
*
* @throws std::bad_alloc On allocation error. The object is left unchanged.
*
* @returns Reference to the object.
*/
MiniString &append(const char *pszThat);
/**
* Appends the given character to "this".
*
* @param c The character to append.
*
* @throws std::bad_alloc On allocation error. The object is left unchanged.
*
* @returns Reference to the object.
*/
MiniString &append(char c);
/**
* Index operator.
*
* Returns the byte at the given index, or a null byte if the index is not
* smaller than length(). This does _not_ count codepoints but simply points
* into the member C string.
*
* @param i The index into the string buffer.
* @returns char at the index or null.
*/
inline char operator[](size_t i) const
{
if (i < length())
return m_psz[i];
return '\0';
}
/**
* Returns the contained string as a C-style const char* pointer.
* This never returns NULL; if the string is empty, this returns a
* pointer to static null byte.
*
* @returns const pointer to C-style string.
*/
inline const char *c_str() const
{
return (m_psz) ? m_psz : "";
}
/**
* Like c_str(), for compatibility with lots of VirtualBox Main code.
*
* @returns const pointer to C-style string.
*/
inline const char *raw() const
{
return (m_psz) ? m_psz : "";
}
/**
* Returns a non-const raw pointer that allows to modify the string directly.
* As opposed to c_str() and raw(), this DOES return NULL for an empty string
* because we cannot return a non-const pointer to a static "" global.
*
* @warning
* -# Be sure not to modify data beyond the allocated memory! Call
* capacity() to find out how large that buffer is.
* -# After any operation that modifies the length of the string,
* you _must_ call MiniString::jolt(), or subsequent copy operations
* may go nowhere. Better not use mutableRaw() at all.
*/
char *mutableRaw()
{
return m_psz;
}
/**
* Clean up after using mutableRaw.
*
* Intended to be called after something has messed with the internal string
* buffer (e.g. after using mutableRaw() or Utf8Str::asOutParam()). Resets the
* internal lengths correctly. Otherwise subsequent copy operations may go
* nowhere.
*/
void jolt()
{
if (m_psz)
{
m_cbLength = strlen(m_psz);
m_cbAllocated = m_cbLength + 1; /* (Required for the Utf8Str::asOutParam case) */
}
else
{
m_cbLength = 0;
m_cbAllocated = 0;
}
}
/**
* Returns @c true if the member string has no length.
*
* This is @c true for instances created from both NULL and "" input
* strings.
*
* This states nothing about how much memory might be allocated.
*
* @returns @c true if empty, @c false if not.
*/
bool isEmpty() const
{
return length() == 0;
}
/**
* Returns @c false if the member string has no length.
*
* This is @c false for instances created from both NULL and "" input
* strings.
*
* This states nothing about how much memory might be allocated.
*
* @returns @c false if empty, @c true if not.
*/
bool isNotEmpty() const
{
return length() != 0;
}
/** Case sensitivity selector. */
enum CaseSensitivity
{
CaseSensitive,
CaseInsensitive
};
/**
* Compares the member string to pcsz.
* @param pcsz
* @param cs Whether comparison should be case-sensitive.
* @return
*/
int compare(const char *pcsz, CaseSensitivity cs = CaseSensitive) const
{
if (m_psz == pcsz)
return 0;
if (m_psz == NULL)
return -1;
if (pcsz == NULL)
return 1;
if (cs == CaseSensitive)
return ::RTStrCmp(m_psz, pcsz);
else
return ::RTStrICmp(m_psz, pcsz);
}
int compare(const MiniString &that, CaseSensitivity cs = CaseSensitive) const
{
return compare(that.m_psz, cs);
}
/** @name Comparison operators.
* @{ */
bool operator==(const MiniString &that) const { return !compare(that); }
bool operator!=(const MiniString &that) const { return !!compare(that); }
bool operator<( const MiniString &that) const { return compare(that) < 0; }
bool operator>( const MiniString &that) const { return compare(that) > 0; }
bool operator==(const char *that) const { return !compare(that); }
bool operator!=(const char *that) const { return !!compare(that); }
bool operator<( const char *that) const { return compare(that) < 0; }
bool operator>( const char *that) const { return compare(that) > 0; }
/** @} */
/** Max string offset value.
*
* When returned by a method, this indicates failure. When taken as input,
* typically a default, it means all the way to the string terminator.
*/
static const size_t npos;
/**
* Find the given substring.
*
* Looks for pcszFind in "this" starting at "pos" and returns its position,
* counting from the beginning of "this" at 0.
*
* @param pcszFind The substring to find.
* @param pos The (byte) offset into the string buffer to start
* searching.
*
* @returns 0 based position of pcszFind. npos if not found.
*/
size_t find(const char *pcszFind, size_t pos = 0) const;
/**
* Returns a substring of "this" as a new Utf8Str.
*
* Works exactly like its equivalent in std::string except that this interprets
* pos and n as unicode codepoints instead of bytes. With the default
* parameters "0" and "npos", this always copies the entire string.
*
* @param pos Index of first unicode codepoint to copy from
* "this", counting from 0.
* @param n Number of unicode codepoints to copy, starting with
* the one at "pos". The copying will stop if the null
* terminator is encountered before n codepoints have
* been copied.
*
* @remarks This works on code points, not bytes!
*/
iprt::MiniString substr(size_t pos = 0, size_t n = npos) const;
/**
* Returns true if "this" ends with "that".
*
* @param that Suffix to test for.
* @param cs Case sensitivity selector.
* @returns true if match, false if mismatch.
*/
bool endsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
/**
* Returns true if "this" begins with "that".
* @param that Prefix to test for.
* @param cs Case sensitivity selector.
* @returns true if match, false if mismatch.
*/
bool startsWith(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
/**
* Returns true if "this" contains "that" (strstr).
*
* @param that Substring to look for.
* @param cs Case sensitivity selector.
* @returns true if match, false if mismatch.
*/
bool contains(const iprt::MiniString &that, CaseSensitivity cs = CaseSensitive) const;
/**
* Attempts to convert the member string into an 64-bit integer.
*
* @returns 64-bit unsigned number on success.
* @returns 0 on failure.
*/
int64_t toInt64() const
{
return RTStrToInt64(m_psz);
}
/**
* Attempts to convert the member string into an unsigned 64-bit integer.
*
* @returns 64-bit unsigned number on success.
* @returns 0 on failure.
*/
uint64_t toUInt64() const
{
return RTStrToUInt64(m_psz);
}
/**
* Attempts to convert the member string into an unsigned 64-bit integer.
*
* @param i Where to return the value on success.
* @returns IPRT error code, see RTStrToInt64.
*/
int toInt(uint64_t &i) const;
/**
* Attempts to convert the member string into an unsigned 32-bit integer.
*
* @param i Where to return the value on success.
* @returns IPRT error code, see RTStrToInt32.
*/
int toInt(uint32_t &i) const;
protected:
/**
* Hide operator bool() to force people to use isEmpty() explicitly.
*/
operator bool() const;
/**
* Destructor implementation, also used to clean up in operator=() before
* assigning a new string.
*/
void cleanup()
{
if (m_psz)
{
RTStrFree(m_psz);
m_psz = NULL;
m_cbLength = 0;
m_cbAllocated = 0;
}
}
/**
* Protected internal helper to copy a string. This ignores the previous object
* state, so either call this from a constructor or call cleanup() first.
*
* copyFrom() unconditionally sets the members to a copy of the given other
* strings and makes no assumptions about previous contents. Can therefore be
* used both in copy constructors, when member variables have no defined value,
* and in assignments after having called cleanup().
*
* This variant copies from another MiniString and is fast since
* the length of the source string is known.
*
* @param s The source string.
*
* @throws std::bad_alloc On allocation failure. The object is left describing
* a NULL string.
*/
void copyFrom(const MiniString &s)
{
if ((m_cbLength = s.m_cbLength))
{
m_cbAllocated = m_cbLength + 1;
m_psz = (char *)RTStrAlloc(m_cbAllocated);
if (RT_LIKELY(m_psz))
memcpy(m_psz, s.m_psz, m_cbAllocated); // include 0 terminator
else
{
m_cbLength = 0;
m_cbAllocated = 0;
#ifdef RT_EXCEPTIONS_ENABLED
throw std::bad_alloc();
#endif
}
}
else
{
m_cbAllocated = 0;
m_psz = NULL;
}
}
/**
* Protected internal helper to copy a string. This ignores the previous object
* state, so either call this from a constructor or call cleanup() first.
*
* See copyFrom() above.
*
* This variant copies from a C string and needs to call strlen()
* on it. It's therefore slower than the one above.
*
* @param pcsz The source string.
*
* @throws std::bad_alloc On allocation failure. The object is left describing
* a NULL string.
*/
void copyFrom(const char *pcsz)
{
if (pcsz && *pcsz)
{
m_cbLength = strlen(pcsz);
m_cbAllocated = m_cbLength + 1;
m_psz = (char *)RTStrAlloc(m_cbAllocated);
if (RT_LIKELY(m_psz))
memcpy(m_psz, pcsz, m_cbAllocated); // include 0 terminator
else
{
m_cbLength = 0;
m_cbAllocated = 0;
#ifdef RT_EXCEPTIONS_ENABLED
throw std::bad_alloc();
#endif
}
}
else
{
m_cbLength = 0;
m_cbAllocated = 0;
m_psz = NULL;
}
}
char *m_psz; /**< The string buffer. */
size_t m_cbLength; /**< strlen(m_psz) - i.e. no terminator included. */
size_t m_cbAllocated; /**< Size of buffer that m_psz points to; at least m_cbLength + 1. */
};
} // namespace iprt
#endif