nsReadableUtils.cpp revision 677833bc953b6cb418c701facbdcf4aa18d6c44e
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2000
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Scott Collins <scc@mozilla.org> (original author)
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nsReadableUtils.h"
#include "nsMemory.h"
#include "nsString.h"
#include "nsUTF8Utils.h"
void
{
}
void
{
}
void
{
if (aSource) {
}
}
void
{
if (aSource) {
}
}
void
{
}
void
{
}
void
{
}
void
{
}
void
{
// right now, this won't work on multi-fragment destinations
}
void
{
// right now, this won't work on multi-fragment destinations
}
void
{
if (aSource) {
}
}
void
{
if (aSource) {
}
}
void
{
if (count)
{
// Grow the buffer if we need to.
{
// aDest has enough room in the fragment just past the end
// of its old data that it can hold what we're about to
// append. Append using copy_string().
// All ready? Time to convert
{
NS_ERROR("Input invalid or incorrect length was calculated");
}
}
else
{
// This isn't the fastest way to do this, but it gets
// complicated to convert UTF16 into a fragmented UTF8
// string, so we'll take the easy way out here in this
// rare situation.
}
}
}
void
{
if (count)
{
// Grow the buffer if we need to.
{
// aDest has enough room in the fragment just past the end
// of its old data that it can hold what we're about to
// append. Append using copy_string().
// All ready? Time to convert
{
NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
}
}
else
{
// This isn't the fastest way to do this, but it gets
// complicated to convert parts of a UTF8 string into a
// UTF16 string, so we'll take the easy way out here in
// this rare situation.
}
}
}
void
{
if (aSource) {
}
}
void
{
if (aSource) {
}
}
/**
* A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
*
* @param aSource an string you will eventually be making a copy of
* @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
*
*/
template <class FromStringT, class ToCharT>
inline
{
}
char*
{
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
return result;
}
char*
{
if (aUTF8Count)
char *result = NS_STATIC_CAST(char*,
return result;
}
char*
{
// no conversion needed, just allocate a buffer of the correct length and copy into it
return result;
}
{
// no conversion needed, just allocate a buffer of the correct length and copy into it
return result;
}
{
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
return result;
}
{
if (aUTF16Count)
return result;
}
{
copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), toBegin);
return aDest;
}
void
{
}
void
{
}
{
// Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
// for each chunk of |aString|...
PRUint32 fragmentLength = 0;
{
// for each character in this chunk...
while ( c < fragmentEnd )
if ( *c++ & NOT_ASCII )
return PR_FALSE;
}
return PR_TRUE;
}
{
static const char NOT_ASCII = char(~0x7F);
// Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
// for each chunk of |aString|...
PRUint32 fragmentLength = 0;
{
const char* fragmentEnd = c + fragmentLength;
// for each character in this chunk...
while ( c < fragmentEnd )
if ( *c++ & NOT_ASCII )
return PR_FALSE;
}
return PR_TRUE;
}
{
// for each chunk of |aString|...
PRUint32 fragmentLength = 0;
{
// for each character in this chunk...
while ( ptr < fragmentEnd )
{
PRUint8 c;
if (0 == state)
{
c = *ptr++;
if ( UTF8traits::isASCII(c) )
continue;
if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
return PR_FALSE;
else if ( UTF8traits::is2byte(c) )
state = 1;
else if ( UTF8traits::is3byte(c) )
{
state = 2;
if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
{
olupper = 0x9F;
}
else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
{
slower = 0xA0;
}
else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
}
else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
{
state = 3;
if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
{
olupper = 0x8F;
}
else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
{
// actually not surrogates but codepoints beyond 0x10FFFF
slower = 0x90;
}
}
else
return PR_FALSE; // Not UTF-8 string
}
{
c = *ptr++;
--state;
// non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
return PR_FALSE; // Not UTF-8 string
}
}
}
return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
}
/**
* A character sink for in-place case conversion.
*/
class ConvertToUpperCase
{
public:
typedef char value_type;
{
++cp;
}
return aSourceLength;
}
};
void
{
}
void
{
char* start;
}
/**
* A character sink for copying with case conversion.
*/
class CopyToUpperCase
{
public:
typedef char value_type;
{
}
{
else
++aSource;
++cp;
}
return len;
}
protected:
};
void
{
}
/**
* A character sink for case conversion.
*/
class ConvertToLowerCase
{
public:
typedef char value_type;
{
++cp;
}
return aSourceLength;
}
};
void
{
}
void
{
char* start;
}
/**
* A character sink for copying with case conversion.
*/
class CopyToLowerCase
{
public:
typedef char value_type;
{
}
{
else
++aSource;
++cp;
}
return len;
}
protected:
};
void
{
}
FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
{
// only bother searching at all if we're given a non-empty range to search
if ( aSearchStart != aSearchEnd )
{
// outer loop keeps searching till we find it or run out of string to search
while ( !found_it )
{
// fast inner loop (that's what it's called, not what it is) looks for a potential match
while ( aSearchStart != aSearchEnd &&
++aSearchStart;
// if we broke out of the `fast' loop because we're out of string ... we're done: no match
if ( aSearchStart == aSearchEnd )
break;
// otherwise, we're at a potential match, let's see if we really hit one
// slow inner loop verifies the potential match (found by the `fast' loop) at the current position
for(;;)
{
// we already compared the first character in the outer loop,
// so we'll advance before the next comparison
++testPattern;
++testSearch;
// if we verified all the way to the end of the pattern, then we found it!
if ( testPattern == aPatternEnd )
{
break;
}
// if we got to end of the string we're searching before we hit the end of the
// pattern, we'll never find what we're looking for
if ( testSearch == aSearchEnd )
{
break;
}
// else if we mismatched ... it's time to advance to the next search position
// and get back into the `fast' loop
{
++aSearchStart;
break;
}
}
}
}
return found_it;
}
FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
{
}
FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
{
}
CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
{
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
}
/**
* This implementation is simple, but does too much work.
* It searches the entire string from left to right, and returns the last match found, if any.
* This implementation will be replaced when I get |reverse_iterator|s working.
*/
RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
{
while ( searchStart != searchEnd )
{
{
// this is the best match so far, so remember it
// ...and get ready to search some more
// (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
++searchStart;
}
}
// if we never found it, return an empty range
if ( !found_it )
return found_it;
}
RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
{
while ( searchStart != searchEnd )
{
{
// this is the best match so far, so remember it
// ...and get ready to search some more
// (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
++searchStart;
}
}
// if we never found it, return an empty range
if ( !found_it )
return found_it;
}
FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
{
const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
if ( charFoundAt ) {
return PR_TRUE;
}
return PR_FALSE;
}
FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
{
if ( charFoundAt ) {
return PR_TRUE;
}
return PR_FALSE;
}
{
++count;
}
++begin;
}
return count;
}
char aChar )
{
++count;
}
++begin;
}
return count;
}
const nsStringComparator& aComparator )
{
return PR_FALSE;
}
const nsCStringComparator& aComparator )
{
return PR_FALSE;
}
const nsStringComparator& aComparator )
{
return PR_FALSE;
}
const nsCStringComparator& aComparator )
{
return PR_FALSE;
}
template <class CharT>
class CalculateHashCode
{
public:
typedef PRUint32 hashcode_type;
typedef CharT value_type;
CalculateHashCode() : mHashCode(0) { }
{
return N;
}
private:
};
{
return sink.GetHashCode();
}
{
return sink.GetHashCode();
}
{
return sEmpty;
}
{
return sEmpty;
}