nsEscape.cpp revision 677833bc953b6cb418c701facbdcf4aa18d6c44e
0N/A/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
5556N/A/* ***** BEGIN LICENSE BLOCK *****
0N/A * Version: MPL 1.1/GPL 2.0/LGPL 2.1
0N/A *
0N/A * The contents of this file are subject to the Mozilla Public License Version
0N/A * 1.1 (the "License"); you may not use this file except in compliance with
2362N/A * the License. You may obtain a copy of the License at
0N/A * http://www.mozilla.org/MPL/
2362N/A *
0N/A * Software distributed under the License is distributed on an "AS IS" basis,
0N/A * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
0N/A * for the specific language governing rights and limitations under the
0N/A * License.
0N/A *
0N/A * The Original Code is mozilla.org code.
0N/A *
0N/A * The Initial Developer of the Original Code is
0N/A * Netscape Communications Corporation.
0N/A * Portions created by the Initial Developer are Copyright (C) 1998
0N/A * the Initial Developer. All Rights Reserved.
2362N/A *
2362N/A * Contributor(s):
2362N/A *
0N/A * Alternatively, the contents of this file may be used under the terms of
0N/A * either of the GNU General Public License Version 2 or later (the "GPL"),
0N/A * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
0N/A * in which case the provisions of the GPL or the LGPL are applicable instead
0N/A * of those above. If you wish to allow use of your version of this file only
0N/A * under the terms of either the GPL or the LGPL, and not to allow others to
0N/A * use your version of this file under the terms of the MPL, indicate your
0N/A * decision by deleting the provisions above and replace them with the notice
0N/A * and other provisions required by the GPL or the LGPL. If you do not delete
5556N/A * the provisions above, a recipient may use your version of this file under
5556N/A * the terms of any one of the MPL, the GPL or the LGPL.
0N/A *
0N/A * ***** END LICENSE BLOCK ***** */
0N/A
853N/A// First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
4151N/A
0N/A#include "nsEscape.h"
0N/A#include "nsMemory.h"
0N/A#include "nsCRT.h"
0N/A#include "nsReadableUtils.h"
0N/A
0N/Aconst int netCharType[256] =
0N/A/* Bit 0 xalpha -- the alphas
0N/A** Bit 1 xpalpha -- as xalpha but
0N/A** converts spaces to plus and plus to %2B
0N/A** Bit 3 ... path -- as xalphas but doesn't escape '/'
0N/A*/
0N/A /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
0N/A { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
0N/A 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
0N/A 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
0N/A 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
0N/A 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
0N/A /* bits for '@' changed from 7 to 0 so '@' can be escaped */
0N/A /* in usernames and passwords in publishing. */
0N/A 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
0N/A 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
0N/A 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
0N/A 0, };
0N/A
0N/A/* decode % escaped hex codes into character values
0N/A */
0N/A#define UNHEX(C) \
0N/A ((C >= '0' && C <= '9') ? C - '0' : \
0N/A ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
0N/A ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
0N/A
0N/A
0N/A#define IS_OK(C) (netCharType[((unsigned int) (C))] & (mask))
0N/A#define HEX_ESCAPE '%'
0N/A
0N/A//----------------------------------------------------------------------------------------
0N/Astatic char* nsEscapeCount(
0N/A const char * str,
0N/A PRInt32 len,
0N/A nsEscapeMask mask,
0N/A PRInt32* out_len)
0N/A//----------------------------------------------------------------------------------------
581N/A{
0N/A if (!str)
0N/A return 0;
4566N/A
4566N/A int i, extra = 0;
4566N/A static const char hexChars[] = "0123456789ABCDEF";
4566N/A
0N/A register const unsigned char* src = (const unsigned char *) str;
0N/A for (i = 0; i < len; i++)
0N/A {
0N/A if (!IS_OK(*src++))
0N/A extra += 2; /* the escape, plus an extra byte for each nibble */
0N/A }
0N/A
0N/A char* result = (char *)nsMemory::Alloc(len + extra + 1);
0N/A if (!result)
0N/A return 0;
0N/A
0N/A register unsigned char* dst = (unsigned char *) result;
0N/A src = (const unsigned char *) str;
0N/A if (mask == url_XPAlphas)
0N/A {
257N/A for (i = 0; i < len; i++)
257N/A {
0N/A unsigned char c = *src++;
257N/A if (IS_OK(c))
257N/A *dst++ = c;
257N/A else if (c == ' ')
5556N/A *dst++ = '+'; /* convert spaces to pluses */
5556N/A else
5556N/A {
5556N/A *dst++ = HEX_ESCAPE;
257N/A *dst++ = hexChars[c >> 4]; /* high nibble */
0N/A *dst++ = hexChars[c & 0x0f]; /* low nibble */
0N/A }
0N/A }
0N/A }
0N/A else
0N/A {
0N/A for (i = 0; i < len; i++)
0N/A {
0N/A unsigned char c = *src++;
0N/A if (IS_OK(c))
0N/A *dst++ = c;
0N/A else
0N/A {
0N/A *dst++ = HEX_ESCAPE;
0N/A *dst++ = hexChars[c >> 4]; /* high nibble */
0N/A *dst++ = hexChars[c & 0x0f]; /* low nibble */
0N/A }
0N/A }
0N/A }
0N/A
0N/A *dst = '\0'; /* tack on eos */
0N/A if(out_len)
0N/A *out_len = dst - (unsigned char *) result;
0N/A return result;
0N/A}
0N/A
0N/A//----------------------------------------------------------------------------------------
0N/ANS_COM char* nsEscape(const char * str, nsEscapeMask mask)
0N/A//----------------------------------------------------------------------------------------
0N/A{
0N/A if(!str)
0N/A return NULL;
0N/A return nsEscapeCount(str, (PRInt32)strlen(str), mask, NULL);
0N/A}
0N/A
0N/A//----------------------------------------------------------------------------------------
0N/ANS_COM char* nsUnescape(char * str)
0N/A//----------------------------------------------------------------------------------------
0N/A{
0N/A nsUnescapeCount(str);
0N/A return str;
0N/A}
4566N/A
4566N/A//----------------------------------------------------------------------------------------
4566N/ANS_COM PRInt32 nsUnescapeCount(char * str)
4566N/A//----------------------------------------------------------------------------------------
0N/A{
5218N/A register char *src = str;
0N/A register char *dst = str;
0N/A static const char hexChars[] = "0123456789ABCDEFabcdef";
0N/A
0N/A char c1[] = " ";
0N/A char c2[] = " ";
0N/A char* const pc1 = c1;
0N/A char* const pc2 = c2;
0N/A
0N/A while (*src)
257N/A {
4566N/A c1[0] = *(src+1);
0N/A if (*(src+1) == '\0')
0N/A c2[0] = '\0';
0N/A else
0N/A c2[0] = *(src+2);
0N/A
0N/A if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
0N/A PL_strpbrk(pc2, hexChars) == 0 )
0N/A *dst++ = *src++;
0N/A else
0N/A {
0N/A src++; /* walk over escape */
0N/A if (*src)
0N/A {
0N/A *dst = UNHEX(*src) << 4;
0N/A src++;
0N/A }
0N/A if (*src)
0N/A {
0N/A *dst = (*dst + UNHEX(*src));
0N/A src++;
0N/A }
4566N/A dst++;
4566N/A }
4566N/A }
4566N/A
4566N/A *dst = 0;
4566N/A return (int)(dst - str);
4566N/A
4566N/A} /* NET_UnEscapeCnt */
4566N/A
0N/A
0N/ANS_COM char *
0N/AnsEscapeHTML(const char * string)
0N/A{
0N/A /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
0N/A char *rv = (char *) nsMemory::Alloc(strlen(string) * 6 + 1);
0N/A char *ptr = rv;
0N/A
0N/A if(rv)
0N/A {
0N/A for(; *string != '\0'; string++)
0N/A {
0N/A if(*string == '<')
0N/A {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'l';
4566N/A *ptr++ = 't';
0N/A *ptr++ = ';';
0N/A }
0N/A else if(*string == '>')
0N/A {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'g';
0N/A *ptr++ = 't';
0N/A *ptr++ = ';';
4566N/A }
0N/A else if(*string == '&')
0N/A {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'a';
0N/A *ptr++ = 'm';
0N/A *ptr++ = 'p';
0N/A *ptr++ = ';';
0N/A }
0N/A else if (*string == '"')
0N/A {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'q';
0N/A *ptr++ = 'u';
0N/A *ptr++ = 'o';
0N/A *ptr++ = 't';
0N/A *ptr++ = ';';
0N/A }
0N/A else if (*string == '\'')
0N/A {
0N/A *ptr++ = '&';
0N/A *ptr++ = '#';
0N/A *ptr++ = '3';
0N/A *ptr++ = '9';
0N/A *ptr++ = ';';
0N/A }
0N/A else
0N/A {
0N/A *ptr++ = *string;
0N/A }
0N/A }
0N/A *ptr = '\0';
0N/A }
0N/A
0N/A return(rv);
0N/A}
0N/A
0N/ANS_COM PRUnichar *
0N/AnsEscapeHTML2(const PRUnichar *aSourceBuffer, PRInt32 aSourceBufferLen)
0N/A{
0N/A // if the caller didn't calculate the length
0N/A if (aSourceBufferLen == -1) {
0N/A aSourceBufferLen = nsCRT::strlen(aSourceBuffer); // ...then I will
0N/A }
0N/A
0N/A /* XXX Hardcoded max entity len. */
0N/A PRUnichar *resultBuffer = (PRUnichar *)nsMemory::Alloc(aSourceBufferLen *
0N/A 6 * sizeof(PRUnichar) + sizeof(PRUnichar('\0')));
0N/A PRUnichar *ptr = resultBuffer;
0N/A
0N/A if (resultBuffer) {
0N/A PRInt32 i;
0N/A
0N/A for(i = 0; i < aSourceBufferLen; i++) {
0N/A if(aSourceBuffer[i] == '<') {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'l';
0N/A *ptr++ = 't';
0N/A *ptr++ = ';';
0N/A } else if(aSourceBuffer[i] == '>') {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'g';
0N/A *ptr++ = 't';
0N/A *ptr++ = ';';
0N/A } else if(aSourceBuffer[i] == '&') {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'a';
0N/A *ptr++ = 'm';
0N/A *ptr++ = 'p';
0N/A *ptr++ = ';';
0N/A } else if (aSourceBuffer[i] == '"') {
0N/A *ptr++ = '&';
0N/A *ptr++ = 'q';
0N/A *ptr++ = 'u';
0N/A *ptr++ = 'o';
0N/A *ptr++ = 't';
0N/A *ptr++ = ';';
0N/A } else if (aSourceBuffer[i] == '\'') {
0N/A *ptr++ = '&';
0N/A *ptr++ = '#';
0N/A *ptr++ = '3';
0N/A *ptr++ = '9';
0N/A *ptr++ = ';';
0N/A } else {
0N/A *ptr++ = aSourceBuffer[i];
0N/A }
0N/A }
0N/A *ptr = 0;
0N/A }
0N/A
0N/A return resultBuffer;
0N/A}
0N/A
0N/A//----------------------------------------------------------------------------------------
0N/A
0N/Aconst int EscapeChars[256] =
0N/A/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
0N/A{
0N/A 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
0N/A 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
0N/A 0,1023, 0, 512,1023, 0,1023,1023,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
0N/A 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008, 912, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
0N/A 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
0N/A 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
0N/A 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
0N/A 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
0N/A 0 /* 8x DEL */
0N/A};
0N/A
0N/A#define NO_NEED_ESC(C) (EscapeChars[((unsigned int) (C))] & (mask))
0N/A
0N/A//----------------------------------------------------------------------------------------
0N/A
0N/A/* returns an escaped string */
0N/A
0N/A/* use the following masks to specify which
0N/A part of an URL you want to escape:
0N/A
0N/A esc_Scheme = 1
0N/A esc_Username = 2
0N/A esc_Password = 4
0N/A esc_Host = 8
0N/A esc_Directory = 16
0N/A esc_FileBaseName = 32
0N/A esc_FileExtension = 64
0N/A esc_Param = 128
0N/A esc_Query = 256
0N/A esc_Ref = 512
0N/A*/
0N/A
0N/A/* by default this function will not escape parts of a string
0N/A that already look escaped, which means it already includes
0N/A a valid hexcode. This is done to avoid multiple escapes of
0N/A a string. Use the following mask to force escaping of a
0N/A string:
0N/A
0N/A esc_Forced = 1024
0N/A*/
0N/A
0N/ANS_COM PRBool NS_EscapeURL(const char *part,
0N/A PRInt32 partLen,
0N/A PRInt16 mask,
0N/A nsACString &result)
0N/A{
0N/A if (!part) {
0N/A NS_NOTREACHED("null pointer");
0N/A return PR_FALSE;
0N/A }
0N/A
0N/A int i = 0;
0N/A static const char hexChars[] = "0123456789ABCDEF";
0N/A if (partLen < 0)
0N/A partLen = strlen(part);
0N/A PRBool forced = (mask & esc_Forced);
0N/A PRBool ignoreNonAscii = (mask & esc_OnlyASCII);
0N/A PRBool ignoreAscii = (mask & esc_OnlyNonASCII);
0N/A PRBool writing = (mask & esc_AlwaysCopy);
0N/A PRBool colon = (mask & esc_Colon);
0N/A
0N/A register const unsigned char* src = (const unsigned char *) part;
0N/A
0N/A char tempBuffer[100];
0N/A unsigned int tempBufferPos = 0;
0N/A
0N/A for (i = 0; i < partLen; i++)
0N/A {
0N/A unsigned char c = *src++;
0N/A
0N/A // if the char has not to be escaped or whatever follows % is
0N/A // a valid escaped string, just copy the char.
0N/A //
0N/A // Also the % will not be escaped until forced
0N/A // See bugzilla bug 61269 for details why we changed this
0N/A //
0N/A // And, we will not escape non-ascii characters if requested.
0N/A // On special request we will also escape the colon even when
0N/A // not covered by the matrix.
0N/A // ignoreAscii is not honored for control characters (C0 and DEL)
0N/A if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
0N/A || (c > 0x7f && ignoreNonAscii)
0N/A || (c > 0x1f && c < 0x7f && ignoreAscii))
0N/A && !(c == ':' && colon))
0N/A {
0N/A if (writing)
0N/A tempBuffer[tempBufferPos++] = c;
0N/A }
0N/A else /* do the escape magic */
0N/A {
0N/A if (!writing)
0N/A {
0N/A result.Append(part, i);
0N/A writing = PR_TRUE;
0N/A }
0N/A tempBuffer[tempBufferPos++] = HEX_ESCAPE;
0N/A tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
0N/A tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
0N/A }
0N/A
0N/A if (tempBufferPos >= sizeof(tempBuffer) - 4)
0N/A {
0N/A NS_ASSERTION(writing, "should be writing");
0N/A tempBuffer[tempBufferPos] = '\0';
0N/A result += tempBuffer;
0N/A tempBufferPos = 0;
0N/A }
0N/A }
0N/A if (writing) {
0N/A tempBuffer[tempBufferPos] = '\0';
0N/A result += tempBuffer;
0N/A }
0N/A return writing;
0N/A}
0N/A
0N/A#define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
0N/A
0N/ANS_COM PRBool NS_UnescapeURL(const char *str, PRInt32 len, PRInt16 flags, nsACString &result)
0N/A{
0N/A if (!str) {
0N/A NS_NOTREACHED("null pointer");
0N/A return PR_FALSE;
0N/A }
0N/A
0N/A if (len < 0)
0N/A len = strlen(str);
0N/A
0N/A PRBool ignoreNonAscii = (flags & esc_OnlyASCII);
0N/A PRBool writing = (flags & esc_AlwaysCopy);
0N/A PRBool skipControl = (flags & esc_SkipControl);
0N/A
0N/A static const char hexChars[] = "0123456789ABCDEFabcdef";
0N/A
0N/A const char *last = str;
0N/A const char *p = str;
0N/A
0N/A for (int i=0; i<len; ++i, ++p) {
0N/A //printf("%c [i=%d of len=%d]\n", *p, i, len);
0N/A if (*p == HEX_ESCAPE && i < len-2) {
0N/A unsigned char *p1 = ((unsigned char *) p) + 1;
0N/A unsigned char *p2 = ((unsigned char *) p) + 2;
0N/A if (ISHEX(*p1) && ISHEX(*p2) && !(ignoreNonAscii && *p1 >= '8') &&
0N/A !(skipControl &&
0N/A (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
0N/A //printf("- p1=%c p2=%c\n", *p1, *p2);
0N/A writing = PR_TRUE;
0N/A if (p > last) {
0N/A //printf("- p=%p, last=%p\n", p, last);
0N/A result.Append(last, p - last);
0N/A last = p;
0N/A }
0N/A char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
0N/A //printf("- u=%c\n", u);
0N/A result.Append(u);
0N/A i += 2;
0N/A p += 2;
0N/A last += 3;
0N/A }
0N/A }
0N/A }
0N/A if (writing && last < str + len)
0N/A result.Append(last, str + len - last);
0N/A
0N/A return writing;
0N/A}
0N/A