/*
* Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 4396708
* @summary Test URL encoder and decoder on a string that contains
* surrogate pairs.
*
*/
import java.io.*;
import java.net.*;
/*
* Surrogate pairs are two character Unicode sequences where the first
* character lies in the range [d800, dbff] and the second character lies
* in the range [dc00, dfff]. They are used as an escaping mechanism to add
* 1M more characters to Unicode.
*/
public class SurrogatePairs {
static String[] testStrings = {"\uD800\uDC00",
"\uD800\uDFFF",
"\uDBFF\uDC00",
"\uDBFF\uDFFF",
"1\uDBFF\uDC00",
"@\uDBFF\uDC00",
"\uDBFF\uDC001",
"\uDBFF\uDC00@",
"\u0101\uDBFF\uDC00",
"\uDBFF\uDC00\u0101"
};
static String[] correctEncodings = {"%F0%90%80%80",
"%F0%90%8F%BF",
"%F4%8F%B0%80",
"%F4%8F%BF%BF",
"1%F4%8F%B0%80",
"%40%F4%8F%B0%80",
"%F4%8F%B0%801",
"%F4%8F%B0%80%40",
"%C4%81%F4%8F%B0%80",
"%F4%8F%B0%80%C4%81"
};
public static void main(String[] args) throws Exception {
for (int i=0; i < testStrings.length; i++) {
test(testStrings[i], correctEncodings[i]);
}
}
private static void test(String str, String correctEncoding)
throws Exception {
System.out.println("Unicode bytes of test string are: "
+ getHexBytes(str));
String encoded = URLEncoder.encode(str, "UTF-8");
System.out.println("URLEncoding is: " + encoded);
if (encoded.equals(correctEncoding))
System.out.println("The encoding is correct!");
else {
throw new Exception("The encoding is incorrect!" +
" It should be " + correctEncoding);
}
String decoded = URLDecoder.decode(encoded, "UTF-8");
System.out.println("Unicode bytes for URLDecoding are: "
+ getHexBytes(decoded));
if (str.equals(decoded))
System.out.println("The decoding is correct");
else {
throw new Exception("The decoded is not equal to the original");
}
System.out.println("---");
}
private static String getHexBytes(String s) throws Exception {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
int a = s.charAt(i);
int b1 = (a >>8) & 0xff;
int b2 = (byte)a;
int b11 = (b1>>4) & 0x0f;
int b12 = b1 & 0x0f;
int b21 = (b2 >>4) & 0x0f;
int b22 = b2 & 0x0f;
sb.append(Integer.toHexString(b11));
sb.append(Integer.toHexString(b12));
sb.append(Integer.toHexString(b21));
sb.append(Integer.toHexString(b22));
sb.append(' ');
}
return sb.toString();
}
}