/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @summary tests RegExp framework
* @author Mike McCloskey
* @bug 4481568 4482696 4495089 4504687 4527731 4599621 4631553 4619345
* 4630911 4672616 4711773 4727935 4750573 4792284 4803197 4757029 4808962
* 4872664 4803179 4892980 4900747 4945394 4938995 4979006 4994840 4997476
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
* 6350801 6676425 6878475 6919132 6931676 6948903 7014645 7039066
*/
/**
* This is a test class created to check the operation of
* the Pattern and Matcher classes.
*/
public class RegExTest {
private static boolean failure = false;
/**
* Main to interpret arguments and run several tests.
*
*/
// Most of the tests are in a file
//processFile("PerlCases.txt");
// These test many randomly generated char patterns
bm();
slice();
// These are hard to put into the file
escapes();
blankInput();
// Substitition tests on randomly generated sequences
// Canonical Equivalence
ceTest();
// Anchors
anchorTest();
// boolean match calls
matchesTest();
// Pattern API
// Misc
backRefTest();
caretTest();
findIntTest();
group0Test();
octalTest();
negationTest();
splitTest();
appendTest();
commentsTest();
gTest();
zTest();
// This RFE rejected in Tiger numOccurrencesTest();
regionTest();
toStringTest();
findFromTest();
boundsTest();
hitEndTest();
if (failure)
throw new RuntimeException("Failure in the RE handling.");
else
}
// Utility functions
for (int i=0; i<length; i++) {
}
}
m.find();
failCount++;
}
m.find();
failCount++;
}
failCount++;
}
failCount++;
}
failCount++;
}
failCount++;
}
boolean expected)
{
if (expected)
else
}
for (int i=0; i<spacesToAdd; i++)
if (failCount > 0)
failure = true;
failCount = 0;
}
/**
* Converts ASCII alphabet characters [A-Za-z] in the given 's' to
* supplementary characters. This method does NOT fully take care
* of the regex syntax.
*/
for (int i = 0; i < length; ) {
char c = s.charAt(i++);
if (c == '\\') {
if (i < length) {
c = s.charAt(i++);
if (c == 'u') {
// assume no syntax error
}
}
} else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
} else {
}
}
}
// Regular expression tests
// This is for bug 6178785
// Test if an expected NPE gets thrown when passing in a null argument
try {
failCount++;
return false;
} catch (NullPointerException npe) {
return true;
}
}
private static void nullArgumentTest() {
m.matches();
//check(new Runnable() { public void run() { m.usePattern(null);}});
report("Null Argument");
}
// This is for bug6635133
// Test if surrogate pair in Unicode escapes can be handled correctly.
failCount++;
}
// This is for bug 4988891
// Test toMatchResult to see that it is a copy of the Matcher
// that is not affected by subsequent operations on the original
"agiantsquidofdestinyasmallsquidoffate");
failCount++;
if (matcherStart1 != resultStart1)
failCount++;
if (matcherStart2 == resultStart2)
failCount++;
if (resultStart1 != resultStart2)
failCount++;
failCount++;
failCount++;
report("toMatchResult is a copy");
}
// This is for bug 5013885
// Must test a slice to see if it reports hitEnd correctly
// Basic test of Slice node
m.find();
if (m.hitEnd())
failCount++;
m.reset("squid");
m.find();
if (!m.hitEnd())
failCount++;
// Test Slice, SliceA and SliceU nodes
for (int i=0; i<3; i++) {
int flags = 0;
m = p.matcher("ad");
m.find();
if (m.hitEnd())
failCount++;
m.reset("ab");
m.find();
if (!m.hitEnd())
failCount++;
}
// Test Boyer-Moore node
m = p.matcher("attack");
m.find();
if (!m.hitEnd())
failCount++;
m = p.matcher("attackattackattackcatatta");
m.find();
if (!m.hitEnd())
failCount++;
report("hitEnd from a Slice");
}
// This is for bug 4997476
// It is weird code submitted by customer demonstrating a regression
int position = 0;
int start = 0;
break;
} else {
}
continue;
failCount++;
}
report("Customer word search");
}
// This is for bug 4994840
// Problem only occurs with multiline patterns
// containing a beginning-of-line caret "^" followed
// by an expression that also matches the empty string.
report("Caret at end");
}
// This test is for 4979006
// Check to see if word boundary construct properly handles unicode
// non spacing marks
// S=other B=word character N=non spacing mark .=word boundary
// SS.BB.SS
// SS.BBN.SS
// SS.BN.SS
// SS.BNN.SS
// SSN.BB.SS
// SS.BNB.SS
// SSNNSS
failCount++;
// SSN.BBN.SS
report("Unicode word boundary");
}
int b) throws Exception
{
failCount++;
failCount++;
}
// This test is for 6284152
while (m.find()) {
}
failCount++;
}
//Positive
check("(?<=%.{0,5})foo\\d",
"%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
//boundary at end of the lookbehind sub-regex should work consistently
//with the boundary just after the lookbehind sub-regex
//Negative
check("(?<!%.{0,5})foo\\d",
"%foo1\n%bar foo2\n%bar foo3\n%blahblah foo4\nfoo5",
//Positive greedy
//Positive reluctant
//supplementary
new String[] {"fo\ud800\udc00o"});
new String[] {"fo\ud800\udc00o"});
new String[] {"fo\ud800\udc00o"});
new String[] {"fo\ud800\udc00o"});
report("Lookbehind");
}
// This test is for 4938995
// Check to see if weak region boundaries are transparent to
// lookahead and lookbehind constructs
matcher.useTransparentBounds(true);
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
matcher.useTransparentBounds(false);
failCount++;
// Negative lookahead/lookbehind
matcher.useTransparentBounds(true);
failCount++;
failCount++;
matcher.useTransparentBounds(false);
failCount++;
failCount++;
report("Region bounds transparency");
}
// This test is for 4945394
failCount++;
failCount++;
failCount++;
report("Check for alternating find");
}
// This test is for 4872664 and 4892980
failCount++;
failCount++;
failCount++;
String s = "for";
failCount++;
failCount++;
s = "f\u203Ar";
failCount++;
failCount++;
// Test adding to bits, subtracting a node, then adding to bits again
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
// Test subtracting a node, adding to bits, subtracting again
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
report("Negated Character Class");
}
// This test is for 4628291
failCount++;
report("toString");
}
// This test is for 4808962
// Supplementary character test
// note: this is case-sensitive.
report("Literal pattern");
}
// This test is for 4803179
// This test is also for 4808962, replacement parts
failCount++;
failCount++;
replaceTest = "\\t$\\$";
failCount++;
// Supplementary character test
replaceTest = "$0";
failCount++;
failCount++;
replaceTest = "\\t$\\$";
failCount++;
report("Literal replacement");
}
// This test is for 4757029
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
matcher.useAnchoringBounds(false);
failCount++;
// Supplementary character test
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
matcher.useAnchoringBounds(false);
failCount++;
report("Regions");
}
int index2)
{
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Correct result
} catch (IllegalStateException ise) {
// Correct result
}
}
// This test is for 4803197
// Supplementary character test
report("Escaped segment");
}
// This test is for 4792284
"(?:\\w{4})+;",
"(?:\\w{8})*;",
"(?:\\w{2}){2,4};",
"(?:\\w{4}){2,};", // only matches the
".*?(?:\\w{5})+;", // specified minimum
".*?(?:\\w{9})*;", // number of reps - OK
"(?:\\w{4})+?;", // lazy repetition - OK
"(?:\\w{4})++;", // possessive repetition - OK
"(?:\\w{2,}?)+;", // non-deterministic - OK
"(\\w{4})+;", // capturing group - OK
};
// Check find()
// Check matches()
if (m.matches()) {
failCount++;
} else {
failCount++;
}
}
report("Non capturing repetition");
}
// This test is for 6358731
failCount++;
}
report("Not captured GroupCurly");
}
// This test is for 4706545
for (int i=0; i<1000; i++) {
check("{javaJavaIdentifierStart}", c,
check("{javaJavaIdentifierPart}", c,
check("{javaUnicodeIdentifierStart}", c,
check("{javaUnicodeIdentifierPart}", c,
check("{javaIdentifierIgnorable}", c,
}
// Supplementary character test
for (int i=0; i<1000; i++) {
check("{javaJavaIdentifierStart}", c,
check("{javaJavaIdentifierPart}", c,
check("{javaUnicodeIdentifierStart}", c,
check("{javaUnicodeIdentifierPart}", c,
check("{javaIdentifierIgnorable}", c,
}
report("Java character classes");
}
// This test is for 4523620
/*
private static void numOccurrencesTest() throws Exception {
Pattern pattern = Pattern.compile("aaa");
if (pattern.numOccurrences("aaaaaa", false) != 2)
failCount++;
if (pattern.numOccurrences("aaaaaa", true) != 4)
failCount++;
pattern = Pattern.compile("^");
if (pattern.numOccurrences("aaaaaa", false) != 1)
failCount++;
if (pattern.numOccurrences("aaaaaa", true) != 1)
failCount++;
report("Number of Occurrences");
}
*/
// This test is for 4776374
// Supplementary character test
report("Caret between terminators");
}
// This test is for 4727935
// Supplementary character test
report("Dollar at End");
}
// This test is for 4711773
failCount++;
failCount++;
// Supplementary character test
failCount++;
failCount++;
report("Multiline Dollar");
}
check(p, "1 word word word 2", true);
check(p, "1 wor wo w 2", true);
check(p, "1 word word 2", true);
check(p, "1 word 2", true);
check(p, "1 wo w w 2", true);
check(p, "1 wo w 2", true);
check(p, "1 wor w 2", true);
check(m, "ababc");
// Supplementary character test
report("Reluctant Repetition");
}
failCount++;
failCount++;
report("Serialization");
}
private static void gTest() {
failCount++;
failCount++;
failCount++;
failCount++;
report("\\G");
}
private static void zTest() {
// Positives
// Negatives
// Positives
// Negatives
report("\\Z");
}
private static void replaceFirstTest() {
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
// Supplementary character test
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
report("Replace First");
}
private static void unixLinesTest() {
failCount++;
failCount++;
// Supplementary character test
failCount++;
failCount++;
report("Unix Lines");
}
private static void commentsTest() {
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
// Supplementary character test
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
report("Comments");
}
failCount++;
failCount++;
failCount++;
failCount++;
// ASCII "a"
// Latin-1 Supplement "a" + grave
// Cyrillic "a"
//single
"a", "\u00e0", "\u0430",
//slice
"ab", "\u00e0\u00e1", "\u0430\u0431",
//class single
"[a]", "[\u00e0]", "[\u0430]",
//class range
"[a-b]", "[\u00e0-\u00e5]", "[\u0430-\u0431]",
//back reference
"(a)\\1", "(\u00e0)\\1", "(\u0430)\\1"
};
"A", "\u00c0", "\u0410",
"AB", "\u00c0\u00c1", "\u0410\u0411",
"A", "\u00c0", "\u0410",
"B", "\u00c2", "\u0411",
"aA", "\u00e0\u00c0", "\u0430\u0410"
};
boolean[] expected = new boolean[] {
true, false, false,
true, false, false,
true, false, false,
true, false, false,
true, false, false
};
failCount++;
}
}
failCount++;
}
}
// flag unicode_case alone should do nothing
failCount++;
}
}
// Special cases: i, I, u+0131 and u+0130
failCount++;
report("Case Folding");
}
private static void appendTest() {
failCount++;
String r = "$3$2$1";
failCount++;
failCount++;
}
// Supplementary character test
failCount++;
r = toSupplementaries("$3$2$1");
failCount++;
failCount++;
}
report("Append");
}
private static void splitTest() {
failCount++;
failCount++;
// Supplementary character test
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
// Supplementary character test
failCount++;
failCount++;
failCount++;
for (int x=0; x<10; x++) {
// expected dropping of ""
failCount++;
failCount++;
}
} else {
failCount++;
}
if (limit != 1) {
failCount++;
} else {
failCount++;
}
}
}
failCount++;
}
}
}
}
// Check the case for no match found
failCount++;
failCount++;
}
// Check the case for limit == 0, source = "";
source = "";
failCount++;
failCount++;
report("Split");
}
private static void negationTest() {
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
report("Negation");
}
private static void ampersandTest() {
report("Ampersand");
}
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
report("Octal");
}
try {
"a 32-character-long pattern xxxx");
for (int i=0; i<100; i++)
} catch (PatternSyntaxException e) {
failCount++;
}
// Supplementary character test
try {
toSupplementaries("a 32-character-long pattern xxxx"));
for (int i=0; i<100; i++)
+ 97 + i%26));
} catch (PatternSyntaxException e) {
failCount++;
}
report("LongPattern");
}
failCount++;
} else {
failCount++;
}
failCount++;
} else {
failCount++;
}
failCount++;
} else {
failCount++;
}
failCount++;
} else {
failCount++;
}
// Supplementary character test
failCount++;
} else {
failCount++;
}
failCount++;
} else {
failCount++;
}
failCount++;
} else {
failCount++;
}
failCount++;
} else {
failCount++;
}
report("Group0");
}
if (!result)
failCount++;
m = p.matcher("1234567890");
if (!result)
failCount++;
try {
failCount++;
} catch (IndexOutOfBoundsException e) {
// correct result
}
// Supplementary character test
if (!result)
failCount++;
report("FindInt");
}
// Should find empty pattern at beginning of input
if (result != true)
failCount++;
if (m.start() != 0)
failCount++;
// Should not match entire input if input is not empty
m.reset();
if (result == true)
failCount++;
try {
m.start(0);
failCount++;
} catch (IllegalStateException e) {
// Correct result
}
// Should match entire input if input is empty
m.reset("");
if (result != true)
failCount++;
if (result != true)
failCount++;
if (result == true)
failCount++;
report("EmptyPattern");
}
// Supplementary character tests
try {
// u00ff when UNICODE_CASE
// u00b5 when UNICODE_CASE
/* Special cases
(1)LatinSmallLetterLongS u+017f
(2)LatinSmallLetterDotlessI u+0131
(3)LatineCapitalLetterIWithDotAbove u+0130
(4)KelvinSign u+212a
(5)AngstromSign u+212b
*/
failCount++;
report("CharClass");
}
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
failCount++;
report("Caret");
}
// Independent group
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Good result
}
// Pure group
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Good result
}
// Supplementary character tests
// Independent group
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Good result
}
// Pure group
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Good result
}
report("GroupCapture");
}
try {
for (int i = 1; i < 10; i++) {
// Make sure backref 1-9 are always accepted
// and fail to match if the target group does not exit
}
} catch(PatternSyntaxException e) {
failCount++;
}
// Supplementary character tests
report("BackRef");
}
/**
* Unicode Technical Report #18, section 2.6 End of Line
* There is no empty line to be matched in the sequence \u000D\u000A
* but there is an empty line in the sequence \u000A\u000D.
*/
m.find();
m.find();
failCount++;
m.reset("blah1\n\rblah2");
m.find();
m.find();
m.find();
failCount++;
// Test behavior of $ with \r\n at end of input
m = p.matcher("blah1\r\n");
if (!m.find())
failCount++;
failCount++;
if (m.find())
failCount++;
// Test behavior of $ with \r\n at end of input in multiline
m = p.matcher("blah1\r\n");
if (!m.find())
failCount++;
if (m.find())
failCount++;
// Test for $ recognition of \u0085 for bug 4527731
m = p.matcher("blah1\u0085");
if (!m.find())
failCount++;
// Supplementary character test
m.find();
m.find();
failCount++;
m.find();
m.find();
m.find();
failCount++;
// Test behavior of $ with \r\n at end of input
if (!m.find())
failCount++;
failCount++;
if (m.find())
failCount++;
// Test behavior of $ with \r\n at end of input in multiline
if (!m.find())
failCount++;
if (m.find())
failCount++;
// Test for $ recognition of \u0085 for bug 4527731
if (!m.find())
failCount++;
report("Anchors");
}
/**
* A basic sanity test of Matcher.lookingAt().
*/
if (!m.lookingAt())
failCount++;
failCount++;
m = p.matcher("zzzabccczzzabcczzzabccczzz");
if (m.lookingAt())
failCount++;
// Supplementary character test
if (!m.lookingAt())
failCount++;
failCount++;
if (m.lookingAt())
failCount++;
report("Looking At");
}
/**
* A basic sanity test of Matcher.matches().
*/
// matches()
if (!m.matches())
failCount++;
// find() but not matches()
m.reset("zzzulbcccccc");
if (m.matches())
failCount++;
// lookingAt() but not matches()
m.reset("ulbccccccdef");
if (m.matches())
failCount++;
// matches()
m = p.matcher("ad");
if (!m.matches())
failCount++;
// Supplementary character test
// matches()
if (!m.matches())
failCount++;
// find() but not matches()
if (m.matches())
failCount++;
// lookingAt() but not matches()
if (m.matches())
failCount++;
// matches()
if (!m.matches())
failCount++;
report("Matches");
}
/**
* A basic sanity test of Pattern.matches().
*/
// matches()
toSupplementaries("ulbcccccc")))
failCount++;
// find() but not matches()
toSupplementaries("zzzulbcccccc")))
failCount++;
// lookingAt() but not matches()
toSupplementaries("ulbccccccdef")))
failCount++;
// Supplementary character test
// matches()
toSupplementaries("ulbcccccc")))
failCount++;
// find() but not matches()
toSupplementaries("zzzulbcccccc")))
failCount++;
// lookingAt() but not matches()
toSupplementaries("ulbccccccdef")))
failCount++;
report("Pattern Matches");
}
/**
* Canonical equivalence testing. Tests the ability of the engine
* to match sequences that are not explicitly specified in the
* pattern when they are considered equivalent by the Unicode Standard.
*/
// Decomposed char outside char classes
if (!m.matches())
failCount++;
m.reset("testa\u030a");
if (!m.matches())
failCount++;
// Composed char outside char classes
m = p.matcher("test\u00e5");
if (!m.matches())
failCount++;
m.reset("testa\u030a");
if (!m.find())
failCount++;
// Decomposed char inside a char class
m = p.matcher("test\u00e5");
if (!m.find())
failCount++;
m.reset("testa\u030a");
if (!m.find())
failCount++;
// Composed char inside a char class
m = p.matcher("test\u00e5");
if (!m.find())
failCount++;
m.reset("testa\u0300");
if (!m.find())
failCount++;
m.reset("testa\u030a");
if (!m.find())
failCount++;
// Marks that cannot legally change order and be equivalent
check(p, "testa\u0308\u0300", true);
check(p, "testa\u0300\u0308", false);
// Marks that can legally change order and be equivalent
check(p, "testa\u0308\u0323", true);
check(p, "testa\u0323\u0308", true);
// Test all equivalences of the sequence a\u0308\u0323\u0300
check(p, "testa\u0308\u0323\u0300", true);
check(p, "testa\u0323\u0308\u0300", true);
check(p, "testa\u0308\u0300\u0323", true);
check(p, "test\u00e4\u0323\u0300", true);
check(p, "test\u00e4\u0300\u0323", true);
/*
* The following canonical equivalence tests don't work. Bug id: 4916384.
*
// Decomposed hangul (jamos)
p = Pattern.compile("\u1100\u1161", Pattern.CANON_EQ);
m = p.matcher("\u1100\u1161");
if (!m.matches())
failCount++;
m.reset("\uac00");
if (!m.matches())
failCount++;
// Composed hangul
p = Pattern.compile("\uac00", Pattern.CANON_EQ);
m = p.matcher("\u1100\u1161");
if (!m.matches())
failCount++;
m.reset("\uac00");
if (!m.matches())
failCount++;
// Decomposed supplementary outside char classes
p = Pattern.compile("test\ud834\uddbc\ud834\udd6f", Pattern.CANON_EQ);
m = p.matcher("test\ud834\uddc0");
if (!m.matches())
failCount++;
m.reset("test\ud834\uddbc\ud834\udd6f");
if (!m.matches())
failCount++;
// Composed supplementary outside char classes
p = Pattern.compile("test\ud834\uddc0", Pattern.CANON_EQ);
m.reset("test\ud834\uddbc\ud834\udd6f");
if (!m.matches())
failCount++;
m = p.matcher("test\ud834\uddc0");
if (!m.matches())
failCount++;
*/
report("Canonical Equivalence");
}
/**
* A basic sanity test of Matcher.replaceAll().
*/
// Global substitution with a literal
failCount++;
m.reset("zzzabccczzzabcczzzabccczzz");
failCount++;
// Global substitution with groups
m.reset("zzzabccczzzabcczzzabccczzz");
failCount++;
// Supplementary character test
// Global substitution with a literal
failCount++;
failCount++;
// Global substitution with groups
failCount++;
report("Global Substitution");
}
/**
* Tests the usage of Matcher.appendReplacement() with literal
* and group substitutions.
*/
// SB substitution with literal
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.appendTail(result);
failCount++;
// SB substitution with groups
blah = "zzzabcdzzz";
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.appendTail(result);
failCount++;
// SB substitution with 3 groups
blah = "zzzabcdcdefzzz";
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.appendTail(result);
failCount++;
// SB substitution with groups and three matches
// skipping middle match
blah = "zzzabcdzzzabcddzzzabcdzzz";
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.find();
m.find();
failCount++;
m.appendTail(result);
failCount++;
// Check to make sure escaped $ is ignored
blah = "zzzabcdcdefzzz";
result = new StringBuffer();
m.find();
failCount++;
m.appendTail(result);
failCount++;
// Check to make sure a reference to nonexistent group causes error
blah = "zzzabcdcdefzzz";
result = new StringBuffer();
m.find();
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Correct result
}
// Check double digit group references
blah = "zzz123456789101112zzz";
result = new StringBuffer();
m.find();
failCount++;
// Check to make sure it backs off $15 to $1 if only three groups
blah = "zzzabcdcdefzzz";
result = new StringBuffer();
m.find();
failCount++;
// Supplementary character test
// SB substitution with literal
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.appendTail(result);
failCount++;
// SB substitution with groups
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.appendTail(result);
failCount++;
// SB substitution with 3 groups
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.appendTail(result);
failCount++;
// SB substitution with groups and three matches
// skipping middle match
result = new StringBuffer();
try {
failCount++;
} catch (IllegalStateException e) {
}
m.find();
failCount++;
m.find();
m.find();
failCount++;
m.appendTail(result);
failCount++;
// Check to make sure escaped $ is ignored
result = new StringBuffer();
m.find();
failCount++;
m.appendTail(result);
failCount++;
// Check to make sure a reference to nonexistent group causes error
result = new StringBuffer();
m.find();
try {
failCount++;
} catch (IndexOutOfBoundsException ioobe) {
// Correct result
}
// Check double digit group references
result = new StringBuffer();
m.find();
failCount++;
// Check to make sure it backs off $15 to $1 if only three groups
result = new StringBuffer();
m.find();
failCount++;
// Check nothing has been appended into the output buffer if
// the replacement string triggers IllegalArgumentException.
m = p.matcher("abcd");
result = new StringBuffer();
m.find();
try {
failCount++;
} catch (IllegalArgumentException iae) {
failCount++;
}
report("SB Substitution");
}
/*
* 5 groups of characters are created to make a substitution string.
* A base string will be created including random lead chars, the
* substitution string, and random trailing chars.
* A pattern containing the 5 groups is searched for and replaced with:
* random group + random string + random group.
* The results are checked for correctness.
*/
private static void substitutionBasher() {
// Create a base string to work in
// Create 5 groups of random number of random chars
// Create the string to substitute
// Create the pattern string to search for
for(int i=0; i<5; i++) {
}
// Place sub string into working string at random index
// Append random chars to end
// Create test pattern and matcher
// Reject candidate if pattern happens to start early
m.find();
if (m.start() < leadingChars)
continue;
// Reject candidate if more than one match
if (m.find())
continue;
// Construct a replacement string with :
// random group + random string + random group
// Do the replacement
// Construct expected result
// Check results
failCount++;
}
report("Substitution Basher");
}
/**
* Checks the handling of some escape sequences that the Pattern
* class should process instead of the java compiler. These are
* not in the file because the escapes should be be processed
* by the Pattern class when the regex is compiled.
*/
if (!m.find())
failCount++;
m = p.matcher("#");
if (!m.find())
failCount++;
m = p.matcher("#");
if (!m.find())
failCount++;
report("Escape sequences");
}
/**
* Checks the handling of blank input situations. These
* tests are incompatible with my test file format.
*/
if (m.find())
failCount++;
m = p.matcher("");
if (!m.find())
failCount++;
m = p.matcher("");
if (m.find())
failCount++;
m = p.matcher("");
if (!m.find())
failCount++;
report("Blank input");
}
/**
* Tests the Boyer-Moore pattern matching of a character sequence
* on randomly generated patterns.
*/
doBnM('a');
report("Boyer Moore (ASCII)");
report("Boyer Moore (Supplementary)");
}
int achar=0;
for (int i=0; i<100; i++) {
// Create a short pattern to search for
for (int x=0; x<patternLength; x++) {
} else {
}
}
// Create a buffer with random ASCII chars that does
// not match the sample
StringBuffer s = null;
do {
s = new StringBuffer(100);
for (int x=0; x<100; x++) {
} else {
}
}
} while (m.find());
// Insert the pattern at a random spot
insertIndex++;
// Make sure that the pattern is found
if (!m.find())
failCount++;
// Make sure that the match text is the pattern
failCount++;
// Make sure match occured at insertion point
if (m.start() != insertIndex)
failCount++;
}
}
/**
* Tests the matching of slices on randomly generated patterns.
* The Boyer-Moore optimization is not done on these patterns
* because it uses unicode case folding.
*/
report("Slice");
report("Slice (Supplementary)");
}
int achar=0;
for (int i=0; i<100; i++) {
// Create a short pattern to search for
for (int x=0; x<patternLength; x++) {
int randomChar = 0;
} else {
}
}
// Create a buffer with random chars that does not match the sample
StringBuffer s = null;
do {
s = new StringBuffer(100);
for (int x=0; x<100; x++) {
int randomChar = 0;
} else {
s.append((char) randomChar);
}
}
} while (m.find());
// Insert the pattern at a random spot
insertIndex++;
// Make sure that the pattern is found
if (!m.find())
failCount++;
// Make sure that the match text is the pattern
failCount++;
// Make sure match occured at insertion point
if (m.start() != insertIndex)
failCount++;
}
}
}
Throwable t) {
}
// Testing examples from a file
/**
* Goes through the file "TestCases.txt" and creates many patterns
* described in the file, matching the patterns against input lines in
* the file, and comparing the results against the correct results
* also found in the file. The file format is described in comments
* at the head of the file.
*/
fileName);
// Process next test case.
// Read a line for pattern
try {
} catch (PatternSyntaxException e) {
continue;
failCount++;
continue;
}
// Read a line for input string
// Check for IllegalStateExceptions before a match
failCount += preMatchInvariants(m);
if (found)
failCount += postTrueMatchInvariants(m);
else
if (found) {
} else {
}
if (found) {
}
// Read a line for the expected result
failCount++;
}
}
}
int failCount = 0;
try {
m.start();
failCount++;
} catch (IllegalStateException ise) {}
try {
m.end();
failCount++;
} catch (IllegalStateException ise) {}
try {
m.group();
failCount++;
} catch (IllegalStateException ise) {}
return failCount;
}
int failCount = 0;
try {
m.group();
failCount++;
} catch (IllegalStateException ise) {}
try {
m.start();
failCount++;
} catch (IllegalStateException ise) {}
try {
m.end();
failCount++;
} catch (IllegalStateException ise) {}
return failCount;
}
int failCount = 0;
//assert(m.start() = m.start(0);
failCount++;
//assert(m.end() = m.end(0);
failCount++;
//assert(m.group() = m.group(0);
failCount++;
try {
m.group(50);
failCount++;
} catch (IndexOutOfBoundsException ise) {}
return failCount;
}
}
}
/**
* Reads a line from the input file. Keeps reading lines until a non
* empty non comment line is read. If the line contains a \n then
* these two characters are replaced by a newline char. If a \\uxxxx
* sequence is read then the sequence is replaced by the unicode char.
*/
int index = 0;
}
}
return line;
}
m.find();
failCount++;
}
{
.matcher(s)
.replaceFirst(r)))
failCount++;
}
{
.matcher(s)
.replaceAll(r)))
failCount++;
}
try {
} catch (PatternSyntaxException pse) {
//pse.printStackTrace();
return;
}
failCount++;
}
m.find();
try {
m.group(g);
} catch (IllegalArgumentException iae) {
//iae.printStackTrace();
return;
} catch (NullPointerException npe) {
return;
}
failCount++;
}
"xxxyyyzzz",
"gname",
"yyy");
"xxxyyyzzz",
"gname8",
"yyy");
//backref
"zzzaabcaazzz", true);
"abcdefabc", true);
"abcdefghijkk", true);
// Supplementary character tests
toSupplementaries("zzzaabcazzz"), true);
toSupplementaries("zzzaabcaazzz"), true);
toSupplementaries("abcdefabc"), true);
"(?<gname>" +
toSupplementaries("abcdefghijkk"), true);
"xxxyyyzzzyyy",
"gname",
"yyy");
checkReplaceFirst("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc",
"${gn}",
"abzzzabcczzzabccc");
checkReplaceAll("(?<gn>ab)(c*)",
"abccczzzabcczzzabccc",
"${gn}",
"abzzzabzzzab");
checkReplaceFirst("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz",
"${gn}",
"zzzabzzzabcczzzabccczzz");
checkReplaceAll("(?<gn>ab)(c*)",
"zzzabccczzzabcczzzabccczzz",
"${gn}",
"zzzabzzzabzzzabzzz");
checkReplaceFirst("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz",
"${gn2}",
"zzzccczzzabcczzzabccczzz");
checkReplaceAll("(?<gn1>ab)(?<gn2>c*)",
"zzzabccczzzabcczzzabccczzz",
"${gn2}",
"zzzccczzzcczzzccczzz");
//toSupplementaries("(ab)(c*)"));
toSupplementaries("abccczzzabcczzzabccc"),
"${gn1}",
toSupplementaries("abzzzabcczzzabccc"));
toSupplementaries("abccczzzabcczzzabccc"),
"${gn1}",
toSupplementaries("abzzzabzzzab"));
toSupplementaries("abccczzzabcczzzabccc"),
"${gn2}",
toSupplementaries("ccczzzabcczzzabccc"));
toSupplementaries("abccczzzabcczzzabccc"),
"${gn2}",
toSupplementaries("ccczzzcczzzccc"));
checkReplaceFirst("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz",
"${dog}",
"zzzDogzzzDogAndCatzzz");
checkReplaceAll("(?<dog>Dog)AndCat",
"zzzDogAndCatzzzDogAndCatzzz",
"${dog}",
"zzzDogzzzDogzzz");
// backref in Matcher & String
failCount++;
// negative
checkExpectedFail("(?<groupnamehasnoascii.in>abc)(def)");
checkExpectedFail("(?<groupnamehasnoascii_in>abc)(def)");
checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
"gnameX");
null);
report("NamedGroupCapture");
}
// This is for bug 6969132
failCount++;
// from a unicode category
if (m.find())
failCount++;
if (!m.hitEnd())
failCount++;
// block
failCount++;
report("NonBmpClassComplement");
}
// different forms
failCount++;
continue; // only pick couple code points, they are the same
}
// Unicode Script
Matcher m;
if (script == lastScript) {
m = lastSM;
} else {
}
if (!m.matches()) {
failCount++;
}
failCount++;
}
lastSM = m;
lastScript = script;
// Unicode Block
//System.out.printf("Not a Block: cp=%x%n", cp);
continue;
}
m = lastBM;
} else {
}
if (!m.matches()) {
failCount++;
}
failCount++;
}
lastBM = m;
}
report("unicodeProperties");
}
// negative
checkExpectedFail("\\x{-23}");
checkExpectedFail("\\x{110000}");
checkExpectedFail("\\x{}");
checkExpectedFail("\\x{AB[ef]");
// codepoint
// in class
failCount++;
failCount++;
failCount++;
failCount++;
}
report("unicodeHexNotation");
}
// UNICODE_CHARACTER_CLASS
// embedded flag (?U)
// properties
// javaMethod
if (// lower
// upper
// alpha
// digit
// alnum
// punct
// graph
// blank
// print
// cntrl
// hexdigit
// space
// word
// bwordb
// properties
failCount++;
}
failCount++;
failCount++;
failCount++;
failCount++;
report("unicodePredefinedClasses");
}
}