286N/A/*
286N/A * reserved comment block
286N/A * DO NOT REMOVE OR ALTER!
286N/A */
286N/A/*
286N/A * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
286N/A *
286N/A * Licensed under the Apache License, Version 2.0 (the "License");
286N/A * you may not use this file except in compliance with the License.
286N/A * You may obtain a copy of the License at
286N/A *
286N/A * http://www.apache.org/licenses/LICENSE-2.0
286N/A *
286N/A * Unless required by applicable law or agreed to in writing, software
286N/A * distributed under the License is distributed on an "AS IS" BASIS,
286N/A * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
286N/A * See the License for the specific language governing permissions and
286N/A * limitations under the License.
286N/A */
286N/A
286N/Apackage com.sun.org.apache.xerces.internal.impl.xpath.regex;
286N/A
286N/A/**
286N/A * This class represents a character class such as [a-z] or a period.
286N/A *
286N/A * @xerces.internal
286N/A *
286N/A */
286N/Afinal class RangeToken extends Token implements java.io.Serializable {
286N/A
286N/A private static final long serialVersionUID = 3257568399592010545L;
286N/A
286N/A int[] ranges;
286N/A boolean sorted;
286N/A boolean compacted;
286N/A RangeToken icaseCache = null;
286N/A int[] map = null;
286N/A int nonMapIndex;
286N/A
286N/A RangeToken(int type) {
286N/A super(type);
286N/A this.setSorted(false);
286N/A }
286N/A
286N/A // for RANGE or NRANGE
286N/A protected void addRange(int start, int end) {
286N/A this.icaseCache = null;
286N/A //System.err.println("Token#addRange(): "+start+" "+end);
286N/A int r1, r2;
286N/A if (start <= end) {
286N/A r1 = start;
286N/A r2 = end;
286N/A } else {
286N/A r1 = end;
286N/A r2 = start;
286N/A }
286N/A
286N/A int pos = 0;
286N/A if (this.ranges == null) {
286N/A this.ranges = new int[2];
286N/A this.ranges[0] = r1;
286N/A this.ranges[1] = r2;
286N/A this.setSorted(true);
286N/A } else {
286N/A pos = this.ranges.length;
286N/A if (this.ranges[pos-1]+1 == r1) {
286N/A this.ranges[pos-1] = r2;
286N/A return;
286N/A }
286N/A int[] temp = new int[pos+2];
286N/A System.arraycopy(this.ranges, 0, temp, 0, pos);
286N/A this.ranges = temp;
286N/A if (this.ranges[pos-1] >= r1)
286N/A this.setSorted(false);
286N/A this.ranges[pos++] = r1;
286N/A this.ranges[pos] = r2;
286N/A if (!this.sorted)
286N/A this.sortRanges();
286N/A }
286N/A }
286N/A
286N/A private final boolean isSorted() {
286N/A return this.sorted;
286N/A }
286N/A private final void setSorted(boolean sort) {
286N/A this.sorted = sort;
286N/A if (!sort) this.compacted = false;
286N/A }
286N/A private final boolean isCompacted() {
286N/A return this.compacted;
286N/A }
286N/A private final void setCompacted() {
286N/A this.compacted = true;
286N/A }
286N/A
286N/A protected void sortRanges() {
286N/A if (this.isSorted())
286N/A return;
286N/A if (this.ranges == null)
286N/A return;
286N/A //System.err.println("Do sorting: "+this.ranges.length);
286N/A
286N/A // Bubble sort
286N/A // Why? -- In many cases,
286N/A // this.ranges has few elements.
286N/A for (int i = this.ranges.length-4; i >= 0; i -= 2) {
286N/A for (int j = 0; j <= i; j += 2) {
286N/A if (this.ranges[j] > this.ranges[j+2]
286N/A || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
286N/A int tmp;
286N/A tmp = this.ranges[j+2];
286N/A this.ranges[j+2] = this.ranges[j];
286N/A this.ranges[j] = tmp;
286N/A tmp = this.ranges[j+3];
286N/A this.ranges[j+3] = this.ranges[j+1];
286N/A this.ranges[j+1] = tmp;
286N/A }
286N/A }
286N/A }
286N/A this.setSorted(true);
286N/A }
286N/A
286N/A /**
286N/A * this.ranges is sorted.
286N/A */
286N/A protected void compactRanges() {
286N/A boolean DEBUG = false;
286N/A if (this.ranges == null || this.ranges.length <= 2)
286N/A return;
286N/A if (this.isCompacted())
286N/A return;
286N/A int base = 0; // Index of writing point
286N/A int target = 0; // Index of processing point
286N/A
286N/A while (target < this.ranges.length) {
286N/A if (base != target) {
286N/A this.ranges[base] = this.ranges[target++];
286N/A this.ranges[base+1] = this.ranges[target++];
286N/A } else
286N/A target += 2;
286N/A int baseend = this.ranges[base+1];
286N/A while (target < this.ranges.length) {
286N/A if (baseend+1 < this.ranges[target])
286N/A break;
286N/A if (baseend+1 == this.ranges[target]) {
286N/A if (DEBUG)
286N/A System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
286N/A +", "+this.ranges[base+1]
286N/A +"], ["+this.ranges[target]
286N/A +", "+this.ranges[target+1]
286N/A +"] -> ["+this.ranges[base]
286N/A +", "+this.ranges[target+1]
286N/A +"]");
286N/A this.ranges[base+1] = this.ranges[target+1];
286N/A baseend = this.ranges[base+1];
286N/A target += 2;
286N/A } else if (baseend >= this.ranges[target+1]) {
286N/A if (DEBUG)
286N/A System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
286N/A +", "+this.ranges[base+1]
286N/A +"], ["+this.ranges[target]
286N/A +", "+this.ranges[target+1]
286N/A +"] -> ["+this.ranges[base]
286N/A +", "+this.ranges[base+1]
286N/A +"]");
286N/A target += 2;
286N/A } else if (baseend < this.ranges[target+1]) {
286N/A if (DEBUG)
286N/A System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
286N/A +", "+this.ranges[base+1]
286N/A +"], ["+this.ranges[target]
286N/A +", "+this.ranges[target+1]
286N/A +"] -> ["+this.ranges[base]
286N/A +", "+this.ranges[target+1]
286N/A +"]");
286N/A this.ranges[base+1] = this.ranges[target+1];
286N/A baseend = this.ranges[base+1];
286N/A target += 2;
286N/A } else {
286N/A throw new RuntimeException("Token#compactRanges(): Internel Error: ["
286N/A +this.ranges[base]
286N/A +","+this.ranges[base+1]
286N/A +"] ["+this.ranges[target]
286N/A +","+this.ranges[target+1]+"]");
286N/A }
286N/A } // while
286N/A base += 2;
286N/A }
286N/A
286N/A if (base != this.ranges.length) {
286N/A int[] result = new int[base];
286N/A System.arraycopy(this.ranges, 0, result, 0, base);
286N/A this.ranges = result;
286N/A }
286N/A this.setCompacted();
286N/A }
286N/A
286N/A protected void mergeRanges(Token token) {
286N/A RangeToken tok = (RangeToken)token;
286N/A this.sortRanges();
286N/A tok.sortRanges();
286N/A if (tok.ranges == null)
286N/A return;
286N/A this.icaseCache = null;
286N/A this.setSorted(true);
286N/A if (this.ranges == null) {
286N/A this.ranges = new int[tok.ranges.length];
286N/A System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
286N/A return;
286N/A }
286N/A int[] result = new int[this.ranges.length+tok.ranges.length];
286N/A for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
286N/A if (i >= this.ranges.length) {
286N/A result[k++] = tok.ranges[j++];
286N/A result[k++] = tok.ranges[j++];
286N/A } else if (j >= tok.ranges.length) {
286N/A result[k++] = this.ranges[i++];
286N/A result[k++] = this.ranges[i++];
286N/A } else if (tok.ranges[j] < this.ranges[i]
286N/A || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
286N/A result[k++] = tok.ranges[j++];
286N/A result[k++] = tok.ranges[j++];
286N/A } else {
286N/A result[k++] = this.ranges[i++];
286N/A result[k++] = this.ranges[i++];
286N/A }
286N/A }
286N/A this.ranges = result;
286N/A }
286N/A
286N/A protected void subtractRanges(Token token) {
286N/A if (token.type == NRANGE) {
286N/A this.intersectRanges(token);
286N/A return;
286N/A }
286N/A RangeToken tok = (RangeToken)token;
286N/A if (tok.ranges == null || this.ranges == null)
286N/A return;
286N/A this.icaseCache = null;
286N/A this.sortRanges();
286N/A this.compactRanges();
286N/A tok.sortRanges();
286N/A tok.compactRanges();
286N/A
286N/A //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
286N/A
286N/A int[] result = new int[this.ranges.length+tok.ranges.length];
286N/A int wp = 0, src = 0, sub = 0;
286N/A while (src < this.ranges.length && sub < tok.ranges.length) {
286N/A int srcbegin = this.ranges[src];
286N/A int srcend = this.ranges[src+1];
286N/A int subbegin = tok.ranges[sub];
286N/A int subend = tok.ranges[sub+1];
286N/A if (srcend < subbegin) { // Not overlapped
286N/A // src: o-----o
286N/A // sub: o-----o
286N/A // res: o-----o
286N/A // Reuse sub
286N/A result[wp++] = this.ranges[src++];
286N/A result[wp++] = this.ranges[src++];
286N/A } else if (srcend >= subbegin
286N/A && srcbegin <= subend) { // Overlapped
286N/A // src: o--------o
286N/A // sub: o----o
286N/A // sub: o----o
286N/A // sub: o----o
286N/A // sub: o------------o
286N/A if (subbegin <= srcbegin && srcend <= subend) {
286N/A // src: o--------o
286N/A // sub: o------------o
286N/A // res: empty
286N/A // Reuse sub
286N/A src += 2;
286N/A } else if (subbegin <= srcbegin) {
286N/A // src: o--------o
286N/A // sub: o----o
286N/A // res: o-----o
286N/A // Reuse src(=res)
286N/A this.ranges[src] = subend+1;
286N/A sub += 2;
286N/A } else if (srcend <= subend) {
286N/A // src: o--------o
286N/A // sub: o----o
286N/A // res: o-----o
286N/A // Reuse sub
286N/A result[wp++] = srcbegin;
286N/A result[wp++] = subbegin-1;
286N/A src += 2;
286N/A } else {
286N/A // src: o--------o
286N/A // sub: o----o
286N/A // res: o-o o-o
286N/A // Reuse src(=right res)
286N/A result[wp++] = srcbegin;
286N/A result[wp++] = subbegin-1;
286N/A this.ranges[src] = subend+1;
286N/A sub += 2;
286N/A }
286N/A } else if (subend < srcbegin) {
286N/A // Not overlapped
286N/A // src: o-----o
286N/A // sub: o----o
286N/A sub += 2;
286N/A } else {
286N/A throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
286N/A +","+this.ranges[src+1]
286N/A +"] - ["+tok.ranges[sub]
286N/A +","+tok.ranges[sub+1]
286N/A +"]");
286N/A }
286N/A }
286N/A while (src < this.ranges.length) {
286N/A result[wp++] = this.ranges[src++];
286N/A result[wp++] = this.ranges[src++];
286N/A }
286N/A this.ranges = new int[wp];
286N/A System.arraycopy(result, 0, this.ranges, 0, wp);
286N/A // this.ranges is sorted and compacted.
286N/A }
286N/A
286N/A /**
286N/A * @param tok Ignore whether it is NRANGE or not.
286N/A */
286N/A protected void intersectRanges(Token token) {
286N/A RangeToken tok = (RangeToken)token;
286N/A if (tok.ranges == null || this.ranges == null)
286N/A return;
286N/A this.icaseCache = null;
286N/A this.sortRanges();
286N/A this.compactRanges();
286N/A tok.sortRanges();
286N/A tok.compactRanges();
286N/A
286N/A int[] result = new int[this.ranges.length+tok.ranges.length];
286N/A int wp = 0, src1 = 0, src2 = 0;
286N/A while (src1 < this.ranges.length && src2 < tok.ranges.length) {
286N/A int src1begin = this.ranges[src1];
286N/A int src1end = this.ranges[src1+1];
286N/A int src2begin = tok.ranges[src2];
286N/A int src2end = tok.ranges[src2+1];
286N/A if (src1end < src2begin) { // Not overlapped
286N/A // src1: o-----o
286N/A // src2: o-----o
286N/A // res: empty
286N/A // Reuse src2
286N/A src1 += 2;
286N/A } else if (src1end >= src2begin
286N/A && src1begin <= src2end) { // Overlapped
286N/A // src1: o--------o
286N/A // src2: o----o
286N/A // src2: o----o
286N/A // src2: o----o
286N/A // src2: o------------o
286N/A if (src2begin <= src2begin && src1end <= src2end) {
286N/A // src1: o--------o
286N/A // src2: o------------o
286N/A // res: o--------o
286N/A // Reuse src2
286N/A result[wp++] = src1begin;
286N/A result[wp++] = src1end;
286N/A src1 += 2;
286N/A } else if (src2begin <= src1begin) {
286N/A // src1: o--------o
286N/A // src2: o----o
286N/A // res: o--o
286N/A // Reuse the rest of src1
286N/A result[wp++] = src1begin;
286N/A result[wp++] = src2end;
286N/A this.ranges[src1] = src2end+1;
286N/A src2 += 2;
286N/A } else if (src1end <= src2end) {
286N/A // src1: o--------o
286N/A // src2: o----o
286N/A // res: o--o
286N/A // Reuse src2
286N/A result[wp++] = src2begin;
286N/A result[wp++] = src1end;
286N/A src1 += 2;
286N/A } else {
286N/A // src1: o--------o
286N/A // src2: o----o
286N/A // res: o----o
286N/A // Reuse the rest of src1
286N/A result[wp++] = src2begin;
286N/A result[wp++] = src2end;
286N/A this.ranges[src1] = src2end+1;
286N/A }
286N/A } else if (src2end < src1begin) {
286N/A // Not overlapped
286N/A // src1: o-----o
286N/A // src2: o----o
286N/A src2 += 2;
286N/A } else {
286N/A throw new RuntimeException("Token#intersectRanges(): Internal Error: ["
286N/A +this.ranges[src1]
286N/A +","+this.ranges[src1+1]
286N/A +"] & ["+tok.ranges[src2]
286N/A +","+tok.ranges[src2+1]
286N/A +"]");
286N/A }
286N/A }
286N/A while (src1 < this.ranges.length) {
286N/A result[wp++] = this.ranges[src1++];
286N/A result[wp++] = this.ranges[src1++];
286N/A }
286N/A this.ranges = new int[wp];
286N/A System.arraycopy(result, 0, this.ranges, 0, wp);
286N/A // this.ranges is sorted and compacted.
286N/A }
286N/A
286N/A /**
286N/A * for RANGE: Creates complement.
286N/A * for NRANGE: Creates the same meaning RANGE.
286N/A */
286N/A static Token complementRanges(Token token) {
286N/A if (token.type != RANGE && token.type != NRANGE)
286N/A throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
286N/A RangeToken tok = (RangeToken)token;
286N/A tok.sortRanges();
286N/A tok.compactRanges();
286N/A int len = tok.ranges.length+2;
286N/A if (tok.ranges[0] == 0)
286N/A len -= 2;
286N/A int last = tok.ranges[tok.ranges.length-1];
286N/A if (last == UTF16_MAX)
286N/A len -= 2;
286N/A RangeToken ret = Token.createRange();
286N/A ret.ranges = new int[len];
286N/A int wp = 0;
286N/A if (tok.ranges[0] > 0) {
286N/A ret.ranges[wp++] = 0;
286N/A ret.ranges[wp++] = tok.ranges[0]-1;
286N/A }
286N/A for (int i = 1; i < tok.ranges.length-2; i += 2) {
286N/A ret.ranges[wp++] = tok.ranges[i]+1;
286N/A ret.ranges[wp++] = tok.ranges[i+1]-1;
286N/A }
286N/A if (last != UTF16_MAX) {
286N/A ret.ranges[wp++] = last+1;
286N/A ret.ranges[wp] = UTF16_MAX;
286N/A }
286N/A ret.setCompacted();
286N/A return ret;
286N/A }
286N/A
286N/A synchronized RangeToken getCaseInsensitiveToken() {
286N/A if (this.icaseCache != null)
286N/A return this.icaseCache;
286N/A
286N/A RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
286N/A for (int i = 0; i < this.ranges.length; i += 2) {
286N/A for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
286N/A if (ch > 0xffff)
286N/A uppers.addRange(ch, ch);
286N/A else {
286N/A char uch = Character.toUpperCase((char)ch);
286N/A uppers.addRange(uch, uch);
286N/A }
286N/A }
286N/A }
286N/A RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
286N/A for (int i = 0; i < uppers.ranges.length; i += 2) {
286N/A for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
286N/A if (ch > 0xffff)
286N/A lowers.addRange(ch, ch);
286N/A else {
286N/A char uch = Character.toUpperCase((char)ch);
286N/A lowers.addRange(uch, uch);
286N/A }
286N/A }
286N/A }
286N/A lowers.mergeRanges(uppers);
286N/A lowers.mergeRanges(this);
286N/A lowers.compactRanges();
286N/A
286N/A this.icaseCache = lowers;
286N/A return lowers;
286N/A }
286N/A
286N/A void dumpRanges() {
286N/A System.err.print("RANGE: ");
286N/A if (this.ranges == null)
286N/A System.err.println(" NULL");
286N/A for (int i = 0; i < this.ranges.length; i += 2) {
286N/A System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
286N/A }
286N/A System.err.println("");
286N/A }
286N/A
286N/A boolean match(int ch) {
286N/A if (this.map == null) this.createMap();
286N/A boolean ret;
286N/A if (this.type == RANGE) {
286N/A if (ch < MAPSIZE)
286N/A return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
286N/A ret = false;
286N/A for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
286N/A if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
286N/A return true;
286N/A }
286N/A } else {
286N/A if (ch < MAPSIZE)
286N/A return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
286N/A ret = true;
286N/A for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
286N/A if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
286N/A return false;
286N/A }
286N/A }
286N/A return ret;
286N/A }
286N/A
286N/A private static final int MAPSIZE = 256;
286N/A private void createMap() {
286N/A int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
286N/A int [] map = new int[asize];
286N/A int nonMapIndex = this.ranges.length;
286N/A for (int i = 0; i < asize; ++i) {
286N/A map[i] = 0;
286N/A }
286N/A for (int i = 0; i < this.ranges.length; i += 2) {
286N/A int s = this.ranges[i];
286N/A int e = this.ranges[i+1];
286N/A if (s < MAPSIZE) {
286N/A for (int j = s; j <= e && j < MAPSIZE; j++) {
286N/A map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
286N/A }
286N/A }
286N/A else {
286N/A nonMapIndex = i;
286N/A break;
286N/A }
286N/A if (e >= MAPSIZE) {
286N/A nonMapIndex = i;
286N/A break;
286N/A }
286N/A }
286N/A this.map = map;
286N/A this.nonMapIndex = nonMapIndex;
286N/A //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
286N/A }
286N/A
286N/A public String toString(int options) {
286N/A String ret;
286N/A if (this.type == RANGE) {
286N/A if (this == Token.token_dot)
286N/A ret = ".";
286N/A else if (this == Token.token_0to9)
286N/A ret = "\\d";
286N/A else if (this == Token.token_wordchars)
286N/A ret = "\\w";
286N/A else if (this == Token.token_spaces)
286N/A ret = "\\s";
286N/A else {
286N/A StringBuffer sb = new StringBuffer();
286N/A sb.append("[");
286N/A for (int i = 0; i < this.ranges.length; i += 2) {
286N/A if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
286N/A if (this.ranges[i] == this.ranges[i+1]) {
286N/A sb.append(escapeCharInCharClass(this.ranges[i]));
286N/A } else {
286N/A sb.append(escapeCharInCharClass(this.ranges[i]));
286N/A sb.append((char)'-');
286N/A sb.append(escapeCharInCharClass(this.ranges[i+1]));
286N/A }
286N/A }
286N/A sb.append("]");
286N/A ret = sb.toString();
286N/A }
286N/A } else {
286N/A if (this == Token.token_not_0to9)
286N/A ret = "\\D";
286N/A else if (this == Token.token_not_wordchars)
286N/A ret = "\\W";
286N/A else if (this == Token.token_not_spaces)
286N/A ret = "\\S";
286N/A else {
286N/A StringBuffer sb = new StringBuffer();
286N/A sb.append("[^");
286N/A for (int i = 0; i < this.ranges.length; i += 2) {
286N/A if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
286N/A if (this.ranges[i] == this.ranges[i+1]) {
286N/A sb.append(escapeCharInCharClass(this.ranges[i]));
286N/A } else {
286N/A sb.append(escapeCharInCharClass(this.ranges[i]));
286N/A sb.append('-');
286N/A sb.append(escapeCharInCharClass(this.ranges[i+1]));
286N/A }
286N/A }
286N/A sb.append("]");
286N/A ret = sb.toString();
286N/A }
286N/A }
286N/A return ret;
286N/A }
286N/A
286N/A private static String escapeCharInCharClass(int ch) {
286N/A String ret;
286N/A switch (ch) {
286N/A case '[': case ']': case '-': case '^':
286N/A case ',': case '\\':
286N/A ret = "\\"+(char)ch;
286N/A break;
286N/A case '\f': ret = "\\f"; break;
286N/A case '\n': ret = "\\n"; break;
286N/A case '\r': ret = "\\r"; break;
286N/A case '\t': ret = "\\t"; break;
286N/A case 0x1b: ret = "\\e"; break;
286N/A //case 0x0b: ret = "\\v"; break;
286N/A default:
286N/A if (ch < 0x20) {
286N/A String pre = "0"+Integer.toHexString(ch);
286N/A ret = "\\x"+pre.substring(pre.length()-2, pre.length());
286N/A } else if (ch >= 0x10000) {
286N/A String pre = "0"+Integer.toHexString(ch);
286N/A ret = "\\v"+pre.substring(pre.length()-6, pre.length());
286N/A } else
286N/A ret = ""+(char)ch;
286N/A }
286N/A return ret;
286N/A }
286N/A
286N/A}