/*
Copyright (c) 2001, Dr Martin Porter
Copyright (c) 2002, Richard Boulton
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* Neither the name of the copyright holders nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* This is the rev 502 of the Snowball SVN trunk,
* but modified:
* made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
* refactored StringBuffers to StringBuilder
* uses char[] as buffer instead of StringBuffer/StringBuilder
* eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b
* reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS
*/
public abstract class SnowballProgram {
protected SnowballProgram()
{
current = new char[8];
setCurrent("");
}
public abstract boolean stem();
/**
* Set the current string.
*/
{
cursor = 0;
limit_backward = 0;
}
/**
* Get the current string.
*/
{
}
/**
* Set the current string.
* @param text character array containing input
* @param length valid length of text.
*/
cursor = 0;
limit_backward = 0;
}
/**
* Get the current buffer containing the stem.
* <p>
* NOTE: this may be a reference to a different character array than the
* one originally provided with setCurrent, in the exceptional case that
* stemming produced a longer intermediate or result string.
* </p>
* <p>
* It is necessary to use {@link #getCurrentBufferLength()} to determine
* the valid length of the returned buffer. For example, many words are
* stemmed simply by subtracting from the length to remove suffixes.
* </p>
* @see #getCurrentBufferLength()
*/
public char[] getCurrentBuffer() {
return current;
}
/**
* Get the valid length of the character array in
* {@link #getCurrentBuffer()}.
* @return valid length of the array.
*/
public int getCurrentBufferLength() {
return limit;
}
// current string
private char current[];
protected int cursor;
protected int limit;
protected int limit_backward;
protected int bra;
protected int ket;
{
}
{
cursor++;
return true;
}
{
if (cursor <= limit_backward) return false;
cursor--;
return true;
}
{
cursor++;
return true;
}
cursor ++;
return true;
}
return false;
}
{
if (cursor <= limit_backward) return false;
cursor--;
return true;
}
cursor--;
return true;
}
return false;
}
{
cursor++;
return true;
}
{
if (cursor <= limit_backward) return false;
cursor--;
return true;
}
{
cursor++;
return true;
}
{
if (cursor <= limit_backward) return false;
cursor--;
return true;
}
{
int i;
for (i = 0; i != s_size; i++) {
}
return true;
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
}
{
int i;
for (i = 0; i != s_size; i++) {
}
return true;
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
}
{
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
}
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
}
{
int i = 0;
int j = v_size;
int c = cursor;
int l = limit;
int common_i = 0;
int common_j = 0;
boolean first_key_inspected = false;
while(true) {
int k = i + ((j - i) >> 1);
int diff = 0;
Among w = v[k];
int i2;
if (c + common == l) {
diff = -1;
break;
}
if (diff != 0) break;
common++;
}
if (diff < 0) {
j = k;
} else {
i = k;
}
if (j - i <= 1) {
if (i > 0) break; // v->s has been inspected
if (j == i) break; // only one item in v
// - but now we need to go round once more to get
// v->s inspected. This looks messy, but is actually
// the optimal approach.
if (first_key_inspected) break;
first_key_inspected = true;
}
}
while(true) {
Among w = v[i];
boolean res;
try {
} catch (InvocationTargetException e) {
res = false;
// FIXME - debug message
} catch (IllegalAccessException e) {
res = false;
// FIXME - debug message
}
}
i = w.substring_i;
if (i < 0) return 0;
}
}
// find_among_b is for backwards processing. Same comments apply
{
int i = 0;
int j = v_size;
int c = cursor;
int lb = limit_backward;
int common_i = 0;
int common_j = 0;
boolean first_key_inspected = false;
while(true) {
int k = i + ((j - i) >> 1);
int diff = 0;
Among w = v[k];
int i2;
diff = -1;
break;
}
if (diff != 0) break;
common++;
}
if (diff < 0) {
j = k;
} else {
i = k;
}
if (j - i <= 1) {
if (i > 0) break;
if (j == i) break;
if (first_key_inspected) break;
first_key_inspected = true;
}
}
while(true) {
Among w = v[i];
boolean res;
try {
} catch (InvocationTargetException e) {
res = false;
// FIXME - debug message
} catch (IllegalAccessException e) {
res = false;
// FIXME - debug message
}
}
i = w.substring_i;
if (i < 0) return 0;
}
}
/* to replace chars between c_bra and c_ket in current by the
* chars in s.
*/
{
//resize if necessary
}
// if the substring being replaced is longer or shorter than the
// replacement, need to shift things around
}
// insert the replacement text
// Note, faster is s.getChars(0, s.length(), current, c_bra);
// but would have to duplicate this method for both String and StringBuilder
for (int i = 0; i < s.length(); i++)
limit += adjustment;
return adjustment;
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
}
protected void slice_check()
{
if (bra < 0 ||
{
// FIXME: report error somehow.
/*
fprintf(stderr, "faulty slice operation:\n");
debug(z, -1, 0);
exit(1);
*/
}
}
{
slice_check();
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
slice_from((CharSequence)s);
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
slice_from((CharSequence)s);
}
protected void slice_del()
{
}
{
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
}
/** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
{
}
/* Copy the slice into the supplied StringBuffer */
{
slice_check();
s.setLength(0);
return s;
}
{
s.setLength(0);
return s;
}
/*
extern void debug(struct SN_env * z, int number, int line_count)
{ int i;
int limit = SIZE(z->p);
//if (number >= 0) printf("%3d (line %4d): '", number, line_count);
if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
for (i = 0; i <= limit; i++)
{ if (z->lb == i) printf("{");
if (z->bra == i) printf("[");
if (z->c == i) printf("|");
if (z->ket == i) printf("]");
if (z->l == i) printf("}");
if (i < limit)
{ int ch = z->p[i];
if (ch == 0) ch = '#';
printf("%c", ch);
}
}
printf("'\n");
}
*/
};