package org.opensolaris.opengrok.analysis.php;
import static org.junit.Assert.assertArrayEquals;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
import org.opensolaris.opengrok.analysis.FileAnalyzer;
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
import org.opensolaris.opengrok.util.IOUtils;
/**
* Tests the {@link PhpSymbolTokenizer} class.
* @author Gustavo Lopes
*/
public class PhpSymbolTokenizerTest {
private FileAnalyzer analyzer;
/**
* Create new test.
*/
public PhpSymbolTokenizerTest() {
PhpAnalyzerFactory analFact = new PhpAnalyzerFactory();
this.analyzer = analFact.getAnalyzer();
}
private String[] getTermsFor(String s) {
return getTermsFor(new StringReader(s));
}
@SuppressWarnings("resource")
private String[] getTermsFor(Reader r) {
List<String> l = new LinkedList<String>();
JFlexTokenizer ts = (JFlexTokenizer)
this.analyzer.overridableTokenStream("refs", null);
ts.yyreset(r);
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
try {
while (ts.yylex()) {
l.add(term.toString());
}
} catch (IOException ex) {
throw new RuntimeException(ex);
} finally {
IOUtils.close(ts);
}
return l.toArray(new String[l.size()]);
}
/**
* Simple tokenizer test.
*/
@Test
public void basicTest() {
String s = "<?php foobar eval $eval 0sdf _ds˙d";
String[] termsFor = getTermsFor(s);
assertArrayEquals(new String[]{"foobar", "eval", "sdf", "_ds˙d"},
termsFor);
}
/**
* More complex tokenizer test using {@code sample.php}.
* @throws IOException
*/
@Test
public void sampleTest() throws IOException {
InputStream res = getClass().getResourceAsStream("sample.php");
InputStreamReader r = new InputStreamReader(res, "UTF-8");
String[] termsFor = getTermsFor(r);
assertArrayEquals(new String[]
{
"a", //line 3
"foo", "bar", //line 5
"g", "a", "c", //line 6
"b", "c", "a", "a", //line 7
"doo", //line 9
"a", //line 10
"foo", "bar", //line 12
"name", //line 13
"foo", "bar", //line 14
"foo", //line 15
"ff", //line 20
"foo", //line 21
"FooException", //line 28
"used",
"Foo", "Bar", //line 30
"Foo", "Foo", "param", //line 31
"gata", //line 37
"gata", //line 38
"foo", "_SERVER", "_SERVER", "_SERVER", //line 39
"foo", "bar", "foo", "bar", "foo", "a", //line 40
}, termsFor);
r.close();
}
}