package it.unimi.dsi.util;
import it.unimi.dsi.fastutil.chars.CharArrayList;
import it.unimi.dsi.util.TextPattern;
import junit.framework.TestCase;
public class TextPatternTest extends TestCase {
public void testSingleCharacterSearch() {
byte[] b = new byte[] { 1, (byte)'A', 2 };
String s = " A ";
TextPattern pattern = new TextPattern( "A" );
assertEquals( -1, pattern.search( b, 0, 1 ) );
assertEquals( -1, pattern.search( s, 0, 1 ) );
assertEquals( -1, pattern.search( s.toCharArray(), 0, 1 ) );
assertEquals( -1, pattern.search( CharArrayList.wrap( s.toCharArray() ), 0, 1 ) );
assertEquals( 1, pattern.search( b ) );
assertEquals( 1, pattern.search( s ) );
assertEquals( 1, pattern.search( s.toCharArray() ) );
assertEquals( 1, pattern.search( CharArrayList.wrap( s.toCharArray() ) ) );
}
public void testSearch() {
byte[] b = new byte[] { 1, (byte)'A', 'B', 2 };
String s = " AB ";
TextPattern pattern = new TextPattern( "AB" );
assertEquals( -1, pattern.search( b, 0, 2 ) );
assertEquals( -1, pattern.search( s, 0, 2 ) );
assertEquals( -1, pattern.search( s.toCharArray(), 0, 2 ) );
assertEquals( -1, pattern.search( CharArrayList.wrap( s.toCharArray() ), 0, 2 ) );
assertEquals( 1, pattern.search( b ) );
assertEquals( 1, pattern.search( s ) );
assertEquals( 1, pattern.search( s.toCharArray() ) );
assertEquals( 1, pattern.search( CharArrayList.wrap( s.toCharArray() ) ) );
TextPattern patternMeta = new TextPattern( "<meta", TextPattern.CASE_INSENSITIVE );
assertTrue( patternMeta.search( documentMetaIsutf_8.getBytes() ) != -1 );
patternMeta = new TextPattern( "<META", TextPattern.CASE_INSENSITIVE );
assertTrue( patternMeta.search( documentMetaIsutf_8.getBytes() ) != -1 );
}
private static final String documentMetaIsutf_8 =
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n" +
"\n" +
"<html>\n" +
"<head>\n" +
"<style type=\"text/css\">\n" +
"@import \"/css/content.php\";\n" +
"@import \"/css/layout.php\";\n" +
"</style>" +
"<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >" +
"<title id=\"mamma\" special-type=\"li turchi\">Sebastiano Vigna</title>\n" +
"</HEAD>\n" +
"<boDY>\n" +
"<div id=header>:::Sebastiano Vigna</div>" +
"<div id=left>\n" +
"<ul id=\"left-nav\">" +
"<br>Bye bye baby\n" +
"<img SRc=\"but I'm ignoring this one\"> and not this one\n" +
"\n\n even whitespace counts \n\n" +
"<frame SRC=\"http://www.GOOGLE.com/\">The frame source counts</frame>\n" +
"<iframe SRC=\"http://www.GOOGLE.com/\">And so does the iframe source</iframe>\n" +
"</body>\n" +
"</html>";
}