/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.analysis;
import static org.sindice.siren.analysis.MockSirenToken.node;
import java.io.StringReader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.IntsRef;
import org.junit.Test;
import org.sindice.siren.util.XSDDatatype;
public class TestTupleTokenizer extends NodeTokenizerTestCase {
private final Tokenizer _t = new TupleTokenizer(new StringReader(""));
@Test
public void testURI()
throws Exception {
this.assertTokenizesTo(_t, "<http://renaud.delbru.fr/>",
new String[] { "http://renaud.delbru.fr/" }, new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://renaud.delbru.fr>",
new String[] { "http://renaud.delbru.fr" }, new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://user@renaud.delbru.fr>",
new String[] { "http://user@renaud.delbru.fr" }, new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://user:passwd@renaud.delbru.fr>",
new String[] { "http://user:passwd@renaud.delbru.fr" },
new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://user:passwd@renaud.delbru.fr:8080>",
new String[] { "http://user:passwd@renaud.delbru.fr:8080" },
new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://renaud.delbru.fr:8080>",
new String[] { "http://renaud.delbru.fr:8080" }, new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://renaud.delbru.fr/subdir/page.html>",
new String[] { "http://renaud.delbru.fr/subdir/page.html" },
new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<http://renaud.delbru.fr/page.html#fragment>",
new String[] { "http://renaud.delbru.fr/page.html#fragment" },
new String[] { "<URI>" });
this.assertTokenizesTo(
_t,
"<http://renaud.delbru.fr/page.html?query=a+query&hl=en&start=20&sa=N>",
new String[] { "http://renaud.delbru.fr/page.html?query=a+query&hl=en&start=20&sa=N" },
new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<ftp://renaud.delbru.fr/>",
new String[] { "ftp://renaud.delbru.fr/" }, new String[] { "<URI>" });
this.assertTokenizesTo(_t, "<mailto:renaud@delbru.fr>",
new String[] { "mailto:renaud@delbru.fr" }, new String[] { "<URI>" });
}
@Test
public void testBNode()
throws Exception {
this.assertTokenizesTo(_t, "_:x74562", new String[] { "x74562" },
new String[] { "<BNODE>" });
this.assertTokenizesTo(_t, "_:node1", new String[] { "node1" },
new String[] { "<BNODE>" });
this.assertTokenizesTo(_t, "_:httpsaojfsd", new String[] { "httpsaojfsd" },
new String[] { "<BNODE>" });
this.assertTokenizesTo(_t, "_:asd", new String[] { "asd" },
new String[] { "<BNODE>" });
}
@Test
public void testLiteral()
throws Exception {
this.assertTokenizesTo(_t, "\"Renaud\"", new String[] { "Renaud" },
new String[] { "<LITERAL>" });
this.assertTokenizesTo(_t, "\"1 and 2\"", new String[] { "1 and 2" },
new String[] { "<LITERAL>" });
this.assertTokenizesTo(_t, "\"renaud http://test/ \"", new String[] {
"renaud http://test/ " }, new String[] { "<LITERAL>" });
this.assertTokenizesTo(_t, "\"foo bar FOO BAR\"",
new String[] { "foo bar FOO BAR" }, new String[] { "<LITERAL>" });
this.assertTokenizesTo(_t, "\"ABC\\u0061\\u0062\\u0063\\u00E9\\u00e9ABC\"",
new String[] { "ABCabcééABC" }, new String[] { "<LITERAL>" });
}
@Test
public void testDot()
throws Exception {
this.assertTokenizesTo(_t, "<http://te.st> . \"ren . aud\" . ",
new String[] { "http://te.st", ".", "ren . aud", "." }, new String[] {
"<URI>", "<DOT>", "<LITERAL>", "<DOT>" });
this.assertTokenizesTo(_t, "<aaa> \"bbb\". <bbb> <aaa>. <ccc> .",
new String[] { "aaa", "bbb", ".", "bbb", "aaa", ".", "ccc", "." }, new String[] {
"<URI>", "<LITERAL>", "<DOT>", "<URI>", "<URI>", "<DOT>", "<URI>", "<DOT>" });
}
// TODO: Check if language tag is correctly assigned when a
// LanguageTagAttribute will be created
@Test
public void testLanguage()
throws Exception {
this.assertTokenizesTo(_t, "\"test\"@en", new String[] { "test" },
new String[] { "<LITERAL>" });
this.assertTokenizesTo(_t, "\"toto@titi.fr \"@fr", new String[] {
"toto@titi.fr " },
new String[] { "<LITERAL>", "<LITERAL>" });
}
@Test
public void testDatatype()
throws Exception {
this.assertTokenizesTo(_t, "<http://test>",
new String[] { "http://test" }, new String[] { "<URI>" },
new String[] { XSDDatatype.XSD_ANY_URI } );
this.assertTokenizesTo(_t, "\"test\"",
new String[] { "test" }, new String[] { "<LITERAL>" },
new String[] { XSDDatatype.XSD_STRING } );
this.assertTokenizesTo(_t, "_:bnode1",
new String[] { "bnode1" }, new String[] { "<BNODE>" },
new String[] { "" } );
this.assertTokenizesTo(_t, "\"test\"^^<http://type/test>",
new String[] { "test" }, new String[] { "<LITERAL>" },
new String[] { "http://type/test" } );
this.assertTokenizesTo(_t, "\"te^^st\"^^<"+XSDDatatype.XSD_NAME+">",
new String[] { "te^^st" }, new String[] { "<LITERAL>" },
new String[] { XSDDatatype.XSD_NAME } );
}
@Test
public void testStructuralNode()
throws Exception {
this.assertTokenizesTo(_t, "<http://renaud.delbru.fr/>",
new String[] { "http://renaud.delbru.fr/" }, new String[] { "<URI>" },
new int[] { 1 }, new IntsRef[] { node(0,0) });
this.assertTokenizesTo(_t,
"<http://renaud.delbru.fr/> <http://renaud.delbru.fr/>",
new String[] { "http://renaud.delbru.fr/", "http://renaud.delbru.fr/" },
new String[] { "<URI>", "<URI>" }, new int[] { 1, 1 },
new IntsRef[] { node(0,0), node(0,1) });
this.assertTokenizesTo(_t, "_:a1 _:a2 . _:a3 _:a4 . _:a5 _:a6 ",
new String[] { "a1", "a2", ".", "a3", "a4", ".", "a5", "a6" },
new String[] { "<BNODE>", "<BNODE>", "<DOT>", "<BNODE>", "<BNODE>",
"<DOT>", "<BNODE>", "<BNODE>" },
new int[] { 1, 1, 1, 1, 1, 1, 1, 1 },
new IntsRef[] { node(0,0), node(0,1), node(0,2),
node(1,0), node(1,1), node(1,2),
node(2,0), node(2,1) });
this.assertTokenizesTo(_t, "<http://te.st> . \"ren . aud\" . ",
new String[] { "http://te.st", ".", "ren . aud", "." },
new String[] { "<URI>", "<DOT>", "<LITERAL>", "<DOT>" },
new int[] { 1, 1, 1, 1 },
new IntsRef[] { node(0,0), node(0,1), node(1,0), node(1,1) });
}
}