/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.analysis.filter; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.junit.Test; import org.sindice.siren.analysis.TupleTokenizer; /** * */ public class TestMailtoFilter { private final String uritype = TupleTokenizer.getTokenTypes()[TupleTokenizer.URI]; private final Tokenizer _t = new TupleTokenizer(new StringReader("")); /* * Helpers */ private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems) throws IOException { this.assertURLDecodedTo(t, uri, expectedStems, null); } private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems, final String[] expectedTypes) throws IOException { this.assertURLDecodedTo(t, uri, expectedStems, expectedTypes, null); } private void assertURLDecodedTo(final Tokenizer t, final String uri, final String[] expectedStems, final String[] expectedTypes, final int[] expectedPosIncr) throws IOException { assertTrue("has CharTermAttribute", t.hasAttribute(CharTermAttribute.class)); final CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class); assertTrue("has TypeAttribute", t.hasAttribute(TypeAttribute.class)); final TypeAttribute typeAtt = t.getAttribute(TypeAttribute.class); assertTrue("has PositionIncrementAttribute", t.hasAttribute(PositionIncrementAttribute.class)); final PositionIncrementAttribute posIncrAtt = t.getAttribute(PositionIncrementAttribute.class); t.setReader(new StringReader(uri)); t.reset(); final TokenFilter filter = new MailtoFilter(t); for (int i = 0; i < expectedStems.length; i++) { assertTrue("token " + i + " exists", filter.incrementToken()); assertEquals(expectedStems[i], termAtt.toString()); if (expectedTypes == null) assertEquals(uritype, typeAtt.type()); else assertEquals(expectedTypes[i], typeAtt.type()); if (expectedPosIncr != null) assertEquals(expectedPosIncr[i], posIncrAtt.getPositionIncrement()); } filter.end(); filter.close(); } @Test public void testNoMailto() throws Exception { this.assertURLDecodedTo(_t, "<http://stephane.net>", new String[] { "http://stephane.net" }); } @Test public void testMailto() throws Exception { this.assertURLDecodedTo(_t, "<mailto:stephane.campinas@deri.org>", new String[] { "stephane.campinas@deri.org", "mailto:stephane.campinas@deri.org" }); } @Test public void testBadMailto() throws Exception { this.assertURLDecodedTo(_t, "<mailto//stephane.net>", new String[] { "mailto//stephane.net" }); this.assertURLDecodedTo(_t, "<mailTo:stephane.net>", new String[] { "mailTo:stephane.net" }); } @Test public void testDifferentTypes() throws Exception { this.assertURLDecodedTo(_t, "<mailto:stephane.net> \"literal\" <mailto:stephane.campinas@deri.org>", new String[] { "stephane.net", "mailto:stephane.net", "literal", "stephane.campinas@deri.org", "mailto:stephane.campinas@deri.org" }, new String[] { uritype, uritype, TupleTokenizer.getTokenTypes()[TupleTokenizer.LITERAL], uritype, uritype}, new int[] { 1, 0, 1, 1, 0 }); } @Test public void testShortURI() throws Exception { this.assertURLDecodedTo(_t, "<steph>", new String[] { "steph" }); } }