/*
* (C) Copyright 2011 Nuxeo SA (http://nuxeo.com/) and others.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Contributors:
* Florent Guillaume
*/
package org.nuxeo.ecm.core.storage;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.StringUtils;
import org.junit.Test;
import org.nuxeo.runtime.test.NXRuntimeTestCase;
public class TestDefaultFulltextParser extends NXRuntimeTestCase {
protected void check(String expected, String s, String mimeType) {
FulltextParser parser = new DefaultFulltextParser();
List<String> strings = new ArrayList<String>();
parser.parse(s, "fakepath", mimeType, null, strings);
assertEquals(expected, StringUtils.join(strings, "|"));
}
@Test
public void testDefaultParser() throws Exception {
check("abc", "abc", null);
check("abc|def", "abc def", null);
check("abc|def", " abc def ", null);
check("abc|def", " -,abc DEF?? !", null);
// accents left alone
check("hot|caf\u00e9", "hot CAF\u00c9", null);
// check html removal and entities unescape
check("test|é|test", "test é test", null);
check("test|é|test", "test é test", "text/html");
check("test|é|test", "<html>test é test</html>", null);
check("test|p|style|something|é|p|test", "test <p style=\"something\">é</p> test", null);
check("test|é|test", "test <p style=\"something\">é</p> test", "text/html");
check("test|é|test", "<html>test <p style=\"something\">é</p> test</html>", null);
}
}