/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.hunspell;
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.CharSequenceOutputs;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
public class TestDictionary extends LuceneTestCase {
public void testSimpleDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("simple.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
assertNotNull(ordList);
assertEquals(1, ordList.length);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char flags[] = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
ordList = dictionary.lookupWord(new char[]{'l', 'u', 'c', 'e', 'n'}, 0, 5);
assertNotNull(ordList);
assertEquals(1, ordList.length);
dictionary.flagLookup.get(ordList.ints[0], ref);
flags = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
dictStream.close();
tempDir.close();
}
public void testCompressedDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char flags[] = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
dictStream.close();
tempDir.close();
}
public void testCompressedBeforeSetDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char flags[] = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
dictStream.close();
tempDir.close();
}
public void testCompressedEmptyAliasDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Directory tempDir = getDirectory();
Dictionary dictionary = new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char flags[] = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
dictStream.close();
tempDir.close();
}
// malformed rule causes ParseException
public void testInvalidData() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("broken.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
ParseException expected = expectThrows(ParseException.class, () -> {
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
});
assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
assertEquals(24, expected.getErrorOffset());
affixStream.close();
dictStream.close();
tempDir.close();
}
// malformed flags causes ParseException
public void testInvalidFlags() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("broken-flags.aff");
InputStream dictStream = getClass().getResourceAsStream("simple.dic");
Directory tempDir = getDirectory();
Exception expected = expectThrows(Exception.class, () -> {
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
});
assertTrue(expected.getMessage().startsWith("expected only one flag"));
affixStream.close();
dictStream.close();
tempDir.close();
}
private static class CloseCheckInputStream extends FilterInputStream {
private boolean closed = false;
public CloseCheckInputStream(InputStream delegate) {
super(delegate);
}
@Override
public void close() throws IOException {
this.closed = true;
super.close();
}
public boolean isClosed() {
return this.closed;
}
}
public void testResourceCleanup() throws Exception {
CloseCheckInputStream affixStream = new CloseCheckInputStream(getClass().getResourceAsStream("compressed.aff"));
CloseCheckInputStream dictStream = new CloseCheckInputStream(getClass().getResourceAsStream("compressed.dic"));
Directory tempDir = getDirectory();
new Dictionary(tempDir, "dictionary", affixStream, dictStream);
assertFalse(affixStream.isClosed());
assertFalse(dictStream.isClosed());
affixStream.close();
dictStream.close();
tempDir.close();
assertTrue(affixStream.isClosed());
assertTrue(dictStream.isClosed());
}
public void testReplacements() throws Exception {
Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs);
IntsRefBuilder scratchInts = new IntsRefBuilder();
// a -> b
Util.toUTF16("a", scratchInts);
builder.add(scratchInts.get(), new CharsRef("b"));
// ab -> c
Util.toUTF16("ab", scratchInts);
builder.add(scratchInts.get(), new CharsRef("c"));
// c -> de
Util.toUTF16("c", scratchInts);
builder.add(scratchInts.get(), new CharsRef("de"));
// def -> gh
Util.toUTF16("def", scratchInts);
builder.add(scratchInts.get(), new CharsRef("gh"));
FST<CharsRef> fst = builder.finish();
StringBuilder sb = new StringBuilder("atestanother");
Dictionary.applyMappings(fst, sb);
assertEquals("btestbnother", sb.toString());
sb = new StringBuilder("abtestanother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestbnother", sb.toString());
sb = new StringBuilder("atestabnother");
Dictionary.applyMappings(fst, sb);
assertEquals("btestcnother", sb.toString());
sb = new StringBuilder("abtestabnother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestcnother", sb.toString());
sb = new StringBuilder("abtestabcnother");
Dictionary.applyMappings(fst, sb);
assertEquals("ctestcdenother", sb.toString());
sb = new StringBuilder("defdefdefc");
Dictionary.applyMappings(fst, sb);
assertEquals("ghghghde", sb.toString());
}
public void testSetWithCrazyWhitespaceAndBOMs() throws Exception {
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(StandardCharsets.UTF_8))));
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(StandardCharsets.UTF_8))));
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(StandardCharsets.UTF_8))));
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(StandardCharsets.UTF_8))));
}
public void testFlagWithCrazyWhitespace() throws Exception {
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
}
private Directory getDirectory() {
return newDirectory();
}
}