/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.dictionary.impl;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import morfologik.stemming.Dictionary;
import morfologik.stemming.DictionaryLookup;
import morfologik.stemming.WordData;
import opennlp.tools.util.Cache;
import org.cogroo.dictionary.FeatureDictionary;
import org.cogroo.tools.featurizer.WordTag;
import static org.cogroo.util.ByteArrayUtil.toByteArray;
public class FSAFeatureDictionary implements FeatureDictionary, Iterable<WordTag> {
private DictionaryLookup dictLookup;
public FSAFeatureDictionary(DictionaryLookup dictLookup) {
this.dictLookup = dictLookup;
}
private final Cache cache = new Cache(500);
private String[] lookup(WordTag key) {
if(key == null) {
return null;
}
String[] arr = (String[]) cache.get(key);
if(arr != null) {
return arr;
}
synchronized (dictLookup) {
List<WordData> data = dictLookup.lookup(key.getWord());
if (data.size() > 0) {
final String prefix = key.getPostag() + "#";
List<String> tags = new ArrayList<String>(data.size());
for (int i = 0; i < data.size(); i++) {
String completeTag = data.get(i).getTag().toString();
if (completeTag.startsWith(prefix) || key.getPostag() == null) {
tags.add(completeTag.substring(completeTag.indexOf("#") + 1));
}
}
return tags.toArray(new String[tags.size()]);
}
}
return null;
}
// add all features if pos == null
public String[] getFeatures(String word, String pos) {
return lookup(new WordTag(word, pos));
}
public static FeatureDictionary create(String path)
throws IllegalArgumentException, IOException {
FileInputStream fsaData = new FileInputStream(path);
FileInputStream featuresData = new FileInputStream(
Dictionary.getExpectedFeaturesName(path));
return create(fsaData, featuresData);
}
public static byte[] getFSADictionaryInfo(String path) throws IOException {
FileInputStream featuresData = new FileInputStream(
Dictionary.getExpectedFeaturesName(path));
return toByteArray(featuresData);
}
public static byte[] getFSADictionaryData(String path) throws IOException {
FileInputStream featuresData = new FileInputStream(path);
return toByteArray(featuresData);
}
public static FeatureDictionary create(InputStream fsaData,
InputStream featuresData) throws IllegalArgumentException, IOException {
DictionaryLookup dictLookup = new DictionaryLookup(Dictionary.readAndClose(
fsaData, featuresData));
return new FSAFeatureDictionary(dictLookup);
}
public static FeatureDictionary create(byte[] dictData, byte[] dictInfo)
throws IllegalArgumentException, IOException {
return create(new ByteArrayInputStream(dictData), new ByteArrayInputStream(
dictInfo));
}
public static void main(String[] args) throws IllegalArgumentException,
IOException {
long start = System.nanoTime();
FSAFeatureDictionary td = (FSAFeatureDictionary) create("../lang/pt_br/cogroo-res/fsa_dictionaries/featurizer/pt_br_feats.dict");
System.out.println("Loading time ["
+ ((System.nanoTime() - start) / 1000000) + "ms]");
Scanner kb = new Scanner(System.in);
System.out.print("Enter a query: ");
String input = kb.nextLine();
while (!input.equals("q")) {
if (input.equals("0")) {
input = "casa";
}
String[] parts = input.split("\\s+");
if (parts.length == 2) {
System.out.println(Arrays.toString(td.getFeatures(parts[0], parts[1])));
} else {
System.out.println("invalid... enter a space separated word + postag");
}
System.out.print("Enter a query: ");
input = kb.nextLine();
}
}
private static class IteratorWrapper implements Iterator<WordTag> {
private final Iterator<WordData> innerIterator;
public IteratorWrapper(Iterator<WordData> iterator) {
this.innerIterator = iterator;
}
public boolean hasNext() {
return innerIterator.hasNext();
}
public WordTag next() {
WordData wd = innerIterator.next();
if(wd != null) {
String completeTag = wd.getTag().toString();
return new WordTag(wd.getWord().toString(), completeTag.substring(completeTag.indexOf("#") + 1));
}
return null;
}
public void remove() {
innerIterator.remove();
}
}
public Iterator<WordTag> iterator() {
return new IteratorWrapper(this.dictLookup.iterator());
}
public static FeatureDictionary createFromResources(String path)
throws IllegalArgumentException, IOException {
InputStream dic = FSAFeatureDictionary.class.getResourceAsStream(path);
InputStream info = FSAFeatureDictionary.class.getResourceAsStream(Dictionary.getExpectedFeaturesName(path));
FeatureDictionary fsa = create(dic, info);
dic.close();
info.close();
return fsa;
}
}