/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.tools.featurizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.ml.model.Event;
import opennlp.tools.util.AbstractEventStream;
import opennlp.tools.util.ObjectStream;
/**
* Class for creating an event stream out of data files for training a chunker.
*/
public class FeaturizerEventStream extends AbstractEventStream<FeatureSample> {
private FeaturizerContextGenerator cg;
/**
* Creates a new event stream based on the specified data stream using the
* specified context generator.
*
* @param d
* The data stream for this event stream.
* @param cg
* The context generator which should be used in the creation of
* events for this event stream.
*/
public FeaturizerEventStream(ObjectStream<FeatureSample> d,
FeaturizerContextGenerator cg) {
super(d);
this.cg = cg;
}
@Override
protected Iterator<Event> createEvents(FeatureSample sample) {
if (sample != null) {
List<Event> events = new ArrayList<Event>();
String[] toksArray = sample.getSentence();
String[] tagsArray = sample.getTags();
String[] predsArray = sample.getFeatures();
for (int ei = 0, el = sample.getSentence().length; ei < el; ei++) {
events.add(new Event(predsArray[ei],
cg.getContext(ei, toksArray, tagsArray, predsArray)));
}
return events.iterator();
} else {
return Collections.emptyListIterator();
}
}
}