/**
* Copyright 2013-2015 Pierre Merienne
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package stormy.pythian.sandbox;
import static stormy.pythian.model.annotation.ComponentType.ANALYTICS;
import static stormy.pythian.model.instance.FeatureType.INTEGER;
import static stormy.pythian.model.instance.FeatureType.TEXT;
import static stormy.pythian.model.instance.Instance.INSTANCE_FIELD;
import storm.trident.Stream;
import storm.trident.TridentState;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.operation.builtin.Count;
import storm.trident.operation.builtin.MapGet;
import storm.trident.state.StateFactory;
import storm.trident.tuple.TridentTuple;
import stormy.pythian.model.annotation.Documentation;
import stormy.pythian.model.annotation.ExpectedFeature;
import stormy.pythian.model.annotation.InputStream;
import stormy.pythian.model.annotation.NameMapper;
import stormy.pythian.model.annotation.OutputStream;
import stormy.pythian.model.annotation.State;
import stormy.pythian.model.component.Component;
import stormy.pythian.model.instance.Feature;
import stormy.pythian.model.instance.Instance;
import stormy.pythian.model.instance.IntegerFeature;
import stormy.pythian.model.instance.NamedFeaturesMapper;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
@Documentation(name = "Word count", type = ANALYTICS)
public class WordCount implements Component {
private static final long serialVersionUID = 1822765078810762926L;
public static final String WORD_FEATURE = "word";
public static final String COUNT_FEATURE = "count";
@InputStream(name = "in")
private Stream in;
@OutputStream(name = "out", from = "in")
private Stream out;
@NameMapper(stream = "in", expectedFeatures = { @ExpectedFeature(name = WORD_FEATURE, type = TEXT) })
private NamedFeaturesMapper inputMapper;
@NameMapper(stream = "out", expectedFeatures = { @ExpectedFeature(name = COUNT_FEATURE, type = INTEGER) })
private NamedFeaturesMapper outputMapper;
@State(name = "count state")
private StateFactory stateFactory;
@Override
public void init() {
TridentState wordCounts = in//
.each(new Fields(INSTANCE_FIELD), new ExtractFeature(WORD_FEATURE, inputMapper), new Fields(WORD_FEATURE)) //
.groupBy(new Fields(WORD_FEATURE)) //
.persistentAggregate(stateFactory, new Fields(WORD_FEATURE), new Count(), new Fields(COUNT_FEATURE)); //
out = in //
.each(new Fields(INSTANCE_FIELD), new ExtractFeature(WORD_FEATURE, inputMapper), new Fields(WORD_FEATURE)) //
.stateQuery(wordCounts, new Fields(WORD_FEATURE), new MapGet(), new Fields(COUNT_FEATURE)) //
.each(new Fields(INSTANCE_FIELD, COUNT_FEATURE), new AddCountFeature(outputMapper), new Fields(Instance.NEW_INSTANCE_FIELD));
}
@SuppressWarnings("serial")
private static class ExtractFeature extends BaseFunction {
private final String featureName;
private final NamedFeaturesMapper inputMapper;
public ExtractFeature(String featureName, NamedFeaturesMapper mapper) {
this.featureName = featureName;
this.inputMapper = mapper;
}
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
Instance instance = Instance.get(tuple, inputMapper);
Feature<?> feature = instance.getFeature(featureName);
collector.emit(new Values(feature.textValue()));
}
}
@SuppressWarnings("serial")
private static class AddCountFeature extends BaseFunction {
private final NamedFeaturesMapper outMappings;
public AddCountFeature(NamedFeaturesMapper outMapper) {
this.outMappings = outMapper;
}
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
Long count = tuple.getLongByField(COUNT_FEATURE);
Instance instance = Instance.get(tuple, (NamedFeaturesMapper) null, outMappings);
instance.setFeature(COUNT_FEATURE, new IntegerFeature(count));
collector.emit(new Values(instance));
}
}
}