/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.stringpattern;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import org.eobjects.analyzer.data.InputColumn;
import org.eobjects.analyzer.result.AnalyzerResultReducer;
import org.eobjects.analyzer.result.Crosstab;
import org.eobjects.analyzer.result.CrosstabDimension;
import org.eobjects.analyzer.result.CrosstabNavigator;
/**
* Result reducer for {@link PatternFinderResult}s
*/
public class PatternFinderResultReducer implements AnalyzerResultReducer<PatternFinderResult> {
@Override
public PatternFinderResult reduce(Collection<? extends PatternFinderResult> results) {
final PatternFinderResult firstResult = results.iterator().next();
final InputColumn<String> column = firstResult.getColumn();
final TokenizerConfiguration tokenizerConfiguration = firstResult.getTokenizerConfiguration();
if (!firstResult.isGroupingEnabled()) {
// a single list of patterns
final List<Crosstab<?>> crosstabs = new ArrayList<Crosstab<?>>(results.size());
for (PatternFinderResult result : results) {
Crosstab<?> crosstab = result.getSingleCrosstab();
crosstabs.add(crosstab);
}
final Crosstab<?> crosstab = reduce(crosstabs, tokenizerConfiguration);
return new PatternFinderResult(column, crosstab, tokenizerConfiguration);
} else {
// groups of lists of patterns
final Map<String, List<Crosstab<?>>> groupedCrosstabs = new HashMap<String, List<Crosstab<?>>>();
for (PatternFinderResult result : results) {
final Set<Entry<String, Crosstab<?>>> entries = result.getGroupedCrosstabs().entrySet();
for (Entry<String, Crosstab<?>> entry : entries) {
final String group = entry.getKey();
List<Crosstab<?>> crosstabsInGroup = groupedCrosstabs.get(group);
if (crosstabsInGroup == null) {
crosstabsInGroup = new ArrayList<Crosstab<?>>();
groupedCrosstabs.put(group, crosstabsInGroup);
}
crosstabsInGroup.add(entry.getValue());
}
}
final Map<String, Crosstab<?>> crosstabs = new TreeMap<String, Crosstab<?>>();
final Set<Entry<String, List<Crosstab<?>>>> entries = groupedCrosstabs.entrySet();
for (Entry<String, List<Crosstab<?>>> entry : entries) {
final String group = entry.getKey();
final List<Crosstab<?>> crosstabInGroup = entry.getValue();
final Crosstab<?> crosstab = reduce(crosstabInGroup, tokenizerConfiguration);
crosstabs.put(group, crosstab);
}
final InputColumn<String> groupColumn = firstResult.getGroupColumn();
return new PatternFinderResult(column, groupColumn, crosstabs, tokenizerConfiguration);
}
}
private Crosstab<?> reduce(List<Crosstab<?>> crosstabs, TokenizerConfiguration tokenizerConfiguration) {
if (crosstabs.size() == 1) {
return crosstabs.get(0);
}
final ReversePatternFinder patternFinder = new ReversePatternFinder(tokenizerConfiguration);
for (Crosstab<?> crosstab : crosstabs) {
final CrosstabDimension patternDimension = crosstab
.getDimension(PatternFinderAnalyzer.DIMENSION_NAME_PATTERN);
final List<String> patterns = patternDimension.getCategories();
for (String pattern : patterns) {
final CrosstabNavigator<?> navigator = crosstab.where(PatternFinderAnalyzer.DIMENSION_NAME_PATTERN,
pattern);
final Number matchCount = (Number) navigator.where(PatternFinderAnalyzer.DIMENSION_NAME_MEASURES,
PatternFinderAnalyzer.MEASURE_MATCH_COUNT).get();
final String sample = (String) navigator.where(PatternFinderAnalyzer.DIMENSION_NAME_MEASURES,
PatternFinderAnalyzer.MEASURE_SAMPLE).get();
patternFinder.run(sample, pattern, matchCount.intValue());
}
}
final Set<Entry<TokenPattern, AtomicInteger>> entries = patternFinder.getPatternCounts().entrySet();
// sort the entries so that the ones with the highest amount of
// matches are at the top
final Set<Entry<TokenPattern, AtomicInteger>> sortedEntrySet = new TreeSet<Entry<TokenPattern, AtomicInteger>>(
new Comparator<Entry<TokenPattern, AtomicInteger>>() {
public int compare(Entry<TokenPattern, AtomicInteger> o1, Entry<TokenPattern, AtomicInteger> o2) {
int result = o2.getValue().get() - o1.getValue().get();
if (result == 0) {
result = o1.getKey().toSymbolicString().compareTo(o2.getKey().toSymbolicString());
}
return result;
}
});
sortedEntrySet.addAll(entries);
final Crosstab<Serializable> crosstab = PatternFinderAnalyzer.createCrosstab();
for (Entry<TokenPattern, AtomicInteger> entry : sortedEntrySet) {
final CrosstabNavigator<Serializable> nav = crosstab.navigate();
final TokenPattern pattern = entry.getKey();
nav.where(PatternFinderAnalyzer.DIMENSION_NAME_PATTERN, pattern.toSymbolicString());
nav.where(PatternFinderAnalyzer.DIMENSION_NAME_MEASURES, PatternFinderAnalyzer.MEASURE_MATCH_COUNT);
final AtomicInteger count = entry.getValue();
nav.put(count, true);
nav.where(PatternFinderAnalyzer.DIMENSION_NAME_MEASURES, PatternFinderAnalyzer.MEASURE_SAMPLE);
final String sample = patternFinder.getSample(pattern);
nav.put(sample, true);
}
return crosstab;
}
}