package org.activityinfo.core.shared.importing.match;
/*
* #%L
* ActivityInfo Server
* %%
* Copyright (C) 2009 - 2013 UNICEF
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
import com.google.common.collect.Maps;
import org.activityinfo.core.shared.importing.model.ImportModel;
import org.activityinfo.core.shared.importing.model.MapExistingAction;
import org.activityinfo.core.shared.importing.source.SourceColumn;
import org.activityinfo.core.shared.importing.strategy.ImportTarget;
import org.activityinfo.model.expr.StringUtil;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
/**
* @author yuriyz on 5/7/14.
*/
public class ColumnMappingGuesser {
private final ImportModel importModel;
private final List<ImportTarget> importTargets;
public ColumnMappingGuesser(ImportModel importModel, List<ImportTarget> importTargets) {
this.importModel = importModel;
this.importTargets = importTargets;
}
public void guess() {
final Map<SourceColumn, ImportTarget> mapping = getMapping();
// remove from guessed map entries which are already set (we don't want to override existing bindings)
for (SourceColumn sourceColumn : importModel.getColumnActions().keySet()) {
mapping.remove(sourceColumn);
}
// set binding
for (Map.Entry<SourceColumn, ImportTarget> entry : mapping.entrySet()) {
importModel.setColumnBinding(new MapExistingAction(entry.getValue()), entry.getKey());
}
}
public Map<SourceColumn, ImportTarget> getMapping() {
// lower distance between maps
Map<SourceColumn, TreeMap<Integer, ImportTarget>> distanceWithinTargetMaps = Maps.newHashMap();
for (SourceColumn sourceColumn : importModel.getSource().getColumns()) {
final String sourceColumnHeader = sourceColumn.getHeader();
final TreeMap<Integer, ImportTarget> distanceMap = getDistanceMap(sourceColumnHeader);
if (!distanceMap.isEmpty()) {
final Map.Entry<Integer, ImportTarget> lowerDistanceEntry = distanceMap.entrySet().iterator().next();
// if number of transformation operations are higher then label length then ignore such mapping
final Integer transformationOperations = lowerDistanceEntry.getKey();
// if (transformationOperations < sourceColumnHeader.length() && transformationOperations < lowerDistanceEntry.getValue().getLabel().length()) {
TreeMap<Integer, ImportTarget> valueMap = distanceWithinTargetMaps.get(sourceColumn);
if (valueMap == null) {
valueMap = Maps.newTreeMap();
distanceWithinTargetMaps.put(sourceColumn, valueMap);
}
valueMap.put(transformationOperations, lowerDistanceEntry.getValue());
// }
}
}
// now re-iterate for target (different source columns may get the same target column as best match (lower distance))
Map<ImportTarget, TreeMap<Integer, SourceColumn>> targetToSource = Maps.newHashMap();
for (Map.Entry<SourceColumn, TreeMap<Integer, ImportTarget>> entry : distanceWithinTargetMaps.entrySet()) {
final TreeMap<Integer, ImportTarget> value = entry.getValue();
if (!value.isEmpty()) {
final Map.Entry<Integer, ImportTarget> bestEntry = value.entrySet().iterator().next(); // entry with lowest distance
TreeMap<Integer, SourceColumn> distanceForSourceMap = targetToSource.get(bestEntry.getValue());
if (distanceForSourceMap == null) {
distanceForSourceMap = Maps.newTreeMap();
targetToSource.put(bestEntry.getValue(), distanceForSourceMap);
}
distanceForSourceMap.put(bestEntry.getKey(), entry.getKey());
}
}
// finally build mapping
Map<SourceColumn, ImportTarget> mapping = Maps.newHashMap();
for (Map.Entry<ImportTarget, TreeMap<Integer, SourceColumn>> entry : targetToSource.entrySet()) {
final TreeMap<Integer, SourceColumn> map = entry.getValue();
if (!map.isEmpty()) {
mapping.put(map.entrySet().iterator().next().getValue(), entry.getKey());
}
}
return mapping;
}
public TreeMap<Integer, ImportTarget> getDistanceMap(String sourceLabel) {
final TreeMap<Integer, ImportTarget> distanceMap = Maps.newTreeMap();
for (ImportTarget target : importTargets) {
final String targetLabel = target.getLabel();
final int distance = StringUtil.getLevenshteinDistance(sourceLabel, targetLabel);
distanceMap.put(distance, target);
}
return distanceMap;
}
}