/******************************************************************************* * Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com) * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Lesser Public License v3 * which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt ******************************************************************************/ package com.opendoorlogistics.core.scripts.wizard; import java.util.ArrayList; import java.util.Collections; import java.util.List; import com.opendoorlogistics.api.components.PredefinedTags; import com.opendoorlogistics.api.tables.ODLColumnType; import com.opendoorlogistics.api.tables.ODLTableDefinition; import com.opendoorlogistics.codefromweb.LevenshteinDistance; import com.opendoorlogistics.codefromweb.LongestCommonSubstring; import com.opendoorlogistics.core.geometry.functions.FmLatitude; import com.opendoorlogistics.core.geometry.functions.FmLongitude; import com.opendoorlogistics.core.scripts.ScriptConstants; import com.opendoorlogistics.core.scripts.elements.AdaptedTableConfig; import com.opendoorlogistics.core.scripts.elements.AdapterColumnConfig; import com.opendoorlogistics.core.scripts.formulae.FmLocalElement; import com.opendoorlogistics.core.scripts.formulae.FmRowId; import com.opendoorlogistics.core.tables.ColumnValueProcessor; import com.opendoorlogistics.core.tables.utils.DatastoreCopier; import com.opendoorlogistics.core.utils.Pair; import com.opendoorlogistics.core.utils.strings.Strings; final public class TableLinkerWizard { public static final long FLAG_USE_ROWID_FOR_LOCATION_KEY = 1 << 0; public static AdaptedTableConfig createBestGuess(ODLTableDefinition source, ODLTableDefinition target) { return createBestGuessWithScores(source, target, 0).getFirst(); } public static AdaptedTableConfig createBestGuess(ODLTableDefinition source, ODLTableDefinition target, long flags) { return createBestGuessWithScores(source, target, flags).getFirst(); } private static Pair<AdaptedTableConfig, List<List<MatchScore>>> createBestGuessWithScores(ODLTableDefinition source, ODLTableDefinition target, long flags) { // get scores List<List<MatchScore>> allScores = scoreMatches(source, target); // create unassigned adapter for the table AdaptedTableConfig tc = new AdaptedTableConfig(); tc.setFromTable(source != null ? source.getName() : ""); tc.setName(target.getName()); tc.setFlags(target.getFlags()); DatastoreCopier.copyTableDefinition(target, tc); // use scores first boolean matchedGeom=false; for (int col = 0; col < tc.getColumnCount(); col++) { // get top scoring match List<MatchScore> scores = allScores.get(col); MatchScore ms = null; if (scores.size() > 0) { ms = scores.get(0); } AdapterColumnConfig config = tc.getColumn(col); // Priority 1 - use the top match if we get a field name (turn off tags as creating problems) if (ms != null && ms.isFieldNameMatch() ) { config.setFrom(source.getColumnName(ms.getSourceColumnIndex())); continue; } // Priority 2 - match colours, images, geometries to first found as they are unusual fields for(ODLColumnType type : new ODLColumnType[]{ODLColumnType.GEOM,ODLColumnType.IMAGE,ODLColumnType.COLOUR}){ if(tc.getColumnType(col) ==type){ for(int srcCol=0; source!=null && srcCol < source.getColumnCount(); srcCol++){ if(source.getColumnType(srcCol) == type){ config.setFrom(source.getColumnName(srcCol)); if(type ==ODLColumnType.GEOM){ matchedGeom = true; } break; } } } } } for (int col = 0; col < tc.getColumnCount(); col++) { // Skip config if already filled AdapterColumnConfig config = tc.getColumn(col); if(!Strings.isEmpty(config.getFrom()) || !Strings.isEmpty(config.getFormula())){ continue; } // Priority 3 - check for case where we have a geometry and we want a lat or long, unless we've already matched a geom to another field if(!matchedGeom){ boolean targetIsLat = TagUtils.hasTag(PredefinedTags.LATITUDE, target, col); boolean targetIsLng = TagUtils.hasTag(PredefinedTags.LONGITUDE, target, col); if((targetIsLat || targetIsLng) && source!=null){ boolean isSet=false; for(int srcCol=0;srcCol < source.getColumnCount(); srcCol++){ if(source.getColumnType(srcCol)==ODLColumnType.GEOM){ FmLocalElement local = new FmLocalElement(-1, source.getColumnName(srcCol)); String formula; if(targetIsLat){ formula = new FmLatitude(local).toString(); }else{ formula = new FmLongitude(local).toString(); } config.setUseFormula(true); config.setFormula(formula); isSet = true; break; } } if(isSet){ continue; } } } // Priority 4 - if flagged use default formula for location key if ((flags & FLAG_USE_ROWID_FOR_LOCATION_KEY) == FLAG_USE_ROWID_FOR_LOCATION_KEY && TagUtils.hasTag(PredefinedTags.LOCATION_KEY, target, col)) { config.setUseFormula(true); config.setFormula(new FmRowId().toString()); continue; } // Priority 5 - set up a formula using the default value... use the canonical string representation if(target.getColumnDefaultValue(col)!=null && target.getColumnType(col)!=ODLColumnType.IMAGE){ Object val = ColumnValueProcessor.convertToMe(ODLColumnType.STRING,target.getColumnDefaultValue(col), target.getColumnType(col)); if(val!=null){ String sval = val.toString(); if(!ColumnValueProcessor.isNumeric(target.getColumnType(col))){ sval = "\"" + sval + "\""; } config.setUseFormula(true); config.setFormula(sval); } continue; } // Turn off fuzzy matching; it often creates more problems than it solves... // // use fuzzy matching if column is required and nothing else worked // if (ms != null && (isRequired && ms.getFieldNameLongestCommonSubstring() >= 3)) { // config.setFrom(source.getColumnName(ms.getSourceColumnIndex())); // continue; // } } // set the datastore to external by default tc.setFromDatastore(ScriptConstants.EXTERNAL_DS_NAME); // // Do special processing for the drawables table... // if ( target != null && TagUtils.hasTag(PredefinedTags.DRAWABLES, target)) { // // blank lat long if have geometry // boolean hasGeom = false; // for (AdapterColumnConfig column : tc.getColumns()) { // if (column.getType() == ODLColumnType.GEOM) { // if (Strings.isEmpty(column.getFrom()) == false) { // hasGeom = true; // } // } // } // // // if we have geometry then wipe the lat / long fields as geom take priority // if (hasGeom) { // int nc = tc.getColumnCount(); // for (int col = 0; col < nc; col++) { // for (String tag : new String[] { PredefinedTags.LATITUDE, PredefinedTags.LONGITUDE }) { // if (TagUtils.hasTag(tag, tc, col)) { // AdapterColumnConfig colObj = tc.getColumn(col); // colObj.setFormula(null); // colObj.setFrom(null); // colObj.setUseFormula(false); // // } // } // } // } // } return new Pair<AdaptedTableConfig, List<List<MatchScore>>>(tc, allScores); } private static List<List<MatchScore>> scoreMatches(ODLTableDefinition source, ODLTableDefinition target) { List<List<MatchScore>> allScores = new ArrayList<>(); for (int j = 0; j < target.getColumnCount(); j++) { if (source != null) { ArrayList<MatchScore> scores = new ArrayList<>(); for (int k = 0; k < source.getColumnCount(); k++) { // check for exact fieldname match String targetName = target.getColumnName(j); String sourceName = source.getColumnName(k); MatchScore ms = new MatchScore(sourceName,targetName,k); if (Strings.equalsStd(targetName, sourceName)) { ms.fieldNameMatch = true; } // check for matched tags ms.nbMatchedTags = TagUtils.countCommonColumnTags(source, k, target, j); // get longest common substring ms.fieldNameLongestCommonSubstring = LongestCommonSubstring.longestSubstr(sourceName, targetName); // edit distance ms.editDistance = LevenshteinDistance.getLevenshteinDistance(sourceName, targetName); scores.add(ms); } Collections.sort(scores); allScores.add(scores); } else { allScores.add(new ArrayList<MatchScore>()); } } return allScores; } public static class MatchScore implements Comparable<MatchScore> { final String sourceFieldName; final String destinationFieldName; final int sourceColumnIndex; public MatchScore(String sourceFieldName, String destinationFieldName, int sourceColumnIndex) { super(); this.sourceFieldName = sourceFieldName; this.destinationFieldName = destinationFieldName; this.sourceColumnIndex = sourceColumnIndex; } private boolean fieldNameMatch; private int nbMatchedTags; private int fieldNameLongestCommonSubstring; private int editDistance; public boolean isFieldNameMatch() { return fieldNameMatch; } public int getNbMatchedTags() { return nbMatchedTags; } public int getFieldNameLongestCommonSubstring() { return fieldNameLongestCommonSubstring; } public int getSourceColumnIndex() { return sourceColumnIndex; } @Override public String toString() { return sourceFieldName + "->" + destinationFieldName + ", srcCol=" + sourceColumnIndex + ", fieldNameMatch?=" + fieldNameMatch + ", nbMatchedTags=" + nbMatchedTags + ", longestSubstring=" + fieldNameLongestCommonSubstring + ", editDistance=" + editDistance ; } @Override public int compareTo(MatchScore o) { // name match first if (fieldNameMatch != o.fieldNameMatch) { return fieldNameMatch ? -1 : +1; } // then tags if (nbMatchedTags != o.nbMatchedTags) { return nbMatchedTags > o.nbMatchedTags ? -1 : +1; } // then common substring if (fieldNameLongestCommonSubstring != o.fieldNameLongestCommonSubstring) { return fieldNameLongestCommonSubstring > o.fieldNameLongestCommonSubstring ? -1 : +1; } // then edit distance if(editDistance != o.editDistance){ return editDistance < o.editDistance ? -1 : +1; } return 0; } } // public static class ScoredTableLink{ // private final ODLTableDefinition source; // private final ODLTableDefinition target; // private final ArrayList<ArrayList<Score>> scores = new ArrayList<>(); // // private static class Score{ // int sourceIndex; // double score; // // Score(int sourceIndex, double score) { // this.sourceIndex = sourceIndex; // this.score = score; // } // } // // public ScoredTableLink(ODLTableDefinition source, ODLTableDefinition target) { // this.source = source; // this.target = target; // for(int i =0 ; i< target.getColumnCount() ; i++){ // scores.add(new ArrayList<Score>()); // } // } // // public void add(int targetIndex, int sourceIndex, double score){ // scores.a // } // } }