/* uDig - User Friendly Desktop Internet GIS client * http://udig.refractions.net * (C) 2004, Refractions Research Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package net.refractions.linecleaner.cleansing; import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import net.refractions.linecleaner.FeatureUtil; import net.refractions.linecleaner.GeometryUtil; import net.refractions.linecleaner.LoggingSystem; import net.refractions.linecleaner.SimilarityMetric; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.core.runtime.NullProgressMonitor; import org.eclipse.core.runtime.SubProgressMonitor; import org.geotools.data.FeatureStore; import org.geotools.data.Query; import org.geotools.feature.Feature; import org.geotools.feature.FeatureCollection; import org.geotools.feature.FeatureIterator; import org.geotools.filter.FidFilter; import org.geotools.filter.FilterFactory; import org.geotools.filter.FilterFactoryFinder; import com.vividsolutions.jts.geom.Geometry; import com.vividsolutions.jts.geom.LineString; import com.vividsolutions.jts.operation.linemerge.LineMerger; /** * <p> * A line cleaner class that attempts to clean out duplicate and similar sets of features * in a given feature source. * </p> */ public class SimilarLinesProcessor extends AbstractProcessor { public static final double DEFAULT_SAMPLING_DISTANCE = 1; public static final double DEFAULT_VERY_SIMILAR_TOLERANCE = 5; public static final double DEFAULT_SIMILAR_TOLERANCE = 30; public enum SieveCategory { IDENTICAL, VERY_SIMILAR, SIMILAR } LoggingSystem loggingSystem; private FeatureStore store; private double samplingDistance; private double verySimilarTolerance; private double similarTolerance; private List<String> featureStorePriorityList = null; private Map<String, SievedFeature> flaggedFeatures = new HashMap<String, SievedFeature>(); private Map<String, SievedFeature> deletedFeatures = new HashMap<String, SievedFeature>(); SimilarityMetric similarityMetric = new SimilarityMetric(); SimilarityCache similarityCache = new SimilarityCache(); SimilarityIndex similarityIndex = new SimilarityIndex(); /** * @param source * @param samplingDistance * @param verySimilarTolerance * @param similarTolerance * @throws IOException */ public SimilarLinesProcessor(net.refractions.udig.project.internal.Map map, FeatureStore source, double samplingDistance, double verySimilarTolerance, double similarTolerance) throws IOException { super(map, source); this.store = source; this.samplingDistance = samplingDistance; this.verySimilarTolerance = verySimilarTolerance; this.similarTolerance = similarTolerance; this.loggingSystem = LoggingSystem.getInstance(); } /** * @param source * @param featureStorePriorityList * @param samplingDistance * @param verySimilarTolerance * @param similarTolerance * @throws IOException */ public SimilarLinesProcessor(net.refractions.udig.project.internal.Map map, FeatureStore source, List<String> featureStorePriorityList, double samplingDistance, double verySimilarTolerance, double similarTolerance) throws IOException { this(map, source, samplingDistance, verySimilarTolerance, similarTolerance); this.featureStorePriorityList = featureStorePriorityList; } /** * * @throws IOException */ public void runInternal(IProgressMonitor monitor, PauseMonitor pauseMonitor) throws IOException { if (monitor == null) monitor = new NullProgressMonitor(); monitor.beginTask("", 100); this.loggingSystem.setCurrentAction(LoggingSystem.SIMILAR_FEATURES); this.loggingSystem.begin(); if (monitor.isCanceled()) { return; } pauseIfNecessary(pauseMonitor); cleanFeatures(new SubProgressMonitor(monitor, 87, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK), pauseMonitor); if (monitor.isCanceled()) { return; } pauseIfNecessary(pauseMonitor); cleanFeaturesByAggregation(new SubProgressMonitor(monitor, 13, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK), pauseMonitor); if (monitor.isCanceled()) { return; } pauseIfNecessary(pauseMonitor); outputHistograms(); if (monitor.isCanceled()) { return; } pauseIfNecessary(pauseMonitor); outputTSV(); monitor.done(); this.loggingSystem.finish(); } protected void cleanFeatures(IProgressMonitor monitor, PauseMonitor pauseMonitor) throws IOException { monitor.beginTask("", this.store.getCount(Query.FIDS)); monitor.subTask("Cleaning Similar Features"); MemoryFeatureIterator i = MemoryFeatureIterator.createDefault(this.featureStore, map); try { while (i.hasNext()) { Feature f = i.next(); FeatureCollection nearbyFeatures = FeatureUtil.nearbyFeatureFids(this.store, f); FeaturePredicate removalPredicate; if (this.featureStorePriorityList == null || !FeatureUtil.hasMergeSourceAttribute(this.store)) { removalPredicate = new LengthPredicate(); } else { removalPredicate = new PrecedenceLengthPredicate(); } sieve(f, nearbyFeatures, removalPredicate); monitor.worked(1); if (monitor.isCanceled()) { break; } pauseIfNecessary(pauseMonitor); } } finally { i.close(); monitor.done(); } } protected void cleanFeaturesByAggregation(IProgressMonitor monitor, PauseMonitor pauseMonitor) throws IOException { List<String> keys = new LinkedList<String>(this.similarityIndex.getKeys()); final Map<String, Double> lengthIndex = this.similarityIndex.lengthIndex; monitor.beginTask("", keys.size()); monitor.subTask("Cleaning Similar Features By Aggregation"); // Sort the keys by length in descending order so that we sieve long features // first. This ensures that in cycles of features involving linestrings shorter // than the tolerance, we pick the right combination of lines to merge together. Collections.sort(keys, new Comparator<String>() { public int compare(String s, String r) { double slength = lengthIndex.get(s); double rlength = lengthIndex.get(r); if (slength < rlength) { return 1; } else if (slength > rlength) { return -1; } else { return 0; } } }); for (String ffid: keys) { Feature f = getFeature(ffid); Collection<Feature> similarFeatures = this.similarityIndex.getFeaturesSimilarTo(f); // Filter the similar features by connectivity to avoid sending // useless extraneous linestrings to the merger. // Note: This suffices for now, but would be more correct if it // was a cycle finder. similarFeatures = FeatureUtil.filterByConnectivity(f, similarFeatures); LineMerger merger = new LineMerger(); merger.add(FeatureUtil.extractGeometries(similarFeatures)); Collection<LineString> mergedLines = merger.getMergedLineStrings(); aggregatedSieve(f, mergedLines); monitor.worked(1); if (monitor.isCanceled()) { break; } pauseIfNecessary(pauseMonitor); } monitor.done(); } /** * Compare f against a collection of linestrings, which should be a * collection of linestrings merged by LineMerger. We delete f if it * has the same end points as any of the linestrings and is within * distance under the given tolerances. * @param f The feature in question. * @param lines Collection of linestrings to compare f against. * @throws IOException */ protected void aggregatedSieve(Feature f, Collection<LineString> lines) throws IOException { Geometry fgeom = f.getDefaultGeometry(); for (LineString line: lines) { double similarity = similarityMetric.similarityF(fgeom, line, this.samplingDistance); double reverseSimilarity = similarityMetric.similarityF(line, fgeom, this.samplingDistance); if (similarity < this.verySimilarTolerance && reverseSimilarity < this.verySimilarTolerance) { if (GeometryUtil.identicalEndPoints(fgeom, line)) { deleteFeature(f, SieveCategory.VERY_SIMILAR, similarity); } else { flagFeature(f, SieveCategory.VERY_SIMILAR, similarity); } } else if (similarity < this.similarTolerance && reverseSimilarity < this.similarTolerance) { if (GeometryUtil.identicalEndPoints(fgeom, line)) { deleteFeature(f, SieveCategory.SIMILAR, similarity); } else { flagFeature(f, SieveCategory.VERY_SIMILAR, similarity); } } } } /** * Compare f against each feature g in candidate set. We remove f or * g provided g and f have identical end points. Which one is removed * is determined by whether or not removalPredicate.test(f,g) returns * true (remove f) or false (remove g). * @param f * @param candidateSet * @param removalPredicate * @throws IOException */ protected void sieve( Feature f, Collection<Feature> candidateSet, FeaturePredicate removalPredicate ) throws IOException { for (Feature g: candidateSet) { Geometry fgeom = f.getDefaultGeometry(); Geometry ggeom = g.getDefaultGeometry(); if (fgeom.equals(ggeom)) { deleteFeature(g, SieveCategory.IDENTICAL, 0.0); } else { double similarity = getSimilarity(f,g); double reverseSimilarity = getSimilarity(g,f); // run the features through a simple sieve by similarity metric if (similarity < this.verySimilarTolerance && reverseSimilarity < this.verySimilarTolerance) { if (GeometryUtil.identicalEndPoints(fgeom, ggeom)) { if (removalPredicate.test(f,g)) { deleteFeature(f, SieveCategory.VERY_SIMILAR, reverseSimilarity); break; // this feature's now out of consideration } deleteFeature(g, SieveCategory.VERY_SIMILAR, similarity); } else { flagFeature(g, SieveCategory.VERY_SIMILAR, similarity); } } else if (similarity < this.similarTolerance && reverseSimilarity < this.similarTolerance) { if (GeometryUtil.identicalEndPoints(fgeom, ggeom)) { if (removalPredicate.test(f,g)) { deleteFeature(f, SieveCategory.SIMILAR, reverseSimilarity); break; } deleteFeature(g, SieveCategory.SIMILAR, similarity); } else { flagFeature(g, SieveCategory.SIMILAR, similarity); } } } } } private double getSimilarity( Feature f, Feature g ) { return this.similarityCache.getSimilarity(f,g); } public void outputHistograms() { String s = ""; if (this.deletedFeatures.size() > 0) { s += "Deleted features:\n"; s += formatHistogram(buildHistogram(this.deletedFeatures)) + "\n"; } if (this.flaggedFeatures.size() > 0) { s += "Flagged features:\n"; s += formatHistogram(buildHistogram(this.flaggedFeatures)) + "\n"; } this.loggingSystem.info(s); } protected String formatHistogram(Map<SieveCategory, Integer> histogram) { String s = ""; for (Map.Entry<SieveCategory, Integer> e: histogram.entrySet()) { SieveCategory category = e.getKey(); Integer count = e.getValue(); s += category + ":\t" + count + "\n"; } return s + "\n"; } public void outputTSV() { String s = ""; if (deletedFeatures.size() > 0) { s += "Deleted Features: \n"; s += histogramTSV(deletedFeatures) + "\n"; } if (flaggedFeatures.size() > 0) { s += "Flagged Features: \n"; s += histogramTSV(flaggedFeatures) + "\n"; } loggingSystem.info(s); } public String histogramTSV(Map<String, SievedFeature> map) { Map<SieveCategory, List<SievedFeature>> histogram = transpose(map); String s = ""; for (Map.Entry<SieveCategory, List<SievedFeature>> e: histogram.entrySet()) { SieveCategory category = e.getKey(); List<SievedFeature> features = e.getValue(); s += category + "\n"; for (SievedFeature sf: features) { s += sf.fid + "," + sf.similarityMetric + "\n"; } s += "\n"; } return s; } protected Map<SieveCategory, List<SievedFeature>> transpose(Map<String, SievedFeature> map) { Map<SieveCategory, List<SievedFeature>> result = new LinkedHashMap<SieveCategory, List<SievedFeature>>(); for (Map.Entry<String, SievedFeature> e: map.entrySet()) { SievedFeature sf = e.getValue(); SieveCategory category = sf.category; List<SievedFeature> features; if (result.containsKey(category)) { features = result.get(category); } else { features = new LinkedList<SievedFeature>(); } features.add(sf); result.put(category, features); } return result; } protected Map<SieveCategory, Integer> buildHistogram(Map<String, SievedFeature> map) { int numCategories = SieveCategory.values().length; Map<SieveCategory, Integer> histogram = new LinkedHashMap<SieveCategory, Integer>(numCategories); for (Map.Entry<String, SievedFeature> e: map.entrySet()) { SievedFeature sf = e.getValue(); int count = 1; if (histogram.containsKey(sf.category)) { count = histogram.get(sf.category) + 1; } histogram.put(sf.category, count); } return histogram; } protected void deleteFeature(Feature f, SieveCategory category, double metric) throws IOException { String fid = f.getID(); // keep track of deleted features and the category they fall under // for logging purposes SievedFeature sf = new SievedFeature(fid, category, metric); deletedFeatures.put(fid, sf); this.similarityIndex.removeFeature(f); this.similarityCache.removeFeature(f); FeatureUtil.removeFeature(this.store, f); loggingSystem.delete(f); } protected void flagFeature(Feature f, SieveCategory category, double metric) { String fid = f.getID(); SievedFeature sf = new SievedFeature(fid, category, metric); flaggedFeatures.put(fid, sf); } protected Feature getFeature(String fid) { FilterFactory ff = FilterFactoryFinder.createFilterFactory(); FidFilter filter = ff.createFidFilter(fid); try { FeatureIterator i = this.store.getFeatures(filter).features(); try { Feature f = i.next(); return f; } finally { i.close(); } } catch (IOException e) { // TODO Handle IOException throw (RuntimeException) new RuntimeException( ).initCause( e ); } } protected Collection<Feature> getFeatures(Collection<String> fids) { List<Feature> features = new LinkedList<Feature>(); for (String fid: fids) { Feature f = getFeature(fid); features.add(f); } return features; } protected Collection<Feature> getFeatures(FeatureCollection fids) { List<Feature> features = new LinkedList<Feature>(); FeatureIterator i = fids.features(); try { while (i.hasNext()) { Feature f = getFeature(i.next().getID()); features.add(f); } } finally { i.close(); } return features; } private interface FeaturePredicate { // given f and g, determine whether or not to remove f public abstract boolean test(Feature f, Feature g); } private class LengthPredicate implements FeaturePredicate { public boolean test(Feature f, Feature g) { return f.getDefaultGeometry().getLength() > g.getDefaultGeometry().getLength(); } } private class PrecedenceLengthPredicate implements FeaturePredicate { public boolean test(Feature f, Feature g) { // fetch the full feature g, instead of just g with fid and geom g = getFeature(g.getID()); int fpriority = featureStorePriorityList.indexOf(f.getAttribute(FeatureUtil.MERGE_SOURCE_NAME)); int gpriority = featureStorePriorityList.indexOf(g.getAttribute(FeatureUtil.MERGE_SOURCE_NAME)); if (fpriority == gpriority) { return f.getDefaultGeometry().getLength() > g.getDefaultGeometry().getLength(); } else if (fpriority > gpriority) { return false; } else { return true; } } } private class SimilarityCache { Map<String, Map<String, Double>> cache = new HashMap<String, Map<String, Double>>(); public double getSimilarity(Feature f, Feature g) { double similarity = 0; if (cached(f,g)) { similarity = getNearbyFeatures(f).get(g.getID()); } else { similarity = calculateSimilarity(f, g); cacheSimilarity(f, g, similarity); indexSimilarity(f, g, similarity); } return similarity; } private void cacheSimilarity(Feature f, Feature g, double similarityMetric) { if (!cached(f, g)) { getNearbyFeatures(f).put(g.getID(), similarityMetric); } } private void indexSimilarity(Feature f, Feature g, double similarityMetric) { if (similarityMetric < similarTolerance) { similarityIndex.addSimilarFeatures(f,g); } } private Map<String, Double> getNearbyFeatures(Feature f) { if (cache.containsKey(f.getID())) { return cache.get(f.getID()); } Map<String, Double> features = new HashMap<String, Double>(); cache.put(f.getID(), features); return features; } private boolean cached(Feature f, Feature g) { return getNearbyFeatures(f).containsKey(g.getID()); } private double calculateSimilarity(Feature f, Feature g) { Geometry fgeom = f.getDefaultGeometry(); Geometry ggeom = g.getDefaultGeometry(); return similarityMetric.similarityF(fgeom, ggeom, samplingDistance); } public void removeFeature(Feature f) { cache.remove(f.getID()); } } /** * * <p> * A class that keeps track of similar features. Keep in mind similarity * is one-way, ie if A is similar to B, B is not necessarily similar to A. * </p> */ private class SimilarityIndex { Map<String, Set<String>> similarityIndex = new HashMap<String, Set<String>>(); Map<String, Set<String>> transposeIndex = new HashMap<String, Set<String>>(); Map<String, Double> lengthIndex = new HashMap<String, Double>(); /** * Add f is similar to g to the index. * @param f * @param g */ public void addSimilarFeatures(Feature f, Feature g) { getSimilarFeatures(f).add(g.getID()); getFidsOfFeaturesSimilarTo(g).add(f.getID()); indexLength(f); indexLength(g); } /** * Remove feature f * @param f */ public void removeFeature(Feature f) { String fid = f.getID(); Collection<String> similar = getSimilarFeatures(f); Collection<String> transpose = getFidsOfFeaturesSimilarTo(f); for (String sfid: similar) { this.transposeIndex.get(sfid).remove(fid); } for (String tfid: transpose) { this.similarityIndex.get(tfid).remove(fid); } this.similarityIndex.remove(fid); this.transposeIndex.remove(fid); this.lengthIndex.remove(fid); } public Set<String> getKeys() { return transposeIndex.keySet(); } public Set<String> getSimilarFeatures(Feature f) { return getValue(similarityIndex, f); } public Set<String> getFidsOfFeaturesSimilarTo(Feature f) { return getValue(transposeIndex, f); } public Collection<Feature> getFeaturesSimilarTo(Feature f) { return getFeatures(getFidsOfFeaturesSimilarTo(f)); } private Set<String> getValue(Map<String, Set<String>> index, Feature f) { Set<String> similarFeatures; if (index.containsKey(f.getID())) { similarFeatures = index.get(f.getID()); } else { similarFeatures = new HashSet<String>(); index.put(f.getID(), similarFeatures); } return similarFeatures; } private void indexLength(Feature g) { if (!this.lengthIndex.containsKey(g.getID())) { this.lengthIndex.put(g.getID(), g.getDefaultGeometry().getLength()); } } } private class SievedFeature { /** SievedFeature fid field */ public String fid; /** SievedFeature similarityMetric field */ public double similarityMetric; /** SievedFeature category field */ public SieveCategory category; /** * @param fid * @param category * @param similarityMetric */ public SievedFeature( String fid, SieveCategory category, double similarityMetric ) { super(); this.fid = fid; this.similarityMetric = similarityMetric; this.category = category; } } }