/* Copyright (c) 2013 Boundless and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/org/documents/edl-v10.html
*
* Contributors:
* Victor Olaya (Boundless) - initial implementation
*/
package org.locationtech.geogig.api.plumbing.diff;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
import org.locationtech.geogig.api.plumbing.diff.DiffMatchPatch.Diff;
import org.locationtech.geogig.api.plumbing.diff.DiffMatchPatch.LinesToCharsResult;
import org.locationtech.geogig.api.plumbing.diff.DiffMatchPatch.Operation;
import org.locationtech.geogig.api.plumbing.diff.DiffMatchPatch.Patch;
import org.locationtech.geogig.storage.FieldType;
import org.locationtech.geogig.storage.text.TextValueSerializer;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Polygon;
/**
* An class that computes differences between geometries using a Longest-Common-Subsequence
* algorithm on string representations of them
*
*/
public class LCSGeometryDiffImpl {
public static final String SUBGEOM_SEPARATOR = "/";
public static final String INNER_RING_SEPARATOR = "@";
private LinkedList<Patch> patches;
private DiffMatchPatch diffMatchPatch;
private int totalInsertions;
private int totalDeletions;
private int replacings;
private String diffText;
public LCSGeometryDiffImpl(Optional<Geometry> oldGeom, Optional<Geometry> newGeom) {
String oldText = oldGeom.isPresent() ? oldGeom.get().toText() : "";
String newText = newGeom.isPresent() ? newGeom.get().toText() : "";
diffMatchPatch = new DiffMatchPatch();
LinkedList<Diff> diffs = diffMatchPatch.diff_main(oldText, newText);
patches = diffMatchPatch.patch_make(diffs);
// to calculate number of edits in the geometry, we do a diffing based on a string
// representation of the coordinates of the geometry, instead of the WKT.
// This is more more practical for counting added/removed/edited points and generating a
// human-readable and easy-to-parse string representation of the diff.
// NOTE! This is limited to geometries with less than 65535 different points, and might
// yield wrong results for geometries over that limit.
// This is a temporary hack, until a better solution is developed.
oldText = geomToStringOfCoordinates(oldGeom);
newText = geomToStringOfCoordinates(newGeom);
LinesToCharsResult chars = coordsToChars(oldText, newText);
diffs = diffMatchPatch.diff_main(chars.chars1, chars.chars2);
charsToCoords(diffs, chars.lineArray);
processDiffs(diffs);
}
private LCSGeometryDiffImpl(LinkedList<Patch> patches) {
diffMatchPatch = new DiffMatchPatch();
this.patches = patches;
}
public LCSGeometryDiffImpl(String s) {
String[] tokens = s.split("\t");
Preconditions.checkArgument(tokens.length == 2);
String[] countings = tokens[0].split("/");
Preconditions.checkArgument(countings.length == 3);
totalDeletions = Integer.parseInt(countings[0]);
totalInsertions = Integer.parseInt(countings[1]);
replacings = Integer.parseInt(countings[2]);
diffMatchPatch = new DiffMatchPatch();
String unescaped = tokens[1].replace("\\n", "\n");
patches = (LinkedList<Patch>) diffMatchPatch.patch_fromText(unescaped);
}
private void processDiffs(List<Diff> diffs) {
totalInsertions = 0;
totalDeletions = 0;
replacings = 0;
int insertions = 0;
int deletions = 0;
StringBuilder sb = new StringBuilder();
for (Diff diff : diffs) {
String text = diff.text;
int nCoords = 0;
String[] tokens = diff.text.split(" ");
for (String token : tokens) {
if (token.contains(",")) {
nCoords++;
}
}
switch (diff.operation) {
case INSERT:
text = text.replace(" " + SUBGEOM_SEPARATOR, ")" + SUBGEOM_SEPARATOR + "(");
sb.append('(');
sb.append(text);
sb.append(") ");
insertions += nCoords;
break;
case DELETE:
sb.append('[');
sb.append(text);
sb.append("] ");
deletions += nCoords;
break;
case EQUAL:
sb.append(text.trim());
sb.append(' ');
replacings += Math.min(deletions, insertions);
totalDeletions += Math.max(deletions - insertions, 0);
totalInsertions += Math.max(insertions - deletions, 0);
insertions = 0;
deletions = 0;
break;
}
}
replacings += Math.min(deletions, insertions);
totalDeletions += Math.max(deletions - insertions, 0);
totalInsertions += Math.max(insertions - deletions, 0);
diffText = sb.toString();
// some final dirty minor corrections
diffText = diffText.replace("(" + SUBGEOM_SEPARATOR, SUBGEOM_SEPARATOR + "(");
diffText = diffText.replace("(" + INNER_RING_SEPARATOR, INNER_RING_SEPARATOR + "(");
diffText = diffText.replace("[" + SUBGEOM_SEPARATOR, SUBGEOM_SEPARATOR + "[");
diffText = diffText.replace("[" + INNER_RING_SEPARATOR, INNER_RING_SEPARATOR + "[");
diffText = diffText.replace(" )", ")");
diffText = diffText.replace(" ]", "]");
}
private String geomToStringOfCoordinates(Optional<Geometry> opt) {
if (!opt.isPresent()) {
return "";
}
Function<Coordinate, String> printCoords = new Function<Coordinate, String>() {
@Override
@Nullable
public String apply(@Nullable Coordinate coord) {
return Double.toString(coord.x) + "," + Double.toString(coord.y);
}
};
StringBuilder sb = new StringBuilder();
Geometry geom = opt.get();
sb.append(geom.getGeometryType() + " ");
int n = geom.getNumGeometries();
for (int i = 0; i < n; i++) {
Geometry subgeom = geom.getGeometryN(i);
if (subgeom instanceof Polygon) {
Polygon polyg = (Polygon) subgeom;
Coordinate[] coords = polyg.getExteriorRing().getCoordinates();
Iterator<String> iter = Iterators
.transform(Iterators.forArray(coords), printCoords);
sb.append(Joiner.on(' ').join(iter));
for (int j = 0; j < polyg.getNumInteriorRing(); j++) {
coords = polyg.getInteriorRingN(j).getCoordinates();
iter = Iterators.transform(Iterators.forArray(coords), printCoords);
sb.append(" " + INNER_RING_SEPARATOR + " ");
sb.append(Joiner.on(' ').join(iter));
}
if (i < n - 1) {
sb.append(" " + SUBGEOM_SEPARATOR + " ");
}
} else {
Coordinate[] coords = subgeom.getCoordinates();
Iterator<String> iter = Iterators
.transform(Iterators.forArray(coords), printCoords);
sb.append(Joiner.on(' ').join(iter));
sb.append(" " + SUBGEOM_SEPARATOR + " ");
}
}
String s = sb.toString().trim();
return s;
}
public LCSGeometryDiffImpl reversed() {
LinkedList<Patch> reversedPatches = diffMatchPatch.patch_deepCopy(patches);
for (Patch patch : reversedPatches) {
LinkedList<Diff> diffs = patch.diffs;
for (Diff diff : diffs) {
if (diff.operation == Operation.DELETE) {
diff.operation = Operation.INSERT;
} else if (diff.operation == Operation.INSERT) {
diff.operation = Operation.DELETE;
}
}
}
return new LCSGeometryDiffImpl(reversedPatches);
}
public boolean canBeAppliedOn(Optional<Geometry> obj) {
String wkt = obj.isPresent() ? obj.get().toText() : "";
Object[] res = diffMatchPatch.patch_apply(patches, wkt);
boolean[] bool = (boolean[]) res[1];
for (int i = 0; i < bool.length; i++) {
if (!bool[i]) {
return false;
}
}
return true;
}
public Optional<Geometry> applyOn(Optional<Geometry> obj) {
Preconditions.checkState(canBeAppliedOn(obj));
String wkt = obj.isPresent() ? obj.get().toText() : "";
String res = (String) diffMatchPatch.patch_apply(patches, wkt)[0];
if (!res.isEmpty()) {
return Optional.fromNullable((Geometry) TextValueSerializer.fromString(
FieldType.forBinding(Geometry.class), res));
} else {
return Optional.absent();
}
}
/**
* Returns a human-readable representation of the difference
*/
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(Integer.toString(totalDeletions) + " point(s) deleted, ");
sb.append(Integer.toString(totalInsertions) + " new point(s) added, ");
sb.append(Integer.toString(replacings) + " point(s) moved");
return sb.toString();
}
/**
* Returns a serialized text version of the difference
*/
public String asText() {
StringBuilder sb = new StringBuilder();
sb.append(Integer.toString(totalDeletions));
sb.append('/');
sb.append(Integer.toString(totalInsertions));
sb.append('/');
sb.append(Integer.toString(replacings));
sb.append('\t');
sb.append(diffMatchPatch.patch_toText(patches).replace("\n", "\\n"));
return sb.toString();
}
@Override
public boolean equals(Object o) {
if (!(o instanceof LCSGeometryDiffImpl)) {
return false;
}
LCSGeometryDiffImpl d = (LCSGeometryDiffImpl) o;
for (int i = 0; i < d.patches.size(); i++) {
Patch patchA = patches.get(i);
Patch patchB = d.patches.get(i);
if (!patchA.equals(patchB)) {
return false;
}
}
return true;
}
// ========================================================================================
// These 2 methods are meant to be used to split a text containing a representation of
// coordinate in a geometry into chunks representing points that can be then hashed
// as characters.
// They are just an adapted version of the diff_lineToChars method in diff_match_patch, using a
// different split character and some extra behaviour
protected LinesToCharsResult coordsToChars(String text1, String text2) {
List<String> lineArray = new ArrayList<String>();
Map<String, Integer> lineHash = new HashMap<String, Integer>();
lineArray.add("");
String chars1 = splitAndHash(text1, lineArray, lineHash, ' ');
String chars2 = splitAndHash(text2, lineArray, lineHash, ' ');
return new LinesToCharsResult(chars1, chars2, lineArray);
}
private String splitAndHash(String text, List<String> lineArray, Map<String, Integer> lineHash,
char splitChar) {
StringBuilder chars = new StringBuilder();
Iterable<String> tokens = Splitter.on(" ").split(text);
for (String token : tokens) {
if (lineHash.containsKey(token)) {
chars.append(String.valueOf((char) (int) lineHash.get(token)));
} else {
lineArray.add(token);
lineHash.put(token, lineArray.size() - 1);
chars.append(String.valueOf((char) (lineArray.size() - 1)));
}
}
return chars.toString();
}
protected void charsToCoords(LinkedList<Diff> diffs, List<String> lineArray) {
StringBuilder text;
for (Diff diff : diffs) {
text = new StringBuilder();
for (int y = 0; y < diff.text.length(); y++) {
String coordText = lineArray.get(diff.text.charAt(y));
text.append(coordText);
if (coordText.length() > 2) {
text.append(' ');
}
}
diff.text = text.toString();
}
}
/**
* Returns a string with a human-readable version of this geometry diff. It is basically a
* collection of coordinates, using the following syntax:
*
* - Coordinates added are shown between brackets, while removed coordinates are shown between
* square brackets.
*
* - The structure of the text representing the geometry is as follows:
*
* - It starts with the type name of the geometry, followed by the list of coordinates -
* Coordinates are x,y pairs, separated by a whitespace - In the case of multi-geometries,
* sub-geometries are separated by the slash (`/`) sign. For instance, `MultiLineString 0,10
* 0,20 0,30 / 10,10 50,65`` represents a multi-line with two lines
*
* - In the case of polygons, the first string of coordinates represents the outer ring, and
* inner rings are added next, delimited by the ``@`` sign. For instance, ``MultiPolygon
* 40.0,40.0 20.0,45.0 45.0,30.0 40.0,40.0 / 20.0,35.0 45.0,20.0 30.0,5.0 10.0,10.0 10.0,30.0
* 20.0,35.0 @ 30.0,20.0 20.0,25.0 20.0,15.0 30.0,20.0`` represents a geometry with two
* polygons, the last one of them with an inner ring.
*
* @return
*/
public String getDiffCoordsString() {
return diffText;
}
}