/*******************************************************************************
* Copyright (c) 2015 ElasticSearch and MITRE, and others
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Apache License, Version 2.0 which
* accompanies this distribution and is available at
* http://www.apache.org/licenses/LICENSE-2.0.txt
******************************************************************************/
// A derivative of commit 14bc4dee08355048d6a94e33834b919a3999a06e
// at https://github.com/chrismale/elasticsearch
package org.locationtech.spatial4j.io;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.context.SpatialContextFactory;
import org.locationtech.spatial4j.exception.InvalidShapeException;
import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.ShapeFactory;
import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
/**
* An extensible parser for <a href="http://en.wikipedia.org/wiki/Well-known_text"> Well Known Text
* (WKT)</a>. The shapes supported by this class are:
* <ul>
* <li>POINT</li>
* <li>MULTIPOINT</li>
* <li>ENVELOPE (strictly isn't WKT but is defined by OGC's <a
* href="http://docs.geoserver.org/stable/en/user/tutorials/cql/cql_tutorial.html">Common Query
* Language (CQL)</a>)</li>
* <li>LINESTRING</li>
* <li>MULTILINESTRING</li>
* <LI>POLYGON</LI>
* <LI>MULTIPOLYGON</LI>
* <li>GEOMETRYCOLLECTION</li>
* <li>BUFFER (non-standard Spatial4j operation)</li>
* </ul>
* 'EMPTY' is supported. Specifying 'Z', 'M', or any other dimensionality in the WKT is effectively
* ignored. Thus, you can specify any number of numbers in the coordinate points but only the first
* two take effect. The javadocs for the <code>parse___Shape</code> methods further describe these
* shapes, or you
*
* <p>
* Most users of this class will call just one method: {@link #parse(String)}, or
* {@link #parseIfSupported(String)} to not fail if it isn't parse-able.
*
* <p>
* To support more shapes, extend this class and override
* {@link #parseShapeByType(WKTReader.State, String)}. It's also possible to delegate to a WKTParser
* by also delegating {@link #newState(String)}.
*
* <p>
* Note, instances of this base class are threadsafe.
*/
public class WKTReader implements ShapeReader {
protected final SpatialContext ctx;
protected final ShapeFactory shapeFactory;
// TODO support SRID: "SRID=4326;POINT(1,2)
/**
* This constructor is required by
* {@link org.locationtech.spatial4j.context.SpatialContextFactory#makeFormats(SpatialContext)}.
*/
public WKTReader(SpatialContext ctx, SpatialContextFactory factory) {
this.ctx = ctx;
this.shapeFactory = ctx.getShapeFactory();
}
/**
* Parses the wktString, returning the defined Shape.
*
* @return Non-null Shape defined in the String
* @throws ParseException Thrown if there is an error in the Shape definition
*/
public Shape parse(String wktString) throws ParseException, InvalidShapeException {
Shape shape = parseIfSupported(wktString);// sets rawString & offset
if (shape != null)
return shape;
String shortenedString =
(wktString.length() <= 128 ? wktString : wktString.substring(0, 128 - 3) + "...");
throw new ParseException("Unknown Shape definition [" + shortenedString + "]", 0);
}
/**
* Parses the wktString, returning the defined Shape. If it can't because the shape name is
* unknown or an empty or blank string was passed, then it returns null. If the WKT starts with a
* supported shape but contains an inner unsupported shape then it will result in a
* {@link ParseException}.
*
* @param wktString non-null, can be empty or have surrounding whitespace
* @return Shape, null if unknown / unsupported shape.
* @throws ParseException Thrown if there is an error in the Shape definition
*/
public Shape parseIfSupported(String wktString) throws ParseException, InvalidShapeException {
State state = newState(wktString);
state.nextIfWhitespace();// leading
if (state.eof())
return null;
// shape types must start with a letter
if (!Character.isLetter(state.rawString.charAt(state.offset)))
return null;
String shapeType = state.nextWord();
Shape result = null;
try {
result = parseShapeByType(state, shapeType);
} catch (ParseException | InvalidShapeException e) {
throw e;
} catch (IllegalArgumentException e) { // NOTE: JTS Throws IAE for bad WKT
throw new InvalidShapeException(e.getMessage(), e);
} catch (Exception e) {
ParseException pe = new ParseException(e.toString(), state.offset);
pe.initCause(e);
throw pe;
}
if (result != null && !state.eof())
throw new ParseException("end of shape expected", state.offset);
return result;
}
/**
* (internal) Creates a new State with the given String. It's only called by
* {@link #parseIfSupported(String)}. This is an extension point for subclassing.
*/
protected State newState(String wktString) {
// NOTE: if we wanted to re-use old States to reduce object allocation, we might do that
// here. But in the scheme of things, it doesn't seem worth the bother as it complicates the
// thread-safety story of the API for too little of a gain.
return new State(wktString);
}
/**
* (internal) Parses the remainder of a shape definition following the shape's name given as
* {@code shapeType} already consumed via {@link State#nextWord()}. If it's able to parse the
* shape, {@link WKTReader.State#offset} should be advanced beyond it (e.g. to the ',' or ')' or
* EOF in general). The default implementation checks the name against some predefined names and
* calls corresponding parse methods to handle the rest. Overriding this method is an excellent
* extension point for additional shape types. Or, use this class by delegation to this method.
* <p>
* When writing a parse method that reacts to a specific shape type, remember to handle the
* dimension and EMPTY token via
* {@link org.locationtech.spatial4j.io.WKTReader.State#nextIfEmptyAndSkipZM()}.
*
* @param state
* @param shapeType Non-Null string; could have mixed case. The first character is a letter.
* @return The shape or null if not supported / unknown.
*/
protected Shape parseShapeByType(State state, String shapeType) throws ParseException {
assert Character.isLetter(shapeType.charAt(0)) : "Shape must start with letter: " + shapeType;
if (shapeType.equalsIgnoreCase("POINT")) {
return parsePointShape(state);
} else if (shapeType.equalsIgnoreCase("MULTIPOINT")) {
return parseMultiPointShape(state);
} else if (shapeType.equalsIgnoreCase("ENVELOPE")) {
return parseEnvelopeShape(state);
} else if (shapeType.equalsIgnoreCase("LINESTRING")) {
return parseLineStringShape(state);
} else if (shapeType.equalsIgnoreCase("POLYGON")) {
return parsePolygonShape(state);
} else if (shapeType.equalsIgnoreCase("GEOMETRYCOLLECTION")) {
return parseGeometryCollectionShape(state);
} else if (shapeType.equalsIgnoreCase("MULTILINESTRING")) {
return parseMultiLineStringShape(state);
} else if (shapeType.equalsIgnoreCase("MULTIPOLYGON")) {
return parseMulitPolygonShape(state);
}
// extension
if (shapeType.equalsIgnoreCase("BUFFER")) {
return parseBufferShape(state);
}
// HEY! Update class Javadocs if add more shapes
return null;
}
/**
* Parses the BUFFER operation applied to a parsed shape.
*
* <pre>
* '(' shape ',' number ')'
* </pre>
*
* Whereas 'number' is the distance to buffer the shape by.
*/
protected Shape parseBufferShape(State state) throws ParseException {
state.nextExpect('(');
Shape shape = shape(state);
state.nextExpect(',');
double distance = shapeFactory.normDist(state.nextDouble());
state.nextExpect(')');
return shape.getBuffered(distance, ctx);
}
/**
* Parses a POINT shape from the raw string.
*
* <pre>
* '(' coordinate ')'
* </pre>
*
* @see #point(State, ShapeFactory.PointsBuilder)
*/
protected Shape parsePointShape(State state) throws ParseException {
if (state.nextIfEmptyAndSkipZM())
return shapeFactory.pointXY(Double.NaN, Double.NaN);
state.nextExpect('(');
OnePointsBuilder onePointsBuilder = new OnePointsBuilder(shapeFactory);
point(state, onePointsBuilder);
state.nextExpect(')');
return onePointsBuilder.getPoint();
}
/**
* Parses a MULTIPOINT shape from the raw string -- a collection of points.
*
* <pre>
* '(' coordinate (',' coordinate )* ')'
* </pre>
*
* Furthermore, coordinate can optionally be wrapped in parenthesis.
*
* @see #point(State, ShapeFactory.PointsBuilder)
*/
protected Shape parseMultiPointShape(State state) throws ParseException {
ShapeFactory.MultiPointBuilder builder = shapeFactory.multiPoint();
if (state.nextIfEmptyAndSkipZM())
return builder.build();
state.nextExpect('(');
do {
boolean openParen = state.nextIf('(');
point(state, builder);
if (openParen)
state.nextExpect(')');
} while (state.nextIf(','));
state.nextExpect(')');
return builder.build();
}
/**
* Parses an ENVELOPE (aka Rectangle) shape from the raw string. The values are normalized.
* <p>
* Source: OGC "Catalogue Services Specification", the "CQL" (Common Query Language) sub-spec.
* <em>Note the inconsistent order of the min & max values between x & y!</em>
*
* <pre>
* '(' x1 ',' x2 ',' y2 ',' y1 ')'
* </pre>
*/
protected Shape parseEnvelopeShape(State state) throws ParseException {
// FYI no dimension or EMPTY
state.nextExpect('(');
double x1 = state.nextDouble();
state.nextExpect(',');
double x2 = state.nextDouble();
state.nextExpect(',');
double y2 = state.nextDouble();
state.nextExpect(',');
double y1 = state.nextDouble();
state.nextExpect(')');
return shapeFactory.rect(shapeFactory.normX(x1), shapeFactory.normX(x2),
shapeFactory.normY(y1), shapeFactory.normY(y2));
}
/**
* Parses a LINESTRING shape from the raw string -- an ordered sequence of points.
*
* <pre>
* coordinateSequence
* </pre>
*
* @see #pointList(State, ShapeFactory.PointsBuilder)
*/
protected Shape parseLineStringShape(State state) throws ParseException {
ShapeFactory.LineStringBuilder lineStringBuilder = shapeFactory.lineString();
if (state.nextIfEmptyAndSkipZM())
return lineStringBuilder.build();
return pointList(state, lineStringBuilder).build();
}
/**
* Parses a MULTILINESTRING shape from the raw string -- a collection of line strings.
*
* <pre>
* '(' coordinateSequence (',' coordinateSequence )* ')'
* </pre>
*
* @see #parseLineStringShape(org.locationtech.spatial4j.io.WKTReader.State)
*/
protected Shape parseMultiLineStringShape(State state) throws ParseException {
ShapeFactory.MultiLineStringBuilder multiLineStringBuilder = shapeFactory.multiLineString();
if (!state.nextIfEmptyAndSkipZM()) {
state.nextExpect('(');
do {
multiLineStringBuilder.add(pointList(state, multiLineStringBuilder.lineString()));
} while (state.nextIf(','));
state.nextExpect(')');
}
return multiLineStringBuilder.build();
}
/**
* Parses a POLYGON shape from the raw string. It might return a
* {@link org.locationtech.spatial4j.shape.Rectangle} if the polygon is one.
*
* <pre>
* coordinateSequenceList
* </pre>
*/
protected Shape parsePolygonShape(WKTReader.State state) throws ParseException {
ShapeFactory.PolygonBuilder polygonBuilder = shapeFactory.polygon();
if (!state.nextIfEmptyAndSkipZM()) {
polygonBuilder = polygon(state, polygonBuilder);
}
return polygonBuilder.buildOrRect();
}
/**
* Parses a MULTIPOLYGON shape from the raw string.
*
* <pre>
* '(' polygon (',' polygon )* ')'
* </pre>
*/
protected Shape parseMulitPolygonShape(WKTReader.State state) throws ParseException {
ShapeFactory.MultiPolygonBuilder multiPolygonBuilder = shapeFactory.multiPolygon();
if (!state.nextIfEmptyAndSkipZM()) {
state.nextExpect('(');
do {
multiPolygonBuilder.add(polygon(state, multiPolygonBuilder.polygon()));
} while (state.nextIf(','));
state.nextExpect(')');
}
return multiPolygonBuilder.build();
}
/**
* Parses a GEOMETRYCOLLECTION shape from the raw string.
*
* <pre>
* '(' shape (',' shape )* ')'
* </pre>
*/
protected Shape parseGeometryCollectionShape(State state) throws ParseException {
ShapeFactory.MultiShapeBuilder<Shape> multiShapeBuilder = shapeFactory.multiShape(Shape.class);
if (state.nextIfEmptyAndSkipZM())
return multiShapeBuilder.build();
state.nextExpect('(');
do {
multiShapeBuilder.add(shape(state));
} while (state.nextIf(','));
state.nextExpect(')');
return multiShapeBuilder.build();
}
/**
* Reads a shape from the current position, starting with the name of the shape. It calls
* {@link #parseShapeByType(org.locationtech.spatial4j.io.WKTReader.State, String)} and throws an
* exception if the shape wasn't supported.
*/
protected Shape shape(State state) throws ParseException {
String type = state.nextWord();
Shape shape = parseShapeByType(state, type);
if (shape == null)
throw new ParseException("Shape of type " + type + " is unknown", state.offset);
return shape;
}
/**
* Reads a list of Points (AKA CoordinateSequence) from the current position.
*
* <pre>
* '(' coordinate (',' coordinate )* ')'
* </pre>
*
* @see #point(State, ShapeFactory.PointsBuilder)
*/
protected <B extends ShapeFactory.PointsBuilder> B pointList(State state, B pointsBuilder) throws ParseException {
state.nextExpect('(');
do {
point(state, pointsBuilder);
} while (state.nextIf(','));
state.nextExpect(')');
return pointsBuilder;
}
/**
* Reads a raw Point (AKA Coordinate) from the current position. Only the first 2 numbers are
* used. The values are normalized.
*
* <pre>
* number number number*
* </pre>
*/
protected ShapeFactory.PointsBuilder point(State state, ShapeFactory.PointsBuilder pointsBuilder) throws ParseException {
double x = state.nextDouble();
double y = state.nextDouble();
state.skipNextDoubles();//TODO capture to call pointXYZ
pointsBuilder.pointXY(shapeFactory.normX(x), shapeFactory.normY(y));
return pointsBuilder;
}
/**
* Reads a polygon
*/
protected ShapeFactory.PolygonBuilder polygon(WKTReader.State state, ShapeFactory.PolygonBuilder polygonBuilder) throws ParseException {
state.nextExpect('(');
pointList(state, polygonBuilder); // outer ring
while (state.nextIf(',')) {
ShapeFactory.PolygonBuilder.HoleBuilder holeBuilder = polygonBuilder.hole();
pointList(state, holeBuilder);
holeBuilder.endHole();
}
state.nextExpect(')');
return polygonBuilder;
}
/** The parse state. */
public class State {
/** Set in {@link #parseIfSupported(String)}. */
public String rawString;
/** Offset of the next char in {@link #rawString} to be read. */
public int offset;
/** Dimensionality specifier (e.g. 'Z', or 'M') following a shape type name. */
public String dimension;
public State(String rawString) {
this.rawString = rawString;
}
public SpatialContext getCtx() {
return ctx;
}
public WKTReader getParser() {
return WKTReader.this;
}
/**
* Reads the word starting at the current character position. The word terminates once
* {@link Character#isJavaIdentifierPart(char)} returns false (or EOF). {@link #offset} is
* advanced past whitespace.
*
* @return Non-null non-empty String.
*/
public String nextWord() throws ParseException {
int startOffset = offset;
while (offset < rawString.length()
&& Character.isJavaIdentifierPart(rawString.charAt(offset))) {
offset++;
}
if (startOffset == offset)
throw new ParseException("Word expected", startOffset);
String result = rawString.substring(startOffset, offset);
nextIfWhitespace();
return result;
}
/**
* Skips over a dimensionality token (e.g. 'Z' or 'M') if found, storing in {@link #dimension},
* and then looks for EMPTY, consuming that and whitespace.
*
* <pre>
* dimensionToken? 'EMPTY'?
* </pre>
*
* @return True if EMPTY was found.
*/
public boolean nextIfEmptyAndSkipZM() throws ParseException {
if (eof())
return false;
char c = rawString.charAt(offset);
if (c == '(' || !Character.isJavaIdentifierPart(c))
return false;
String word = nextWord();
if (word.equalsIgnoreCase("EMPTY"))
return true;
// we figure this word is Z or ZM or some other dimensionality signifier. We skip it.
this.dimension = word;
if (eof())
return false;
c = rawString.charAt(offset);
if (c == '(' || !Character.isJavaIdentifierPart(c))
return false;
word = nextWord();
if (word.equalsIgnoreCase("EMPTY"))
return true;
throw new ParseException("Expected EMPTY because found dimension; but got [" + word + "]",
offset);
}
/**
* Reads in a double from the String. Parses digits with an optional decimal, sign, or exponent.
* NaN and Infinity are not supported. {@link #offset} is advanced past whitespace.
*
* @return Double value
*/
public double nextDouble() throws ParseException {
int startOffset = offset;
skipDouble();
if (startOffset == offset)
throw new ParseException("Expected a number", offset);
double result;
try {
result = Double.parseDouble(rawString.substring(startOffset, offset));
} catch (Exception e) {
throw new ParseException(e.toString(), offset);
}
nextIfWhitespace();
return result;
}
/** Advances offset forward until it points to a character that isn't part of a number. */
public void skipDouble() {
int startOffset = offset;
for (; offset < rawString.length(); offset++) {
char c = rawString.charAt(offset);
if (!(Character.isDigit(c) || c == '.' || c == '-' || c == '+')) {
// 'e' is okay as long as it isn't first
if (offset != startOffset && (c == 'e' || c == 'E'))
continue;
break;
}
}
}
/** Advances past as many doubles as there are, with intervening whitespace. */
public void skipNextDoubles() {
while (!eof()) {
int startOffset = offset;
skipDouble();
if (startOffset == offset)
return;
nextIfWhitespace();
}
}
/**
* Verifies that the current character is of the expected value. If the character is the
* expected value, then it is consumed and {@link #offset} is advanced past whitespace.
*
* @param expected The expected char.
*/
public void nextExpect(char expected) throws ParseException {
if (eof())
throw new ParseException("Expected [" + expected + "] found EOF", offset);
char c = rawString.charAt(offset);
if (c != expected)
throw new ParseException("Expected [" + expected + "] found [" + c + "]", offset);
offset++;
nextIfWhitespace();
}
/** If the string is consumed, i.e. at end-of-file. */
public final boolean eof() {
return offset >= rawString.length();
}
/**
* If the current character is {@code expected}, then offset is advanced after it and any
* subsequent whitespace. Otherwise, false is returned.
*
* @param expected The expected char
* @return true if consumed
*/
public boolean nextIf(char expected) {
if (!eof() && rawString.charAt(offset) == expected) {
offset++;
nextIfWhitespace();
return true;
}
return false;
}
/**
* Moves offset to next non-whitespace character. Doesn't move if the offset is already at
* non-whitespace. <em>There is very little reason for subclasses to call this because
* most other parsing methods call it.</em>
*/
public void nextIfWhitespace() {
for (; offset < rawString.length(); offset++) {
if (!Character.isWhitespace(rawString.charAt(offset))) {
return;
}
}
}
/**
* Returns the next chunk of text till the next ',' or ')' (non-inclusive) or EOF. If a '(' is
* encountered, then it looks past its matching ')', taking care to handle nested matching
* parenthesis too. It's designed to be of use to subclasses that wish to get the entire
* subshape at the current position as a string so that it might be passed to other software
* that will parse it.
* <p>
* Example:
*
* <pre>
* OUTER(INNER(3, 5))
* </pre>
*
* If this is called when offset is at the first character, then it will return this whole
* string. If called at the "I" then it will return "INNER(3, 5)". If called at "3", then it
* will return "3". In all cases, offset will be positioned at the next position following the
* returned substring.
*
* @return non-null substring.
*/
public String nextSubShapeString() throws ParseException {
int startOffset = offset;
int parenStack = 0;// how many parenthesis levels are we in?
for (; offset < rawString.length(); offset++) {
char c = rawString.charAt(offset);
if (c == ',') {
if (parenStack == 0)
break;
} else if (c == ')') {
if (parenStack == 0)
break;
parenStack--;
} else if (c == '(') {
parenStack++;
}
}
if (parenStack != 0)
throw new ParseException("Unbalanced parenthesis", startOffset);
return rawString.substring(startOffset, offset);
}
}// class State
@Override
public String getFormatName() {
return ShapeIO.WKT;
}
static String readString(Reader reader) throws IOException {
char[] arr = new char[1024];
StringBuilder buffer = new StringBuilder();
int numCharsRead;
while ((numCharsRead = reader.read(arr, 0, arr.length)) != -1) {
buffer.append(arr, 0, numCharsRead);
}
return buffer.toString();
}
@Override
public Shape read(Reader reader) throws IOException, ParseException {
return parse(readString(reader));
}
@Override
public Shape read(Object value) throws IOException, ParseException, InvalidShapeException {
return parse(value.toString());
}
@Override
public Shape readIfSupported(Object value) throws InvalidShapeException {
try {
return parseIfSupported(value.toString());
} catch (ParseException e) {
}
return null;
}
}