/******************************************************************************* * Copyright (c) 2015 ElasticSearch and MITRE, and others * All rights reserved. This program and the accompanying materials * are made available under the terms of the Apache License, Version 2.0 which * accompanies this distribution and is available at * http://www.apache.org/licenses/LICENSE-2.0.txt ******************************************************************************/ // A derivative of commit 14bc4dee08355048d6a94e33834b919a3999a06e // at https://github.com/chrismale/elasticsearch package org.locationtech.spatial4j.io; import org.locationtech.spatial4j.context.SpatialContext; import org.locationtech.spatial4j.context.SpatialContextFactory; import org.locationtech.spatial4j.exception.InvalidShapeException; import org.locationtech.spatial4j.shape.Shape; import org.locationtech.spatial4j.shape.ShapeFactory; import java.io.IOException; import java.io.Reader; import java.text.ParseException; /** * An extensible parser for <a href="http://en.wikipedia.org/wiki/Well-known_text"> Well Known Text * (WKT)</a>. The shapes supported by this class are: * <ul> * <li>POINT</li> * <li>MULTIPOINT</li> * <li>ENVELOPE (strictly isn't WKT but is defined by OGC's <a * href="http://docs.geoserver.org/stable/en/user/tutorials/cql/cql_tutorial.html">Common Query * Language (CQL)</a>)</li> * <li>LINESTRING</li> * <li>MULTILINESTRING</li> * <LI>POLYGON</LI> * <LI>MULTIPOLYGON</LI> * <li>GEOMETRYCOLLECTION</li> * <li>BUFFER (non-standard Spatial4j operation)</li> * </ul> * 'EMPTY' is supported. Specifying 'Z', 'M', or any other dimensionality in the WKT is effectively * ignored. Thus, you can specify any number of numbers in the coordinate points but only the first * two take effect. The javadocs for the <code>parse___Shape</code> methods further describe these * shapes, or you * * <p> * Most users of this class will call just one method: {@link #parse(String)}, or * {@link #parseIfSupported(String)} to not fail if it isn't parse-able. * * <p> * To support more shapes, extend this class and override * {@link #parseShapeByType(WKTReader.State, String)}. It's also possible to delegate to a WKTParser * by also delegating {@link #newState(String)}. * * <p> * Note, instances of this base class are threadsafe. */ public class WKTReader implements ShapeReader { protected final SpatialContext ctx; protected final ShapeFactory shapeFactory; // TODO support SRID: "SRID=4326;POINT(1,2) /** * This constructor is required by * {@link org.locationtech.spatial4j.context.SpatialContextFactory#makeFormats(SpatialContext)}. */ public WKTReader(SpatialContext ctx, SpatialContextFactory factory) { this.ctx = ctx; this.shapeFactory = ctx.getShapeFactory(); } /** * Parses the wktString, returning the defined Shape. * * @return Non-null Shape defined in the String * @throws ParseException Thrown if there is an error in the Shape definition */ public Shape parse(String wktString) throws ParseException, InvalidShapeException { Shape shape = parseIfSupported(wktString);// sets rawString & offset if (shape != null) return shape; String shortenedString = (wktString.length() <= 128 ? wktString : wktString.substring(0, 128 - 3) + "..."); throw new ParseException("Unknown Shape definition [" + shortenedString + "]", 0); } /** * Parses the wktString, returning the defined Shape. If it can't because the shape name is * unknown or an empty or blank string was passed, then it returns null. If the WKT starts with a * supported shape but contains an inner unsupported shape then it will result in a * {@link ParseException}. * * @param wktString non-null, can be empty or have surrounding whitespace * @return Shape, null if unknown / unsupported shape. * @throws ParseException Thrown if there is an error in the Shape definition */ public Shape parseIfSupported(String wktString) throws ParseException, InvalidShapeException { State state = newState(wktString); state.nextIfWhitespace();// leading if (state.eof()) return null; // shape types must start with a letter if (!Character.isLetter(state.rawString.charAt(state.offset))) return null; String shapeType = state.nextWord(); Shape result = null; try { result = parseShapeByType(state, shapeType); } catch (ParseException | InvalidShapeException e) { throw e; } catch (IllegalArgumentException e) { // NOTE: JTS Throws IAE for bad WKT throw new InvalidShapeException(e.getMessage(), e); } catch (Exception e) { ParseException pe = new ParseException(e.toString(), state.offset); pe.initCause(e); throw pe; } if (result != null && !state.eof()) throw new ParseException("end of shape expected", state.offset); return result; } /** * (internal) Creates a new State with the given String. It's only called by * {@link #parseIfSupported(String)}. This is an extension point for subclassing. */ protected State newState(String wktString) { // NOTE: if we wanted to re-use old States to reduce object allocation, we might do that // here. But in the scheme of things, it doesn't seem worth the bother as it complicates the // thread-safety story of the API for too little of a gain. return new State(wktString); } /** * (internal) Parses the remainder of a shape definition following the shape's name given as * {@code shapeType} already consumed via {@link State#nextWord()}. If it's able to parse the * shape, {@link WKTReader.State#offset} should be advanced beyond it (e.g. to the ',' or ')' or * EOF in general). The default implementation checks the name against some predefined names and * calls corresponding parse methods to handle the rest. Overriding this method is an excellent * extension point for additional shape types. Or, use this class by delegation to this method. * <p> * When writing a parse method that reacts to a specific shape type, remember to handle the * dimension and EMPTY token via * {@link org.locationtech.spatial4j.io.WKTReader.State#nextIfEmptyAndSkipZM()}. * * @param state * @param shapeType Non-Null string; could have mixed case. The first character is a letter. * @return The shape or null if not supported / unknown. */ protected Shape parseShapeByType(State state, String shapeType) throws ParseException { assert Character.isLetter(shapeType.charAt(0)) : "Shape must start with letter: " + shapeType; if (shapeType.equalsIgnoreCase("POINT")) { return parsePointShape(state); } else if (shapeType.equalsIgnoreCase("MULTIPOINT")) { return parseMultiPointShape(state); } else if (shapeType.equalsIgnoreCase("ENVELOPE")) { return parseEnvelopeShape(state); } else if (shapeType.equalsIgnoreCase("LINESTRING")) { return parseLineStringShape(state); } else if (shapeType.equalsIgnoreCase("POLYGON")) { return parsePolygonShape(state); } else if (shapeType.equalsIgnoreCase("GEOMETRYCOLLECTION")) { return parseGeometryCollectionShape(state); } else if (shapeType.equalsIgnoreCase("MULTILINESTRING")) { return parseMultiLineStringShape(state); } else if (shapeType.equalsIgnoreCase("MULTIPOLYGON")) { return parseMulitPolygonShape(state); } // extension if (shapeType.equalsIgnoreCase("BUFFER")) { return parseBufferShape(state); } // HEY! Update class Javadocs if add more shapes return null; } /** * Parses the BUFFER operation applied to a parsed shape. * * <pre> * '(' shape ',' number ')' * </pre> * * Whereas 'number' is the distance to buffer the shape by. */ protected Shape parseBufferShape(State state) throws ParseException { state.nextExpect('('); Shape shape = shape(state); state.nextExpect(','); double distance = shapeFactory.normDist(state.nextDouble()); state.nextExpect(')'); return shape.getBuffered(distance, ctx); } /** * Parses a POINT shape from the raw string. * * <pre> * '(' coordinate ')' * </pre> * * @see #point(State, ShapeFactory.PointsBuilder) */ protected Shape parsePointShape(State state) throws ParseException { if (state.nextIfEmptyAndSkipZM()) return shapeFactory.pointXY(Double.NaN, Double.NaN); state.nextExpect('('); OnePointsBuilder onePointsBuilder = new OnePointsBuilder(shapeFactory); point(state, onePointsBuilder); state.nextExpect(')'); return onePointsBuilder.getPoint(); } /** * Parses a MULTIPOINT shape from the raw string -- a collection of points. * * <pre> * '(' coordinate (',' coordinate )* ')' * </pre> * * Furthermore, coordinate can optionally be wrapped in parenthesis. * * @see #point(State, ShapeFactory.PointsBuilder) */ protected Shape parseMultiPointShape(State state) throws ParseException { ShapeFactory.MultiPointBuilder builder = shapeFactory.multiPoint(); if (state.nextIfEmptyAndSkipZM()) return builder.build(); state.nextExpect('('); do { boolean openParen = state.nextIf('('); point(state, builder); if (openParen) state.nextExpect(')'); } while (state.nextIf(',')); state.nextExpect(')'); return builder.build(); } /** * Parses an ENVELOPE (aka Rectangle) shape from the raw string. The values are normalized. * <p> * Source: OGC "Catalogue Services Specification", the "CQL" (Common Query Language) sub-spec. * <em>Note the inconsistent order of the min & max values between x & y!</em> * * <pre> * '(' x1 ',' x2 ',' y2 ',' y1 ')' * </pre> */ protected Shape parseEnvelopeShape(State state) throws ParseException { // FYI no dimension or EMPTY state.nextExpect('('); double x1 = state.nextDouble(); state.nextExpect(','); double x2 = state.nextDouble(); state.nextExpect(','); double y2 = state.nextDouble(); state.nextExpect(','); double y1 = state.nextDouble(); state.nextExpect(')'); return shapeFactory.rect(shapeFactory.normX(x1), shapeFactory.normX(x2), shapeFactory.normY(y1), shapeFactory.normY(y2)); } /** * Parses a LINESTRING shape from the raw string -- an ordered sequence of points. * * <pre> * coordinateSequence * </pre> * * @see #pointList(State, ShapeFactory.PointsBuilder) */ protected Shape parseLineStringShape(State state) throws ParseException { ShapeFactory.LineStringBuilder lineStringBuilder = shapeFactory.lineString(); if (state.nextIfEmptyAndSkipZM()) return lineStringBuilder.build(); return pointList(state, lineStringBuilder).build(); } /** * Parses a MULTILINESTRING shape from the raw string -- a collection of line strings. * * <pre> * '(' coordinateSequence (',' coordinateSequence )* ')' * </pre> * * @see #parseLineStringShape(org.locationtech.spatial4j.io.WKTReader.State) */ protected Shape parseMultiLineStringShape(State state) throws ParseException { ShapeFactory.MultiLineStringBuilder multiLineStringBuilder = shapeFactory.multiLineString(); if (!state.nextIfEmptyAndSkipZM()) { state.nextExpect('('); do { multiLineStringBuilder.add(pointList(state, multiLineStringBuilder.lineString())); } while (state.nextIf(',')); state.nextExpect(')'); } return multiLineStringBuilder.build(); } /** * Parses a POLYGON shape from the raw string. It might return a * {@link org.locationtech.spatial4j.shape.Rectangle} if the polygon is one. * * <pre> * coordinateSequenceList * </pre> */ protected Shape parsePolygonShape(WKTReader.State state) throws ParseException { ShapeFactory.PolygonBuilder polygonBuilder = shapeFactory.polygon(); if (!state.nextIfEmptyAndSkipZM()) { polygonBuilder = polygon(state, polygonBuilder); } return polygonBuilder.buildOrRect(); } /** * Parses a MULTIPOLYGON shape from the raw string. * * <pre> * '(' polygon (',' polygon )* ')' * </pre> */ protected Shape parseMulitPolygonShape(WKTReader.State state) throws ParseException { ShapeFactory.MultiPolygonBuilder multiPolygonBuilder = shapeFactory.multiPolygon(); if (!state.nextIfEmptyAndSkipZM()) { state.nextExpect('('); do { multiPolygonBuilder.add(polygon(state, multiPolygonBuilder.polygon())); } while (state.nextIf(',')); state.nextExpect(')'); } return multiPolygonBuilder.build(); } /** * Parses a GEOMETRYCOLLECTION shape from the raw string. * * <pre> * '(' shape (',' shape )* ')' * </pre> */ protected Shape parseGeometryCollectionShape(State state) throws ParseException { ShapeFactory.MultiShapeBuilder<Shape> multiShapeBuilder = shapeFactory.multiShape(Shape.class); if (state.nextIfEmptyAndSkipZM()) return multiShapeBuilder.build(); state.nextExpect('('); do { multiShapeBuilder.add(shape(state)); } while (state.nextIf(',')); state.nextExpect(')'); return multiShapeBuilder.build(); } /** * Reads a shape from the current position, starting with the name of the shape. It calls * {@link #parseShapeByType(org.locationtech.spatial4j.io.WKTReader.State, String)} and throws an * exception if the shape wasn't supported. */ protected Shape shape(State state) throws ParseException { String type = state.nextWord(); Shape shape = parseShapeByType(state, type); if (shape == null) throw new ParseException("Shape of type " + type + " is unknown", state.offset); return shape; } /** * Reads a list of Points (AKA CoordinateSequence) from the current position. * * <pre> * '(' coordinate (',' coordinate )* ')' * </pre> * * @see #point(State, ShapeFactory.PointsBuilder) */ protected <B extends ShapeFactory.PointsBuilder> B pointList(State state, B pointsBuilder) throws ParseException { state.nextExpect('('); do { point(state, pointsBuilder); } while (state.nextIf(',')); state.nextExpect(')'); return pointsBuilder; } /** * Reads a raw Point (AKA Coordinate) from the current position. Only the first 2 numbers are * used. The values are normalized. * * <pre> * number number number* * </pre> */ protected ShapeFactory.PointsBuilder point(State state, ShapeFactory.PointsBuilder pointsBuilder) throws ParseException { double x = state.nextDouble(); double y = state.nextDouble(); state.skipNextDoubles();//TODO capture to call pointXYZ pointsBuilder.pointXY(shapeFactory.normX(x), shapeFactory.normY(y)); return pointsBuilder; } /** * Reads a polygon */ protected ShapeFactory.PolygonBuilder polygon(WKTReader.State state, ShapeFactory.PolygonBuilder polygonBuilder) throws ParseException { state.nextExpect('('); pointList(state, polygonBuilder); // outer ring while (state.nextIf(',')) { ShapeFactory.PolygonBuilder.HoleBuilder holeBuilder = polygonBuilder.hole(); pointList(state, holeBuilder); holeBuilder.endHole(); } state.nextExpect(')'); return polygonBuilder; } /** The parse state. */ public class State { /** Set in {@link #parseIfSupported(String)}. */ public String rawString; /** Offset of the next char in {@link #rawString} to be read. */ public int offset; /** Dimensionality specifier (e.g. 'Z', or 'M') following a shape type name. */ public String dimension; public State(String rawString) { this.rawString = rawString; } public SpatialContext getCtx() { return ctx; } public WKTReader getParser() { return WKTReader.this; } /** * Reads the word starting at the current character position. The word terminates once * {@link Character#isJavaIdentifierPart(char)} returns false (or EOF). {@link #offset} is * advanced past whitespace. * * @return Non-null non-empty String. */ public String nextWord() throws ParseException { int startOffset = offset; while (offset < rawString.length() && Character.isJavaIdentifierPart(rawString.charAt(offset))) { offset++; } if (startOffset == offset) throw new ParseException("Word expected", startOffset); String result = rawString.substring(startOffset, offset); nextIfWhitespace(); return result; } /** * Skips over a dimensionality token (e.g. 'Z' or 'M') if found, storing in {@link #dimension}, * and then looks for EMPTY, consuming that and whitespace. * * <pre> * dimensionToken? 'EMPTY'? * </pre> * * @return True if EMPTY was found. */ public boolean nextIfEmptyAndSkipZM() throws ParseException { if (eof()) return false; char c = rawString.charAt(offset); if (c == '(' || !Character.isJavaIdentifierPart(c)) return false; String word = nextWord(); if (word.equalsIgnoreCase("EMPTY")) return true; // we figure this word is Z or ZM or some other dimensionality signifier. We skip it. this.dimension = word; if (eof()) return false; c = rawString.charAt(offset); if (c == '(' || !Character.isJavaIdentifierPart(c)) return false; word = nextWord(); if (word.equalsIgnoreCase("EMPTY")) return true; throw new ParseException("Expected EMPTY because found dimension; but got [" + word + "]", offset); } /** * Reads in a double from the String. Parses digits with an optional decimal, sign, or exponent. * NaN and Infinity are not supported. {@link #offset} is advanced past whitespace. * * @return Double value */ public double nextDouble() throws ParseException { int startOffset = offset; skipDouble(); if (startOffset == offset) throw new ParseException("Expected a number", offset); double result; try { result = Double.parseDouble(rawString.substring(startOffset, offset)); } catch (Exception e) { throw new ParseException(e.toString(), offset); } nextIfWhitespace(); return result; } /** Advances offset forward until it points to a character that isn't part of a number. */ public void skipDouble() { int startOffset = offset; for (; offset < rawString.length(); offset++) { char c = rawString.charAt(offset); if (!(Character.isDigit(c) || c == '.' || c == '-' || c == '+')) { // 'e' is okay as long as it isn't first if (offset != startOffset && (c == 'e' || c == 'E')) continue; break; } } } /** Advances past as many doubles as there are, with intervening whitespace. */ public void skipNextDoubles() { while (!eof()) { int startOffset = offset; skipDouble(); if (startOffset == offset) return; nextIfWhitespace(); } } /** * Verifies that the current character is of the expected value. If the character is the * expected value, then it is consumed and {@link #offset} is advanced past whitespace. * * @param expected The expected char. */ public void nextExpect(char expected) throws ParseException { if (eof()) throw new ParseException("Expected [" + expected + "] found EOF", offset); char c = rawString.charAt(offset); if (c != expected) throw new ParseException("Expected [" + expected + "] found [" + c + "]", offset); offset++; nextIfWhitespace(); } /** If the string is consumed, i.e. at end-of-file. */ public final boolean eof() { return offset >= rawString.length(); } /** * If the current character is {@code expected}, then offset is advanced after it and any * subsequent whitespace. Otherwise, false is returned. * * @param expected The expected char * @return true if consumed */ public boolean nextIf(char expected) { if (!eof() && rawString.charAt(offset) == expected) { offset++; nextIfWhitespace(); return true; } return false; } /** * Moves offset to next non-whitespace character. Doesn't move if the offset is already at * non-whitespace. <em>There is very little reason for subclasses to call this because * most other parsing methods call it.</em> */ public void nextIfWhitespace() { for (; offset < rawString.length(); offset++) { if (!Character.isWhitespace(rawString.charAt(offset))) { return; } } } /** * Returns the next chunk of text till the next ',' or ')' (non-inclusive) or EOF. If a '(' is * encountered, then it looks past its matching ')', taking care to handle nested matching * parenthesis too. It's designed to be of use to subclasses that wish to get the entire * subshape at the current position as a string so that it might be passed to other software * that will parse it. * <p> * Example: * * <pre> * OUTER(INNER(3, 5)) * </pre> * * If this is called when offset is at the first character, then it will return this whole * string. If called at the "I" then it will return "INNER(3, 5)". If called at "3", then it * will return "3". In all cases, offset will be positioned at the next position following the * returned substring. * * @return non-null substring. */ public String nextSubShapeString() throws ParseException { int startOffset = offset; int parenStack = 0;// how many parenthesis levels are we in? for (; offset < rawString.length(); offset++) { char c = rawString.charAt(offset); if (c == ',') { if (parenStack == 0) break; } else if (c == ')') { if (parenStack == 0) break; parenStack--; } else if (c == '(') { parenStack++; } } if (parenStack != 0) throw new ParseException("Unbalanced parenthesis", startOffset); return rawString.substring(startOffset, offset); } }// class State @Override public String getFormatName() { return ShapeIO.WKT; } static String readString(Reader reader) throws IOException { char[] arr = new char[1024]; StringBuilder buffer = new StringBuilder(); int numCharsRead; while ((numCharsRead = reader.read(arr, 0, arr.length)) != -1) { buffer.append(arr, 0, numCharsRead); } return buffer.toString(); } @Override public Shape read(Reader reader) throws IOException, ParseException { return parse(readString(reader)); } @Override public Shape read(Object value) throws IOException, ParseException, InvalidShapeException { return parse(value.toString()); } @Override public Shape readIfSupported(Object value) throws InvalidShapeException { try { return parseIfSupported(value.toString()); } catch (ParseException e) { } return null; } }