UnenclosedEsriJsonRecordReader.java example

Explorer

spatial-framework-for-hadoop-master
- hive
  - src
    - main
      - java
        com
        esri
        hadoop
        hive
        BinUtils.java
        GeometryUtils.java
        HiveGeometry.java
        HiveGeometryOIHelper.java
        LogUtils.java
        ST_Aggr_ConvexHull.java
        ST_Aggr_Intersection.java
        ST_Aggr_Union.java
        ST_Area.java
        ST_AsBinary.java
        ST_AsGeoJson.java
        ST_AsJson.java
        ST_AsShape.java
        ST_AsText.java
        ST_Bin.java
        ST_BinEnvelope.java
        ST_Boundary.java
        ST_Buffer.java
        ST_Centroid.java
        ST_Contains.java
        ST_ConvexHull.java
        ST_CoordDim.java
        ST_Crosses.java
        ST_Difference.java
        ST_Dimension.java
        ST_Disjoint.java
        ST_Distance.java
        ST_EndPoint.java
        ST_EnvIntersects.java
        ST_Envelope.java
        ST_Equals.java
        ST_ExteriorRing.java
        ST_GeodesicLengthWGS84.java
        ST_GeomCollection.java
        ST_GeomFromGeoJson.java
        ST_GeomFromJson.java
        ST_GeomFromShape.java
        ST_GeomFromText.java
        ST_GeomFromWKB.java
        ST_Geometry.java
        ST_GeometryAccessor.java
        ST_GeometryN.java
        ST_GeometryProcessing.java
        ST_GeometryRelational.java
        ST_GeometryType.java
        ST_InteriorRingN.java
        ST_Intersection.java
        ST_Intersects.java
        ST_Is3D.java
        ST_IsClosed.java
        ST_IsEmpty.java
        ST_IsMeasured.java
        ST_IsRing.java
        ST_IsSimple.java
        ST_Length.java
        ST_LineFromWKB.java
        ST_LineString.java
        ST_M.java
        ST_MLineFromWKB.java
        ST_MPointFromWKB.java
        ST_MPolyFromWKB.java
        ST_MaxM.java
        ST_MaxX.java
        ST_MaxY.java
        ST_MaxZ.java
        ST_MinM.java
        ST_MinX.java
        ST_MinY.java
        ST_MinZ.java
        ST_MultiLineString.java
        ST_MultiPoint.java
        ST_MultiPolygon.java
        ST_NumGeometries.java
        ST_NumInteriorRing.java
        ST_NumPoints.java
        ST_Overlaps.java
        ST_Point.java
        ST_PointFromWKB.java
        ST_PointN.java
        ST_PointZ.java
        ST_PolyFromWKB.java
        ST_Polygon.java
        ST_Relate.java
        ST_SRID.java
        ST_SetSRID.java
        ST_StartPoint.java
        ST_SymmetricDiff.java
        ST_Touches.java
        ST_Union.java
        ST_Within.java
        ST_X.java
        ST_Y.java
        ST_Z.java
        serde
        BaseJsonSerDe.java
        EsriJsonSerDe.java
        GeoJsonSerDe.java
        JsonSerde.java
        shims
        HiveShims.java
    - test
      - java
        com
        esri
        hadoop
        hive
        TestStAsShape.java
        TestStGeomFromShape.java
        TestStGeometryType.java
        TestStLineString.java
        TestStMinX.java
        TestStMinY.java
        TestStMultiPoint.java
        TestStMultiPolygon.java
        TestStPoint.java
        TestStX.java
        TestStY.java
        serde
        JsonSerDeTestingBase.java
        TestEsriJsonSerDe.java
        TestGeoJsonSerDe.java
- json
  - src
    - main
      - java
        com
        esri
        json
        EsriFeature.java
        EsriFeatureClass.java
        EsriField.java
        EsriFieldType.java
        EsriJsonFactory.java
        deserializer
        GeometryJsonDeserializer.java
        GeometryTypeJsonDeserializer.java
        SpatialReferenceJsonDeserializer.java
        hadoop
        EnclosedBaseJsonRecordReader.java
        EnclosedEsriJsonInputFormat.java
        EnclosedEsriJsonRecordReader.java
        EnclosedGeoJsonInputFormat.java
        EnclosedGeoJsonRecordReader.java
        EnclosedJsonInputFormat.java
        EnclosedJsonRecordReader.java
        UnenclosedBaseJsonRecordReader.java
        UnenclosedEsriJsonInputFormat.java
        UnenclosedEsriJsonRecordReader.java
        UnenclosedGeoJsonInputFormat.java
        UnenclosedGeoJsonRecordReader.java
        UnenclosedJsonInputFormat.java
        UnenclosedJsonRecordReader.java
        serializer
        GeometryJsonSerializer.java
        GeometryTypeJsonSerializer.java
        SpatialReferenceJsonSerializer.java
    - test
      - java
        com
        esri
        json
        hadoop
        TestEnclosedEsriJsonRecordReader.java
        TestEnclosedGeoJsonRecordReader.java
        TestUnenclosedEsriJsonRecordMrv1.java
        TestUnenclosedEsriJsonRecordReader.java
        TestUnenclosedGeoJsonRecordReader.java

package com.esri.json.hadoop;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;


/**
 * 
 * Enumerates records from an Esri Unenclosed JSON file
 * 
 */
/*
 * The JSON will look like this (white-space ignored)
 * 
 * { // start record 1
 * 	"attributes" : {}
 *  "geometry" : {}
 * } // end record 1
 * { // start record 2
 * 	"attributes" : {}
 *  "geometry" : {}
 * } // end record 2
 */
public class UnenclosedEsriJsonRecordReader extends UnenclosedBaseJsonRecordReader {
	static final Log LOG = LogFactory.getLog(UnenclosedEsriJsonRecordReader.class.getName());

	public UnenclosedEsriJsonRecordReader() throws IOException {  // explicit just to declare exception
        super();
	}

	public UnenclosedEsriJsonRecordReader(org.apache.hadoop.mapred.InputSplit split,
									  Configuration conf) throws IOException {
		//attrLabel = "attributes";
        super(split, conf);
	}


	/**
	 * Given an arbitrary byte offset into a unenclosed JSON document, 
	 * find the start of the next record in the document.  Discard trailing
	 * bytes from the previous record if we happened to seek to the middle
	 * of it
	 * 
	 * Record boundary defined as : \{\s*"(attributes|geometry)"\s*:\s*\{
	 * 
	 * @throws IOException
	 */
	protected boolean moveToRecordStart() throws IOException {
		int next = 0;
		long resetPosition = readerPosition;

		// The case of split point exactly at whitespace between records, is
		// handled by forcing it to the split following, in the interest of
		// better balancing the splits, by consuming the whitespace in next().
		// The alternative of forcing it to the split preceding, could be
		// done like what is commented here.
		//   while (next != '{' || skipDup > 0) {  // skipDup>0 => record already consumed
		// 	  next = getChar();
		// 	  if (next < 0)  return false;   // end of stream, no good
		// 	  if (next == '}')  skipDup = -1;  // Definitely not
		// 	  else if (skipDup == 0) skipDup = 1;  // no info - Maybe so until refuted by '}'
		//   }

		while (true) {

			// scan until we reach a {
			while (next != '{') {
				next = getChar();
				
				// end of stream, no good
				if (next < 0) {
					return false;
				}
			}
			
			resetPosition = readerPosition;
			inputReader.mark(100);
			
			// ok last char was '{', skip till we get to a '"'
			next = getNonWhite();
			if (next < 0) {   // end of stream, no good
				return false;
			}
			if (next != '"') {
				continue;
			}

			boolean inEscape = false;
			String fieldName = "";
			// Next should be a field name of  attributes  or  geometry .

            // If we see another opening brace, the previous one must have been inside
            // a quoted string literal (after which the double quote we found, was a
			// closing quote mark rather than the opening quote mark) - start over.

			while (next != '{') {
				next = getChar();
				if (next < 0) {  // end of stream, no good
					return false;
				}

				inEscape = (!inEscape && next == '\\');
				if (!inEscape && next == '"') {
					break;
				}

				fieldName += (char)next;
			}
			
			if (!(fieldName.equals("attributes") || fieldName.equals("geometry"))) {
				// not the field name we were expecting, start over
				continue;
			}
			
			// ok last char was '"', skip till we get to a ':'
			next = getNonWhite();
			if (next < 0) {   // end of stream, no good
				return false;
			}
			if (next != ':') {
				continue;
			}
			
			// and finally, if the next char is a {, we know for sure that this is a valid record
			next = getNonWhite();
			if (next < 0) {   // end of stream, no good
				return false;
			}
			
			if (next == '{') {
				// at this point we can be sure that we have found the record boundary
				break;
			}
		}
		
		inputReader.reset();
		readerPosition = resetPosition;
		
		firstBraceConsumed = true;
		
		return true;
	}

}