/*
* GeoTools - The Open Source Java GIS Toolkit
* http://geotools.org
*
* (C) 2015, Open Source Geospatial Foundation (OSGeo)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
package org.geotools.data.shapefile;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.geotools.data.DataSourceException;
import org.geotools.data.DataUtilities;
import org.geotools.data.FeatureWriter;
import org.geotools.data.Transaction;
import org.geotools.data.simple.SimpleFeatureCollection;
import org.geotools.data.simple.SimpleFeatureIterator;
import org.geotools.feature.simple.SimpleFeatureTypeBuilder;
import org.geotools.util.logging.Logging;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.feature.type.AttributeDescriptor;
import org.opengis.feature.type.GeometryDescriptor;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.GeometryCollection;
import com.vividsolutions.jts.geom.LineString;
import com.vividsolutions.jts.geom.MultiLineString;
import com.vividsolutions.jts.geom.MultiPoint;
import com.vividsolutions.jts.geom.MultiPolygon;
import com.vividsolutions.jts.geom.Point;
import com.vividsolutions.jts.geom.Polygon;
/**
* Class specializing in dumping a feature collection onto one or more shapefiles into a target directory.
* <p>
* The collection will be distributed among different shapefiles if needed do respect certain limitations:
* <ul>
* <li>Only a single geometry type per shapefile, in case the source feature collection contains more than one parallel shapefiles will be generated,
* by default appending the type of geometry at the end of the file name</li>
* <li>Maximum file size, by default, 2GB for the shp file, 4GB for the dbf file. In case the maximum size is exceeded the code will create a new
* shapefile appending a counter at the end of the file name</li>
* </ul>
*
* @author Andrea Aime - GeoSolutions
*/
public class ShapefileDumper {
private class StoreWriter {
int currentFileId = 0;
ShapefileDataStore dstore;
FeatureWriter<SimpleFeatureType, SimpleFeature> writer;
SimpleFeatureType schema;
/**
* @param schema
* @throws MalformedURLException
* @throws FileNotFoundException
* @throws IOException
*/
public StoreWriter(SimpleFeatureType schema) throws MalformedURLException, FileNotFoundException, IOException {
// create the datastore for the current geom type
this.schema = schema;
createStoreAndWriter(schema);
}
private void createStoreAndWriter(SimpleFeatureType schema)
throws MalformedURLException, FileNotFoundException, IOException {
this.dstore = buildStore(schema);
this.writer = dstore.getFeatureWriter(schema.getTypeName(),
Transaction.AUTO_COMMIT);
}
public void nextWriter() throws IOException {
// close the old shapefile
this.writer.close();
this.dstore.dispose();
// prepare the new one
currentFileId++;
SimpleFeatureTypeBuilder tb = new SimpleFeatureTypeBuilder();
tb.init(schema);
tb.setName(schema.getTypeName() + String.valueOf(currentFileId));
SimpleFeatureType ft = tb.buildFeatureType();
// set it up at the current store and writer
createStoreAndWriter(ft);
}
}
static final Logger LOGGER = Logging.getLogger(ShapefileDumper.class);
File targetDirectory;
long maxShpSize = ShapefileFeatureWriter.DEFAULT_MAX_SHAPE_SIZE;
long maxDbfSize = ShapefileFeatureWriter.DEFAULT_MAX_DBF_SIZE;
boolean emptyShapefileAllowed = true;
Charset charset = (Charset) ShapefileDataStoreFactory.DBFCHARSET.getDefaultValue();
public ShapefileDumper(File targetDirectory) {
this.targetDirectory = targetDirectory;
}
/**
* Maximum size of the shapefiles being generated
*
* @return
*/
public long getMaxShpSize() {
return maxShpSize;
}
/**
* Sets the maximum size of the shp files the dumper will generate. The default is 2GB. When the threshold is reached a new shapefile with a
* progressive number at the end will be written to continue dumping features.
*/
public void setMaxShpSize(long maxShapeSize) {
this.maxShpSize = maxShapeSize;
}
/**
* Maximums size of the DBF files being generated
*
* @return
*/
public long getMaxDbfSize() {
return maxDbfSize;
}
/**
* Sets the maximum size of the DBF files the dumper will generate. The default is 4GB, but some systems might be able to only read DBF files up
* to 2GB. When the threshold is reached a new shapefile with a progressive number at the end will be written to continue dumping features.
*/
public void setMaxDbfSize(long maxDbfSize) {
this.maxDbfSize = maxDbfSize;
}
/**
* The charset used in the DBF files. It's ISO-8859-1 by default (per DBF spec)
* @return
*/
public Charset getCharset() {
return charset;
}
/**
* Sets the charset used to dump the DBF files.
* @param charset
*/
public void setCharset(Charset charset) {
this.charset = charset;
}
/**
* Returns true if empty shpaefile dumping is allowed (true by default)
* @return
*/
public boolean isEmptyShapefileAllowed() {
return emptyShapefileAllowed;
}
/**
* Settings this flag to false will avoid empty shapefiles to be created
* @param emptyShapefileAllowed
*/
public void setEmptyShapefileAllowed(boolean emptyShapefileAllowed) {
this.emptyShapefileAllowed = emptyShapefileAllowed;
}
/**
* Dumps the collection into one or more shapefiles. Multiple files will be geneated when
* the input collection contains multiple geometry types, or as the size limit for output files
* get reached
*
* @param fc The input feature collection
* @return True if at least one feature got written, false otherwise
* @throws IOException
*/
public boolean dump(SimpleFeatureCollection fc) throws IOException {
// make sure we are not trying to write out a geometryless data set
if (fc.getSchema().getGeometryDescriptor() == null) {
throw new DataSourceException("Cannot write geometryless shapefiles, yet "
+ fc.getSchema() + " has no geometry field");
}
// Takes a feature collection with a generic schema and remaps it to one whose schema respects the limitations of the shapefile format
fc = RemappingFeatureCollection.getShapefileCompatibleCollection(fc);
SimpleFeatureType schema = fc.getSchema();
Map<Class, StoreWriter> writers = new HashMap<Class, StoreWriter>();
boolean featuresWritten = false;
Class geomType = schema.getGeometryDescriptor().getType().getBinding();
// let's see if we will need to write multiple geometry types
boolean multiWriter = GeometryCollection.class.equals(geomType)
|| Geometry.class.equals(geomType);
// we write all the features with no geometry type defined and NULL geometries to the same file
StoreWriter nullStoreWriter = null;
try (SimpleFeatureIterator it = fc.features()) {
while (it.hasNext()) {
SimpleFeature f = it.next();
// if the geometry type is not defined and the geometry value is NULL we write it
// to the NULL geometries file otherwise we write it to the correspondent geometry file
StoreWriter storeWriter;
if (multiWriter && f.getDefaultGeometry() == null) {
// lazy instantiation of NULL geometries writer
nullStoreWriter = nullStoreWriter == null ?
getStoreWriter(schema, null, multiWriter, Point.class, "_NULL") : nullStoreWriter;
storeWriter = nullStoreWriter;
} else {
storeWriter = getStoreWriter(f, writers, multiWriter);
}
// try to write, the shapefile size limits could be reached
try {
writeToShapefile(f, storeWriter.writer);
} catch(ShapefileSizeException e) {
// make one attempt to move to the next file (just one, since
// we could be trying to write a feature that won't fit the size limits)
storeWriter.nextWriter();
writeToShapefile(f, storeWriter.writer);
}
featuresWritten = true;
}
// force writing out a empty shapefile if required
if(!featuresWritten && emptyShapefileAllowed) {
if(multiWriter) {
// force the dump of a point file
getStoreWriter(fc.getSchema(), writers, true, Point.class, null);
} else {
getStoreWriter(fc.getSchema(), writers, false, geomType, null);
}
}
} catch (ShapefileSizeException e) {
throw e;
} catch (IOException ioe) {
LOGGER.log(Level.WARNING,
"Error while writing featuretype '" + schema.getTypeName() + "' to shapefile.",
ioe);
throw new IOException(ioe);
} finally {
// close all writers, dispose all datastores, even if an exception occurs
// during closeup (shapefile datastore will have to copy the shapefiles, that migh
// fail in many ways)
IOException stored = null;
// add the not defined and NULL geometries store writer if defined
List<StoreWriter> writersValues = new ArrayList<>(writers.values());
if (nullStoreWriter != null) {
writersValues.add(nullStoreWriter);
}
for (StoreWriter sw : writersValues) {
try {
SimpleFeatureType writerSchema = sw.dstore.getSchema();
sw.writer.close();
sw.dstore.dispose();
// notify subclasses that the file has been completed
shapefileDumped(writerSchema.getTypeName(), writerSchema);
} catch (IOException e) {
stored = e;
}
}
// if an exception occurred make the world aware of it
if (stored != null) {
throw new IOException(stored);
}
}
return featuresWritten;
}
private void writeToShapefile(SimpleFeature f,
FeatureWriter<SimpleFeatureType, SimpleFeature> writer) throws IOException {
SimpleFeature fw = writer.next();
// we cannot trust attribute order, shapefile changes the location and name of the geometry
for (AttributeDescriptor d : fw.getFeatureType().getAttributeDescriptors()) {
fw.setAttribute(d.getLocalName(), f.getAttribute(d.getLocalName()));
}
fw.setDefaultGeometry(f.getDefaultGeometry());
writer.write();
}
/**
* Allows subsclasses to perform extra actions against a shapefile that was completely written
*
* @param fileName
* @param remappedSchema
*/
protected void shapefileDumped(String fileName, SimpleFeatureType remappedSchema) throws IOException {
// By default nothing extra is done
}
/**
* Creates a shapefile data store for the specified schema
*
* @param tempDir
* @param charset
* @param schema
* @return
* @throws MalformedURLException
* @throws FileNotFoundException
* @throws IOException
*/
private ShapefileDataStore buildStore(SimpleFeatureType schema)
throws MalformedURLException, FileNotFoundException, IOException {
File file = new File(targetDirectory, schema.getTypeName() + ".shp");
ShapefileDataStore sfds = new ShapefileDataStore(DataUtilities.fileToURL(file));
// handle shapefile encoding
// and dump the charset into a .cst file, for debugging and control purposes
// (.cst is not a standard extension)
sfds.setCharset(charset);
File charsetFile = new File(targetDirectory, schema.getTypeName() + ".cst");
PrintWriter pw = null;
try {
pw = new PrintWriter(charsetFile);
pw.write(charset.name());
} finally {
if (pw != null)
pw.close();
}
// create the shapefile
try {
sfds.createSchema(schema);
} catch (NullPointerException e) {
LOGGER.warning(
"Error in shapefile schema. It is possible you don't have a geometry set in the output. \n"
+ "Please specify a <wfs:PropertyName>geom_column_name</wfs:PropertyName> in the request");
throw new IOException(
"Error in shapefile schema. It is possible you don't have a geometry set in the output.");
}
// create the prj file
try {
if (schema.getCoordinateReferenceSystem() != null) {
sfds.forceSchemaCRS(schema.getCoordinateReferenceSystem());
}
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Could not properly create the .prj file", e);
}
// enforce the limits
sfds.setMaxShpSize(this.maxShpSize);
sfds.setMaxDbfSize(this.maxDbfSize);
return sfds;
}
private Map<String, Object> getGeometryType(SimpleFeature f) {
Class<?> target;
String geometryType = null;
Geometry g = (Geometry) f.getDefaultGeometry();
if (g instanceof Point) {
target = Point.class;
geometryType = "Point";
} else if (g instanceof MultiPoint) {
target = MultiPoint.class;
geometryType = "MPoint";
} else if (g instanceof MultiPolygon || g instanceof Polygon) {
target = MultiPolygon.class;
geometryType = "Polygon";
} else if (g instanceof LineString || g instanceof MultiLineString) {
target = MultiLineString.class;
geometryType = "Line";
} else {
throw new RuntimeException("This should never happen, "
+ "there's a bug in the SHAPE-ZIP output format. I got a geometry of type "
+ g.getClass());
}
Map<String, Object> map = new HashMap<String, Object>();
map.put("target", target);
map.put("geometryType", geometryType);
return map;
}
/**
* Returns the feature writer for a specific geometry type, creates a new datastore and a new writer if there are none so far
*/
private StoreWriter getStoreWriter(SimpleFeature f,
Map<Class, StoreWriter> writers, boolean multiWriter) throws IOException {
// get the target class
Class<?> target = null;
String geometryType = null;
if (multiWriter) {
// geometry type is not defined (we have the generic Geometry type) so we iterate
// over all geometries objects and extract the correct type
Map<String, Object> map = getGeometryType(f);
target = (Class<?>) map.get("target");
geometryType = (String) map.get("geometryType");
} else {
// we have a specific geometry type defined (Point, LineString, etc ...)
target = Geometry.class;
geometryType = "Geometry";
}
return getStoreWriter(f.getFeatureType(), writers, multiWriter, target, geometryType);
}
private StoreWriter getStoreWriter(SimpleFeatureType original, Map<Class, StoreWriter> writers,
boolean multiWriter, Class<?> target, String geometryType) throws IOException {
// see if we already have a cached writer
StoreWriter storeWriter = writers != null ? writers.get(target) : null;
if (storeWriter == null) {
// retype the schema
SimpleFeatureTypeBuilder builder = new SimpleFeatureTypeBuilder();
for (AttributeDescriptor d : original.getAttributeDescriptors()) {
if (Geometry.class.isAssignableFrom(d.getType().getBinding()) && multiWriter) {
GeometryDescriptor gd = (GeometryDescriptor) d;
builder.add(gd.getLocalName(), target, gd.getCoordinateReferenceSystem());
builder.setDefaultGeometry(gd.getLocalName());
} else {
builder.add(d);
}
}
builder.setNamespaceURI(original.getName().getURI());
// we need to associate the geometry type to the file name only if we can have be multiple types
String fileName;
if (multiWriter) {
fileName = getShapeName(original, geometryType);
} else {
fileName = getShapeName(original, null);
}
builder.setName(fileName);
SimpleFeatureType retyped = builder.buildFeatureType();
// cache it if cache map provided
storeWriter = new StoreWriter(retyped);
if (writers != null) {
writers.put(target, storeWriter);
}
}
return storeWriter;
}
/**
* Returns the shapefile name from the given schema and geometry type. By default it's simple typeName and geometryType concatenated, subclasses
* can override this behavior
*
* @param schema
* @param geometryType The name of the geometry type, will be null if there is no need for a geometry type suffix
* @return
*/
protected String getShapeName(SimpleFeatureType schema, String geometryType) {
if (geometryType == null) {
return schema.getTypeName();
} else {
return schema.getTypeName() + geometryType;
}
}
}