/*
* Copyright (c) 2017 wetransform GmbH
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the License,
* or (at your option) any later version.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* wetransform GmbH <http://www.wetransform.to>
*/
package eu.esdihumboldt.hale.io.gml.reader.internal.wfs;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URIBuilder;
import de.fhg.igd.slf4jplus.ALogger;
import de.fhg.igd.slf4jplus.ALoggerFactory;
import eu.esdihumboldt.hale.common.core.io.IOProvider;
import eu.esdihumboldt.hale.common.core.io.supplier.DefaultInputSupplier;
import eu.esdihumboldt.hale.common.core.io.supplier.LocatableInputSupplier;
import eu.esdihumboldt.hale.common.instance.geometry.CRSProvider;
import eu.esdihumboldt.hale.common.instance.model.Filter;
import eu.esdihumboldt.hale.common.instance.model.Instance;
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection;
import eu.esdihumboldt.hale.common.instance.model.InstanceReference;
import eu.esdihumboldt.hale.common.instance.model.InstanceResolver;
import eu.esdihumboldt.hale.common.instance.model.ext.InstanceIterator;
import eu.esdihumboldt.hale.common.instance.model.impl.FilteredInstanceCollection;
import eu.esdihumboldt.hale.common.instance.model.impl.IndexInstanceReference;
import eu.esdihumboldt.hale.common.schema.model.TypeDefinition;
import eu.esdihumboldt.hale.common.schema.model.TypeIndex;
import eu.esdihumboldt.hale.io.gml.reader.internal.GmlInstanceCollection;
import eu.esdihumboldt.hale.io.gml.reader.internal.GmlInstanceCollection.GmlInstanceIterator;
import eu.esdihumboldt.hale.io.gml.reader.internal.instance.StreamGmlInstance;
/**
* Instance collection based on a GML input stream from WFS requests.<br>
* <br>
* The {@link InstanceIterator} created by {@link #iterator()} will
* transparently issue multiple requests to the WFS. When all features that
* match the request have been retrieved from the WFS (possibly via multiple
* requests), the iterator will be closed and subsequent calls to hasNext() will
* return false.<br>
* <br>
* The number of features to retrieve per request must be provided when creating
* the instance collection.
*
* For WFS 2.0.0/2.0.2 a starting offset can be provided by adding a
* <code>STARTINDEX</code> parameter to the source location. The maximum overall
* number of features to retrive can be set for by adding a
* <code>MAXFEATURES</code> (WFS 1.1.0) or <code>COUNT</code> (WFS 2.0.0/2.0.2)
* parameter to the source location or programmatically via
* {@link #setMaxNumberOfFeatures(int)}.
*
* @author Florian Esser
*/
public class WfsBackedGmlInstanceCollection implements InstanceCollection {
/**
* Constant indicating unlimited feature retrieval.
*/
public static final int UNLIMITED = -1;
private final ALogger log = ALoggerFactory.getLogger(WfsBackedGmlInstanceCollection.class);
// Original source location
private final URI primordialUri;
// Key/value pairs of the query part of the primordial URI. Keys are stored
// in uppercase by convention.
private final Map<String, String> primordialQueryParams = new HashMap<>();
// URI used by iterator to create follow-up requests
private final URI baseUri;
// XXX Use WFSVersion instead. To resolve dependency cycle, create
// e.e.h.common.wfs project?
private final String wfsVersion;
// Absolute limit for the amount of features to retrieve (default:
// unlimited)
private int maxNumberOfFeatures = UNLIMITED;
// Number of features to retrieve at most with one WFS GetFeature request
private final int featuresPerRequest;
private final int size;
// Parameters needed for instantiation of GmlInstanceCollection
private final TypeIndex sourceSchema;
private final boolean restrictToFeatures;
private final boolean ignoreRoot;
private final boolean strict;
private final CRSProvider crsProvider;
private final boolean ignoreNamespaces;
private final IOProvider ioProvider;
/**
* Create a GML instance collection based on the given WFS source.
*
* @param source the source
* @param sourceSchema the source schema
* @param restrictToFeatures if only instances that are GML features shall
* be loaded
* @param ignoreRoot if the root element should be ignored for creating
* instances even if it is recognized as an allowed instance type
* @param strict if associating elements with properties should be done
* strictly according to the schema, otherwise a fall-back is
* used trying to populate values also on invalid property paths
* @param ignoreNamespaces if parsing of the XML instances should allow
* types and properties with namespaces that differ from those
* defined in the schema
* @param crsProvider CRS provider in case no CRS is specified, may be
* <code>null</code>
* @param provider the I/O provider to get values
* @param featuresPerRequest Number of features to retrieve at most with one
* WFS GetFeature request, or {@value #UNLIMITED} to disable
* pagination
* @throws URISyntaxException thrown if the WFS request URL cannot be
* generated from the source location URI
*/
public WfsBackedGmlInstanceCollection(LocatableInputSupplier<? extends InputStream> source,
TypeIndex sourceSchema, boolean restrictToFeatures, boolean ignoreRoot, boolean strict,
boolean ignoreNamespaces, CRSProvider crsProvider, IOProvider provider,
int featuresPerRequest) throws URISyntaxException {
this.sourceSchema = sourceSchema;
this.restrictToFeatures = restrictToFeatures;
this.ignoreRoot = ignoreRoot;
this.strict = strict;
this.crsProvider = crsProvider;
this.ignoreNamespaces = ignoreNamespaces;
this.ioProvider = provider;
this.primordialUri = source.getLocation();
// Build base URI from original location by removing STARTINDEX and
// MAXFEATURES/COUNT parameters if present
URIBuilder builder = new URIBuilder(primordialUri);
builder.getQueryParams().forEach(
qp -> primordialQueryParams.put(qp.getName().toUpperCase(), qp.getValue()));
wfsVersion = primordialQueryParams.get("VERSION");
if (wfsVersion == null || wfsVersion.isEmpty()) {
throw new IllegalArgumentException("WFS URL must contain VERSION parameter");
}
List<NameValuePair> params = builder.getQueryParams();
params.removeIf(nvp -> nvp.getName().equalsIgnoreCase("STARTINDEX"));
params.removeIf(
nvp -> nvp.getName().equalsIgnoreCase(getMaxFeaturesParameterName(wfsVersion)));
builder.clearParameters();
builder.addParameters(params);
this.baseUri = builder.build();
// If a MAXFEATURES/COUNT parameter is present in the primordial URI,
// set maxNumberOfFeatures accordingly
if (primordialQueryParams.containsKey(getMaxFeaturesParameterName(wfsVersion))) {
// Allow possible NumberFormatException to be thrown up to prevent
// unintended retrieval of too many features
maxNumberOfFeatures = Integer
.parseInt(primordialQueryParams.get(getMaxFeaturesParameterName(wfsVersion)));
if (maxNumberOfFeatures < 0) {
throw new IllegalArgumentException(
MessageFormat.format("Parameter \"{0}\" must be a non-negative integer.",
getMaxFeaturesParameterName(wfsVersion)));
}
}
// Use primordial URI and issue "hits" request to check if the WFS will
// return anything at all
int hits;
try {
hits = requestHits(primordialUri);
} catch (WFSException e) {
log.debug(MessageFormat.format("Failed to perform hits query (REQUESTTYPE=hits): {0}",
e.getMessage()), e);
hits = UNKNOWN_SIZE;
}
switch (wfsVersion) {
case "1.1.0":
// The "numberOfFeatures" reported by a 1.1.0 WFS may be smaller
// than the actual number of features matches by the query if the
// number of features returned per query is limited on the server
// side. Therefore do not rely on it as a size information here.
this.size = UNKNOWN_SIZE;
break;
case "2.0.0":
case "2.0.2":
// The "numberMatched" reported by a 2.0.0/2.0.2 WFS should be
// number of features matched by the query. If hits equals
// UNKNOWN_SIZE then size is also set to that value
this.size = isLimited() ? Math.min(maxNumberOfFeatures, hits) : hits;
break;
default:
this.size = UNKNOWN_SIZE;
}
if (featuresPerRequest != UNLIMITED && featuresPerRequest <= 0) {
throw new IllegalArgumentException(MessageFormat.format(
"featuresPerRequest must be a positive integer or {0} to disable pagination",
UNLIMITED));
}
this.featuresPerRequest = featuresPerRequest;
}
/**
* Set an absolute limit for the amount of features to be retrieved.
*
* @param maxNumberOfFeatures valid values are -1 for unlimited retrieval or
* a positive integer (or zero) to impose an absolute feature
* limit.
*/
public void setMaxNumberOfFeatures(int maxNumberOfFeatures) {
if (maxNumberOfFeatures < -1) {
throw new IllegalArgumentException(
"Invalid maximum: must be either -1 (unlimited) or a non-negative integer.");
}
this.maxNumberOfFeatures = maxNumberOfFeatures;
}
/**
* @see InstanceCollection#hasSize()
*/
@Override
public boolean hasSize() {
return size != UNKNOWN_SIZE;
}
/**
* @see InstanceCollection#size()
*/
@Override
public int size() {
return size;
}
/**
* @see Iterable#iterator()
*/
@Override
public WfsBackedGmlInstanceIterator iterator() {
return new WfsBackedGmlInstanceIterator();
}
/**
* @see InstanceCollection#isEmpty()
*/
@Override
public boolean isEmpty() {
return size == 0;
}
/**
* @return true if pagination is enabled
*/
public boolean isPaged() {
return featuresPerRequest != UNLIMITED;
}
/**
* @return true if an absolute limit of features to be retrieved is set
*/
public boolean isLimited() {
return maxNumberOfFeatures != UNLIMITED;
}
/**
* @see InstanceCollection#select(Filter)
*/
@Override
public InstanceCollection select(Filter filter) {
return FilteredInstanceCollection.applyFilter(this, filter);
}
/**
* @see InstanceResolver#getReference(Instance)
*/
@Override
public InstanceReference getReference(Instance instance) {
if (instance instanceof StreamGmlInstance) {
// XXX Possible improvement: return reference based on feature ID if
// source WFS supports a GetFeatureById query
return new IndexInstanceReference(instance.getDataSet(),
((StreamGmlInstance) instance).getIndexInStream());
}
throw new IllegalArgumentException(
"Reference can only be determined based on a StreamGmlInstance");
}
/**
* @see InstanceResolver#getInstance(InstanceReference)
*/
@Override
public Instance getInstance(InstanceReference reference) {
IndexInstanceReference ref = (IndexInstanceReference) reference;
WfsBackedGmlInstanceIterator it = iterator();
try {
for (int i = 0; i < ref.getIndex(); i++) {
// skip all instances before the referenced instance
it.skip();
}
return it.next(); // return the referenced instance
} finally {
it.close();
}
}
private int requestHits(URI requestUri) throws WFSException {
URIBuilder builder = new URIBuilder(requestUri);
builder.addParameter("RESULTTYPE", "hits");
InputStream in;
try {
in = builder.build().toURL().openStream();
} catch (IOException | URISyntaxException e) {
throw new WFSException(
MessageFormat.format("Unable to execute WFS request: {0}", e.getMessage()), e);
}
return FeatureCollectionHelper.getNumberOfFeatures(in);
}
private String getMaxFeaturesParameterName(String version) {
// XXX Use WFSVersion
switch (version) {
case "1.1.0":
return "MAXFEATURES";
case "2.0.0":
case "2.0.2":
return "COUNT";
default:
throw new IllegalArgumentException("Unsupported WFS version");
}
}
/**
* Iterates over {@link Instance}s in the GML stream retrieved from the WFS.
*
* The iterator will not load more features per WFS GetFeature request than
* specified in the {@link WfsBackedGmlInstanceCollection}. If the WFS query
* yields more results, multiple GetFeature request will be issued
* transparently to the WFS until all results have been retrieved or the
* maximum number of features as specified in the
* {@link WfsBackedGmlInstanceCollection} was reached.
*
* @author Florian Esser
*/
public class WfsBackedGmlInstanceIterator implements InstanceIterator {
private GmlInstanceCollection currentCollection;
private GmlInstanceIterator iterator;
private int totalFeaturesProcessed;
/**
* Create the iterator
*/
public WfsBackedGmlInstanceIterator() {
createNextIterator();
}
/**
* Closes the iterator of the currently active GmlInstanceCollection and
* creates a new one for the next WFS request. If the new request yields
* no result, this {@link WfsBackedGmlInstanceIterator} is closed.
*/
private void proceedOrClose() {
iterator.close();
if (!isPaged() || isFeatureLimitReached()) {
close();
}
else {
createNextIterator();
if (!iterator.hasNext()) {
close();
}
}
}
private void createNextIterator() {
URI nextUri;
try {
nextUri = calculateNextUri();
} catch (URISyntaxException e) {
throw new IllegalStateException(e.getMessage(), e);
}
log.debug(MessageFormat.format("Creating new iterator for URL \"{0}\"",
nextUri.toString()));
currentCollection = new GmlInstanceCollection(new DefaultInputSupplier(nextUri),
sourceSchema, restrictToFeatures, ignoreRoot, strict, ignoreNamespaces,
crsProvider, ioProvider);
iterator = currentCollection.iterator();
// Make sure root element is processed by the iterator
iterator.hasNext();
}
private URI calculateNextUri() throws URISyntaxException {
URIBuilder builder = new URIBuilder(baseUri);
// Use STARTINDEX value in primordial URI as offset
int offset = 0;
if (primordialQueryParams.containsKey("STARTINDEX")) {
try {
offset = Integer.parseInt(primordialQueryParams.get("STARTINDEX"));
} catch (NumberFormatException e) {
// Ignore if invalid
}
}
if (offset < 0) {
offset = 0;
}
// STARTINDEX is 0-based
builder.addParameter("STARTINDEX", Integer.toString(offset + totalFeaturesProcessed));
final int maxFeatures;
if (isPaged()) {
maxFeatures = WfsBackedGmlInstanceCollection.this.featuresPerRequest;
}
else {
maxFeatures = WfsBackedGmlInstanceCollection.this.maxNumberOfFeatures;
}
if (maxFeatures >= 0) {
builder.addParameter(getMaxFeaturesParameterName(wfsVersion),
Integer.toString(maxFeatures));
}
return builder.build();
}
/**
* @see java.util.Iterator#hasNext()
*/
@Override
public boolean hasNext() {
if (isClosed()) {
return false;
}
if (isFeatureLimitReached()) {
close();
return false;
}
if (!iterator.hasNext()) {
// If all features from the current request have been read, try
// to retrieve a new batch from the WFS and continue. If the new
// request does not yield new results this iterator will be
// closed.
proceedOrClose();
}
if (!isClosed() && iterator.hasNext()) {
return true;
}
return false;
}
/**
* @return true if the number of features processed is equal to (or
* exceeds) the maximum number of features to processed or the
* number of results reported by the WFS.
*/
protected boolean isFeatureLimitReached() {
return (maxNumberOfFeatures != UNLIMITED
&& totalFeaturesProcessed >= maxNumberOfFeatures)
|| (size != UNKNOWN_SIZE && totalFeaturesProcessed >= size);
}
/**
* @see java.util.Iterator#next()
*/
@Override
public Instance next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
Instance instance = iterator.next();
return new StreamGmlInstance(instance, totalFeaturesProcessed++);
}
/**
* @see eu.esdihumboldt.hale.common.instance.model.ResourceIterator#close()
*/
@Override
public void close() {
if (iterator != null) {
iterator.close();
iterator = null;
currentCollection = null;
}
}
/**
* @return true if all results from the original WFS request have been
* retrieved
*/
public boolean isClosed() {
return iterator == null;
}
/**
* @see eu.esdihumboldt.hale.common.instance.model.ext.InstanceIterator#typePeek()
*/
@Override
public TypeDefinition typePeek() {
if (hasNext()) {
return iterator.typePeek();
}
return null;
}
/**
* @see eu.esdihumboldt.hale.common.instance.model.ext.InstanceIterator#supportsTypePeek()
*/
@Override
public boolean supportsTypePeek() {
return true;
}
/**
* @see eu.esdihumboldt.hale.common.instance.model.ext.InstanceIterator#skip()
*/
@Override
public void skip() {
if (iterator.hasNext()) {
iterator.skip();
}
else {
proceedOrClose();
if (hasNext()) {
skip();
}
}
}
}
}