package mil.nga.giat.geowave.format.landsat8; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Serializable; import java.net.MalformedURLException; import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.ArrayUtils; import org.geotools.data.DataUtilities; import org.geotools.data.simple.SimpleFeatureIterator; import org.geotools.feature.simple.SimpleFeatureBuilder; import org.geotools.feature.simple.SimpleFeatureTypeBuilder; import org.opengis.feature.simple.SimpleFeature; import org.opengis.feature.simple.SimpleFeatureType; import org.opengis.filter.Filter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.collect.Iterators; import com.google.common.collect.MinMaxPriorityQueue; import com.google.common.io.LineReader; import com.vividsolutions.jts.geom.MultiPolygon; import mil.nga.giat.geowave.core.index.StringUtils; public class SceneFeatureIterator implements SimpleFeatureIterator { protected static class BestCloudCoverComparator implements Comparator<SimpleFeature>, Serializable { private static final long serialVersionUID = -5294130929073387335L; @Override public int compare( final SimpleFeature first, final SimpleFeature second ) { return Float.compare( (Float) first.getAttribute(CLOUD_COVER_ATTRIBUTE_NAME), (Float) second.getAttribute(CLOUD_COVER_ATTRIBUTE_NAME)); } } private final static Logger LOGGER = LoggerFactory.getLogger(SceneFeatureIterator.class); private static final String SCENES_GZ_URL = "http://landsat-pds.s3.amazonaws.com/scene_list.gz"; protected static final String SCENES_TYPE_NAME = "scene"; public static final String SHAPE_ATTRIBUTE_NAME = "shape"; public static final String ACQUISITION_DATE_ATTRIBUTE_NAME = "acquisitionDate"; public static final String CLOUD_COVER_ATTRIBUTE_NAME = "cloudCover"; public static final String PROCESSING_LEVEL_ATTRIBUTE_NAME = "processingLevel"; public static final String PATH_ATTRIBUTE_NAME = "path"; public static final String ROW_ATTRIBUTE_NAME = "row"; public static final String SCENE_DOWNLOAD_ATTRIBUTE_NAME = "sceneDownloadUrl"; public static final String ENTITY_ID_ATTRIBUTE_NAME = "entityId"; protected static final String[] SCENE_ATTRIBUTES = new String[] { SHAPE_ATTRIBUTE_NAME, ACQUISITION_DATE_ATTRIBUTE_NAME, CLOUD_COVER_ATTRIBUTE_NAME, PROCESSING_LEVEL_ATTRIBUTE_NAME, PATH_ATTRIBUTE_NAME, ROW_ATTRIBUTE_NAME, ENTITY_ID_ATTRIBUTE_NAME, SCENE_DOWNLOAD_ATTRIBUTE_NAME }; protected static String AQUISITION_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; private final String SCENES_DIR = "scenes"; private final String COMPRESSED_FILE_NAME = "scene_list.gz"; private final String CSV_FILE_NAME = "scene_list"; private final String TEMP_CSV_FILE_NAME = "scene_list.tmp"; private CSVParser parser; private FileInputStream parserFis; private InputStreamReader parserIsr; private Iterator<SimpleFeature> iterator; private SimpleFeatureType type; public SceneFeatureIterator( final boolean onlyScenesSinceLastRun, final boolean useCachedScenes, final boolean nBestScenesByPathRow, final int nBestScenes, final Filter cqlFilter, final String workspaceDir ) throws MalformedURLException, IOException { init( new File( workspaceDir, SCENES_DIR), onlyScenesSinceLastRun, useCachedScenes, nBestScenesByPathRow, nBestScenes, new WRS2GeometryStore( workspaceDir), cqlFilter); } private void init( final File scenesDir, final boolean onlyScenesSinceLastRun, final boolean useCachedScenes, final boolean nBestScenesByPathRow, final int nBestScenes, final WRS2GeometryStore geometryStore, final Filter cqlFilter ) throws IOException { if (!scenesDir.exists() && !scenesDir.mkdirs()) { LOGGER.warn("Unable to create directory '" + scenesDir.getAbsolutePath() + "'"); } final File csvFile = new File( scenesDir, CSV_FILE_NAME); long startLine = 0; if (!csvFile.exists() || !useCachedScenes) { final File compressedFile = new File( scenesDir, COMPRESSED_FILE_NAME); final File tempCsvFile = new File( scenesDir, TEMP_CSV_FILE_NAME); if (compressedFile.exists()) { if (!compressedFile.delete()) { LOGGER.warn("Unable to delete '" + compressedFile.getAbsolutePath() + "'"); } } if (tempCsvFile.exists()) { if (!tempCsvFile.delete()) { LOGGER.warn("Unable to delete '" + tempCsvFile.getAbsolutePath() + "'"); } } InputStream in = null; // first download the gzipped file try { in = new URL( SCENES_GZ_URL).openStream(); final FileOutputStream outStream = new FileOutputStream( compressedFile); IOUtils.copyLarge( in, outStream); outStream.close(); } catch (final IOException e) { LOGGER.warn( "Unable to read scenes from public S3", e); throw e; } finally { if (in != null) { IOUtils.closeQuietly(in); } } // next unzip to CSV GzipCompressorInputStream gzIn = null; FileOutputStream out = null; FileInputStream fin = null; BufferedInputStream bin = null; try { fin = new FileInputStream( compressedFile); bin = new BufferedInputStream( fin); out = new FileOutputStream( tempCsvFile); gzIn = new GzipCompressorInputStream( bin); final byte[] buffer = new byte[1024]; int n = 0; while (-1 != (n = gzIn.read(buffer))) { out.write( buffer, 0, n); } fin.close(); // once we have a csv we can cleanup the compressed file if (!compressedFile.delete()) { LOGGER.warn("Unable to delete '" + compressedFile.getAbsolutePath() + "'"); } out.close(); } catch (final IOException e) { LOGGER.warn( "Unable to extract scenes file", e); throw e; } finally { // HP Fortify "Unreleased Resource" false positive // These streams are closed if not null, in this // "finally" block if (out != null) { IOUtils.closeQuietly(out); } if (gzIn != null) { IOUtils.closeQuietly(gzIn); } if (fin != null) { IOUtils.closeQuietly(fin); } if (bin != null) { IOUtils.closeQuietly(bin); } } if (onlyScenesSinceLastRun && csvFile.exists()) { // seek the number of lines of the existing file try (final FileInputStream is = new FileInputStream( csvFile)) { final LineReader lines = new LineReader( new InputStreamReader( is, StringUtils.UTF8_CHAR_SET)); while (lines.readLine() != null) { startLine++; } } } if (csvFile.exists()) { if (!csvFile.delete()) { LOGGER.warn("Unable to delete '" + csvFile.getAbsolutePath() + "'"); } } if (!tempCsvFile.renameTo(csvFile)) { LOGGER.warn("Unable to rename '" + tempCsvFile.getAbsolutePath() + "' to '" + csvFile.getAbsolutePath() + "'"); } } type = createFeatureType(); setupCsvToFeatureIterator( csvFile, startLine, geometryStore, cqlFilter); if (nBestScenes > 0) { nBestScenes( nBestScenesByPathRow, nBestScenes); } } public static SimpleFeatureType createFeatureType() { // initialize the feature type final SimpleFeatureTypeBuilder typeBuilder = new SimpleFeatureTypeBuilder(); typeBuilder.setName(SCENES_TYPE_NAME); typeBuilder.add( SHAPE_ATTRIBUTE_NAME, MultiPolygon.class); typeBuilder.add( ENTITY_ID_ATTRIBUTE_NAME, String.class); typeBuilder.add( ACQUISITION_DATE_ATTRIBUTE_NAME, Date.class); typeBuilder.add( CLOUD_COVER_ATTRIBUTE_NAME, Float.class); typeBuilder.add( PROCESSING_LEVEL_ATTRIBUTE_NAME, String.class); typeBuilder.add( PATH_ATTRIBUTE_NAME, Integer.class); typeBuilder.add( ROW_ATTRIBUTE_NAME, Integer.class); typeBuilder.add( SCENE_DOWNLOAD_ATTRIBUTE_NAME, String.class); return typeBuilder.buildFeatureType(); } private boolean hasOtherProperties( final Filter cqlFilter ) { final String[] attributes = DataUtilities.attributeNames( cqlFilter, type); for (final String attr : attributes) { if (!ArrayUtils.contains( SCENE_ATTRIBUTES, attr)) { return true; } } return false; } private void nBestScenes( final boolean byPathRow, final int n ) { iterator = nBestScenes( this, byPathRow, n); } private static class PathRowPair { private final int path; private final int row; public PathRowPair( final int path, final int row ) { this.path = path; this.row = row; } @Override public int hashCode() { final int prime = 31; int result = 1; result = (prime * result) + path; result = (prime * result) + row; return result; } @Override public boolean equals( final Object obj ) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final PathRowPair other = (PathRowPair) obj; if (path != other.path) { return false; } if (row != other.row) { return false; } return true; } } protected static Iterator<SimpleFeature> nBestScenes( final SimpleFeatureIterator iterator, final boolean byPathRow, final int n ) { if (byPathRow) { final Map<PathRowPair, MinMaxPriorityQueue<SimpleFeature>> bestScenes = new HashMap<>(); while (iterator.hasNext()) { final SimpleFeature feature = iterator.next(); final Integer path = (Integer) feature.getAttribute(PATH_ATTRIBUTE_NAME); final Integer row = (Integer) feature.getAttribute(ROW_ATTRIBUTE_NAME); final PathRowPair pr = new PathRowPair( path, row); MinMaxPriorityQueue<SimpleFeature> queue = bestScenes.get(pr); if (queue == null) { queue = MinMaxPriorityQueue.orderedBy( new BestCloudCoverComparator()).maximumSize( n).create(); bestScenes.put( pr, queue); } queue.offer(feature); } final List<Iterator<SimpleFeature>> iterators = new ArrayList<Iterator<SimpleFeature>>(); for (final MinMaxPriorityQueue<SimpleFeature> queue : bestScenes.values()) { iterators.add(queue.iterator()); } return Iterators.concat(iterators.iterator()); } final MinMaxPriorityQueue<SimpleFeature> bestScenes = MinMaxPriorityQueue.orderedBy( new BestCloudCoverComparator()).maximumSize( n).create(); // iterate once through the scenes, saving the best entity IDs // based on cloud cover while (iterator.hasNext()) { bestScenes.offer(iterator.next()); } iterator.close(); return bestScenes.iterator(); } private void setupCsvToFeatureIterator( final File csvFile, final long startLine, final WRS2GeometryStore geometryStore, final Filter cqlFilter ) throws FileNotFoundException, IOException { parserFis = new FileInputStream( csvFile); parserIsr = new InputStreamReader( parserFis, StringUtils.UTF8_CHAR_SET); parser = new CSVParser( parserIsr, CSVFormat.DEFAULT.withHeader().withSkipHeaderRecord()); final Iterator<CSVRecord> csvIterator = parser.iterator(); long startLineDecrementor = startLine; // we skip the header, so only skip to start line 1 while ((startLineDecrementor > 1) && csvIterator.hasNext()) { startLineDecrementor--; csvIterator.next(); } // wrap the iterator with a feature conversion and a filter (if // provided) iterator = Iterators.transform( csvIterator, new CSVToFeatureTransform( geometryStore, type)); if (cqlFilter != null) { Filter actualFilter; if (hasOtherProperties(cqlFilter)) { final PropertyIgnoringFilterVisitor visitor = new PropertyIgnoringFilterVisitor( SCENE_ATTRIBUTES, type); actualFilter = (Filter) cqlFilter.accept( visitor, null); } else { actualFilter = cqlFilter; } final CqlFilterPredicate filterPredicate = new CqlFilterPredicate( actualFilter); iterator = Iterators.filter( iterator, filterPredicate); } } public SimpleFeatureType getFeatureType() { return type; } @Override public void close() { if (parser != null) { try { parser.close(); parser = null; parserFis.close(); parserFis = null; parserIsr.close(); parserIsr = null; } catch (final IOException e) { LOGGER.warn( "Unable to close CSV parser", parser, e); } } } @Override public boolean hasNext() { if (iterator != null) { return iterator.hasNext(); } return false; } @Override public SimpleFeature next() throws NoSuchElementException { if (iterator != null) { return iterator.next(); } return null; } private static class CSVToFeatureTransform implements Function<CSVRecord, SimpleFeature> { // shape (Geometry), entityId (String), acquisitionDate (Date), // cloudCover (double), processingLevel (String), path (int), row (int) private final WRS2GeometryStore wrs2Geometry; private final SimpleFeatureBuilder featureBuilder; public CSVToFeatureTransform( final WRS2GeometryStore wrs2Geometry, final SimpleFeatureType type ) { this.wrs2Geometry = wrs2Geometry; featureBuilder = new SimpleFeatureBuilder( type); } // entityId,acquisitionDate,cloudCover,processingLevel,path,row,min_lat,min_lon,max_lat,max_lon,download_url @Override public SimpleFeature apply( final CSVRecord input ) { final String entityId = input.get("entityId"); final double cloudCover = Double.parseDouble(input.get("cloudCover")); final String processingLevel = input.get("processingLevel"); final int path = Integer.parseInt(input.get("path")); final int row = Integer.parseInt(input.get("row")); final String downloadUrl = input.get("download_url"); final MultiPolygon shape = wrs2Geometry.getGeometry( path, row); featureBuilder.add(shape); featureBuilder.add(entityId); Date aquisitionDate; final SimpleDateFormat sdf = new SimpleDateFormat( AQUISITION_DATE_FORMAT); try { aquisitionDate = sdf.parse(input.get("acquisitionDate")); featureBuilder.add(aquisitionDate); } catch (final ParseException e) { LOGGER.warn( "Unable to parse aquisition date", e); featureBuilder.add(null); } featureBuilder.add(cloudCover); featureBuilder.add(processingLevel); featureBuilder.add(path); featureBuilder.add(row); featureBuilder.add(downloadUrl); return featureBuilder.buildFeature(entityId); } } private static class CqlFilterPredicate implements Predicate<SimpleFeature> { private final Filter cqlFilter; public CqlFilterPredicate( final Filter cqlFilter ) { this.cqlFilter = cqlFilter; } @Override public boolean apply( final SimpleFeature input ) { return cqlFilter.evaluate(input); } } }