package org.opentripplanner.analyst.cluster;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.io.ByteStreams;
import org.apache.commons.io.FileUtils;
import org.opentripplanner.analyst.PointSet;
import org.opentripplanner.analyst.PointSetCache;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
/**
* TODO what does this do? Does it really need to be a subclass?
*/
public class PointSetDatastore extends PointSetCache {
static private File POINT_DIR = new File("cache", "pointsets");
private String pointsetBucket;
private AmazonS3Client s3;
private final Boolean workOffline;
public PointSetDatastore(Integer maxCacheSize, String s3CredentialsFilename,
Boolean workOffline, String pointsetBucket){
super();
// allow the data store to work offline with cached data and skip S3 connection
this.workOffline = workOffline;
this.pointsetBucket = pointsetBucket;
if(!this.workOffline) {
if (s3CredentialsFilename != null) {
AWSCredentials creds = new ProfileCredentialsProvider(s3CredentialsFilename, "default").getCredentials();
s3 = new AmazonS3Client(creds);
}
else {
// default credentials providers, e.g. IAM role
s3 = new AmazonS3Client();
}
}
// set up the cache
this.pointSets = CacheBuilder.newBuilder()
.maximumSize(maxCacheSize)
.build(new S3PointSetLoader(workOffline, s3, pointsetBucket));
}
// adds file to S3 Data store or offline cache (if working offline)
public String addPointSet(File pointSetFile, String pointSetId) throws IOException {
if (pointSetId == null)
throw new NullPointerException("null point set id");
File renamedPointSetFile = new File(POINT_DIR, pointSetId + ".json");
if (renamedPointSetFile.exists())
return pointSetId;
FileUtils.copyFile(pointSetFile, renamedPointSetFile);
if(!this.workOffline) {
// only upload if it doesn't exist
try {
s3.getObjectMetadata(pointsetBucket, pointSetId + ".json.gz");
} catch (AmazonServiceException e) {
// gzip compression in storage, not because we're worried about file size but to speed file transfer
FileInputStream fis = new FileInputStream(pointSetFile);
File tempFile = File.createTempFile(pointSetId, ".json.gz");
FileOutputStream fos = new FileOutputStream(tempFile);
GZIPOutputStream gos = new GZIPOutputStream(fos);
try {
ByteStreams.copy(fis, gos);
} finally {
gos.close();
fis.close();
}
s3.putObject(pointsetBucket, pointSetId + ".json.gz", tempFile);
tempFile.delete();
}
}
return pointSetId;
}
/** does this pointset exist in local cache? */
public boolean isCached (String pointsetId) {
return new File(POINT_DIR, pointsetId + ".json").exists();
}
/**
* Load pointsets from S3.
*/
protected static class S3PointSetLoader extends CacheLoader<String, PointSet> {
private Boolean workOffline;
private AmazonS3Client s3;
private String pointsetBucket;
/**
* Construct a new point set loader. S3 clients are generally threadsafe, so it's fine to share them.
*/
public S3PointSetLoader(Boolean workOffline, AmazonS3Client s3, String pointsetBucket) {
this.workOffline = workOffline;
this.s3 = s3;
this.pointsetBucket = pointsetBucket;
}
@Override
public PointSet load (String pointSetId) throws Exception {
File cachedFile;
if(!workOffline) {
// get pointset metadata from S3
cachedFile = new File(POINT_DIR, pointSetId + ".json");
if(!cachedFile.exists()){
POINT_DIR.mkdirs();
S3Object obj = s3.getObject(pointsetBucket, pointSetId + ".json.gz");
ObjectMetadata objMet = obj.getObjectMetadata();
FileOutputStream fos = new FileOutputStream(cachedFile);
GZIPInputStream gis = new GZIPInputStream(obj.getObjectContent());
try {
ByteStreams.copy(gis, fos);
} finally {
fos.close();
gis.close();
}
}
}
else
cachedFile = new File(POINT_DIR, pointSetId + ".json");
// grab it from the cache
return PointSet.fromGeoJson(cachedFile);
}
}
@Override
public List<String> getPointSetIds() {
// we have no clue what is in the S3 bucket.
throw new UnsupportedOperationException("S3-backed point set datastore does not know what pointsets are available.");
}
}