/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.streams.s3;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
/**
* There is a nuance associated with reading portions of files in S3. Everything occurs over
* an Apache HTTP client object. Apache and therefore Amazon defaults to re-using the stream.
* As a result, if you only intend read a small portion of the file. You must first "abort" the
* stream, then close the 'inputStream'. Otherwise, Apache will exhaust the entire stream
* and transfer the entire file. If you are only reading the first 50 lines of a 5,000,000 line file
* this becomes problematic.
*
* <p/>
* This class operates as a wrapper to fix the aforementioned nuances.
*
* <p/>
* Reference:
* http://stackoverflow.com/questions/17782937/connectionpooltimeoutexception-when-iterating-objects-in-s3
*/
public class S3ObjectInputStreamWrapper extends InputStream {
private static final Logger LOGGER = LoggerFactory.getLogger(S3ObjectInputStreamWrapper.class);
private final S3Object s3Object;
private final S3ObjectInputStream is;
private boolean isClosed = false;
/**
* Create an input stream safely.
* @param s3Object s3Object
*/
public S3ObjectInputStreamWrapper(S3Object s3Object) {
this.s3Object = s3Object;
this.is = this.s3Object.getObjectContent();
}
public int hashCode() {
return this.is.hashCode();
}
public boolean equals(Object obj) {
return this.is.equals(obj);
}
public String toString() {
return this.is.toString();
}
public int read() throws IOException {
return this.is.read();
}
public int read(byte[] byt) throws IOException {
return this.is.read(byt);
}
public int read(byte[] byt, int off, int len) throws IOException {
return this.is.read(byt, off, len);
}
public long skip(long skip) throws IOException {
return this.is.skip(skip);
}
public int available() throws IOException {
return this.is.available();
}
public boolean markSupported() {
return this.is.markSupported();
}
public synchronized void mark(int readlimit) {
this.is.mark(readlimit);
}
public synchronized void reset() throws IOException {
this.is.reset();
}
public void close() throws IOException {
ensureEverythingIsReleased();
}
/**
* ensureEverythingIsReleased as part of close process.
*/
public void ensureEverythingIsReleased() {
if (this.isClosed) {
return;
}
try {
// ensure that the S3 Object is closed properly.
this.s3Object.close();
} catch (Throwable ex) {
LOGGER.warn("Problem Closing the S3Object[{}]: {}", s3Object.getKey(), ex.getMessage());
}
try {
// Abort the stream
this.is.abort();
} catch (Throwable ex) {
LOGGER.warn("Problem Aborting S3Object[{}]: {}", s3Object.getKey(), ex.getMessage());
}
// close the input Stream Safely
closeSafely(this.is);
// This corrects the issue with Open HTTP connections
closeSafely(this.s3Object);
this.isClosed = true;
}
private static void closeSafely(Closeable is) {
try {
if (is != null) {
is.close();
}
} catch (Exception ex) {
ex.printStackTrace();
LOGGER.warn("S3InputStreamWrapper: Issue Closing Closeable - {}", ex.getMessage());
}
}
protected void finalize() throws Throwable {
try {
// If there is an accidental leak where the user did not close, call this on the classes destructor
ensureEverythingIsReleased();
super.finalize();
} catch (Exception ex) {
// this should never be called, just being very cautious
LOGGER.warn("S3InputStreamWrapper: Issue Releasing Connections on Finalize - {}", ex.getMessage());
}
}
}