/* $Id: TempFileCharacterInput.java 988245 2010-08-23 18:39:35Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.core.interfaces;
import java.io.*;
import java.nio.charset.StandardCharsets;
import org.apache.manifoldcf.core.system.ManifoldCF;
/** This class represents a temporary file character input
* stream. Call the "done" method to clean up the
* file when done.
* NOTE: The implied flow of this method is to be handed
* a file that has already been created by some means. The
* file must be a dedicated temporary file, which can be
* destroyed when the data has been used. However, this class can also
* buffer data in memory if the data is not too large (that is, less than a
* supplied cutoff value).
*/
public class TempFileCharacterInput extends CharacterInput
{
public static final String _rcsid = "@(#)$Id: TempFileCharacterInput.java 988245 2010-08-23 18:39:35Z kwright $";
protected File file;
protected byte[] inMemoryBuffer;
protected final static int CHUNK_SIZE = 65536;
protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
/** Construct from a non-length-delimited reader.
*@param is is a reader to transfer from, to the end of the data. This will, as a side effect, also calculate the character length
* and hash value for the data.
*/
public TempFileCharacterInput(Reader is)
throws ManifoldCFException, IOException
{
this(is,-1L);
}
/** Construct from a length-delimited reader.
*@param is is a reader to transfer from, to the end of the data. This will, as a side effect, also calculate the character length
* and hash value for the data.
*@param length is the length limit to transfer, or -1 if no limit
*/
public TempFileCharacterInput(Reader is, long length)
throws ManifoldCFException, IOException
{
this(is,length,DEFAULT_MAX_MEM_SIZE);
}
/** Construct from a length-delimited reader.
*@param is is a reader to transfer from, to the end of the data. This will, as a side effect, also calculate the character length
* and hash value for the data.
*@param length is the length limit to transfer, or -1 if no limit
*@param maxInMemoryLength is the maximum size to keep in memory, before using a backing File object. The amount possibly
* saved in memory will be guaranteed less than this size.
*/
public TempFileCharacterInput(Reader is, long length, int maxInMemoryLength)
throws ManifoldCFException, IOException
{
super();
// Before we do anything else, we read the first chunk. This will allow
// us to determine if we're going to buffer the data in memory or not. However,
// it may need to be read in chunks, since there's no guarantee it will come in
// in the size requested.
int chunkSize = CHUNK_SIZE;
char[] buffer = new char[chunkSize];
int chunkTotal = 0;
boolean eofSeen = false;
while (true)
{
int chunkAmount;
if (length == -1L || length > chunkSize)
chunkAmount = chunkSize-chunkTotal;
else
{
chunkAmount = (int)(length-chunkTotal);
eofSeen = true;
}
if (chunkAmount == 0)
break;
int readsize = is.read(buffer,chunkTotal,chunkAmount);
if (readsize == -1)
{
eofSeen = true;
break;
}
chunkTotal += readsize;
}
// Set up hash digest, and calculate the initial hash.
java.security.MessageDigest md = ManifoldCF.startHash();
String chunkString = new String(buffer,0,chunkTotal);
ManifoldCF.addToHash(md,chunkString);
// In order to compute the byte length, we need to convert to a byte array, which is
// also our final form for in-memory storage. But we don't want to do the work
// unless there's a chance it will be needed.
byte[] byteBuffer;
if (eofSeen)
byteBuffer = chunkString.getBytes(StandardCharsets.UTF_8);
else
byteBuffer = null;
if (eofSeen && byteBuffer.length <= maxInMemoryLength)
{
// Buffer locally; don't create a temp file
file = null;
inMemoryBuffer = byteBuffer;
charLength = chunkTotal;
hashValue = ManifoldCF.getHashValue(md);
}
else
{
inMemoryBuffer = null;
// Create a temporary file!
long totalMoved = 0;
// Create a temporary file to put the stuff in
File outfile;
try
{
outfile = File.createTempFile("_MC_","");
}
catch (IOException e)
{
handleIOException(e,"creating backing file");
outfile = null;
}
try
{
// Register the file for autodeletion, using our infrastructure.
ManifoldCF.addFile(outfile);
// deleteOnExit() causes memory leakage!
// outfile.deleteOnExit();
FileOutputStream outStream;
OutputStreamWriter outWriter;
try
{
outStream = new FileOutputStream(outfile);
// Create a Writer corresponding to the file output stream, and encode using utf-8
outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
}
catch (IOException e)
{
handleIOException(e,"opening backing file");
outStream = null;
outWriter = null;
}
try
{
// Transfor what we've already read.
try
{
outWriter.write(buffer,0,chunkTotal);
}
catch (IOException e)
{
handleIOException(e,"writing backing file");
}
totalMoved += chunkTotal;
// Now, transfer the remainder
while (true)
{
int moveAmount;
if (length == -1L || length-totalMoved > chunkSize)
moveAmount = chunkSize;
else
moveAmount = (int)(length-totalMoved);
if (moveAmount == 0)
break;
// Read character data in 64K chunks
int readsize = is.read(buffer,0,moveAmount);
if (readsize == -1)
break;
try
{
outWriter.write(buffer,0,readsize);
}
catch (IOException e)
{
handleIOException(e,"writing backing file");
}
ManifoldCF.addToHash(md,new String(buffer,0,readsize));
totalMoved += readsize;
}
}
finally
{
try
{
outWriter.close();
}
catch (IOException e)
{
handleIOException(e,"closing backing file");
}
}
// Now, create the input stream.
// Save the file name
file = outfile;
charLength = totalMoved;
hashValue = ManifoldCF.getHashValue(md);
}
catch (Throwable e)
{
// Delete the temp file we created on any error condition
// outfile.delete();
ManifoldCF.deleteFile(outfile);
if (e instanceof Error)
throw (Error)e;
if (e instanceof RuntimeException)
throw (RuntimeException)e;
if (e instanceof ManifoldCFException)
throw (ManifoldCFException)e;
if (e instanceof IOException)
throw (IOException)e;
throw new RuntimeException("Unexpected throwable of type "+e.getClass().getName()+": "+e.getMessage(),e);
}
}
}
/** Construct from an existing temporary fle.
*@param tempFile is the existing temporary file, encoded in utf-8.
*/
public TempFileCharacterInput(File tempFile)
{
super();
inMemoryBuffer = null;
file = tempFile;
ManifoldCF.addFile(file);
// deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
// file.deleteOnExit();
}
protected TempFileCharacterInput()
{
super();
}
/** Open a Utf8 stream directly from the backing file */
@Override
public InputStream getUtf8Stream()
throws ManifoldCFException
{
if (file != null)
{
try
{
return new FileInputStream(file);
}
catch (FileNotFoundException e)
{
throw new ManifoldCFException("No such file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
}
}
else if (inMemoryBuffer != null)
{
return new ByteArrayInputStream(inMemoryBuffer);
}
return null;
}
/** Get binary UTF8 stream length directly */
@Override
public long getUtf8StreamLength()
throws ManifoldCFException
{
if (file != null)
return file.length();
else if (inMemoryBuffer != null)
return inMemoryBuffer.length;
return 0L;
}
@Override
protected void openStream()
throws ManifoldCFException
{
if (file != null)
{
try
{
// Open the file and create a stream.
InputStream binaryStream = new FileInputStream(file);
stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
}
catch (FileNotFoundException e)
{
throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
}
}
else if (inMemoryBuffer != null)
{
stream = new InputStreamReader(new ByteArrayInputStream(inMemoryBuffer),StandardCharsets.UTF_8);
}
}
/** Transfer to a new object; this causes the current object to become "already discarded" */
@Override
public CharacterInput transfer()
{
// Create a new TempFileCharacterInput object, and fill it with our current stuff
TempFileCharacterInput rval = new TempFileCharacterInput();
rval.file = file;
rval.inMemoryBuffer = inMemoryBuffer;
rval.stream = stream;
rval.charLength = charLength;
rval.hashValue = hashValue;
file = null;
inMemoryBuffer = null;
stream = null;
charLength = -1L;
hashValue = null;
return rval;
}
@Override
public void discard()
throws ManifoldCFException
{
super.discard();
// Delete the underlying file
if (file != null)
{
ManifoldCF.deleteFile(file);
file = null;
}
}
/** Calculate the datum's length in characters */
@Override
protected void calculateLength()
throws ManifoldCFException
{
scanFile();
}
/** Calculate the datum's hash value */
@Override
protected void calculateHashValue()
throws ManifoldCFException
{
scanFile();
}
private void scanFile()
throws ManifoldCFException
{
// Scan the file in order to figure out the hash value and the character length
try
{
// Open the file and create a stream.
InputStream binaryStream;
if (file != null)
binaryStream = new FileInputStream(file);
else if (inMemoryBuffer != null)
binaryStream = new ByteArrayInputStream(inMemoryBuffer);
else
binaryStream = null;
Reader reader = new InputStreamReader(binaryStream,StandardCharsets.UTF_8);
try
{
// Set up hash digest and character length counter before we start anything.
java.security.MessageDigest md = ManifoldCF.startHash();
char[] buffer = new char[CHUNK_SIZE];
long totalMoved = 0;
while (true)
{
int moveAmount = CHUNK_SIZE;
// Read character data in 64K chunks
int readsize = reader.read(buffer,0,moveAmount);
if (readsize == -1)
break;
ManifoldCF.addToHash(md,new String(buffer,0,readsize));
totalMoved += readsize;
}
charLength = totalMoved;
hashValue = ManifoldCF.getHashValue(md);
}
finally
{
reader.close();
}
}
catch (IOException e)
{
handleIOException(e,"scanning file");
}
}
}