/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.filehandler;
import org.fhcrc.cpl.toolbox.proteomics.Protein;
import java.io.*;
import java.util.*;
/**
* User: migra
* Date: Jun 16, 2004
* Time: 2:40:58 PM
*
*/
public class FastaLoader
{
private File _fastaFile;
// int[] _aaCounts = new int[26];
public FastaLoader(File fastaFile)
{
_fastaFile = fastaFile;
}
/*
public int[] getFrequencies()
{
return _aaCounts;
}
*/
public ProteinIterator iterator()
{
return new ProteinIterator();
}
public class ProteinIterator implements Iterator<Protein>
{
String _proteinHeader = null;
BufferedReader _reader = null;
private boolean _beforeFirst = true;
private long _fileLength;
private long _currentLine = 0;
private long _currentHeaderLine = 0;
private long _lastHeaderLine = 0;
private long _currentPosition = 0;
private Integer _previous = null;
private void init()
{
try
{
_reader = new BufferedReader(new FileReader(_fastaFile));
String line = getLine();
//Iterator expects _proteinHeader to be initialized...
if (null != line && line.charAt(0) == '>')
{
_proteinHeader = line.substring(1);
_currentHeaderLine = _currentLine;
}
else
{
if (null != _reader)
_reader.close();
throw new IllegalArgumentException("Fasta File did not start with a >");
}
}
catch (IOException x)
{
if (null != _reader)
{
try
{
_reader.close();
}
catch (IOException x2) {}
}
}
_beforeFirst = false;
_fileLength = _fastaFile.length();
}
private String getLine() throws IOException
{
String line = _reader.readLine();
if (null != line)
{
// TODO: Temporary tracking of input position by counting length of each line... should
// switch to a different input stream/reader that tracks bytes instead
_currentPosition += line.length() + 1;
_currentLine++;
}
return line;
}
/**
*
* @return are there any more proteins left in the file
*/
public boolean hasNext()
{
if (_beforeFirst)
init();
return null != _proteinHeader;
}
/**
* Closes file just in case.
* @throws IOException if file is not closeable
*/
protected void finalize() throws Throwable
{
super.finalize(); //If iteration is not complete, still close the file...
if (null != _reader)
_reader.close();
}
/**
* Get next protein object in file.
* @return Protein or null if end of file
*/
public Protein next()
{
if (_beforeFirst)
init();
if (null == _proteinHeader)
return null;
ByteArrayOutputStream aaStream = new ByteArrayOutputStream(2048);
String line;
try
{
while((line = getLine()) != null)
{
if (line.length() > 0 && line.charAt(0) == '>')
{
Protein p = createProtein(_proteinHeader, aaStream);
_proteinHeader = line.substring(1);
return p;
}
else
{
byte[] bytes = line.getBytes();
for (byte aByte : bytes)
{
if ((aByte >= 'A') && (aByte <= 'Z')) {
//_aaCounts[bytes[i] - 'A'] ++;
aaStream.write(aByte);
}
}
}
}
// End of file -- last protein
Protein p = createProtein(_proteinHeader, aaStream);
close();
return p;
}
catch (IOException x)
{
throw new RuntimeException("Failed to read next protein", x);
}
}
private Protein createProtein(String header, ByteArrayOutputStream aaStream)
{
Protein p = new Protein(header, aaStream.toByteArray());
_lastHeaderLine = _currentHeaderLine;
_currentHeaderLine = _currentLine;
return p;
}
/**
* Unsupported
*/
public void remove()
{
throw new UnsupportedOperationException();
}
/**
* Closes the file. No more items will be returned from the iterator
*/
public void close()
{
if (null != _reader)
try
{
_reader.close();
}
catch (IOException x) {}
_reader = null;
_proteinHeader = null;
}
public Integer getPercentCompleteIfChanged()
{
int current = getPercentComplete();
if (null != _previous && current == _previous.intValue())
return null;
_previous = current;
return current;
}
private int getPercentComplete()
{
return Math.round((float)_currentPosition * 100 / _fileLength);
}
public long getLastHeaderLine()
{
return _lastHeaderLine;
}
}
}