/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.sequencing.io.fastq;
import com.google.common.io.CharStreams;
import com.google.common.io.LineProcessor;
import java.io.IOException;
/**
* Low-level event based parser for FASTQ formatted sequences.
*
* @since 3.0.3
*/
final class FastqParser
{
/**
* Parse the specified readable.
*
* @param readable readable, must not be null
* @param listener low-level event based parser callback, must not be null
* @throws IOException if an I/O error occurs
*/
static void parse(final Readable readable, final ParseListener listener)
throws IOException
{
if (readable == null)
{
throw new IllegalArgumentException("readable must not be null");
}
FastqParserLineProcessor lineProcessor = new FastqParserLineProcessor(listener);
CharStreams.readLines(readable, lineProcessor);
if (lineProcessor.getState() == State.COMPLETE)
{
listener.complete();
lineProcessor.setState(State.DESCRIPTION);
}
if (lineProcessor.getState() != State.DESCRIPTION)
{
throw new IOException("truncated sequence"); // at line " + lineNumber);
}
}
/**
* FASTQ formatted sequence parser line processor.
*/
private static final class FastqParserLineProcessor implements LineProcessor<Object>
{
/** Parser state. */
private State state = State.DESCRIPTION;
/** Sequence length. */
private int sequenceLength = 0;
/** Quality length. */
private int qualityLength = 0;
/** Parse listener. */
private final ParseListener listener;
/**
* Create a new FASTQ formatted sequence parser line processor with the specified parse listener.
*
* @param listener parse listener, must not be null
*/
private FastqParserLineProcessor(final ParseListener listener)
{
if (listener == null)
{
throw new IllegalArgumentException("listener must not be null");
}
this.listener = listener;
}
/**
* Return the parser state.
*
* @return the parser state
*/
private State getState()
{
return state;
}
/**
* Set the parser state to <code>state</code>.
*
* @param state parser state
*/
private void setState(final State state)
{
this.state = state;
}
@Override
public Object getResult()
{
return null;
}
@Override
public boolean processLine(final String line) throws IOException
{
String sequence = null;
String quality = null;
switch (state)
{
case DESCRIPTION:
if (line.startsWith("@"))
{
listener.description(line.substring(1).trim());
state = State.SEQUENCE;
}
else
{
throw new IOException("description must begin with a '@' character");
}
break;
case SEQUENCE:
sequence = line.trim();
listener.sequence(sequence);
sequenceLength = sequence.length();
state = State.REPEAT_DESCRIPTION;
break;
case REPEAT_DESCRIPTION:
if (line.startsWith("+"))
{
listener.repeatDescription(line.substring(1).trim());
state = State.QUALITY;
}
else
{
sequence = line.trim();
listener.appendSequence(sequence);
sequenceLength += sequence.length();
}
break;
case QUALITY:
quality = line.trim();
listener.quality(quality);
qualityLength = quality.length();
state = State.COMPLETE;
break;
case COMPLETE:
if (sequenceLength == qualityLength)
{
listener.complete();
if (line.startsWith("@"))
{
listener.description(line.substring(1).trim());
state = State.SEQUENCE;
}
else
{
throw new IOException("description must begin with a '@' character");
}
}
else
{
quality = line.trim();
listener.appendQuality(quality);
qualityLength += quality.length();
}
break;
default:
break;
}
return true;
}
}
/** Parser state. */
private static enum State
{
/** Description parser state. */
DESCRIPTION,
/** Sequence parser state. */
SEQUENCE,
/** Repeat description parser state. */
REPEAT_DESCRIPTION,
/** Quality score parser state. */
QUALITY,
/** Complete parser state. */
COMPLETE;
};
}