/* The MIT License (MIT) Copyright (c) 2016 Pierre Lindenbaum Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. History: * 2016 creation */ package com.github.lindenb.jvarkit.util.bio.fasta; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.PushbackReader; import java.io.Reader; import java.util.ArrayList; import java.util.List; import java.util.function.BiFunction; import com.github.lindenb.jvarkit.io.IOUtils; import com.github.lindenb.jvarkit.lang.AbstractCharSequence; import htsjdk.samtools.util.AbstractIterator; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.RuntimeIOException; public class FastaSequenceReader { private int sequenceCapacity=100; private BiFunction<String,byte[],FastaSequence> fastaSequenceCreator = new BiFunction<String, byte[], FastaSequence>() { @Override public FastaSequence apply(final String name,final byte[] seq) { return new SequenceImpl(name, seq); } }; public FastaSequenceReader() { } public void setSequenceCapacity(final int sequenceCapacity) { this.sequenceCapacity = sequenceCapacity; } public int getSequenceCapacity() { return sequenceCapacity; } public void setFastaSequenceCreator(BiFunction<String, byte[], FastaSequence> fastaSequenceCreator) { this.fastaSequenceCreator = fastaSequenceCreator; } public CloseableIterator<FastaSequence> iterator(final Reader r) throws IOException { return new MyIterator(r); } public CloseableIterator<FastaSequence> iterator(final File file) throws IOException { return iterator(IOUtils.openFileForReader(file)); } public Iterable<FastaSequence> getSequencesIn(final File file) { return new FastaIterable(file); } public FastaSequence readOne(final File file) throws IOException { CloseableIterator<FastaSequence> r= iterator(file); if(!r.hasNext()) { r.close(); throw new IOException("Expected one sequence in "+file+" but got none"); } final FastaSequence seq = r.next(); if(r.hasNext()) { r.close(); throw new IOException("Expected only one sequence in "+file+" but got none after "+seq.getName() ); } return seq; } public List<FastaSequence> readAll(final File file) throws IOException { final List<FastaSequence> seqs = new ArrayList<>(); final CloseableIterator<FastaSequence> r= iterator(file); while(r.hasNext()) seqs.add(r.next()); r.close(); return seqs; } protected FastaSequence createFastaSequence(final String name,byte seq[]) { return new SequenceImpl(name, seq); } protected FastaSequence read(final PushbackReader reader) throws IOException { boolean at_begin=true; StringBuilder name=null; ByteArrayOutputStream sequence=null; try { int c; while((c=reader.read())!=-1) { if(at_begin && c=='>') { if(name!=null) { reader.unread(c); return this.fastaSequenceCreator.apply( name.toString(), sequence.toByteArray() ); } name = new StringBuilder(); sequence =new ByteArrayOutputStream(this.sequenceCapacity); /* consume header */ while((c=reader.read())!=-1 && c!='\n') { name.append((char)c); } at_begin = true; } else if(Character.isWhitespace(c)) { at_begin = (c=='\n'); } else if(sequence==null) { throw new IOException("Illegal character "+(char)c); } else { sequence.write(c); while((c=reader.read())!=-1 && c!='\n') { sequence.write(c); } at_begin=true; } } /* eof met */ if(name!=null) { return this.fastaSequenceCreator.apply( name.toString(), sequence.toByteArray() ); } return null; } catch (final IOException e) { throw new RuntimeIOException(e); }} private class MyIterator extends AbstractIterator<FastaSequence> implements CloseableIterator<FastaSequence> { PushbackReader r; MyIterator(final Reader r) { if(r==null) throw new RuntimeIOException("reader is null"); this.r=new PushbackReader(r); } @Override protected FastaSequence advance() { if(r==null) return null; try { final FastaSequence s= FastaSequenceReader.this.read(this.r); if(s==null) close(); return s; } catch (IOException e) { throw new RuntimeIOException(); } } @Override public void close() { CloserUtil.close(r); r=null; } } private static class SequenceImpl extends AbstractCharSequence implements FastaSequence { final String name; final byte seq[]; SequenceImpl(final String name,final byte seq[]) { this.name=name; this.seq=seq; } @Override public String getName() { return name; } @Override public int length() { return seq.length; } @Override public char charAt(final int index) { return (char)seq[index]; } } public class FastaIterable implements Iterable<FastaSequence> { final File fastaFile; FastaIterable(final File fastaFile) { this.fastaFile = fastaFile; } @Override public CloseableIterator<FastaSequence> iterator() { try { return FastaSequenceReader.this.iterator(this.fastaFile); } catch (final IOException e) { throw new RuntimeIOException(e); } } } }