package org.genedb.crawl.elasticsearch.index.gff; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import org.apache.log4j.Logger; import org.genedb.crawl.elasticsearch.index.NonDatabaseDataSourceIndexBuilder; public class GFFSequenceExtractor { private static Logger logger = Logger.getLogger(GFFSequenceExtractor.class); public synchronized String read(String regionFilePath, String sequenceNameRequested) throws IOException { File regionFile = new File(regionFilePath); BufferedReader fileReader = NonDatabaseDataSourceIndexBuilder.getReader(regionFile); StringBuffer sequenceStringbuffer = new StringBuffer(); try { String line = ""; boolean currentSequenceMatches = false; while ((line = fileReader.readLine()) != null) { if (line.startsWith(">")) { String sequenceName = line.substring(1); /* we ignore everything after a space */ int spacePos = sequenceName.indexOf(" "); if (spacePos != -1) { sequenceName = sequenceName.substring(0, spacePos); } currentSequenceMatches = sequenceName.equals(sequenceNameRequested); logger.info(String.format("Found %s", sequenceName)); } else if (currentSequenceMatches) { sequenceStringbuffer.append(line); } } } finally { fileReader.close(); } logger.info(String.format("Sequence length %s", sequenceStringbuffer.length())); return sequenceStringbuffer.toString(); } }