/*******************************************************************************
* GenPlay, Einstein Genome Analyzer
* Copyright (C) 2009, 2014 Albert Einstein College of Medicine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu>
* Nicolas Fourel <nicolas.fourel@einstein.yu.edu>
* Eric Bouhassira <eric.bouhassira@einstein.yu.edu>
*
* Website: <http://genplay.einstein.yu.edu>
******************************************************************************/
package edu.yu.einstein.genplay.core.IO.extractor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.TreeSet;
import edu.yu.einstein.genplay.core.IO.dataReader.DataReader;
import edu.yu.einstein.genplay.core.IO.utils.Extractors;
import edu.yu.einstein.genplay.core.IO.utils.TrackLineHeader;
import edu.yu.einstein.genplay.exception.ExceptionManager;
import edu.yu.einstein.genplay.exception.exceptions.DataLineException;
import edu.yu.einstein.genplay.gui.statusBar.Stoppable;
/**
* This class must be extended by the {@link Extractor} for text files
* @author Julien Lajugie
*/
public abstract class TextFileExtractor extends Extractor implements Stoppable, DataReader {
/** Size of the buffer of the reader */
private final static int BUFFER_LENGTH = 8192;
/** Return code when the extraction of a file is done */
protected final static int EXTRACTION_DONE = 0;
/** Return code when a line was skipped */
protected final static int LINE_SKIPPED = 1;
/** Return code when a line was extracted but not an item */
protected final static int LINE_EXTRACTED = 2;
/** Return code when an item was extracted */
protected final static int ITEM_EXTRACTED = 3;
private final BufferedReader reader; // buffered reader to read the data
private boolean isInitialized = false; // true when the file has been initialized and is ready to be extracted
private int lineExtracted; // number of line extracted
private int lineSkipped; // number of line skipped
private int currentLineNumber; // current line number
private Integer randomLineCount = null; // number of random lines to extract in the text file. Extract the entire file if null
private TreeSet<Integer> randomLineNumbers; // TreeSet containing the numbers of the lines to extract (the line numbers are randomly generated)
private final TrackLineHeader trackLineHeader; // header of the track extracted from the track line
/**
*
* @param dataFile
* @throws FileNotFoundException
*/
public TextFileExtractor(File dataFile) throws FileNotFoundException {
super(dataFile);
reader = new BufferedReader(new FileReader(dataFile), BUFFER_LENGTH);
lineExtracted = 0;
lineSkipped = 0;
trackLineHeader = new TrackLineHeader();
}
/**
* Method defining how to extract the data
* @param currentLine a data line
* @return
* <ul>
* <li> {@link #EXTRACTION_DONE} if the extraction is finished (the line was not extracted)
* <li> {@link #LINE_SKIPPED} if the line was skipped but the extraction is not done
* <li> {@link #LINE_EXTRACTED} if the line was extracted but no items were extracted (multi-line items)
* <li> {@link #ITEM_EXTRACTED} if the line was extracted and an item was extracted
* </ul>
*/
protected abstract int extractDataLine(String currentLine) throws DataLineException;
/**
* Method defining how to extract the header
* @param currentLine a header line
*/
protected void extractHeaderLine(String currentLine) {
trackLineHeader.parseTrackLine(currentLine);
}
/**
* Finalized the extraction:
* <ul>
* <li> Closes the reader
* <li> Retrieves the duration of the extraction
* </ul>
*/
@Override
protected final void finalizeExtraction() {
super.finalizeExtraction();
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
ExceptionManager.getInstance().caughtException(e);
}
}
}
/**
* @return the number of the current line being extracted
*/
public int getCurrentLineNumber() {
return currentLineNumber;
}
/**
* @return the number of line extracted
*/
public int getLineExtracted() {
return lineExtracted;
}
/**
* @return the number of line skipped
*/
public int getLineSkipped() {
return lineSkipped;
}
/**
*
* @return the number of random lines to extract in the text file. The entire file will be extracted if null
*/
public Integer getRandomLineCount() {
return randomLineCount;
}
/**
* @return a {@link TrackLineHeader} object containing the parameters extracted from the "Track" line
*/
public TrackLineHeader getTrackLineHeader() {
return trackLineHeader;
}
/**
* Extract the header of the a data text file
* @throws IOException
*/
@Override
protected final void initializeExtraction() throws IOException {
super.initializeExtraction();
isInitialized = true;
readHeader();
}
/**
* Reads the header and call the {@link #extractHeader(String)} method for each header line
* @throws IOException
*/
private void readHeader() throws IOException {
String currentLine = null;
reader.mark(BUFFER_LENGTH);
// loop for the header
while (!isStopped()
&& ((currentLine = reader.readLine()) != null)
&& Extractors.isHeaderLine(currentLine)) {
currentLineNumber++;
reader.mark(BUFFER_LENGTH);
extractHeaderLine(currentLine);
}
// roll back to the begining of the read if
if (((currentLine = reader.readLine()) != null)
&& !Extractors.isHeaderLine(currentLine)) {
reader.reset();
}
}
@Override
public boolean readItem() throws IOException {
// case where the extraction was stopped
if (isStopped()) {
finalizeExtraction();
return false;
}
// case where we need to initialize the extractor
if (!isInitialized) {
initializeExtraction();
}
String currentLine = null;
int extractionStatus = LINE_SKIPPED;
while (((currentLine = reader.readLine()) != null) && (extractionStatus != EXTRACTION_DONE)) {
currentLineNumber++;
currentLine = currentLine.trim();
if (!currentLine.isEmpty()) {
// we extract a line if either way:
// 1. the whole file needs to be extracted (ie: the randomLineNumbers variable is not set)
// 2. we extract a random part of the file and the current line was selected as one of the random line to extract
// (ie the current line number is present in the randomLineNumbers set)
if ((randomLineNumbers == null) || (randomLineNumbers.contains(currentLineNumber))) {
try {
extractionStatus = extractDataLine(currentLine);
} catch (DataLineException e) {
notifyDataEventListeners(e, currentLineNumber, currentLine);
lineSkipped++;
}
switch (extractionStatus) {
case LINE_EXTRACTED:
lineExtracted++;
break;
case LINE_SKIPPED:
lineSkipped++;
break;
case ITEM_EXTRACTED:
lineExtracted++;
itemExtractedCount++;
return true;
}
}
}
}
// case where the extraction is done
finalizeExtraction();
return false;
}
/**
* @return the name of the data. The name of the data is
*/
@Override
protected String retrieveDataName(File dataFile) {
String dataName = Extractors.retrieveDataName(dataFile);
if (dataName == null) {
dataName = dataFile.getName();
}
return dataName;
}
/**
* Set the number of random lines to extract in the text file
* @param randomLineCount number of random lines to extract in the text file. Extract the entire file if null
* @throws UnsupportedOperationException if the extractor doesn't support this operation (eg: Wiggle Extractors)
* @throws IOException
*/
public void setRandomLineCount(Integer randomLineCount) throws UnsupportedOperationException, IOException {
this.randomLineCount = randomLineCount;
// if the randomLineCount variable is not null we generate a tree set of random line numbers to extract
randomLineNumbers = null;
if (randomLineCount != null) {
randomLineNumbers = Extractors.generateRandomLineNumbers(randomLineCount, getDataFile());
}
}
}