/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.IO.extractor; import java.io.File; import java.io.FileNotFoundException; import edu.yu.einstein.genplay.core.IO.dataReader.SCWReader; import edu.yu.einstein.genplay.core.IO.utils.DataLineValidator; import edu.yu.einstein.genplay.core.IO.utils.Extractors; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; import edu.yu.einstein.genplay.exception.exceptions.DataLineException; import edu.yu.einstein.genplay.exception.exceptions.InvalidChromosomeException; /** * A Wiggle file extractor * @author Julien Lajugie */ public final class WiggleExtractor extends TextFileExtractor implements SCWReader { /** Default first base position of bed files. Bedgraph files are 0-based */ public static final int DEFAULT_FIRST_BASE_POSITION = 1; private int firstBasePosition = DEFAULT_FIRST_BASE_POSITION;// position of the first base private Chromosome chromosome; // chromosome of the last item read private Integer start; // start position of the last item read private Integer stop; // stop position of the last item read private Float score; // score of the last item read private int currentSpan; // last span specified private int currentStep; // last step specified private int currentPosition; // current position private boolean isFixedStep = false; // true if we are extrating a fixedStep line /** * Creates an instance of {@link WiggleExtractor} * @param dataFile file containing the data * @throws FileNotFoundException if the specified file is not found */ public WiggleExtractor(File dataFile) throws FileNotFoundException { super(dataFile); } @Override protected int extractDataLine(String line) throws DataLineException { start = null; stop = null; score = null; String[] splittedLine = Extractors.parseLineTabAndSpace(line); int i = 0; while (i < splittedLine.length) { String currentField = splittedLine[i].trim(); if (currentField.equalsIgnoreCase("variableStep")) { // a variableStep must at least contain 2 elements if (splittedLine.length < 2) { throw new DataLineException(DataLineException.INVALID_PARAMETER_NUMBER); } else { isFixedStep = false; currentSpan = 1; } } else if (currentField.equalsIgnoreCase("fixedStep")) { // a fixedStep must at least contain 4 elements if (splittedLine.length < 4) { throw new DataLineException(DataLineException.INVALID_PARAMETER_NUMBER); } else { isFixedStep = true; currentSpan = 1; } } else if ((currentField.length() > 6) && (currentField.substring(0, 6).equalsIgnoreCase("chrom="))) { // retrieve chromosome String chromosomeName = splittedLine[i].trim().substring(6).trim(); if (getChromosomeSelector() != null) { // case where last chromosome already extracted, no more data to extract if (getChromosomeSelector().isExtractionDone(chromosomeName)) { return EXTRACTION_DONE; } // chromosome was not selected for extraction if (!getChromosomeSelector().isSelected(chromosomeName)) { chromosome = getProjectChromosome().get(chromosomeName); return LINE_SKIPPED; } } try { chromosome = getProjectChromosome().get(chromosomeName); } catch (InvalidChromosomeException e) { // unknown chromosome return LINE_SKIPPED; } } else if ((currentField.length() > 6) && (currentField.substring(0, 6).equalsIgnoreCase("start="))) { // retrieve start position String posStr = splittedLine[i].trim().substring(6); currentPosition = Extractors.getInt(posStr); } else if ((currentField.length() > 5) && (currentField.substring(0, 5).equalsIgnoreCase("step="))) { // retrieve step position String stepStr = splittedLine[i].trim().substring(5); currentStep = Extractors.getInt(stepStr); } else if ((currentField.length() > 5) && (currentField.substring(0, 5).equalsIgnoreCase("span="))) { // retrieve span String spanStr = splittedLine[i].trim().substring(5); currentSpan = Extractors.getInt(spanStr); } else { if (chromosome == null) { return LINE_SKIPPED; } if (isFixedStep) { score = Extractors.getFloat(splittedLine[i]); if ((score == 0) || !getChromosomeSelector().isSelected(chromosome.getName())) { currentPosition += currentStep; return LINE_SKIPPED; } start = currentPosition; stop = currentPosition + currentSpan; } else { if (splittedLine.length < 2) { throw new DataLineException(DataLineException.INVALID_PARAMETER_NUMBER); } currentPosition = Extractors.getInt(splittedLine[i].trim()); float score = Extractors.getFloat(splittedLine[i + 1]); i++; if ((score == 0) || !getChromosomeSelector().isSelected(chromosome.getName())) { return LINE_SKIPPED; } start = currentPosition; stop = currentPosition + currentSpan; } // check for data line errors String errors = DataLineValidator.getErrors(chromosome, start, stop); if (!errors.isEmpty()) { throw new DataLineException(errors); } // Stop position checking, must not be greater than the chromosome length String stopEndErrorMessage = DataLineValidator.getErrors(chromosome, stop); if (!stopEndErrorMessage.isEmpty()) { DataLineException stopEndException = new DataLineException(stopEndErrorMessage, DataLineException.SHRINK_STOP_PROCESS); // notify the listeners that the stop position needed to be shrunk notifyDataEventListeners(stopEndException, getCurrentLineNumber(), line); stop = chromosome.getLength(); } // if we are in a multi-genome project, we compute the position on the meta genome start = getRealGenomePosition(chromosome, start); stop = getRealGenomePosition(chromosome, stop); currentPosition += currentStep; return ITEM_EXTRACTED; } i++; } return LINE_EXTRACTED; } @Override public Chromosome getChromosome() { return chromosome; } @Override public int getFirstBasePosition() { return firstBasePosition; } @Override public Float getScore() { return score; } @Override public Integer getStart() { return start; } @Override public Integer getStop() { return stop; } @Override public void setFirstBasePosition(int firstBasePosition) { this.firstBasePosition = firstBasePosition; } /** * We raise a new UnsupportedOperationException because it's not possible to load * a random fraction of a wiggle file */ @Override public void setRandomLineCount(Integer randomLineCount) throws UnsupportedOperationException { throw new UnsupportedOperationException("Wiggle files need to be entirely extracted"); } }