/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.IO.extractor; import java.io.File; import java.io.FileNotFoundException; import edu.yu.einstein.genplay.core.IO.dataReader.SCWReader; import edu.yu.einstein.genplay.core.IO.utils.DataLineValidator; import edu.yu.einstein.genplay.core.IO.utils.Extractors; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; import edu.yu.einstein.genplay.exception.exceptions.DataLineException; import edu.yu.einstein.genplay.exception.exceptions.InvalidChromosomeException; import edu.yu.einstein.genplay.util.Utils; /** * An Affymetrix pair file extractor * @author Julien Lajugie */ public final class PairExtractor extends TextFileExtractor implements SCWReader { /** Default first base position of bed files. Affymetrix PAIR files are 1-based (to be verified) */ public static final int DEFAULT_FIRST_BASE_POSITION = 0; private int firstBasePosition = DEFAULT_FIRST_BASE_POSITION;// position of the first base private Chromosome chromosome; // chromosome of the last item read private Integer position; // position of the last item read private Float score; // score of the last item read /** * Creates an instance of {@link PairExtractor} * @param dataFile file containing the data * @throws FileNotFoundException if the specified file is not found */ public PairExtractor(File dataFile) throws FileNotFoundException { super(dataFile); } @Override protected int extractDataLine(String line) throws DataLineException { chromosome = null; position = null; score = null; if (line.trim().isEmpty()) { return LINE_SKIPPED; } // We don't want to extract the header lines // So we extract only if the line starts with a number try { Extractors.getInt(line.substring(0, 1)); } catch (Exception e){ return LINE_SKIPPED; } String[] splitedLine = Extractors.parseLineTabOnly(line); if (splitedLine.length < 10) { throw new DataLineException(DataLineException.INVALID_PARAMETER_NUMBER); } String chromosomeField[] = Utils.split(splitedLine[2], ':'); if (chromosomeField.length != 2) { throw new DataLineException(DataLineException.INVALID_PARAMETER_NUMBER); } String chromosomeName = chromosomeField[0]; if (getChromosomeSelector() != null) { // case where last chromosome already extracted, no more data to extract if (getChromosomeSelector().isExtractionDone(chromosomeName)) { return EXTRACTION_DONE; } // chromosome was not selected for extraction if (!getChromosomeSelector().isSelected(chromosomeName)) { return LINE_SKIPPED; } } try { chromosome = getProjectChromosome().get(chromosomeName) ; } catch (InvalidChromosomeException e) { // unknown chromosome return LINE_SKIPPED; } position = Extractors.getInt(splitedLine[4]); score = Extractors.getFloat(splitedLine[9]); // Checks errors String errors = DataLineValidator.getErrors(chromosome, position, position, score); if (errors.length() == 0) { position = getRealGenomePosition(chromosome, position); return ITEM_EXTRACTED; } else { throw new DataLineException(errors); } } @Override public Chromosome getChromosome() { return chromosome; } @Override public int getFirstBasePosition() { return firstBasePosition; } @Override public Float getScore() { return score; } @Override public Integer getStart() { return position; } @Override public Integer getStop() { return position; } @Override public void setFirstBasePosition(int firstBasePosition) { this.firstBasePosition = firstBasePosition; } }