/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.IO.extractor; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import edu.yu.einstein.genplay.exception.exceptions.InvalidFileTypeException; import edu.yu.einstein.genplay.util.Utils; /** * Factory that tries to create and to return a subclass of {@link Extractor} depending on a file. * @author Julien Lajugie * @version 0.1 */ public final class ExtractorFactory { /** * @param fileToExtract file containing the data to extract * @return an {@link Extractor} if the extension of the file is known. null otherwise * @throws FileNotFoundException */ public static Extractor checkFileExtension(File fileToExtract) throws FileNotFoundException { String fileExtension = Utils.getExtension(fileToExtract); if (fileExtension == null) { return null; } if (fileExtension.equalsIgnoreCase("gptf")) { return new TransferableTrackExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("gff")) { return new GFFExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("gtf")) { return new GTFExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("gff3")) { // TODO return gff3 extractor return null; } else if (fileExtension.equalsIgnoreCase("gr")) { return new BedGraphExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("bed")) { return new BedExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("wig")) { return new WiggleExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("bgr")) { return new BedGraphExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("pair")) { return new PairExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("elx")) { return new ElandExtendedExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("psl")) { return new PSLExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("sam")) { return new SAMExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("bam")) { return new SAMExtractor(fileToExtract); } else if (fileExtension.equalsIgnoreCase("2bit")) { return new TwoBitExtractor(fileToExtract); } else { return null; } } /** * @param fileToExtract * @return an {@link Extractor} if there is some information about * the type of {@link Extractor} in the header. Null otherwise * @throws IOException */ public static Extractor checkHeader(File fileToExtract) throws IOException { BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(fileToExtract)); boolean isHeader = true; String line = null; while (((line = reader.readLine()) != null) && isHeader) { isHeader = false; line = line.trim(); if (line.length() == 0) { isHeader = true; } // comment line if (line.charAt(0) == '#') { isHeader = true; } // browser line if (line.substring(0, 7).equalsIgnoreCase("browser")) { isHeader = true; } // track line if (line.substring(0, 5).equalsIgnoreCase("track")) { isHeader = true; } if (isHeader) { if (line.substring(0, 5).equalsIgnoreCase("##GFF")) { return new GFFExtractor(fileToExtract); } else if (line.substring(0, 5).equalsIgnoreCase("##GTF")) { return new GTFExtractor(fileToExtract); } else if (line.substring(0, 5).equalsIgnoreCase("track")) { String lineTmp = line.toLowerCase(); if (lineTmp.contains("type")) { String type = null; int indexStart = lineTmp.indexOf("type") + 4; line = line.substring(indexStart); line = line.trim(); if (line.charAt(0) == '=') { // remove the '=' from the line line = line.substring(1); line = line.trim(); if (line.charAt(0) == '\"') { // remove the first " line = line.substring(1); //type = line.split("\"")[0]; type = Utils.split(line, '"')[0]; } else { line = line.trim(); //type = line.split(" ")[0].trim(); type = Utils.split(line, ' ')[0]; } reader.close(); if (type.equalsIgnoreCase("bedgraph")) { return new BedGraphExtractor(fileToExtract); } else if (type.equalsIgnoreCase("bed")) { return new BedExtractor(fileToExtract); } else if (type.equalsIgnoreCase("wiggle")) { return new WiggleExtractor(fileToExtract); } else if (type.equalsIgnoreCase("eland_extended")) { return new ElandExtendedExtractor(fileToExtract); } else if (type.equalsIgnoreCase("psl")) { return new PSLExtractor(fileToExtract); } else if (type.equalsIgnoreCase("sam")) { return new SAMExtractor(fileToExtract); } else { return null; } } } } } } } finally { if (reader != null) { reader.close(); } } return null; } /** * @param fileToExtract file to extract * @return an instance of a subclass of {@link Extractor} if the type has been found. * Otherwise throw a {@link InvalidFileTypeException} * @throws IOException * @throws InvalidFileTypeException */ public static Extractor getExtractor(File fileToExtract) throws IOException, InvalidFileTypeException { Extractor extractor = null; extractor = checkHeader(fileToExtract); if (extractor != null) { return extractor; } extractor = checkFileExtension(fileToExtract); if (extractor != null) { return extractor; } // if we can't figure out the type of Extractor throw new InvalidFileTypeException(); } }