/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.feature.genome;
import org.apache.log4j.Logger;
import org.broad.igv.Globals;
import org.broad.igv.util.FileUtils;
import org.broad.igv.util.HttpUtils;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* /**
*
* @author jrobinso
*/
public class GenomeImporter {
public static final int MAX_CONTIGS = 1500000;
static Logger log = Logger.getLogger(GenomeImporter.class);
public static final Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("\\s+");
/**
* Create a zip containing all the information and data required to load a
* genome. All file/directory validation is assume to have been done by validation
* outside of this method.
*
* @param genomeFile
* @param genomeId Id of the genome.
* @param genomeDisplayName The genome name that is user-friendly.
* @param fastaFile The location of a fasta file, or directory of fasta files
* @param geneAnnotFile
* @param cytobandFile
* @return The newly created genome archive file.
*/
public File createGenomeArchive(File genomeFile,
String genomeId,
String genomeDisplayName,
String fastaFile,
File geneAnnotFile,
File cytobandFile,
File chrAliasFile) throws IOException {
if ((genomeFile == null) || (genomeId == null) || (genomeDisplayName == null)) {
log.error("Invalid input for genome creation: ");
log.error("\tGenome file=" + genomeFile.getAbsolutePath());
log.error("\tGenome Id=" + genomeId);
log.error("\tGenome Name" + genomeDisplayName);
return null;
}
File propertyFile = null;
FileWriter propertyFileWriter = null;
try {
boolean fastaDirectory = false;
List<String> fastaFileNames = new ArrayList<String>();
if (!FileUtils.resourceExists(fastaFile)) {
String msg = "File not found: " + fastaFile;
throw new GenomeException(msg);
}
if (fastaFile.toLowerCase().endsWith(Globals.ZIP_EXTENSION)) {
String msg = "Error. Zip archives are not supported. Please select a fasta file.";
throw new GenomeException(msg);
}
if (fastaFile.toLowerCase().endsWith(Globals.GZIP_FILE_EXTENSION)) {
String msg = "Error. GZipped files are not supported. Please select a non-gzipped fasta file.";
throw new GenomeException(msg);
}
List<String> fastaIndexPathList = new ArrayList<String>();
String fastaIndexPath = fastaFile + ".fai";
File sequenceInputFile = new File(fastaFile);
if (sequenceInputFile.exists()) {
// Local file
if (sequenceInputFile.isDirectory()) {
fastaDirectory = true;
List<File> files = getSequenceFiles(sequenceInputFile);
for (File file : files) {
if (file.getName().toLowerCase().endsWith(Globals.GZIP_FILE_EXTENSION)) {
String msg = "<html>Error. One or more fasta files are gzipped: " + file.getName() +
"<br>All fasta files must be gunzipped prior to importing.";
throw new GenomeException(msg);
}
File indexFile = new File(sequenceInputFile, file.getName() + ".fai");
if (!indexFile.exists()) {
FastaUtils.createIndexFile(file.getAbsolutePath(), indexFile.getAbsolutePath());
}
fastaIndexPathList.add(fastaIndexPath);
fastaFileNames.add(file.getName());
}
} else {
// Index if neccessary
File indexFile = new File(fastaIndexPath);
if (!indexFile.exists()) {
FastaUtils.createIndexFile(fastaFile, fastaIndexPath);
}
fastaIndexPathList.add(fastaIndexPath);
}
} else {
if (!FileUtils.resourceExists(fastaIndexPath)) {
String msg = "<html>Index file " + fastaIndexPath + " Not found. " +
"<br>Remote fasta files must be indexed prior to importing.";
throw new GenomeException(msg);
}
}
fastaFile = FileUtils.getRelativePath(genomeFile.getParent(), fastaFile);
// Create "in memory" property file
byte[] propertyBytes = createGenomePropertyFile(genomeId, genomeDisplayName, fastaFile, geneAnnotFile,
cytobandFile, chrAliasFile, fastaDirectory, fastaFileNames);
File[] inputFiles = {geneAnnotFile, cytobandFile, chrAliasFile};
// Create archive
createGenomeArchive(genomeFile, inputFiles, propertyBytes);
} finally {
if (propertyFileWriter != null) {
try {
propertyFileWriter.close();
} catch (IOException ex) {
log.error("Failed to close genome archive: +" + genomeFile.getAbsolutePath(), ex);
}
}
if (propertyFile != null) propertyFile.delete();
}
return genomeFile;
}
private List<File> getSequenceFiles(File sequenceDir) {
ArrayList<File> files = new ArrayList();
for (File f : sequenceDir.listFiles()) {
if (f.getName().startsWith(".") || f.isDirectory() || f.getName().endsWith(".fai")) {
continue;
} else {
files.add(f);
}
}
return files;
}
/**
* This method creates the property.txt file that is stored in each
* .genome file. This is not the user-defined genome property file
* created by storeUserDefinedGenomeListToFile(...)
*
* @param genomeId
* @param genomeDisplayName
* @param sequenceLocation Path to nucleotide sequence. Can be absolute or relative, also local or remote
* @param geneAnnotFile
* @param cytobandFile
* @param fastaFileNames
* @return
*/
public byte[] createGenomePropertyFile(String genomeId,
String genomeDisplayName,
String sequenceLocation,
File geneAnnotFile,
File cytobandFile,
File chrAliasFile,
boolean fastaDirectory,
List<String> fastaFileNames) throws IOException {
PrintWriter propertyFileWriter = null;
try {
ByteArrayOutputStream propertyBytes = new ByteArrayOutputStream();
// Add the new property file to the archive
propertyFileWriter = new PrintWriter(new OutputStreamWriter(propertyBytes));
propertyFileWriter.println("fasta=true"); // Fasta is the only format supported now
propertyFileWriter.println("fastaDirectory=" + fastaDirectory);
if (fastaDirectory) {
propertyFileWriter.print("fastaFiles=");
for (String fif : fastaFileNames) {
propertyFileWriter.print(fif + ",");
}
propertyFileWriter.println();
}
propertyFileWriter.println("ordered=" + !fastaDirectory);
if (genomeId != null) {
propertyFileWriter.println(Globals.GENOME_ARCHIVE_ID_KEY + "=" + genomeId);
}
if (genomeDisplayName != null) {
propertyFileWriter.println(Globals.GENOME_ARCHIVE_NAME_KEY + "=" + genomeDisplayName);
}
if (cytobandFile != null) {
propertyFileWriter.println(Globals.GENOME_ARCHIVE_CYTOBAND_FILE_KEY + "=" + cytobandFile.getName());
}
if (geneAnnotFile != null) {
propertyFileWriter.println(Globals.GENOME_ARCHIVE_GENE_FILE_KEY + "=" + geneAnnotFile.getName());
}
if (chrAliasFile != null) {
propertyFileWriter.println(Globals.GENOME_CHR_ALIAS_FILE_KEY + "=" + chrAliasFile.getName());
}
if (sequenceLocation != null) {
if (!HttpUtils.isRemoteURL(sequenceLocation)) {
sequenceLocation = sequenceLocation.replace('\\', '/');
}
propertyFileWriter.println(Globals.GENOME_ARCHIVE_SEQUENCE_FILE_LOCATION_KEY + "=" + sequenceLocation);
}
propertyFileWriter.flush();
return propertyBytes.toByteArray();
} finally {
if (propertyFileWriter != null) {
propertyFileWriter.close();
}
}
}
final static int ZIP_ENTRY_CHUNK_SIZE = 64000;
static public void createGenomeArchive(File zipOutputFile, File[] inputFiles, byte[] propertyBytes)
throws FileNotFoundException, IOException {
if (zipOutputFile == null) {
return;
}
if ((inputFiles == null) || (inputFiles.length == 0)) {
return;
}
ZipOutputStream zipOutputStream = null;
try {
zipOutputStream = new ZipOutputStream(new FileOutputStream(zipOutputFile));
ZipEntry propertiesEntry = new ZipEntry("property.txt");
propertiesEntry.setSize(propertyBytes.length);
zipOutputStream.putNextEntry(propertiesEntry);
zipOutputStream.write(propertyBytes);
for (File file : inputFiles) {
if (file == null) {
continue;
}
long fileLength = file.length();
ZipEntry zipEntry = new ZipEntry(file.getName());
zipEntry.setSize(fileLength);
zipOutputStream.putNextEntry(zipEntry);
BufferedInputStream bufferedInputstream = null;
try {
InputStream inputStream = new FileInputStream(file);
bufferedInputstream = new BufferedInputStream(inputStream);
int bytesRead = 0;
byte[] data = new byte[ZIP_ENTRY_CHUNK_SIZE];
while ((bytesRead = bufferedInputstream.read(data)) != -1) {
zipOutputStream.write(data, 0, bytesRead);
}
} finally {
if (bufferedInputstream != null) {
bufferedInputstream.close();
}
}
}
} finally {
if (zipOutputStream != null) {
zipOutputStream.flush();
zipOutputStream.close();
}
}
}
}