/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.design.io;
import static com.google.common.base.Preconditions.checkNotNull;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import com.google.common.base.Splitter;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.design.Design;
import fr.ens.biologie.genomique.eoulsan.design.DesignFactory;
import fr.ens.biologie.genomique.eoulsan.design.Experiment;
import fr.ens.biologie.genomique.eoulsan.design.ExperimentSample;
import fr.ens.biologie.genomique.eoulsan.design.ExperimentSampleMetadata;
import fr.ens.biologie.genomique.eoulsan.design.Sample;
import fr.ens.biologie.genomique.eoulsan.util.GuavaCompatibility;
/**
* This class define a design reader for Eoulsan 2 design file.
* @since 2.0
* @author Xavier Bauquet
*/
public class Eoulsan2DesignReader implements DesignReader {
static final String SAMPLE_ID_FIELDNAME = "SampleId";
static final String SAMPLE_NAME_FIELDNAME = "SampleName";
static final String EXPERIMENT_FIELD_PREFIX = "Exp.";
static final String EXPERIMENT_NAME_SUFFIX = "name";
static final String DESIGN_FORMAT_VERSION_METADATA_KEY =
"DesignFormatVersion";
static final String FORMAT_VERSION = "2";
static final char EQUAL_SEPARATOR = '=';
static final char TAB_SEPARATOR = '\t';
static final char DOT_SEPARATOR = '.';
private final InputStream is;
private final Splitter trimTabSplitter =
Splitter.on(TAB_SEPARATOR).omitEmptyStrings();
private final Splitter tabSplitter = Splitter.on(TAB_SEPARATOR).trimResults();
private final Splitter dotSplitter = Splitter.on(DOT_SEPARATOR).trimResults();
//
// Parsing Format header
//
/**
* Parse the header of the new design file including the informations about
* the design, the genome, the annotations and the informations about the
* experiments.
* @param design the design object
* @param line the line read from your design file
* @throws IOException if the header parsing fails
*/
private void parseHeader(final Design design, final String line)
throws IOException {
final int equalPos = line.indexOf(EQUAL_SEPARATOR);
// Verify that there is only one key and one value
if (equalPos == -1) {
throw new IOException("Found a field with two value in the design.");
}
final String key = line.substring(0, equalPos).trim();
final String value = line.substring(equalPos + 1).trim();
// if the key is empty
if ("".equals(key)) {
throw new IOException("Found an empty field name in design file header.");
}
// if the value is empty
if ("".equals(value)) {
throw new IOException(
"Found an empty field value in design file header.");
}
// If it is an experiment field or a design field
if (key.startsWith(EXPERIMENT_FIELD_PREFIX)) {
readExpMetadata(key, value, design);
} else {
readDesignMetadata(key, value, design);
}
}
/**
* Read the experiment metadata from the header part
* @param key the key of the experiment metadata read
* @param value the value of the experiment metadata read
* @param design the design object
* @throws IOException if the metadata read is incorrect
*/
private void readExpMetadata(String key, String value, Design design)
throws IOException {
// split the experiment key to extract the
final List<String> expField =
GuavaCompatibility.splitToList(this.dotSplitter, key);
if (expField.size() != 3) {
throw new IOException("The experiment key is invalid.");
}
String expId = expField.get(1);
String expKey = expField.get(2);
// Create the experiment if doesn't exist
if (!design.containsExperiment(expId)) {
design.addExperiment(expId);
}
if (EXPERIMENT_NAME_SUFFIX.equals(expKey)) {
// Set for experiment name
design.getExperiment(expId).setName(value);
} else {
// Set for other experiment metadata
if (design.getExperiment(expId).getMetadata().contains(key)) {
throw new IOException(
"There is two or more metadata with the same key \""
+ key + "\" in the experiment: " + expId + " file header.");
}
design.getExperiment(expId).getMetadata().set(expKey, value);
}
}
/**
* Read the design metadata from the header.
* @param key the key of the design metadata
* @param value the value of the design metadata
* @param design the design object
* @throws IOException if design metadata read is incorrect
*/
private void readDesignMetadata(String key, String value, Design design)
throws IOException {
if (DESIGN_FORMAT_VERSION_METADATA_KEY.equals(key)) {
if (!FORMAT_VERSION.equals(value.trim())) {
throw new IOException("Unsupported design format version: " + value);
}
return;
}
// If the field name already exist
if (design.getMetadata().contains(key)) {
throw new IOException("There is two or more metadata with the same key \""
+ key + "\" in design file header.");
}
design.getMetadata().set(key, value);
}
//
// Columns parsing
//
/**
* Parse the column including the information by sample.
* @param design the design object
* @param columnNames the name of the columns
* @param line the line read from your design file
* @throws IOException if the data read is incorrect
*/
private void parseColumns(final Design design, final List<String> columnNames,
final String line, final boolean firstLine) throws IOException {
final List<String> splitLine =
GuavaCompatibility.splitToList(this.tabSplitter, line);
if (firstLine) {
// Save the column names
columnNames.addAll(splitLine);
final int sampleIdPos = columnNames.indexOf(SAMPLE_ID_FIELDNAME);
if (sampleIdPos == -1) {
throw new IOException("Invalid file format: "
+ "No \"" + SAMPLE_ID_FIELDNAME + "\" field found.");
}
// Check if the SampleId column is the first one
if (sampleIdPos != 0) {
throw new IOException("Invalid file format: "
+ "The \"" + SAMPLE_ID_FIELDNAME
+ "\" field is not the first field.");
}
final int sampleNamePos = columnNames.indexOf(SAMPLE_NAME_FIELDNAME);
if (sampleNamePos == -1) {
throw new IOException("Invalid file format: "
+ "No \"" + SAMPLE_NAME_FIELDNAME + "\" field found.");
}
// Check if the SampleName column is the second one
if (sampleNamePos != 1) {
throw new IOException("Invalid file format: "
+ "The \"" + SAMPLE_NAME_FIELDNAME
+ "\" field is not the second field.");
}
} else {
if (splitLine.size() != columnNames.size()) {
// Check if the line has the same size than the number of column
throw new IOException("Invalid file format: "
+ "Found " + splitLine.size() + " fields whereas "
+ columnNames.size() + " are required in line: " + line);
}
// Save the sampleId
final String sampleId = splitLine.get(0);
final String sampleName = splitLine.get(1);
final Sample sample = design.addSample(sampleId);
sample.setName(sampleName);
// Add the sample to all the experiments
for (Experiment e : design.getExperiments()) {
e.addSample(sample);
}
final Iterator<String> nameIterator = columnNames.iterator();
final Iterator<String> valueIterator = splitLine.iterator();
while (nameIterator.hasNext() && valueIterator.hasNext()) {
// Iterate over the line fields
final String columnName = nameIterator.next();
final String columnValue = valueIterator.next();
if (SAMPLE_ID_FIELDNAME.equals(columnName)
|| SAMPLE_NAME_FIELDNAME.equals(columnName)) {
continue;
}
if (columnName.startsWith(EXPERIMENT_FIELD_PREFIX)) {
// Test if it's a ExperimentSampleMetadata
readExperimentSampleMetadata(columnName, columnValue, design, sample);
} else {
// Or a SampleMetadata
readSampleMetadata(columnName, columnValue, design, sample);
}
}
}
}
/**
* Read sample metadata.
* @param columnName the column name
* @param columnValue the value
* @param design the design object
* @param sample the sample
* @throws IOException if the metadata read is incorrect
*/
private void readSampleMetadata(String columnName, String columnValue,
Design design, Sample sample) throws IOException {
if (sample.getMetadata().contains(columnName)) {
// If the field name already exist
throw new IOException("There is two or more metadata with the same key \""
+ columnName + "\" in design file header.");
}
sample.getMetadata().set(columnName, columnValue);
}
/**
* Read sample metadata experiment referring to a specific experiment.
* @param columnName the column name
* @param columnValue the value
* @param design the design object
* @param sample the sample
* @throws IOException if the sample metadata read is incorrect
*/
private void readExperimentSampleMetadata(String columnName,
String columnValue, Design design, Sample sample) throws IOException {
// split the column name
final List<String> expField =
GuavaCompatibility.splitToList(this.dotSplitter, columnName);
if (expField.size() != 3) {
// Check if the experiment key doesn't contain more that 3 entry
throw new IOException("The experiment key is invalide.");
}
// getters
String expId = expField.get(1);
String expKey = expField.get(2);
if (!design.containsExperiment(expId)) {
// Check if the experiment exists
throw new IOException("The experiment" + expId + "doesn't exist.");
}
final Experiment experiment = design.getExperiment(expId);
if (!experiment.containsSample(sample)) {
// Add the sample to the experiment if not in yet
experiment.addSample(sample);
}
final ExperimentSample experimentSample =
experiment.getExperimentSample(sample);
final ExperimentSampleMetadata experimentSampleMetadata =
experimentSample.getMetadata();
if (experimentSampleMetadata.contains(expKey)) {
// If the field name already exist
throw new IOException("There is two or more metadata with the same key \""
+ expKey + "\" in design file header.");
}
// Add the experiment sample metadata
experimentSampleMetadata.set(expKey, columnValue);
}
//
// Read method
//
@Override
public Design read() throws IOException {
final Design design = DesignFactory.createEmptyDesign();
boolean header = true;
boolean firstLine = true;
final List<String> columnNames = new ArrayList<>();
final BufferedReader br = new BufferedReader(
new InputStreamReader(this.is, Globals.DEFAULT_CHARSET));
final StringBuilder lineBuffer = new StringBuilder();
String line = null;
while ((line = br.readLine()) != null) {
// Trim trailing tabular
if (header) {
final List<String> fields =
GuavaCompatibility.splitToList(this.trimTabSplitter, line);
if (fields.size() == 1) {
line = fields.get(0);
}
}
// Concatenate lines that ends with "\\"
if (header && line.endsWith("\\")) {
lineBuffer.append(line.substring(0, line.length() - 1));
continue;
}
lineBuffer.append(line);
line = lineBuffer.toString();
// go through the lines of the design file
final String trimmedLine = line.trim();
lineBuffer.setLength(0);
if ("".equals(trimmedLine)
|| trimmedLine.startsWith("#") || trimmedLine.startsWith("[")) {
// If the line is empty or begin by # or [ this line is ignored
continue;
}
if (header && line.indexOf('\t') != -1) {
// Test if the line is in the header or in the column
header = false;
}
if (header) {
// Read the Header
parseHeader(design, line);
} else {
// Read the columns
parseColumns(design, columnNames, line, firstLine);
if (firstLine) {
firstLine = false;
}
}
}
br.close();
return design;
}
//
// Constructors
//
/**
* Public constructor.
* @param file file to read
* @throws FileNotFoundException if the file cannot be found
*/
public Eoulsan2DesignReader(final File file) throws FileNotFoundException {
checkNotNull(file, "the file argument cannot be null");
this.is = new FileInputStream(file);
}
/**
* Public constructor.
* @param file file to read
* @throws IOException if the stream cannot be opened
*/
public Eoulsan2DesignReader(final DataFile file) throws IOException {
checkNotNull(file, "the file argument cannot be null");
this.is = file.open();
}
/**
* Public constructor.
* @param is Input stream to read
* @throws IOException if the stream cannot be opened
*/
public Eoulsan2DesignReader(final InputStream is) throws IOException {
checkNotNull(is, "the is argument cannot be null");
this.is = is;
}
/**
* Public constructor.
* @param filename File to read
* @throws FileNotFoundException if the file doesn't exist
*/
public Eoulsan2DesignReader(final String filename)
throws FileNotFoundException {
checkNotNull(filename, "the filename argument cannot be null");
this.is = new FileInputStream(filename);
}
}