package fr.ens.biologie.genomique.eoulsan.splitermergers;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.EoulsanLogger;
import fr.ens.biologie.genomique.eoulsan.core.Parameter;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormat;
import fr.ens.biologie.genomique.eoulsan.data.DataFormats;
/**
* This class define a merger class for expression files.
* @author Laurent Jourdren
* @since 2.0
*/
public class ExpressionMerger implements Merger {
@Override
public DataFormat getFormat() {
return DataFormats.EXPRESSION_RESULTS_TSV;
}
@Override
public void configure(final Set<Parameter> conf) throws EoulsanException {
// The merge does not need any parameter
for (Parameter p : conf) {
throw new EoulsanException("Unknown parameter for "
+ getFormat().getName() + " merger: " + p.getName());
}
}
@Override
public void merge(final Iterator<DataFile> inFileIterator, DataFile outFile)
throws IOException {
final Multiset<String> counts = HashMultiset.create();
final Set<String> emptyCounts = new HashSet<>();
while (inFileIterator.hasNext()) {
// Get input file
final DataFile inFile = inFileIterator.next();
EoulsanLogger.getLogger()
.info("Merge " + inFile.getName() + " to " + outFile.getName());
boolean first = true;
try (BufferedReader reader =
new BufferedReader(new InputStreamReader(inFile.open()))) {
String line = null;
while ((line = reader.readLine()) != null) {
// Do no handle header
if (first) {
first = false;
continue;
}
final int tabPos = line.indexOf('\t');
// Do not handle empty or invalid lines
if (tabPos == -1) {
continue;
}
try {
final String id = line.substring(0, tabPos).trim();
final int count = Integer.parseInt(line.substring(tabPos).trim());
if (count == 0) {
emptyCounts.add(id);
}
counts.add(id, count);
} catch (NumberFormatException e) {
// Do not handle parsing errors
}
}
}
}
// Write the result file
try (Writer writer = new OutputStreamWriter(outFile.create())) {
writer.write(ExpressionSplitter.EXPRESSION_FILE_HEADER);
// Write the non empty counts
for (Multiset.Entry<String> e : counts.entrySet()) {
final String id = e.getElement();
// Remove the id from empty counts
emptyCounts.remove(id);
// Write the entry
writer.write(id + '\t' + e.getCount() + '\n');
}
// Write the empty counts
for (String id : emptyCounts) {
writer.write(id + "\t0\n");
}
}
}
}