/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms.db.io.parser;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class PlateCompositionParser {
// TODO: factor out the well composition tables into a common parser that can be refined by type after some practice.
private Map<String, String> plateProperties = new HashMap<>();
private Map<String, Map<Pair<String, String>, String>> compositionTables = new HashMap<>();
private Map<Pair<String, String>, Pair<Integer, Integer>> coordinatesToIndices = new HashMap<>();
public void processFile(File inFile) throws IOException {
try (BufferedReader br = new BufferedReader(new FileReader(inFile))) {
String line;
boolean readingCompositionTable = false;
String compositionTableName = null;
List<String> compositionTableColumns = null;
int rowIndexInCompositionTable = 0;
while ((line = br.readLine()) != null) {
if (line.startsWith(">>")) {
// TODO: add max table width based on plate type.
String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "\t");
readingCompositionTable = true;
if (fields.length < 2) {
throw new RuntimeException(String.format("Found malformed composition table header: %s", line));
}
compositionTableColumns = Arrays.asList(fields);
compositionTableName = fields[0].replaceFirst("^>>", "");
rowIndexInCompositionTable = 0;
} else if (line.startsWith(">")) {
String[] fields = StringUtils.split(line, "\t", 2);
// Found a plate attribute.
if (fields.length != 2) {
System.err.format("Too few fields: %s\n", StringUtils.join(fields, ", "));
System.err.flush();
throw new RuntimeException(String.format("Found malformed plate attribute line: %s", line));
}
plateProperties.put(fields[0].replaceFirst("^>", ""), fields[1]);
} else if (line.trim().length() == 0) {
// Assume a blank line terminates a composition table.
readingCompositionTable = false;
compositionTableName = null;
compositionTableColumns = null;
rowIndexInCompositionTable = 0;
} else if (readingCompositionTable) {
// This split method with a very long name preserves blanks and doesn't merge consecutive delimiters.
String[] fields = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "\t");
// The split ^^ preserves blanks, so we can exactly compare the lengths.
if (fields.length != compositionTableColumns.size()) {
throw new RuntimeException(
String.format("Found %d fields where %d were expected in composition table line:\n '%s'\n",
fields.length, compositionTableColumns.size(), line));
}
for (int i = 1; i < fields.length; i++) {
String val = compositionTableColumns.get(i);
// No need to store empty values;
if (val == null || val.isEmpty()) {
continue;
}
Pair<String, String> coordinates = Pair.of(fields[0], val);
// Note: assumes every row appears in each composition table (even empty ones).
coordinatesToIndices.put(coordinates, Pair.of(rowIndexInCompositionTable, i - 1));
Map<Pair<String, String>, String> thisTable = compositionTables.get(compositionTableName);
if (thisTable == null) {
thisTable = new HashMap<>();
compositionTables.put(compositionTableName, thisTable);
}
// TODO: add paranoid check for repeated keys? Shouldn't be possible unless tables are repeated.
thisTable.put(coordinates, fields[i]);
}
rowIndexInCompositionTable++;
}
}
}
}
public Map<String, String> getPlateProperties() {
return plateProperties;
}
public Map<String, Map<Pair<String, String>, String>> getCompositionTables() {
return compositionTables;
}
public Map<Pair<String, String>, Pair<Integer, Integer>> getCoordinatesToIndices() {
return coordinatesToIndices;
}
}