package nl.tudelft.lifetiles.sequence.model; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.AbstractMap; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import nl.tudelft.lifetiles.core.util.IteratorUtils; /** * The parser for the sequence meta data. * * @author Joren Hammudoglu * */ public class SequenceMetaParser { /** * The name of the sequence ID column. */ private static final String ID_COLUMN = "##ID"; /** * The column names, excluding the <code>##ID</code> column. */ private List<String> columns; /** * The parsed data. */ private Map<String, Map<String, String>> data; /** * Parse the data. * * @param file * the file to parse * @throws IOException * when the file was not found */ public void parse(final File file) throws IOException { Iterator<String> lineStream = Files.lines(file.toPath()).iterator(); String header = lineStream.next(); parseHeader(header); data = new HashMap<>(); for (String line : IteratorUtils.toIterable(lineStream)) { Entry<String, Map<String, String>> parsed = parseLine(line); data.put(parsed.getKey(), parsed.getValue()); } } /** * Get the column names. * * @return a list of column names. */ public List<String> getColumns() { maybeThrowNotParsed(); return columns; } /** * Ge the parsed data. * * @return the data */ public Map<String, Map<String, String>> getData() { maybeThrowNotParsed(); return data; } /** * @return true iff the data is parsed, else false. */ public boolean isParsed() { return data != null; } /** * Throw a {@link IllegalStateException} when the data is not parsed yet. */ private void maybeThrowNotParsed() { if (!isParsed()) { throw new IllegalStateException("Data not parsed."); } } /** * Parse the header line into column names. * * @param headerLine * the first line of the file * @throws IOException * when the header cannot be parsed */ private void parseHeader(final String headerLine) throws IOException { List<String> allColumns = Arrays.asList(headerLine.split("\t")); String firstColumn = allColumns.get(0); if (!firstColumn.equals(ID_COLUMN)) { throw new IOException("Invalid column header :" + firstColumn); } columns = allColumns.subList(1, allColumns.size()); } /** * Parse a single line. * * @param line * the line. * @return an entry of identifier and a value map of columns to values. */ private Entry<String, Map<String, String>> parseLine(final String line) { assert columns != null; Map<String, String> result = new HashMap<>(columns.size()); List<String> values = Arrays.asList(line.split("\t")); String identifier = values.get(0); assert values.size() + 1 != columns.size(); for (int index = 0; index < values.size() - 1; index++) { String key = columns.get(index); String value = values.get(index + 1); result.put(key, value); } return new AbstractMap.SimpleEntry<String, Map<String, String>>( identifier, result); } }