package nl.tudelft.lifetiles.graph.model; import java.io.File; import java.io.IOException; import java.nio.file.Files; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import nl.tudelft.lifetiles.core.util.Timer; import nl.tudelft.lifetiles.sequence.model.DefaultSequence; import nl.tudelft.lifetiles.sequence.model.SegmentString; import nl.tudelft.lifetiles.sequence.model.Sequence; import nl.tudelft.lifetiles.sequence.model.SequenceSegment; /** * @author Rutger van den Berg * */ public class DefaultGraphParser implements GraphParser { /** * Index of end position in the vertex descriptor. */ private static final int END_POS = 3; /** * Index of starting position in the vertex descriptor. */ private static final int START_POS = 2; /** * Index of sources in the vertex descriptor. */ private static final int SOURCES_POS = 1; /** * Map containing all sequences. */ private final Map<String, Sequence> sequences; /** * Creates a new graph parser. */ public DefaultGraphParser() { sequences = new HashMap<>(); } /** * @param descriptor * Description line in the vertex file. * @param content * Content line in the vertex file. * @return a new SequenceSegment */ private SequenceSegment createSegment(final String descriptor, final String content) { if (descriptor.charAt(0) != '>') { throw new IllegalArgumentException(); } String[] desc = descriptor.split("\\|"); String[] sources = desc[SOURCES_POS].split(","); Set<Sequence> currentSequences = new HashSet<>(); for (String sequencename : sources) { sequencename = sequencename.trim(); if (!sequences.containsKey(sequencename)) { // This is a parser. We do actually need to instantiate here. @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops") Sequence newSequence = new DefaultSequence(sequencename); sequences.put(sequencename, newSequence); } currentSequences.add(sequences.get(sequencename)); } SequenceSegment segment = new SequenceSegment(currentSequences, Integer.parseInt(desc[START_POS].trim()), Integer.parseInt(desc[END_POS].trim()), new SegmentString( content.trim())); for (Sequence s : currentSequences) { s.appendSegment(segment); } return segment; } /** * @return A map of sequences. */ @Override public Map<String, Sequence> getSequences() { if (sequences.isEmpty()) { throw new UnsupportedOperationException("Graph not parsed yet."); } return sequences; } /** * @param edgefile * The file to parse * @param graph * The graph to which the edges will be added. * @throws IOException * When there is an error reading the specified file. */ private void parseEdges(final File edgefile, final Graph<SequenceSegment> graph) throws IOException { Iterator<String> iterator = Files.lines(edgefile.toPath()).iterator(); String line; while (iterator.hasNext()) { line = iterator.next(); String[] edge = line.split(" "); graph.addEdge(Integer.parseInt(edge[0]), Integer.parseInt(edge[1])); } } /** * @param filename * The basename of the file to parse. * @param gfact * The graph factory to use to produce the graph. * @return a new graph containing the parsed information. * @throws IOException * when there is an error while reading the file. * @throws IllegalArgumentException * when one of the files does not exist or cannot be read. * */ @Override public Graph<SequenceSegment> parseGraph(final File vertexfile, final File edgefile, final GraphFactory<SequenceSegment> gfact) throws IOException { Timer timer = Timer.getAndStart(); Graph<SequenceSegment> graph = gfact.getGraph(); parseVertices(vertexfile, graph); parseEdges(edgefile, graph); timer.stopAndLog("Graph parsing"); return graph; } /** * @param vertexfile * The file to parse. * @param graph * The graph to which the edges will be added. * @throws IOException * When there is an error reading the file. */ private void parseVertices(final File vertexfile, final Graph<SequenceSegment> graph) throws IOException { Iterator<String> iterator = Files.lines(vertexfile.toPath()).iterator(); while (iterator.hasNext()) { graph.addVertex(createSegment(iterator.next(), iterator.next())); } } }