package uk.ac.rhul.cs.cl1.seeding; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; import java.util.StringTokenizer; import java.util.TreeSet; import uk.ac.rhul.cs.cl1.MutableNodeSet; import uk.ac.rhul.cs.graph.Graph; import com.sosnoski.util.hashmap.StringIntHashMap; /** * Seed generator where seeds will be generated according to the contents of * a file. * * The file must contain one line for each seed to be used. Lines must contain * node names separated by spaces. If a line containing a single asterisk (*) is * found in the seed file, this means that after all the predefined seeds have * been processed, the remaining nodes will also be considered as singleton * seeds. * * @author tamas * */ public class FileBasedSeedGenerator extends SeedGenerator { /** The name of the file that will be used */ private String filename; /** The number of lines in the file */ private int size; /** Whether to generate unused nodes as seeds after we finished processing the file */ private boolean generateUnusedNodesAsSeeds = false; /** Whether to ignore disconnected seeds */ private boolean disconnectedSeedsIgnored = true; /** * Constructs a seed generator backed by the given file */ public FileBasedSeedGenerator(Graph graph, String filename) throws IOException { super(graph); this.filename = filename; /* Count the number of seeds */ File f = new File(this.filename); LineNumberReader reader = new LineNumberReader(new FileReader(f)); String nextLine; while ((nextLine = reader.readLine()) != null) { if ("*".equals(nextLine)) generateUnusedNodesAsSeeds = true; } size = reader.getLineNumber(); reader.close(); } /** * Internal iterator class that will be used when calling iterator() */ private class IteratorImpl extends SeedIterator { /** Reader to read the file */ BufferedReader reader = null; /** Line that was read the last time */ String line = null; /** The current nodeset that will be returned with the next call to next() */ MutableNodeSet currentNodeSet = null; /** A mutable node set that contains no nodes */ MutableNodeSet emptyNodeSet; /** A map mapping node names to indices */ StringIntHashMap namesToIndices = new StringIntHashMap(); /** Nodes that have not been used so far */ TreeSet<Integer> unusedNodes; /** Constructs the iterator */ public IteratorImpl(String filename) { emptyNodeSet = new MutableNodeSet(graph); unusedNodes = new TreeSet<Integer>(); File f = new File(filename); /* Populate the mapping from node names to node indices */ int n = graph.getNodeCount(); for (int i = 0; i < n; i++) { namesToIndices.add(graph.getNodeName(i), i); unusedNodes.add(i); } try { reader = new BufferedReader(new FileReader(f)); } catch (FileNotFoundException ex) { ex.printStackTrace(); return; } readNextLine(); processLine(); } private void readNextLine() { try { line = reader.readLine(); } catch (IOException ex) { ex.printStackTrace(); line = null; } } private void processLine() { currentNodeSet = emptyNodeSet.clone(); if (line == null) { if (generateUnusedNodesAsSeeds) { /* No more lines in file, return the unused nodes */ Integer id = unusedNodes.pollFirst(); if (id == null) currentNodeSet = null; else currentNodeSet.add(id); } else { currentNodeSet = null; } return; } boolean isConnected = false; while (!isConnected) { StringTokenizer st = new StringTokenizer(line); /* Process current line */ while (st.hasMoreTokens()) { String name = st.nextToken(); int idx = namesToIndices.get(name); if (idx >= 0) { currentNodeSet.add(idx); unusedNodes.remove(idx); } // TODO: error reporting here } /* Read next line */ readNextLine(); /* Check whether the nodeset is non-empty and connected */ if (areDisconnectedSeedsIgnored()) { isConnected = currentNodeSet.size() > 0 && currentNodeSet.isConnected(); } else { isConnected = true; } if (!isConnected) { currentNodeSet.clear(); if (line == null) { if (generateUnusedNodesAsSeeds) { Integer id = unusedNodes.pollFirst(); currentNodeSet.add(id); } else currentNodeSet = null; return; } } } if (isConnected) return; currentNodeSet = null; } public boolean hasNext() { return (currentNodeSet != null); } public Seed next() { Seed result = new Seed(currentNodeSet); processLine(); return result; } } /** * Returns whether disconnected seeds will be ignored or not */ public boolean areDisconnectedSeedsIgnored() { return disconnectedSeedsIgnored; } public SeedIterator iterator() { return new IteratorImpl(filename); } /** * Sets whether disconnected seeds should be ignored or not */ public void setDisconnectedSeedsIgnored(boolean disconnectedSeedsIgnored) { this.disconnectedSeedsIgnored = disconnectedSeedsIgnored; } public int size() { if (generateUnusedNodesAsSeeds) return this.graph.getNodeCount(); return size; } }