/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package annis.corpuspathsearch;
import annis.utils.ANNISFormatHelper;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Searches for ANNIS corpora in file system locations.
* @author Thomas Krause <krauseto@hu-berlin.de>
*/
public class Search
{
private static final Logger log = LoggerFactory.getLogger(Search.class);
private final List<File> rootPaths;
private final Map<String, File> corpusPaths;
private boolean wasSearched;
public Search(List<File> rootPaths)
{
this.rootPaths = rootPaths;
this.corpusPaths = new TreeMap<>();
this.wasSearched = false;
}
public void startSearch()
{
corpusPaths.clear();
for (File f : rootPaths)
{
searchPath(f);
}
wasSearched = true;
}
private void searchPath(File path)
{
if (path != null && path.canRead())
{
if (path.isDirectory())
{
// search all subdirectories
File[] children = path.listFiles();
for (File f : children)
{
log.debug("seaching in "+ f.getPath() + " for corpora");
searchPath(f);
}
}
else if (path.isFile() && "corpus.tab".equals(path.getName()))
{
try
{
String toplevel = ANNISFormatHelper.extractToplevelCorpusNames(new FileInputStream(path));
corpusPaths.put(toplevel, path);
}
catch (FileNotFoundException ex)
{
log.error(null, ex);
}
}
}
}
public Map<String, File> getCorpusPaths()
{
return corpusPaths;
}
public boolean isWasSearched()
{
return wasSearched;
}
public void setWasSearched(boolean wasSearched)
{
this.wasSearched = wasSearched;
}
@Override
public String toString()
{
StringBuilder sb = new StringBuilder();
for (Map.Entry<String, File> e : corpusPaths.entrySet())
{
sb.append(e.getKey());
sb.append("\t");
sb.append(e.getValue().getParentFile().getAbsolutePath());
sb.append("\n");
}
return sb.toString();
}
}