package org.myrobotlab.service; import java.io.IOException; import java.nio.file.FileVisitResult; import java.nio.file.FileVisitor; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.BasicFileAttributes; import org.myrobotlab.document.Document; import org.myrobotlab.document.connector.AbstractConnector; import org.myrobotlab.document.connector.ConnectorState; import org.myrobotlab.document.transformer.ConnectorConfig; import org.myrobotlab.framework.ServiceType; import org.myrobotlab.logging.LoggerFactory; import org.myrobotlab.service.interfaces.DocumentPublisher; import org.slf4j.Logger; public class FileConnector extends AbstractConnector implements DocumentPublisher, FileVisitor<Path> { public final static Logger log = LoggerFactory.getLogger(FileConnector.class.getCanonicalName()); private static final long serialVersionUID = 1L; private String directory; // TODO: add wildcard includes/excludes // TODO: add file path includes/excludes private boolean interrupted = false; public FileConnector(String name) { super(name); } @Override public void setConfig(ConnectorConfig config) { // TODO Auto-generated method stub log.info("Set Config not yet implemented"); } @Override public void startCrawling() { state = ConnectorState.RUNNING; Path startPath = Paths.get(directory); try { Files.walkFileTree(startPath, this); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } log.info("File Connector finished walking the tree."); // TODO: should we flush here immediately? state = ConnectorState.STOPPED; } @Override public void stopCrawling() { interrupted = true; state = ConnectorState.INTERRUPTED; } @Override public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { System.out.println(file); if (interrupted) { state = ConnectorState.INTERRUPTED; return FileVisitResult.TERMINATE; } String docId = getDocIdPrefix() + file.toFile().getAbsolutePath(); Document doc = new Document(docId); doc.setField("last_modified", attrs.lastModifiedTime()); doc.setField("created_date", attrs.creationTime()); doc.setField("filename", file.toFile().getAbsolutePath()); doc.setField("size", attrs.size()); // TODO: potentially add a byte array of the file // or maybe an input stream or other handle to the file. feed(doc); return FileVisitResult.CONTINUE; } @Override public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { throw exc; } @Override public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { if (exc != null) { throw exc; } return FileVisitResult.CONTINUE; } public String getDirectory() { return directory; } public void setDirectory(String directory) { this.directory = directory; } /** * This static method returns all the details of the class without it having * to be constructed. It has description, categories, dependencies, and peer * definitions. * * @return ServiceType - returns all the data * */ static public ServiceType getMetaData() { ServiceType meta = new ServiceType(FileConnector.class.getCanonicalName()); meta.addDescription("This connector will scan all the files in a directory and production documents"); meta.addCategory("ingest"); return meta; } }