/**
* villemos solutions [space^] (http://www.villemos.com)
* Probe. Send. Act. Emergent solution.
* Copyright 2011 Gert Villemos
* All Rights Reserved.
*
* Released under the Apache license, version 2.0 (do what ever
* you want, just dont claim ownership).
*
* NOTICE: All information contained herein is, and remains
* the property of villemos solutions, and its suppliers
* if any. The intellectual and technical concepts contained
* herein are proprietary to villemos solutions
* and its suppliers and may be covered by European and Foreign Patents,
* patents in process, and are protected by trade secret or copyright law.
*
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from villemos solutions.
*
* And it wouldn't be nice either.
*
*/
package com.villemos.ispace.aperture;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.camel.Body;
import org.apache.camel.Handler;
import org.apache.camel.Message;
import org.semanticdesktop.aperture.accessor.impl.DefaultDataAccessorRegistry;
import org.semanticdesktop.aperture.crawler.CrawlerHandler;
import org.semanticdesktop.aperture.crawler.filesystem.FileSystemCrawler;
import org.semanticdesktop.aperture.datasource.filesystem.FileSystemDataSource;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.rdf.impl.RDFContainerFactoryImpl;
public class DocumentProcessor {
protected List<Message> messages = new ArrayList<Message>();
protected CrawlerHandler handler = new ExtendedCrawlerHandler(messages);
@Handler
public synchronized List<Message> processDocument(@Body File file) {
/** Make sure we have no messages left from previous run. */
messages.clear();
if (file.getName().startsWith("~")) {
return messages;
}
RDFContainerFactoryImpl rdfFactory = new RDFContainerFactoryImpl();
RDFContainer configuration = rdfFactory.newInstance("source:testsource");
/** Note that the configuration must be set prior to setting the root folder. */
FileSystemDataSource source = new FileSystemDataSource();
source.setConfiguration(configuration);
source.setRootFolder(file.getAbsolutePath());
/** create a Crawler */
final FileSystemCrawler crawler = new FileSystemCrawler();
//final FileSystemCrawler crawler = new ExtendedFileSystemCrawler();
crawler.setDataSource(source);
crawler.setCrawlerHandler(handler);
crawler.setDataAccessorRegistry(new DefaultDataAccessorRegistry());
crawler.crawl();
crawler.getCrawlReport();
/** Required to get rid of 'org.openrdf.rdf2go.RepositoryModel not closed, closing now' warnings. */
source.getConfiguration().dispose();
/** Return the messages. The route must contain a splitter to process
* each document. */
return messages;
}
public void addMessage(Message message) {
messages.add(message);
}
}