/*
* KBInitializer.java
*
* Created on Nov 21, 2010, 6:30:24 PM
*
* Description: Initializes the OpenCyc knowledge base.
*
* Copyright (C) Nov 21, 2010, Stephen L. Reed.
*
* This program is free software; you can redistribute it and/or modify it under the terms
* of the GNU General Public License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with this program;
* if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.texai.kb;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import net.jcip.annotations.NotThreadSafe;
import net.sf.ehcache.CacheManager;
import org.apache.log4j.Logger;
import org.openrdf.model.Statement;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.texai.kb.persistence.DistributedRepositoryManager;
import org.texai.kb.persistence.RDFEntityManager;
import org.texai.kb.persistence.RDFUtility;
import org.texai.turtleStatementParser.TurtleStatementParser;
import org.texai.turtleStatementParser.misc.ParsedTurtleStatementHandler;
import org.texai.util.StringUtils;
import org.texai.util.TexaiException;
/** Initializes a repository with turtle-formatted RDF statements, such as those created by RDFEntityManager.export(...).
*
* @author reed
*/
@NotThreadSafe
public final class KBInitializer implements ParsedTurtleStatementHandler {
/** the logger */
private static final Logger LOGGER = Logger.getLogger(KBInitializer.class);
/** the RDF entity manager */
private final RDFEntityManager rdfEntityManager;
/** the statement file path */
private String statementFilePath = "../Main/data/kb-statements.txt";
/** the repository name */
private String repositoryName = Constants.OPEN_CYC;
/** the statement count */
private int statementCount = 0;
/** the repository connection */
private RepositoryConnection repositoryConnection;
/** Constructs a new KBInitializer instance.
* @param rdfEntityManager the RDF entity manager
*/
public KBInitializer(final RDFEntityManager rdfEntityManager) {
//Preconditions
assert rdfEntityManager != null : "rdfEntityManager must not be null";
this.rdfEntityManager = rdfEntityManager;
}
/** Constructs a new KBInitializer instance.
* @param repositoryConnection the repository connection
*/
public KBInitializer(final RepositoryConnection repositoryConnection) {
//Preconditions
assert repositoryConnection != null : "repositoryConnection must not be null";
this.repositoryConnection = repositoryConnection;
rdfEntityManager = null;
}
/** Initializes the OpenCyc knowledge base by default unless this instance was constructed
* with a given repository connection.
*/
public void process() {
LOGGER.info("Turtle-format RDF input file path: " + statementFilePath);
if (rdfEntityManager != null) {
LOGGER.info("repository name: " + repositoryName);
repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName);
} else {
LOGGER.info("repository name: " + repositoryConnection.getRepository().getDataDir());
}
// establish a transaction
try {
assert repositoryConnection.isAutoCommit();
repositoryConnection.setAutoCommit(false);
LOGGER.info("beginning repository size " + repositoryConnection.size());
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
}
// process the input file
final BufferedInputStream inputStream;
try {
assert (new File(statementFilePath).exists()) : statementFilePath + " not found";
inputStream = new BufferedInputStream(new FileInputStream(statementFilePath));
final TurtleStatementParser turtleStatementParser = TurtleStatementParser.makeTurtleStatementParser(
inputStream,
this); // parsedTurtleStatementHandler
turtleStatementParser.getStatements(); // see handleStatement method
inputStream.close();
} catch (IOException ex) {
throw new TexaiException(ex);
}
// commit the final transaction
try {
assert !repositoryConnection.isAutoCommit();
repositoryConnection.commit();
repositoryConnection.setAutoCommit(true);
LOGGER.info("ending repository size " + repositoryConnection.size());
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
}
LOGGER.info(statementCount + " statements loaded");
}
/** Handles a parsed turtle statement.
*
* @param statement the statement
*/
@Override
public void handleStatement(final Statement statement) {
LOGGER.debug("statement: " + RDFUtility.formatStatementAsTurtle(statement));
try {
repositoryConnection.add(statement);
statementCount++;
if (statementCount % 10000 == 0) {
LOGGER.info(statementCount + " " + RDFUtility.formatStatement(statement));
}
if (statementCount % 5000 == 0) {
// commit the transaction
assert !repositoryConnection.isAutoCommit();
repositoryConnection.commit();
}
} catch (final RepositoryException ex) {
throw new TexaiException(ex);
}
}
/** Gets the statement file path.
*
* @return the statement file path
*/
public String getStatementFilePath() {
return statementFilePath;
}
/** Sets the statement file path.
*
* @param statementFilePath the statement file path
*/
public void setStatementFilePath(final String statementFilePath) {
//Preconditions
assert statementFilePath != null : "statementFilePath must not be null";
assert !statementFilePath.isEmpty() : "statementFilePath must not be empty";
this.statementFilePath = statementFilePath;
}
/** Gets the repository name.
*
* @return the repositoryName
*/
public String getRepositoryName() {
return repositoryName;
}
/** Sets the repository name.
*
* @param repositoryName the repositoryName to set
*/
public void setRepositoryName(final String repositoryName) {
//Preconditions
assert StringUtils.isNonEmptyString(repositoryName);
this.repositoryName = repositoryName;
}
/** Executes this application.
*
* @param args the command line arguments (unused)
*/
public static void main(final String[] args) {
CacheInitializer.initializeCaches();
final RDFEntityManager rdfEntityManager = new RDFEntityManager();
final KBInitializer kbInitializer = new KBInitializer(rdfEntityManager);
kbInitializer.process();
rdfEntityManager.close();
DistributedRepositoryManager.shutDown();
CacheManager.getInstance().shutdown();
}
}