/**
* Copyright (c) 2010, UNC-Chapel Hill and Nescent
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided
that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and
* the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the UNC-Chapel Hill or Nescent nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@author Jose R. Perez-Aguera
*/
package edu.unc.ils.mrc.hive.api.impl.elmo;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;
import javax.xml.namespace.QName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.openrdf.elmo.sesame.SesameManager;
import java.text.SimpleDateFormat;
import edu.unc.ils.mrc.hive.HiveException;
import edu.unc.ils.mrc.hive.api.SKOSConcept;
import edu.unc.ils.mrc.hive.api.SKOSScheme;
import edu.unc.ils.mrc.hive.ir.lucene.search.AutocompleteTerm;
import edu.unc.ils.mrc.hive2.api.HiveConcept;
import edu.unc.ils.mrc.hive2.api.HiveVocabulary;
import edu.unc.ils.mrc.hive2.api.impl.HiveVocabularyImpl;
/**
* This class represents a HIVE vocabulary and associated indexes as
* described in the HIVE vocabulary property file.
*
* Each HIVE vocabulary consists of a Sesame store, Lucene index, and
* two serialized TreeMaps representing the alphabetic and top-concept
* indexes.
*/
public class SKOSSchemeImpl implements SKOSScheme {
private static final Log logger = LogFactory.getLog(SKOSSchemeImpl.class);
/* Vocabulary/scheme name */
private String schemeName;
/* Vocabulary/scheme long name */
private String longName;
/* Vocabulary/scheme URI*/
private String schemaURI;
/* Lucene index directory */
private String indexDirectory;
/* Sesame store directory */
private String storeDirectory;
/* H2 database directory */
private String h2Directory;
/* Alphabetic index file name */
//private String alphaFilePath;
/* Top concept index file name */
//private String topConceptIndexPath;
/* KEA+ stopwords file path */
private String stopwordsPath;
/* SKOS RDF/XML file path */
private String rdfPath;
/* KAE+ training set path */
private String KEAtrainSetDir;
/* KEA+ test set path */
private String KEAtestSetDir;
/* KEA+ model path */
private String KEAModelPath;
/* Maui model path */
private String MauiModelPath;
/* Lingpipe model path */
private String lingpipeModel;
/* Vocabulary creation date */
private Date creationDate;
/* Atom feed URL */
private String atomFeedURL;
/* Autocomplete index path */
private String autocompletePath;
/* KEA stemmer class name */
private String keaStemmerClass;
/* Maui stemmer class name */
private String mauiStemmerClass;
/* RDF Format */
private String rdfFormat;
private HiveVocabulary hiveVocab;
private String date;
private long numberOfConcepts;
private long numberOfRelations;
private long numberOfBroaders;
private long numberOfNarrowers;
private long numberOfRelated;
public SKOSSchemeImpl(String confPath, String vocabularyName,
boolean firstTime) throws HiveException
{
String propertiesFile = confPath + File.separator + vocabularyName + ".properties";
init(propertiesFile);
if (!firstTime) {
try {
Map<String, Long> stats = hiveVocab.getStats();
this.date = hiveVocab.getLastUpdateDate().toString();
this.numberOfBroaders = stats.get("broader");
this.numberOfConcepts = stats.get("concepts");
this.numberOfNarrowers = stats.get("narrower");
this.numberOfRelated = stats.get("related");
this.numberOfRelations = numberOfBroaders + numberOfNarrowers + numberOfRelated;
} catch (Exception e) {
logger.error(e);
}
}
}
/**
* Initialize the scheme based on the specified properties file
* @param propertiesFile
*/
private void init(String propertiesFile) throws HiveException
{
logger.trace("init " + propertiesFile);
logger.info("Loading vocabulary configuration from " + propertiesFile);
Properties properties = new Properties();
try {
FileInputStream fis = new FileInputStream(propertiesFile);
properties.load(fis);
// Scheme name
this.schemeName = properties.getProperty("name");
if (schemeName.isEmpty())
logger.warn("name property is empty");
// Scheme long name
this.longName = properties.getProperty("longName");
if (longName.isEmpty())
logger.warn("longName property is empty");
// Scheme URI
this.schemaURI = properties.getProperty("uri");
if (schemaURI.isEmpty())
logger.warn("uri property is empty");
//
this.rdfFormat = properties.getProperty("rdfFormat", "rdfxml");
if (rdfFormat.isEmpty())
logger.warn("rdfFormat property is empty");
// Lucene index path
this.indexDirectory = properties.getProperty("index");
if (indexDirectory.isEmpty())
logger.warn("index property is empty");
// Sesame store path
this.storeDirectory = properties.getProperty("store");
if (storeDirectory.isEmpty())
logger.warn("store property is empty");
// H2 store path
this.h2Directory = properties.getProperty("h2");
if (h2Directory.isEmpty())
logger.warn("h2 property is empty");
// KEA+ model path
this.KEAModelPath = properties.getProperty("kea_model");
if (KEAModelPath.isEmpty())
logger.warn("kea_model property is empty");
// Maui model path
this.MauiModelPath = properties.getProperty("maui_model");
if (MauiModelPath.isEmpty())
logger.warn("maui_model property is empty");
// KEA+ test set path
this.KEAtestSetDir = properties.getProperty("kea_test_set");
if (KEAtestSetDir.isEmpty())
logger.warn("kea_test_set property is empty");
// KEA+ training set path
this.KEAtrainSetDir = properties.getProperty("kea_training_set");
if (KEAtrainSetDir.isEmpty())
logger.warn("kea_training_set property is empty");
// KEA+ stopwords path
this.stopwordsPath = properties.getProperty("stopwords");
if (stopwordsPath.isEmpty())
logger.warn("stopwords property is empty");
// Path to SKOS/RDF file
this.rdfPath = properties.getProperty("rdf_file");
if (rdfPath.isEmpty())
logger.warn("rdf_file property is empty");
// Lingpipe model path
this.lingpipeModel = properties.getProperty("lingpipe_model");
if (lingpipeModel == null || lingpipeModel.isEmpty())
logger.warn("lingpipe_model property is empty");
String dateStr = properties.getProperty("creationDate");
SimpleDateFormat df = new SimpleDateFormat("MM-DD-yyyy");
try {
this.creationDate = df.parse(dateStr);
} catch (Exception e) {
logger.warn("Missing or invalid creationDate");
}
// Atom feed URL for synchronization
this.atomFeedURL = properties.getProperty("atomFeedURL");
if (atomFeedURL == null || atomFeedURL.isEmpty())
logger.warn("atomFeedURL property is empty");
// Autocomplete index path
this.autocompletePath = properties.getProperty("autocomplete");
if (autocompletePath == null || autocompletePath.isEmpty())
logger.warn("autocomplete property is empty");
// kea stemmer class
this.keaStemmerClass = properties.getProperty("keaStemmerClass", "l");
System.out.println("Using kea stemmer " + keaStemmerClass);
if (keaStemmerClass == null || keaStemmerClass.isEmpty())
logger.warn("keaStemmerClass property is empty, defaulting to kea.stemers.PorterStemmer");
// maui stemmer class
this.mauiStemmerClass = properties.getProperty("mauiStemmerClass", "maui.stemmers.PorterStemmer");
System.out.println("Using maui stemmer " + mauiStemmerClass);
if (mauiStemmerClass == null || mauiStemmerClass.isEmpty())
logger.warn("mauiStemmerClass property is empty, defaulting to maui.stemers.PorterStemmer");
fis.close();
this.hiveVocab = HiveVocabularyImpl.getInstance(schemeName, indexDirectory, storeDirectory,
h2Directory, autocompletePath);
} catch (FileNotFoundException e) {
throw new HiveException("Property file not found", e);
} catch (IOException e) {
throw new HiveException ("Error occurred during scheme initialization", e);
}
}
@Override
public String getStopwordsPath() {
return stopwordsPath;
}
@Override
public String getRdfPath() {
return rdfPath;
}
@Override
public String getKEAtrainSetDir() {
return KEAtrainSetDir;
}
@Override
public String getKEAtestSetDir() {
return KEAtestSetDir;
}
@Override
public String getKEAModelPath() {
return KEAModelPath;
}
@Override
public String getMauiModelPath() {
return MauiModelPath;
}
public String getAtomFeedURL() {
return this.atomFeedURL;
}
@Override
public String getAutoCompletePath() {
return this.autocompletePath;
}
@Override
/**
Returns an index of all terms, sorted alphabetically.
**/
public TreeMap<String, QName> getSubAlphaIndex(String startLetter) {
TreeMap<String, QName> terms = new TreeMap<String, QName>();
try
{
List<HiveConcept> hcs = hiveVocab.findConcepts(startLetter + "%", false);
for (HiveConcept hc: hcs) {
terms.put(hc.getPrefLabel(), hc.getQName());
}
} catch (Exception e) {
logger.error(e);
}
return terms;
}
@Override
public List<SKOSConcept> getSubTopConceptIndex(String startLetter) {
List<SKOSConcept> terms = new ArrayList<SKOSConcept>();
try
{
List<HiveConcept> hcs = hiveVocab.findConcepts(startLetter + "%", true);
for (HiveConcept hc: hcs) {
SKOSConceptImpl sc = new SKOSConceptImpl(hc.getQName());
sc.setPrefLabel(hc.getPrefLabel());
sc.setIsLeaf(hc.isLeaf());
terms.add(sc);
}
} catch (Exception e) {
logger.error(e);
}
return terms;
}
@Override
public String getH2Path() {
return this.h2Directory;
}
@Override
public String getLastDate() {
return this.date;
}
@Override
public String getName() {
return this.schemeName;
}
@Override
public long getNumberOfConcepts() {
return this.numberOfConcepts;
}
@Override
public long getNumberOfBroader() {
return this.numberOfBroaders;
}
@Override
public long getNumberOfNarrower() {
return this.numberOfNarrowers;
}
@Override
public long getNumberOfRelated() {
return this.numberOfRelated;
}
@Override
public long getNumberOfRelations() {
return this.numberOfRelations;
}
@Override
public String getLongName() {
return longName;
}
@Override
public String getStoreDirectory() {
return storeDirectory;
}
@Override
public String getIndexDirectory() {
return this.indexDirectory;
}
@Override
public String getSchemaURI() {
return this.schemaURI;
}
@Override
public SesameManager getManager() {
try {
return hiveVocab.getManager();
} catch (Exception e) {
logger.error(e);
}
return null;
}
@Override
public String getLingpipeModel() {
return this.lingpipeModel;
}
@Override
public Date getCreationDate() {
return this.creationDate;
}
@Override
public void importConcept(String uri) throws Exception {
hiveVocab.importConcept(QName.valueOf(uri), uri);
}
@Override
public void deleteConcept(String uri) throws Exception {
deleteConcept(QName.valueOf(uri));
}
@Override
public void deleteConcept(QName qname) throws Exception {
hiveVocab.removeConcept(qname);
}
@Override
public long getNumberOfTopConcepts() throws Exception {
return hiveVocab.getNumTopConcepts();
}
@Override
public void importConcepts(String path) throws Exception {
hiveVocab.importConcepts(path, rdfFormat);
}
@Override
public void importConcepts(String path, boolean doSesame, boolean doLucene,
boolean doH2, boolean doH2KEA, boolean doAutocomplete) throws Exception {
hiveVocab.importConcepts(path, doSesame, doLucene, doH2, doH2KEA, doAutocomplete, rdfFormat);
}
@Override
public void importConcept(QName qname, String path) throws Exception {
hiveVocab.importConcept(qname, path);
}
@Override
public Date getLastUpdateDate() {
Date lastUpdate = null;
try
{
lastUpdate = hiveVocab.getLastUpdateDate();
} catch (Exception e) {
logger.error(e);
}
return lastUpdate;
}
@Override
public void close() throws Exception {
hiveVocab.close();
}
@Override
public List<AutocompleteTerm> suggestTermsFor(String str, int numTerms) throws Exception
{
return hiveVocab.suggestTermsFor(str, numTerms);
}
@Override
public HiveVocabulary getHiveVocabulary() {
return hiveVocab;
}
@Override
public Map<String, QName> getAlphaIndex() {
return hiveVocab.findAllConcepts(false);
}
@Override
public Map<String, QName> getTopConceptIndex() {
return hiveVocab.findAllConcepts(true);
}
@Override
public String getKeaStemmerClass()
{
return keaStemmerClass;
}
@Override
public String getMauiStemmerClass()
{
return mauiStemmerClass;
}
}