/**
* This file is part of General Entity Annotator Benchmark.
*
* General Entity Annotator Benchmark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* General Entity Annotator Benchmark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with General Entity Annotator Benchmark. If not, see <http://www.gnu.org/licenses/>.
*/
package org.aksw.gerbil.dataset.impl.nif;
import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import org.aksw.gerbil.dataset.InitializableDataset;
import org.aksw.gerbil.dataset.RdfModelContainingDataset;
import org.aksw.gerbil.dataset.impl.AbstractDataset;
import org.aksw.gerbil.datatypes.ErrorTypes;
import org.aksw.gerbil.exceptions.GerbilException;
import org.aksw.gerbil.io.nif.AbstractNIFParser;
import org.aksw.gerbil.transfer.nif.Document;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.adapters.RDFReaderRIOT;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
public abstract class AbstractNIFDataset extends AbstractDataset implements RdfModelContainingDataset, InitializableDataset {
private static final transient Logger LOGGER = LoggerFactory.getLogger(AbstractNIFDataset.class);
private List<Document> documents;
private String name;
private boolean hasBeenInitialized = false;
private Model rdfModel;
public AbstractNIFDataset(String name) {
this.name = name;
}
/**
* This method returns an opened InputStream from which the NIF data will be
* read. If an error occurs while opening the stream, null should be
* returned. <b>Note</b> that for closing the stream
* {@link #closeInputStream(InputStream)} is called. If there are other
* resources related to this stream that have to be closed, this method can
* be overwritten to free these resources, too.
*
* @return an opened InputStream or null if an error occurred.
*/
protected abstract InputStream getDataAsInputStream();
/**
* This method returns the language of the NIF data, e.g.,
* {@link Lang#TURTLE}.
*
* @return the language of the NIF data
*/
protected abstract Lang getDataLanguage();
/**
* This method is called for closing the input stream that has been returned
* by {@link #getDataAsInputStream()}.If there are other resources related
* to this stream that have to be closed, this method can be overwritten to
* free these resources, too.
*
* @param inputStream
* the input stream which should be closed
*/
protected void closeInputStream(InputStream inputStream) {
try {
inputStream.close();
} catch (Exception e) {
}
}
@Override
public synchronized void init() throws GerbilException {
if (hasBeenInitialized) {
return;
}
Model nifModel = ModelFactory.createDefaultModel();
// dataset = RDFDataMgr.loadModel(rdfpath);
InputStream inputStream = getDataAsInputStream();
if (inputStream == null) {
throw new GerbilException("Couldn't get InputStream.", ErrorTypes.DATASET_LOADING_ERROR);
}
try {
LocalNIFParser parser = new LocalNIFParser(this);
documents = parser.parseNIF(inputStream, nifModel);
// RDFDataMgr.read(nifModel, inputStream, getDataLanguage());
} catch (Exception e) {
throw new GerbilException("Exception while parsing dataset.", e, ErrorTypes.DATASET_LOADING_ERROR);
} finally {
closeInputStream(inputStream);
}
// if there are still triples available
rdfModel = nifModel;
hasBeenInitialized = true;
LOGGER.info("{} dataset initialized", name);
}
public String getName() {
if (!hasBeenInitialized) {
throw new IllegalStateException(
"This dataset hasn't been initialized. Please call init() before using the dataset.");
}
return name;
}
@Override
public int size() {
if (!hasBeenInitialized) {
throw new IllegalStateException(
"This dataset hasn't been initialized. Please call init() before using the dataset.");
}
return documents.size();
}
@Override
public List<Document> getInstances() {
if (!hasBeenInitialized) {
throw new IllegalStateException(
"This dataset hasn't been initialized. Please call init() before using the dataset.");
}
return documents;
}
@Override
public Model getRdfModel() {
return rdfModel;
}
protected static class LocalNIFParser extends AbstractNIFParser {
private AbstractNIFDataset languageSource;
public LocalNIFParser(AbstractNIFDataset languageSource) {
super("");
this.languageSource = languageSource;
}
@Override
protected Model parseNIFModel(InputStream is, Model nifModel) {
RDFReaderRIOT rdfReader = new RDFReaderRIOT(languageSource.getDataLanguage());
rdfReader.read(nifModel, is, "");
return nifModel;
}
@Override
protected Model parseNIFModel(Reader reader, Model nifModel) {
RDFReaderRIOT rdfReader = new RDFReaderRIOT(languageSource.getDataLanguage());
rdfReader.read(nifModel, reader, "");
return nifModel;
}
}
}