/**
* File: $HeadURL: https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/hdt/impl/TempHDTImporterOnePass.java $
* Revision: $Rev: 191 $
* Last modified: $Date: 2013-03-03 11:41:43 +0000 (dom, 03 mar 2013) $
* Last modified by: $Author: mario.arias $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Contacting the authors:
* Mario Arias: mario.arias@deri.org
* Javier D. Fernandez: jfergar@infor.uva.es
* Miguel A. Martinez-Prieto: migumar2@infor.uva.es
* Alejandro Andres: fuzzy.alej@gmail.com
*/
package org.rdfhdt.hdt.hdt.impl;
import java.io.File;
import java.io.IOException;
import org.rdfhdt.hdt.dictionary.TempDictionary;
import org.rdfhdt.hdt.enums.RDFNotation;
import org.rdfhdt.hdt.enums.TripleComponentRole;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.hdt.HDTVocabulary;
import org.rdfhdt.hdt.hdt.TempHDT;
import org.rdfhdt.hdt.hdt.TempHDTImporter;
import org.rdfhdt.hdt.listener.ProgressListener;
import org.rdfhdt.hdt.options.HDTOptions;
import org.rdfhdt.hdt.rdf.RDFParserCallback;
import org.rdfhdt.hdt.rdf.RDFParserCallback.RDFCallback;
import org.rdfhdt.hdt.rdf.RDFParserFactory;
import org.rdfhdt.hdt.triples.IteratorTripleString;
import org.rdfhdt.hdt.triples.TempTriples;
import org.rdfhdt.hdt.triples.TripleString;
import org.rdfhdt.hdt.triples.TriplesFactory;
import org.rdfhdt.hdt.util.RDFInfo;
import org.rdfhdt.hdt.util.listener.ListenerUtil;
public class TempHDTImporterOnePass implements TempHDTImporter {
class TripleAppender implements RDFCallback {
TempDictionary dict;
TempTriples triples;
ProgressListener listener;
long num;
long size;
public TripleAppender(TempDictionary dict, TempTriples triples, ProgressListener listener) {
this.dict = dict;
this.triples = triples;
this.listener = listener;
}
public void processTriple(TripleString triple, long pos) {
triples.insert(
dict.insert(triple.getSubject(), TripleComponentRole.SUBJECT),
dict.insert(triple.getPredicate(), TripleComponentRole.PREDICATE),
dict.insert(triple.getObject(), TripleComponentRole.OBJECT)
);
num++;
size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+4; // Spaces and final dot
ListenerUtil.notifyCond(listener, "Loaded "+num+" triples", num, 0, 100);
}
};
@Override
public TempHDT loadFromRDF(HDTOptions specs, String filename, String baseUri, RDFNotation notation, ProgressListener listener)
throws IOException, ParserException {
RDFParserCallback parser = RDFParserFactory.getParserCallback(notation);
// Fill the specs with missing properties
if (!RDFInfo.triplesSet(specs) && TriplesFactory.TEMP_TRIPLES_IMPL_LIST.equals(specs.get("tempTriples.impl"))) {
//count lines if not user-set and if triples in-mem (otherwise not important info)
RDFInfo.setTriples(RDFInfo.countLines(filename, parser, notation), specs);
}
RDFInfo.setSizeInBytes(new File(filename).length(), specs); //else just get sizeOfRDF
// Create Modifiable Instance
TempHDT modHDT = new TempHDTImpl(specs, baseUri, ModeOfLoading.ONE_PASS);
TempDictionary dictionary = modHDT.getDictionary();
TempTriples triples = modHDT.getTriples();
TripleAppender appender = new TripleAppender(dictionary, triples, listener);
// Load RDF in the dictionary and generate triples
dictionary.startProcessing();
parser.doParse(filename, baseUri, notation, appender);
dictionary.endProcessing();
// Reorganize both the dictionary and the triples
modHDT.reorganizeDictionary(listener);
modHDT.reorganizeTriples(listener);
modHDT.getHeader().insert( "_:statistics", HDTVocabulary.ORIGINAL_SIZE, appender.size);
return modHDT;
}
public TempHDT loadFromTriples(HDTOptions specs, IteratorTripleString iterator, String baseUri, ProgressListener listener)
throws IOException {
// Create Modifiable Instance
TempHDT modHDT = new TempHDTImpl(specs, baseUri, ModeOfLoading.ONE_PASS);
TempDictionary dictionary = modHDT.getDictionary();
TempTriples triples = modHDT.getTriples();
// Load RDF in the dictionary and generate triples
dictionary.startProcessing();
long num=0;
long size=0;
while(iterator.hasNext()) {
TripleString triple = iterator.next();
triples.insert(
dictionary.insert(triple.getSubject(), TripleComponentRole.SUBJECT),
dictionary.insert(triple.getPredicate(), TripleComponentRole.PREDICATE),
dictionary.insert(triple.getObject(), TripleComponentRole.OBJECT)
);
num++;
size+=triple.getSubject().length()+triple.getPredicate().length()+triple.getObject().length()+4; // Spaces and final dot
ListenerUtil.notifyCond(listener, "Loaded "+num+" triples", num, 0, 100);
}
dictionary.endProcessing();
// Reorganize both the dictionary and the triples
modHDT.reorganizeDictionary(listener);
modHDT.reorganizeTriples(listener);
modHDT.getHeader().insert( "_:statistics", HDTVocabulary.ORIGINAL_SIZE, size);
return modHDT;
}
}