/** * File: $HeadURL: https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/hdt/impl/TempHDTImporterTwoPass.java $ * Revision: $Rev: 191 $ * Last modified: $Date: 2013-03-03 11:41:43 +0000 (dom, 03 mar 2013) $ * Last modified by: $Author: mario.arias $ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Contacting the authors: * Mario Arias: mario.arias@deri.org * Javier D. Fernandez: jfergar@infor.uva.es * Miguel A. Martinez-Prieto: migumar2@infor.uva.es * Alejandro Andres: fuzzy.alej@gmail.com */ package org.rdfhdt.hdt.hdt.impl; import java.io.File; import java.io.IOException; import org.rdfhdt.hdt.dictionary.TempDictionary; import org.rdfhdt.hdt.enums.RDFNotation; import org.rdfhdt.hdt.enums.TripleComponentRole; import org.rdfhdt.hdt.exceptions.ParserException; import org.rdfhdt.hdt.hdt.TempHDT; import org.rdfhdt.hdt.hdt.TempHDTImporter; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.options.HDTOptions; import org.rdfhdt.hdt.rdf.RDFParserCallback; import org.rdfhdt.hdt.rdf.RDFParserCallback.RDFCallback; import org.rdfhdt.hdt.rdf.RDFParserFactory; import org.rdfhdt.hdt.triples.TempTriples; import org.rdfhdt.hdt.triples.TripleString; import org.rdfhdt.hdt.triples.TriplesFactory; import org.rdfhdt.hdt.util.RDFInfo; import org.rdfhdt.hdt.util.listener.ListenerUtil; public class TempHDTImporterTwoPass implements TempHDTImporter { class DictionaryAppender implements RDFCallback { TempDictionary dict; ProgressListener listener; long count; DictionaryAppender(TempDictionary dict, ProgressListener listener) { this.dict = dict; this.listener = listener; } @Override public void processTriple(TripleString triple, long pos) { dict.insert(triple.getSubject(), TripleComponentRole.SUBJECT); dict.insert(triple.getPredicate(), TripleComponentRole.PREDICATE); dict.insert(triple.getObject(), TripleComponentRole.OBJECT); count++; ListenerUtil.notifyCond(listener, "Generating dictionary "+count+" triples processed.", count, 0, 100); } public long getCount() { return count; } }; /** * Warning: different from HDTConverterOnePass$TripleAppender * This one uses dict.stringToID, the other uses dict.insert * @author mario.arias * */ class TripleAppender2 implements RDFCallback { TempDictionary dict; TempTriples triples; ProgressListener listener; long count; public TripleAppender2(TempDictionary dict, TempTriples triples, ProgressListener listener) { this.dict = dict; this.triples = triples; this.listener = listener; } public void processTriple(TripleString triple, long pos) { triples.insert( dict.stringToId(triple.getSubject(), TripleComponentRole.SUBJECT), dict.stringToId(triple.getPredicate(), TripleComponentRole.PREDICATE), dict.stringToId(triple.getObject(), TripleComponentRole.OBJECT) ); count++; ListenerUtil.notifyCond(listener, "Generating triples "+count+" triples processed.", count, 0, 100); } }; @Override public TempHDT loadFromRDF(HDTOptions specs, String filename, String baseUri, RDFNotation notation, ProgressListener listener) throws IOException, ParserException { RDFParserCallback parser = RDFParserFactory.getParserCallback(notation); // Fill the specs with missing properties if (!RDFInfo.triplesSet(specs) && TriplesFactory.TEMP_TRIPLES_IMPL_LIST.equals(specs.get("tempTriples.impl"))) { //count lines if not user-set and if triples in-mem (otherwise not important info) RDFInfo.setTriples(RDFInfo.countLines(filename, parser, notation), specs); //FIXME setting numberOfLines costs (counting them) but saves memory... what to do?? //especially because in two-pass they are counter by DictionaryAppender (but triples object //is instantiated earlier) } RDFInfo.setSizeInBytes(new File(filename).length(), specs); //else just get sizeOfRDF // Create Modifiable Instance and parser TempHDT modHDT = new TempHDTImpl(specs, baseUri, ModeOfLoading.TWO_PASS); TempDictionary dictionary = (TempDictionary)modHDT.getDictionary(); TempTriples triples = (TempTriples)modHDT.getTriples(); // Load RDF in the dictionary dictionary.startProcessing(); parser.doParse(filename, baseUri, notation, new DictionaryAppender(dictionary, listener)); dictionary.endProcessing(); // Reorganize IDs before loading triples modHDT.reorganizeDictionary(listener); // Load triples (second pass) parser.doParse(filename, baseUri, notation, new TripleAppender2(dictionary, triples, listener)); //reorganize HDT modHDT.reorganizeTriples(listener); return modHDT; } }