/** * File: $HeadURL: https://hdt-java.googlecode.com/svn/trunk/hdt-java/src/org/rdfhdt/hdt/hdt/impl/HDTImpl.java $ * Revision: $Rev: 202 $ * Last modified: $Date: 2013-05-10 18:04:41 +0100 (vie, 10 may 2013) $ * Last modified by: $Author: mario.arias $ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Contacting the authors: * Mario Arias: mario.arias@deri.org * Javier D. Fernandez: jfergar@infor.uva.es * Miguel A. Martinez-Prieto: migumar2@infor.uva.es * Alejandro Andres: fuzzy.alej@gmail.com */ package org.rdfhdt.hdt.hdt.impl; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Date; import java.util.zip.GZIPInputStream; import org.rdfhdt.hdt.dictionary.Dictionary; import org.rdfhdt.hdt.dictionary.DictionaryFactory; import org.rdfhdt.hdt.dictionary.DictionaryPrivate; import org.rdfhdt.hdt.dictionary.TempDictionary; import org.rdfhdt.hdt.enums.TripleComponentRole; import org.rdfhdt.hdt.exceptions.IllegalFormatException; import org.rdfhdt.hdt.exceptions.NotFoundException; import org.rdfhdt.hdt.exceptions.NotImplementedException; import org.rdfhdt.hdt.hdt.HDTPrivate; import org.rdfhdt.hdt.hdt.HDTVocabulary; import org.rdfhdt.hdt.hdt.TempHDT; import org.rdfhdt.hdt.header.Header; import org.rdfhdt.hdt.header.HeaderFactory; import org.rdfhdt.hdt.header.HeaderPrivate; import org.rdfhdt.hdt.iterator.DictionaryTranslateIterator; import org.rdfhdt.hdt.listener.ProgressListener; import org.rdfhdt.hdt.options.ControlInfo; import org.rdfhdt.hdt.options.ControlInformation; import org.rdfhdt.hdt.options.HDTOptions; import org.rdfhdt.hdt.triples.IteratorTripleString; import org.rdfhdt.hdt.triples.TempTriples; import org.rdfhdt.hdt.triples.TripleID; import org.rdfhdt.hdt.triples.Triples; import org.rdfhdt.hdt.triples.TriplesFactory; import org.rdfhdt.hdt.triples.TriplesPrivate; import org.rdfhdt.hdt.util.StopWatch; import org.rdfhdt.hdt.util.StringUtil; import org.rdfhdt.hdt.util.io.CountInputStream; import org.rdfhdt.hdt.util.io.IOUtil; import org.rdfhdt.hdt.util.listener.IntermediateListener; /** * Basic implementation of HDT interface * */ public class HDTImpl implements HDTPrivate { private HDTOptions spec; protected HeaderPrivate header; protected DictionaryPrivate dictionary; protected TriplesPrivate triples; private String hdtFileName; private String baseUri; private boolean isMapped; private void createComponents() { header = HeaderFactory.createHeader(spec); dictionary = DictionaryFactory.createDictionary(spec); triples = TriplesFactory.createTriples(spec); } public void populateHeaderStructure(String baseUri) { header.insert(baseUri, HDTVocabulary.RDF_TYPE, HDTVocabulary.HDT_DATASET); header.insert(baseUri, HDTVocabulary.RDF_TYPE, HDTVocabulary.VOID_DATASET); // VOID header.insert(baseUri, HDTVocabulary.VOID_TRIPLES, triples.getNumberOfElements()); header.insert(baseUri, HDTVocabulary.VOID_PROPERTIES, dictionary.getNpredicates()); header.insert(baseUri, HDTVocabulary.VOID_DISTINCT_SUBJECTS, dictionary.getNsubjects()); header.insert(baseUri, HDTVocabulary.VOID_DISTINCT_OBJECTS, dictionary.getNobjects()); // Structure String formatNode = "_:format"; String dictNode = "_:dictionary"; String triplesNode = "_:triples"; String statisticsNode = "_:statistics"; String publicationInfoNode = "_:publicationInformation"; header.insert(baseUri, HDTVocabulary.HDT_FORMAT_INFORMATION, formatNode); header.insert(formatNode, HDTVocabulary.HDT_DICTIONARY, dictNode); header.insert(formatNode, HDTVocabulary.HDT_TRIPLES, triplesNode); header.insert(baseUri, HDTVocabulary.HDT_STATISTICAL_INFORMATION, statisticsNode); header.insert(baseUri, HDTVocabulary.HDT_PUBLICATION_INFORMATION, publicationInfoNode); dictionary.populateHeader(header, dictNode); triples.populateHeader(header, triplesNode); header.insert(statisticsNode, HDTVocabulary.HDT_SIZE, getDictionary().size()+getTriples().size()); // Current time header.insert(publicationInfoNode, HDTVocabulary.DUBLIN_CORE_ISSUED, StringUtil.formatDate(new Date())); } /** * @param spec2 */ public HDTImpl(HDTOptions spec) { this.spec = spec; createComponents(); } @Override public void loadFromHDT(InputStream input, ProgressListener listener) throws IOException { ControlInfo ci = new ControlInformation(); IntermediateListener iListener = new IntermediateListener(listener); // Load Global ControlInformation ci.clear(); ci.load(input); String hdtFormat = ci.getFormat(); if(!hdtFormat.equals(HDTVocabulary.HDT_CONTAINER)) { throw new IllegalFormatException("This software cannot open this version of HDT File"); } // Load header ci.clear(); ci.load(input); iListener.setRange(0, 5); header = HeaderFactory.createHeader(ci); header.load(input, ci, iListener); // Set base URI. try { IteratorTripleString it = header.search("", HDTVocabulary.RDF_TYPE, HDTVocabulary.HDT_DATASET); if(it.hasNext()) { this.baseUri = it.next().getSubject().toString(); } } catch (NotFoundException e) { e.printStackTrace(); } // Load dictionary ci.clear(); ci.load(input); iListener.setRange(5, 60); dictionary = DictionaryFactory.createDictionary(ci); dictionary.load(input, ci, iListener); // Load Triples ci.clear(); ci.load(input); iListener.setRange(60, 100); triples = TriplesFactory.createTriples(ci); triples.load(input, ci, iListener); } @Override public void loadFromHDT(String hdtFileName, ProgressListener listener) throws IOException { InputStream in; if(hdtFileName.endsWith(".gz")) { in = new BufferedInputStream(new GZIPInputStream(new FileInputStream(hdtFileName))); } else { in = new CountInputStream(new BufferedInputStream(new FileInputStream(hdtFileName))); } loadFromHDT(in, listener); in.close(); this.hdtFileName = hdtFileName; } @Override public void mapFromHDT(File f, long offset, ProgressListener listener) throws IOException { this.hdtFileName = f.toString(); this.isMapped = true; CountInputStream input; if(hdtFileName.endsWith(".gz")) { File old = f; hdtFileName = hdtFileName.substring(0, hdtFileName.length()-3); f = new File(hdtFileName); if(!f.exists()) { System.err.println("We cannot map a gzipped HDT, decompressing into "+hdtFileName+" first."); IOUtil.decompressGzip(old, f); System.err.println("Gzipped HDT successfully decompressed. You might want to delete "+old.getAbsolutePath()+" to save disk space."); } else { System.err.println("We cannot map a gzipped HDT, using "+hdtFileName+" instead."); } } input = new CountInputStream(new BufferedInputStream(new FileInputStream(hdtFileName))); ControlInfo ci = new ControlInformation(); IntermediateListener iListener = new IntermediateListener(listener); // Load Global ControlInformation ci.clear(); ci.load(input); String hdtFormat = ci.getFormat(); if(!hdtFormat.equals(HDTVocabulary.HDT_CONTAINER)) { throw new IllegalFormatException("This software cannot open this version of HDT File"); } // Load header ci.clear(); ci.load(input); iListener.setRange(0, 5); header = HeaderFactory.createHeader(ci); header.load(input, ci, iListener); // Set base URI. try { IteratorTripleString it = header.search("", HDTVocabulary.RDF_TYPE, HDTVocabulary.HDT_DATASET); if(it.hasNext()) { this.baseUri = it.next().getSubject().toString(); } } catch (NotFoundException e) { e.printStackTrace(); } // Load dictionary ci.clear(); input.mark(1024); ci.load(input); input.reset(); iListener.setRange(5, 60); dictionary = DictionaryFactory.createDictionary(ci); dictionary.mapFromFile(input, f, iListener); // Load Triples ci.clear(); input.mark(1024); ci.load(input); input.reset(); iListener.setRange(60, 100); triples = TriplesFactory.createTriples(ci); triples.mapFromFile(input, f, iListener); input.close(); } /* * (non-Javadoc) * * @see hdt.HDT#saveToHDT(java.io.OutputStream) */ @Override public void saveToHDT(OutputStream output, ProgressListener listener) throws IOException { ControlInfo ci = new ControlInformation(); IntermediateListener iListener = new IntermediateListener(listener); ci.clear(); ci.setType(ControlInfo.Type.GLOBAL); ci.setFormat(HDTVocabulary.HDT_CONTAINER); ci.save(output); ci.clear(); ci.setType(ControlInfo.Type.HEADER); header.save(output, ci, iListener); ci.clear(); ci.setType(ControlInfo.Type.DICTIONARY); dictionary.save(output, ci, iListener); ci.clear(); ci.setType(ControlInfo.Type.TRIPLES); triples.save(output, ci, iListener); } /* * (non-Javadoc) * * @see hdt.HDT#saveToHDT(java.io.OutputStream) */ @Override public void saveToHDT(String fileName, ProgressListener listener) throws IOException { OutputStream out = new BufferedOutputStream(new FileOutputStream(fileName)); //OutputStream out = new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(fileName))); saveToHDT(out, listener); out.close(); this.hdtFileName = fileName; } @Override public IteratorTripleString search(CharSequence subject, CharSequence predicate, CharSequence object) throws NotFoundException { // Conversion from TripleString to TripleID TripleID triple = new TripleID( dictionary.stringToId(subject, TripleComponentRole.SUBJECT), dictionary.stringToId(predicate, TripleComponentRole.PREDICATE), dictionary.stringToId(object, TripleComponentRole.OBJECT) ); if(triple.getSubject()==-1 || triple.getPredicate()==-1 || triple.getObject()==-1) { throw new NotFoundException("String not found in dictionary"); } return new DictionaryTranslateIterator(triples.search(triple), dictionary, subject, predicate, object); } /* * (non-Javadoc) * * @see hdt.HDT#getHeader() */ @Override public Header getHeader() { return header; } /* * (non-Javadoc) * * @see hdt.HDT#getDictionary() */ @Override public Dictionary getDictionary() { return dictionary; } /* * (non-Javadoc) * * @see hdt.HDT#getTriples() */ @Override public Triples getTriples() { return triples; } /* (non-Javadoc) * @see hdt.hdt.HDT#getSize() */ @Override public long size() { return dictionary.size()+triples.size(); } public void loadFromModifiableHDT(TempHDT modHdt, ProgressListener listener) { modHdt.reorganizeDictionary(listener); modHdt.reorganizeTriples(listener); // Get parts TempTriples modifiableTriples = (TempTriples) modHdt.getTriples(); TempDictionary modifiableDictionary = (TempDictionary) modHdt.getDictionary(); // Convert triples to final format if(triples.getClass().equals(modifiableTriples.getClass())) { triples = modifiableTriples; } else { //StopWatch tripleConvTime = new StopWatch(); triples.load(modifiableTriples, listener); //System.out.println("Triples conversion time: "+tripleConvTime.stopAndShow()); } // Convert dictionary to final format if(dictionary.getClass().equals(modifiableDictionary.getClass())) { dictionary = (DictionaryPrivate)modifiableDictionary; } else { //StopWatch dictConvTime = new StopWatch(); dictionary.load(modifiableDictionary, listener); //System.out.println("Dictionary conversion time: "+dictConvTime.stopAndShow()); } this.baseUri = modHdt.getBaseURI(); } /* (non-Javadoc) * @see hdt.hdt.HDT#generateIndex(hdt.listener.ProgressListener) */ @Override public void loadOrCreateIndex(ProgressListener listener) { ControlInfo ci = new ControlInformation(); String indexName = hdtFileName+".index"; indexName = indexName.replaceAll("\\.hdt\\.gz", "hdt"); try { CountInputStream in = new CountInputStream(new BufferedInputStream(new FileInputStream(indexName))); ci.load(in); if(isMapped) { triples.mapIndex(in, new File(indexName), ci, listener); } else { triples.loadIndex(in, ci, listener); } in.close(); } catch (Exception e) { System.out.println("Could not read .hdt.index, Generating a new one."); // GENERATE triples.generateIndex(listener); // SAVE try { FileOutputStream out = new FileOutputStream(indexName); ci.clear(); triples.saveIndex(out, ci, listener); out.close(); } catch (IOException e2) { } } } @Override public String getBaseURI() { return baseUri; } protected void setTriples(TriplesPrivate triples) { this.triples = triples; } @Override public void close() throws IOException { dictionary.close(); triples.close(); } }