/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.tdb.store.bulkloader2; import java.io.FileNotFoundException ; import java.io.FileOutputStream ; import java.io.OutputStream ; import java.util.List ; import org.apache.jena.atlas.AtlasException ; import org.apache.jena.atlas.io.IO ; import org.apache.jena.atlas.lib.DateTimeUtils ; import org.apache.jena.atlas.lib.ProgressMonitor ; import org.apache.jena.graph.Node ; import org.apache.jena.graph.Triple ; import org.apache.jena.riot.RDFDataMgr ; import org.apache.jena.sparql.core.Quad ; import org.apache.jena.tdb.TDB ; import org.apache.jena.tdb.base.file.Location ; import org.apache.jena.tdb.setup.DatasetBuilderStd ; import org.apache.jena.tdb.solver.stats.Stats ; import org.apache.jena.tdb.solver.stats.StatsCollectorNodeId ; import org.apache.jena.tdb.store.DatasetGraphTDB ; import org.apache.jena.tdb.store.NodeId ; import org.apache.jena.tdb.store.bulkloader.BulkLoader ; import org.apache.jena.tdb.store.bulkloader.BulkStreamRDF ; import org.apache.jena.tdb.store.nodetable.NodeTable ; import org.apache.jena.tdb.store.nodetupletable.NodeTupleTable ; import org.apache.jena.tdb.sys.Names ; import org.slf4j.Logger ; public class ProcNodeTableBuilder { private static Logger cmdLog = TDB.logLoader ; public static void exec(Location location, String dataFileTriples, String dataFileQuads, List<String> datafiles, boolean collectStats) { // This formats the location correctly. // But we're not really interested in it all. DatasetGraphTDB dsg = DatasetBuilderStd.create(location) ; // so close indexes and the prefix table. dsg.getTripleTable().getNodeTupleTable().getTupleTable().close(); dsg.getQuadTable().getNodeTupleTable().getTupleTable().close(); ProgressMonitor monitor = ProgressMonitor.create(cmdLog, "Data", BulkLoader.DataTickPoint, BulkLoader.superTick) ; OutputStream outputTriples = null ; OutputStream outputQuads = null ; try { outputTriples = new FileOutputStream(dataFileTriples) ; outputQuads = new FileOutputStream(dataFileQuads) ; } catch (FileNotFoundException e) { throw new AtlasException(e) ; } NodeTableBuilder sink = new NodeTableBuilder(dsg, monitor, outputTriples, outputQuads, collectStats) ; monitor.start() ; sink.startBulk() ; for( String filename : datafiles) { if ( datafiles.size() > 0 ) cmdLog.info("Load: "+filename+" -- "+DateTimeUtils.nowAsString()) ; RDFDataMgr.parse(sink, filename) ; } sink.finishBulk() ; IO.close(outputTriples) ; IO.close(outputQuads) ; // ---- Stats // See Stats class. if ( ! location.isMem() && sink.getCollector() != null ) Stats.write(dsg.getLocation().getPath(Names.optStats), sink.getCollector().results()) ; // ---- Monitor long time = monitor.finish() ; long total = monitor.getTicks() ; float elapsedSecs = time/1000F ; float rate = (elapsedSecs!=0) ? total/elapsedSecs : 0 ; String str = String.format("Total: %,d tuples : %,.2f seconds : %,.2f tuples/sec [%s]", total, elapsedSecs, rate, DateTimeUtils.nowAsString()) ; cmdLog.info(str) ; } static class NodeTableBuilder implements BulkStreamRDF { private DatasetGraphTDB dsg ; private NodeTable nodeTable ; private WriteRows writerTriples ; private WriteRows writerQuads ; private ProgressMonitor monitor ; private StatsCollectorNodeId stats ; NodeTableBuilder(DatasetGraphTDB dsg, ProgressMonitor monitor, OutputStream outputTriples, OutputStream outputQuads, boolean collectStats) { this.dsg = dsg ; this.monitor = monitor ; NodeTupleTable ntt = dsg.getTripleTable().getNodeTupleTable() ; this.nodeTable = ntt.getNodeTable() ; this.writerTriples = new WriteRows(outputTriples, 3, 20000) ; this.writerQuads = new WriteRows(outputQuads, 4, 20000) ; if ( collectStats ) this.stats = new StatsCollectorNodeId(nodeTable) ; } @Override public void startBulk() {} @Override public void start() {} @Override public void finish() {} @Override public void finishBulk() { writerTriples.flush() ; writerQuads.flush() ; nodeTable.sync() ; dsg.getPrefixes().sync() ; } @Override public void triple(Triple triple) { Node s = triple.getSubject() ; Node p = triple.getPredicate() ; Node o = triple.getObject() ; process(Quad.tripleInQuad,s,p,o); } @Override public void quad(Quad quad) { Node s = quad.getSubject() ; Node p = quad.getPredicate() ; Node o = quad.getObject() ; Node g = null ; // Union graph?! if ( ! quad.isTriple() && ! quad.isDefaultGraph() ) g = quad.getGraph() ; process(g,s,p,o); } private void process(Node g, Node s, Node p, Node o) { NodeId sId = nodeTable.getAllocateNodeId(s) ; NodeId pId = nodeTable.getAllocateNodeId(p) ; NodeId oId = nodeTable.getAllocateNodeId(o) ; if ( g != null ) { NodeId gId = nodeTable.getAllocateNodeId(g) ; writerQuads.write(gId.getId()) ; writerQuads.write(sId.getId()) ; writerQuads.write(pId.getId()) ; writerQuads.write(oId.getId()) ; writerQuads.endOfRow() ; if ( stats != null ) stats.record(gId, sId, pId, oId) ; } else { writerTriples.write(sId.getId()) ; writerTriples.write(pId.getId()) ; writerTriples.write(oId.getId()) ; writerTriples.endOfRow() ; if ( stats != null ) stats.record(null, sId, pId, oId) ; } monitor.tick() ; } public StatsCollectorNodeId getCollector() { return stats ; } @Override public void base(String base) {} @Override public void prefix(String prefix, String iri) { dsg.getPrefixes().getPrefixMapping().setNsPrefix(prefix, iri) ; } } }