// Copyright 2017 JanusGraph Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package org.janusgraph.diskstorage.keycolumnvalue.scan; import com.google.common.base.Preconditions; import com.google.common.util.concurrent.AbstractFuture; import org.janusgraph.core.schema.JanusGraphManagement; import org.janusgraph.diskstorage.*; import org.janusgraph.diskstorage.configuration.Configuration; import org.janusgraph.diskstorage.keycolumnvalue.*; import org.janusgraph.diskstorage.util.BufferUtil; import org.janusgraph.diskstorage.util.RecordIterator; import org.janusgraph.diskstorage.util.StaticArrayEntry; import org.janusgraph.diskstorage.util.StaticArrayEntryList; import org.janusgraph.util.system.Threads; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.*; import java.util.concurrent.*; import java.util.function.Consumer; import java.util.function.Predicate; /** * @author Matthias Broecheler (me@matthiasb.com) */ class StandardScannerExecutor extends AbstractFuture<ScanMetrics> implements JanusGraphManagement.IndexJobFuture, Runnable { private static final Logger log = LoggerFactory.getLogger(StandardScannerExecutor.class); private static final int QUEUE_SIZE = 1000; private static final int TIMEOUT_MS = 180000; // 60 seconds private static final int MAX_KEY_LENGTH = 128; //in bytes private final ScanJob job; private final Consumer<ScanMetrics> finishJob; private final StoreFeatures storeFeatures; private final StoreTransaction storeTx; private final KeyColumnValueStore store; private final int numProcessors; private final int workBlockSize; private final Configuration jobConfiguration; private final Configuration graphConfiguration; private final ScanMetrics metrics; private boolean hasCompleted = false; private boolean interrupted = false; private List<SliceQuery> queries; private int numQueries; private List<BlockingQueue<SliceResult>> dataQueues; private DataPuller[] pullThreads; StandardScannerExecutor(final ScanJob job, final Consumer<ScanMetrics> finishJob, final KeyColumnValueStore store, final StoreTransaction storeTx, final StoreFeatures storeFeatures, final int numProcessors, final int workBlockSize, final Configuration jobConfiguration, final Configuration graphConfiguration) throws BackendException { this.job = job; this.finishJob = finishJob; this.store = store; this.storeTx = storeTx; this.storeFeatures = storeFeatures; this.numProcessors = numProcessors; this.workBlockSize = workBlockSize; this.jobConfiguration = jobConfiguration; this.graphConfiguration = graphConfiguration; metrics = new StandardScanMetrics(); } private final DataPuller addDataPuller(SliceQuery sq, StoreTransaction stx) throws BackendException { BlockingQueue<SliceResult> queue = new LinkedBlockingQueue<SliceResult>(QUEUE_SIZE); dataQueues.add(queue); DataPuller dp = new DataPuller(sq, queue, KCVSUtil.getKeys(store,sq,storeFeatures,MAX_KEY_LENGTH,stx),job.getKeyFilter()); dp.start(); return dp; } @Override public void run() { try { job.workerIterationStart(jobConfiguration, graphConfiguration, metrics); queries = job.getQueries(); numQueries = queries.size(); Preconditions.checkArgument(numQueries>0,"Must at least specify one query for job: %s",job); if (numQueries>1) { //It is assumed that the first query is the grounding query if multiple queries exist SliceQuery ground = queries.get(0); StaticBuffer start = ground.getSliceStart(); Preconditions.checkArgument(start.equals(BufferUtil.zeroBuffer(1)), "Expected start of first query to be a single 0s: %s",start); StaticBuffer end = ground.getSliceEnd(); Preconditions.checkArgument(end.equals(BufferUtil.oneBuffer(end.length())), "Expected end of first query to be all 1s: %s",end); } dataQueues = new ArrayList<BlockingQueue<SliceResult>>(numQueries); pullThreads = new DataPuller[numQueries]; for (int pos=0;pos<numQueries;pos++) { pullThreads[pos]=addDataPuller(queries.get(pos),storeTx); } } catch (Throwable e) { log.error("Exception trying to setup the job:", e); cleanupSilent(); job.workerIterationEnd(metrics); setException(e); return; } BlockingQueue<Row> processorQueue = new LinkedBlockingQueue<>(QUEUE_SIZE); Processor[] processors = new Processor[numProcessors]; for (int i=0;i<processors.length;i++) { processors[i]= new Processor(job.clone(),processorQueue); processors[i].start(); } try { SliceResult[] currentResults = new SliceResult[numQueries]; while (!interrupted) { for (int i = 0; i < numQueries; i++) { if (currentResults[i]!=null) continue; BlockingQueue<SliceResult> queue = dataQueues.get(i); SliceResult qr = queue.poll(10,TimeUnit.MILLISECONDS); //Try very short time to see if we are done if (qr==null) { if (pullThreads[i].isFinished()) continue; //No more data to be expected qr = queue.poll(TIMEOUT_MS,TimeUnit.MILLISECONDS); //otherwise, give it more time if (qr==null && !pullThreads[i].isFinished()) throw new TemporaryBackendException("Timed out waiting for next row data - storage error likely"); } currentResults[i]=qr; } SliceResult conditionQuery = currentResults[0]; if (conditionQuery==null) break; //Termination condition - primary query has no more data final StaticBuffer key = conditionQuery.key; Map<SliceQuery,EntryList> queryResults = new HashMap<>(numQueries); for (int i=0;i<currentResults.length;i++) { SliceQuery query = queries.get(i); EntryList entries = EntryList.EMPTY_LIST; if (currentResults[i]!=null && currentResults[i].key.equals(key)) { assert query.equals(currentResults[i].query); entries = currentResults[i].entries; currentResults[i]=null; } queryResults.put(query,entries); } processorQueue.put(new Row(key, queryResults)); } for (int i = 0; i < pullThreads.length; i++) { pullThreads[i].join(10); if (pullThreads[i].isAlive()) { log.warn("Data pulling thread [{}] did not terminate. Forcing termination",i); pullThreads[i].interrupt(); } } for (int i=0; i<processors.length;i++) { processors[i].finish(); } if (!Threads.waitForCompletion(processors,TIMEOUT_MS)) log.error("Processor did not terminate in time"); cleanup(); job.workerIterationEnd(metrics); if (interrupted) { setException(new InterruptedException("Scanner got interrupted")); } else { finishJob.accept(metrics); set(metrics); } } catch (Throwable e) { log.error("Exception occured during job execution: {}",e); job.workerIterationEnd(metrics); setException(e); } finally { Threads.terminate(processors); cleanupSilent(); } } @Override protected void interruptTask() { interrupted = true; } private void cleanup() throws BackendException { if (!hasCompleted) { hasCompleted = true; if (pullThreads!=null) { for (int i = 0; i < pullThreads.length; i++) { if (pullThreads[i].isAlive()) { pullThreads[i].interrupt(); } } } storeTx.rollback(); } } private void cleanupSilent() { try { cleanup(); } catch (BackendException ex) { log.error("Encountered exception when trying to clean up after failure",ex); } } @Override public ScanMetrics getIntermediateResult() { return metrics; } private static class Row { final StaticBuffer key; final Map<SliceQuery,EntryList> entries; private Row(StaticBuffer key, Map<SliceQuery, EntryList> entries) { this.key = key; this.entries = entries; } } private class Processor extends Thread { private ScanJob job; private final BlockingQueue<Row> processorQueue; private volatile boolean finished; private int numProcessed; private Processor(ScanJob job, BlockingQueue<Row> processorQueue) { this.job = job; this.processorQueue = processorQueue; this.finished = false; this.numProcessed = 0; } @Override public void run() { try { job.workerIterationStart(jobConfiguration, graphConfiguration, metrics); while (!finished || !processorQueue.isEmpty()) { Row row; while ((row=processorQueue.poll(100,TimeUnit.MILLISECONDS))!=null) { if (numProcessed>=workBlockSize) { //Setup new chunk of work job.workerIterationEnd(metrics); job = job.clone(); job.workerIterationStart(jobConfiguration, graphConfiguration, metrics); numProcessed=0; } try { job.process(row.key,row.entries,metrics); metrics.increment(ScanMetrics.Metric.SUCCESS); } catch (Throwable ex) { log.error("Exception processing row ["+row.key+"]: ",ex); metrics.increment(ScanMetrics.Metric.FAILURE); } numProcessed++; } } } catch (InterruptedException e) { log.error("Processing thread interrupted while waiting on queue or processing data", e); } catch (Throwable e) { log.error("Unexpected error processing data: {}",e); } finally { job.workerIterationEnd(metrics); } } public void finish() { this.finished=true; } } private static class DataPuller extends Thread { private final BlockingQueue<SliceResult> queue; private final KeyIterator keyIter; private final SliceQuery query; private final Predicate<StaticBuffer> keyFilter; private volatile boolean finished; private DataPuller(SliceQuery query, BlockingQueue<SliceResult> queue, KeyIterator keyIter, Predicate<StaticBuffer> keyFilter) { this.query = query; this.queue = queue; this.keyIter = keyIter; this.keyFilter = keyFilter; this.finished = false; } @Override public void run() { try { while (keyIter.hasNext()) { StaticBuffer key = keyIter.next(); RecordIterator<Entry> entries = keyIter.getEntries(); if (!keyFilter.test(key)) continue; EntryList entryList = StaticArrayEntryList.ofStaticBuffer(entries, StaticArrayEntry.ENTRY_GETTER); queue.put(new SliceResult(query, key, entryList)); } finished = true; } catch (InterruptedException e) { log.error("Data-pulling thread interrupted while waiting on queue or data", e); } catch (Throwable e) { log.error("Could not load data from storage: {}",e); } finally { try { keyIter.close(); } catch (IOException e) { log.warn("Could not close storage iterator ", e); } } } public boolean isFinished() { return finished; } } private static class SliceResult { final SliceQuery query; final StaticBuffer key; final EntryList entries; private SliceResult(SliceQuery query, StaticBuffer key, EntryList entries) { this.query = query; this.key = key; this.entries = entries; } } }