/* * Copyright 2015 NAVER Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.navercorp.pinpoint.common.hbase.parallel; import com.navercorp.pinpoint.common.hbase.HbaseAccessor; import com.sematext.hbase.wd.AbstractRowKeyDistributor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.concurrent.ExecutorService; /** * @author HyunGil Jeong */ public class ParallelResultScanner implements ResultScanner { private final AbstractRowKeyDistributor keyDistributor; private final List<ScanTask> scanTasks; private final Result[] nextResults; private Result next = null; public ParallelResultScanner(TableName tableName, HbaseAccessor hbaseAccessor, ExecutorService executor, Scan originalScan, AbstractRowKeyDistributor keyDistributor, int numParallelThreads) throws IOException { if (hbaseAccessor == null) { throw new NullPointerException("hbaseAccessor must not be null"); } if (executor == null) { throw new NullPointerException("executor must not be null"); } if (keyDistributor == null) { throw new NullPointerException("keyDistributor must not be null"); } if (originalScan == null) { throw new NullPointerException("originalScan must not be null"); } this.keyDistributor = keyDistributor; final ScanTaskConfig scanTaskConfig = new ScanTaskConfig(tableName, hbaseAccessor, keyDistributor, originalScan.getCaching()); final Scan[] splitScans = splitScans(originalScan); this.scanTasks = createScanTasks(scanTaskConfig, splitScans, numParallelThreads); this.nextResults = new Result[scanTasks.size()]; for (ScanTask scanTask : scanTasks) { executor.execute(scanTask); } } private Scan[] splitScans(Scan originalScan) throws IOException { Scan[] scans = this.keyDistributor.getDistributedScans(originalScan); for (int i = 0; i < scans.length; ++i) { Scan scan = scans[i]; scan.setId(originalScan.getId() + "-" + i); } return scans; } private List<ScanTask> createScanTasks(ScanTaskConfig scanTaskConfig, Scan[] splitScans, int numParallelThreads) { if (splitScans.length <= numParallelThreads) { List<ScanTask> scanTasks = new ArrayList<>(splitScans.length); for (Scan scan : splitScans) { scanTasks.add(new ScanTask(scanTaskConfig, scan)); } return scanTasks; } else { int maxIndividualScans = (splitScans.length + (numParallelThreads - 1)) / numParallelThreads; List<List<Scan>> scanDistributions = new ArrayList<>(numParallelThreads); for (int i = 0; i < numParallelThreads; ++i) { scanDistributions.add(new ArrayList<Scan>(maxIndividualScans)); } for (int i = 0; i < splitScans.length; ++i) { scanDistributions.get(i % numParallelThreads).add(splitScans[i]); } List<ScanTask> scanTasks = new ArrayList<>(numParallelThreads); for (List<Scan> scanDistribution : scanDistributions) { Scan[] scansForSingleTask = scanDistribution.toArray(new Scan[scanDistribution.size()]); scanTasks.add(new ScanTask(scanTaskConfig, scansForSingleTask)); } return scanTasks; } } private boolean hasNext() throws IOException { if (next != null) { return true; } next = nextInternal(); return next != null; } @Override public Result next() throws IOException { if (hasNext()) { Result toReturn = next; next = null; return toReturn; } return null; } private Result nextInternal() throws IOException { Result result = null; int indexOfResultToUse = -1; for (int i = 0; i < this.scanTasks.size(); ++i) { ScanTask scanTask = this.scanTasks.get(i); // fail fast in case of errors checkTask(scanTask); if (nextResults[i] == null) { try { nextResults[i] = scanTask.getResult(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return null; } if (nextResults[i] == null) { continue; } } if (result == null || Bytes.compareTo(keyDistributor.getOriginalKey(nextResults[i].getRow()), keyDistributor.getOriginalKey(result.getRow())) < 0) { result = nextResults[i]; indexOfResultToUse = i; } } if (indexOfResultToUse >= 0) { nextResults[indexOfResultToUse] = null; } return result; } private void checkTask(ScanTask scanTask) { Throwable th = scanTask.getThrowable(); if (th != null) { throw new ScanTaskException(th); } } @Override public Result[] next(int nbRows) throws IOException { // Identical to HTable.ClientScanner implementation // Collect values to be returned here ArrayList<Result> resultSets = new ArrayList<>(nbRows); for (int i = 0; i < nbRows; i++) { Result next = next(); if (next != null) { resultSets.add(next); } else { break; } } return resultSets.toArray(new Result[resultSets.size()]); } @Override public void close() { for (ScanTask scanTask : this.scanTasks) { scanTask.close(); } } @Override public Iterator<Result> iterator() { // Identical to HTable.ClientScanner implementation return new Iterator<Result>() { // The next RowResult, possibly pre-read Result next = null; // return true if there is another item pending, false if there isn't. // this method is where the actual advancing takes place, but you need // to call next() to consume it. hasNext() will only advance if there // isn't a pending next(). public boolean hasNext() { if (next == null) { try { next = ParallelResultScanner.this.next(); return next != null; } catch (IOException e) { throw new RuntimeException(e); } } return true; } // get the pending next item and advance the iterator. returns null if // there is no next item. public Result next() { // since hasNext() does the real advancing, we call this to determine // if there is a next before proceeding. if (!hasNext()) { return null; } // if we get to here, then hasNext() has given us an item to return. // we want to return the item and then null out the next pointer, so // we use a temporary variable. Result temp = next; next = null; return temp; } public void remove() { throw new UnsupportedOperationException(); } }; } }