/* * Copyright 2014 Aerospike, Inc. * * Portions may be licensed to Aerospike, Inc. under one or more * contributor license agreements. * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You * may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.aerospike.hadoop.mapreduce; import java.io.IOException; import java.util.concurrent.ArrayBlockingQueue; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import com.aerospike.client.AerospikeClient; import com.aerospike.client.AerospikeException; import com.aerospike.client.AerospikeException.ScanTerminated; import com.aerospike.client.Key; import com.aerospike.client.policy.ClientPolicy; import com.aerospike.client.policy.QueryPolicy; import com.aerospike.client.policy.ScanPolicy; import com.aerospike.client.query.Filter; import com.aerospike.client.query.RecordSet; import com.aerospike.client.query.ResultSet; import com.aerospike.client.query.Statement; import com.aerospike.client.Record; import com.aerospike.client.ScanCallback; public class AerospikeRecordReader extends RecordReader<AerospikeKey, AerospikeRecord> implements org.apache.hadoop.mapred.RecordReader<AerospikeKey, AerospikeRecord> { private class KeyRecPair { public AerospikeKey key; public AerospikeRecord rec; public KeyRecPair(AerospikeKey key, AerospikeRecord rec) { this.key = key; this.rec = rec; } } private static final Log log = LogFactory.getLog(AerospikeRecordReader.class); private ASSCanReader scanReader = null; private ASQueryReader queryReader = null; private ArrayBlockingQueue<KeyRecPair> queue = new ArrayBlockingQueue<KeyRecPair>(16 * 1024); private boolean isFinished = false; private boolean isError = false; private boolean isRunning = false; private String numrangeBin; private long numrangeBegin; private long numrangeEnd; private AerospikeKey currentKey; private AerospikeRecord currentValue; public class CallBack implements ScanCallback { @Override public void scanCallback(Key key, Record record) throws AerospikeException { try { queue.put(new KeyRecPair(new AerospikeKey(key), new AerospikeRecord(record))); } catch (Exception ex) { throw new ScanTerminated(ex); } } } public class ASSCanReader extends java.lang.Thread { String node; String host; int port; String namespace; String setName; String[] binNames; ASSCanReader(String node, String host, int port, String ns, String setName, String[] binNames) { this.node = node; this.host = host; this.port = port; this.namespace = ns; this.setName = setName; this.binNames = binNames; } public void run() { try { AerospikeClient client = AerospikeClientSingleton.getInstance(new ClientPolicy(), host, port); log.info(String.format("scanNode %s:%d:%s:%s", host, port, namespace, setName)); ScanPolicy scanPolicy = new ScanPolicy(); CallBack cb = new CallBack(); log.info("scan starting"); isRunning = true; if (binNames != null) client.scanNode(scanPolicy, node, namespace, setName, cb, binNames); else client.scanNode(scanPolicy, node, namespace, setName, cb); isFinished = true; log.info("scan finished"); } catch (Exception ex) { log.error("exception in ASSCanReader.run: " + ex); isError = true; return; } } } public class ASQueryReader extends java.lang.Thread { String node; String host; int port; String namespace; String setName; String[] binNames; String numrangeBin; long numrangeBegin; long numrangeEnd; ASQueryReader(String node, String host, int port, String ns, String setName, String[] binNames, String numrangeBin, long numrangeBegin, long numrangeEnd) { this.node = node; this.host = host; this.port = port; this.namespace = ns; this.setName = setName; this.binNames = binNames; this.numrangeBin = numrangeBin; this.numrangeBegin = numrangeBegin; this.numrangeEnd = numrangeEnd; } public void run() { try { AerospikeClient client = AerospikeClientSingleton.getInstance(new ClientPolicy(), host, port); log.info(String.format("queryNode %s:%d %s:%s:%s[%d:%d]", host, port, namespace, setName, numrangeBin, numrangeBegin, numrangeEnd)); Statement stmt = new Statement(); stmt.setNamespace(namespace); stmt.setSetName(setName); stmt.setFilters(Filter.range(numrangeBin, numrangeBegin, numrangeEnd)); if (binNames != null) stmt.setBinNames(binNames); QueryPolicy queryPolicy = new QueryPolicy(); RecordSet rs = client.queryNode(queryPolicy, stmt, client.getNode(node)); isRunning = true; try { log.info("query starting"); while (rs.next()) { Key key = rs.getKey(); Record record = rs.getRecord(); queue.put(new KeyRecPair(new AerospikeKey(key), new AerospikeRecord(record))); } } finally { rs.close(); isFinished = true; log.info("query finished"); } } catch (Exception ex) { isError = true; return; } } } public AerospikeRecordReader() throws IOException { log.info("NEW CTOR"); } public AerospikeRecordReader(AerospikeSplit split) throws IOException { log.info("OLD CTOR"); init(split); } public void init(AerospikeSplit split) throws IOException { final String type = split.getType(); final String node = split.getNode(); final String host = split.getHost(); final int port = split.getPort(); final String namespace = split.getNameSpace(); final String setName = split.getSetName(); final String[] binNames = split.getBinNames(); this.numrangeBin = split.getNumRangeBin(); this.numrangeBegin = split.getNumRangeBegin(); this.numrangeEnd = split.getNumRangeEnd(); if (type.equals("scan")) { scanReader = new ASSCanReader(node, host, port, namespace, setName, binNames); scanReader.start(); } else if (type.equals("numrange")) { queryReader = new ASQueryReader(node, host, port, namespace, setName, binNames, numrangeBin, numrangeBegin, numrangeEnd); queryReader.start(); } log.info("node: " + node); } public AerospikeKey createKey() { return new AerospikeKey(); } public AerospikeRecord createValue() { return new AerospikeRecord(); } protected AerospikeKey setCurrentKey(AerospikeKey oldApiKey, AerospikeKey newApiKey, AerospikeKey keyval) { if (oldApiKey == null) { oldApiKey = new AerospikeKey(); oldApiKey.set(keyval); } // new API might not be used if (newApiKey != null) { newApiKey.set(keyval); } return oldApiKey; } protected AerospikeRecord setCurrentValue(AerospikeRecord oldApiVal, AerospikeRecord newApiVal, AerospikeRecord val) { if (oldApiVal == null) { oldApiVal = new AerospikeRecord(); oldApiVal.set(val); } // new API might not be used if (newApiVal != null) { newApiVal.set(val); } return oldApiVal; } public synchronized boolean next(AerospikeKey key, AerospikeRecord value) throws IOException { final int waitMSec = 1000; int trials = 5; try { KeyRecPair pair; while (true) { if (isError) return false; if (!isRunning) { Thread.sleep(100); continue; } if (!isFinished && queue.size() == 0) { if (trials == 0) { log.error("SCAN TIMEOUT"); return false; } log.info("queue empty: waiting..."); Thread.sleep(waitMSec); trials--; } else if (isFinished && queue.size() == 0) { return false; } else if (queue.size() != 0) { pair = queue.take(); break; } } // log.info("key=" + pair.key + ", val=" + pair.rec); currentKey = setCurrentKey(currentKey, key, pair.key); currentValue = setCurrentValue(currentValue, value, pair.rec); } catch (Exception ex) { log.error("exception in AerospikeRecordReader.next: " + ex); throw new IOException("exception in AerospikeRecordReader.next", ex); } return true; } public float getProgress() { if (isFinished) return 1.0f; else return 0.0f; } public synchronized long getPos() throws IOException { return 0; } public synchronized void close() throws IOException { if (scanReader != null) { try { scanReader.join(); } catch (Exception ex) { throw new IOException("exception in AerospikeRecordReader.close", ex); } scanReader = null; } if (queryReader != null) { try { queryReader.join(); } catch (Exception ex) { throw new IOException("exception in AerospikeRecordReader.close", ex); } queryReader = null; } } // ---------------- NEW API ---------------- @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { log.info("INITIALIZE"); init((AerospikeSplit) split); } @Override public boolean nextKeyValue() throws IOException { // new API call routed to old API if (currentKey == null) { currentKey = createKey(); } if (currentValue == null) { currentValue = createValue(); } // FIXME: does the new API mandate a new instance each time (?) return next(currentKey, currentValue); } @Override public AerospikeKey getCurrentKey() throws IOException { return currentKey; } @Override public AerospikeRecord getCurrentValue() { return currentValue; } } // Local Variables: // mode: java // c-basic-offset: 4 // tab-width: 4 // indent-tabs-mode: nil // End: // vim: softtabstop=4:shiftwidth=4:expandtab