package com.splout.db.qnode; /* * #%L * Splout SQL Server * %% * Copyright (C) 2012 Datasalt Systems S.L. * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * #L% */ import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import com.splout.db.common.JSONSerDe; import com.splout.db.common.JSONSerDe.JSONSerDeException; import com.splout.db.common.PartitionMap; import com.splout.db.common.QueryResult; import com.splout.db.common.ReplicationEntry; import com.splout.db.common.ReplicationMap; import com.splout.db.common.Tablespace; import com.splout.db.dnode.DNodeHandler; import com.splout.db.engine.ResultSerializer; import com.splout.db.engine.ResultSerializer.SerializationException; import com.splout.db.hazelcast.TablespaceVersion; import com.splout.db.qnode.beans.ErrorQueryStatus; import com.splout.db.qnode.beans.QueryStatus; import com.splout.db.thrift.DNodeException; import com.splout.db.thrift.DNodeService; /** * The Querier is a specialized module ( * {@link com.splout.db.qnode.QNodeHandlerModule}) of the * {@link com.splout.db.qnode.QNode} that performs the distributed query * mechanism. */ @SuppressWarnings({ "rawtypes" }) public class Querier extends QNodeHandlerModule { public final static String PARTITION_RANDOM = "random"; private final static Log log = LogFactory.getLog(Querier.class); private boolean useBinaryProtocol = true; @SuppressWarnings("serial") public static final class QuerierException extends Exception { public QuerierException(String msg) { super(msg); } public QuerierException(String msg, Exception e) { super(msg, e); } } public Querier(QNodeHandlerContext context) { super(context); if (context.getConfig().getBoolean(QNodeProperties.DISABLE_BINARY_PROTOCOL)) { this.useBinaryProtocol = false; } } /** * Proxy method for QNodeHandler's query() method. Returns a * {@link QueryStatus} with the status of the query. * * @throws JSONSerDeException * @throws QuerierException * @throws SerializationException */ public QueryStatus query(String tablespaceName, String key, String sql, String partition) throws JSONSerDeException, QuerierException, SerializationException { Long version = context.getCurrentVersionsMap().get(tablespaceName); if (version == null) { return new ErrorQueryStatus("Unknown tablespace or no version ready to be served! (" + tablespaceName + ")"); } Tablespace tablespace = context.getTablespaceVersionsMap().get(new TablespaceVersion(tablespaceName, version)); if (tablespace == null) { return new ErrorQueryStatus("Unknown tablespace version:(" + version + ") tablespace:(" + tablespaceName + ")"); } PartitionMap partitionMap = tablespace.getPartitionMap(); // find the partition int partitionId; // use a key to find the appropriated partition if (key != null) { partitionId = partitionMap.findPartition(key); if (partitionId == PartitionMap.NO_PARTITION) { return new ErrorQueryStatus("Key out of partition ranges: " + key + " for tablespace " + tablespaceName); } } else { // use provided partition // partition shouldn't be null here -> we check it before at QNodeHandler if (partition.toLowerCase().equals(PARTITION_RANDOM)) { partitionId = (int) (Math.random() * partitionMap.getPartitionEntries().size()); } else { try { partitionId = Integer.parseInt(partition); } catch (Exception e) { throw new QuerierException("partition must be either a valid partition number or '" + PARTITION_RANDOM + "' string."); } } } return query(tablespaceName, sql, partitionId); } private ThreadLocal<Map<Integer, Integer>> partitionRoundRobin = new ThreadLocal<Map<Integer, Integer>>() { protected Map<Integer, Integer> initialValue() { return new HashMap<Integer, Integer>(); } }; public Map<Integer, Integer> getPartitionRoundRobin() { return partitionRoundRobin.get(); } /** * API method for querying a tablespace when you already know the partition * Id. Can be used for multi-querying. */ public QueryStatus query(String tablespaceName, String sql, int partitionId) throws JSONSerDeException, SerializationException { String msg = "tablespace[" + tablespaceName + "] partition[" + partitionId + "] sql[" + sql + "]"; Long version = context.getCurrentVersionsMap().get(tablespaceName); if (version == null) { return new ErrorQueryStatus("Unknown tablespace! [" + tablespaceName + "] for " + msg); } Tablespace tablespace = context.getTablespaceVersionsMap().get(new TablespaceVersion(tablespaceName, version)); if (tablespace == null) { return new ErrorQueryStatus("Unknown tablespace! [" + tablespaceName + "] for " + msg); } ReplicationMap replicationMap = tablespace.getReplicationMap(); ReplicationEntry repEntry = null; for (ReplicationEntry rEntry : replicationMap.getReplicationEntries()) { if (rEntry.getShard() == partitionId) { repEntry = rEntry; } } if (repEntry == null) { return new ErrorQueryStatus("Incomplete Tablespace information for tablespace [" + tablespaceName + "] Maybe let the Splout warmup a little bit and try later?. For resolving " + msg); } if (repEntry.getNodes().size() == 0) { // No one alive for serving the // query! return new ErrorQueryStatus("No alive DNodes for " + tablespace + " for " + msg); } String electedNode; int tried = 0; for (;;) { // Fail-over loop electedNode = null; Integer lastNode = partitionRoundRobin.get().get(partitionId); if (lastNode == null) { lastNode = -1; } lastNode++; tried++; int index = lastNode % repEntry.getNodes().size(); electedNode = repEntry.getNodes().get(index); partitionRoundRobin.get().put(partitionId, index); // Perform query QueryStatus qStatus = new QueryStatus(); long start = System.currentTimeMillis(); DNodeService.Client client = null; boolean renew = false; try { client = context.getDNodeClientFromPool(electedNode); if (useBinaryProtocol) { QueryResult r = ResultSerializer.deserialize(client.binarySqlQuery(tablespaceName, version, partitionId, sql)); qStatus.setResult((ArrayList) r.mapify()); } else { qStatus.setResult(JSONSerDe.deSer(client.sqlQuery(tablespaceName, version, partitionId, sql), ArrayList.class)); } long end = System.currentTimeMillis(); // Report the time of the query qStatus.setMillis((end - start)); // ... and the shard hit. qStatus.setShard(partitionId); return qStatus; } catch (TTransportException e) { renew = true; if (tried == repEntry.getNodes().size()) { return new ErrorQueryStatus("Error connecting dnode[" + electedNode + "] for " + msg); } else { log.warn("TTransportException problem when connecting dnode[" + electedNode + "] at trial[" + tried + "] of[" + repEntry.getNodes().size() + "] DNodes. Will retry. Info: " + msg, e); } } catch (InterruptedException e) { log.info("Interrupt received when retrieving connection from pool for dnode[" + electedNode + "] " + msg, e); // In this case we don't retry. } catch (DNodeException e) { if (e.getCode() == DNodeHandler.EXCEPTION_ORDINARY) { // In this case we shoulndn't rety. Just return exception. Typically // this error are syntax errors or this kind of things return new ErrorQueryStatus(e.getMsg() + " from dnode[" + electedNode + "] for " + msg); } else { if (tried == repEntry.getNodes().size()) { return new ErrorQueryStatus("DNode exception [" + e.getMsg() + "] from dnode[" + electedNode + "] for " + msg); } else { log.warn("Error resolving query with dnode[" + electedNode + "] at trial[" + tried + "] of[" + repEntry.getNodes().size() + "] DNodes. Will retry. Info: " + msg, e); } } } catch (TException e) { if (tried == repEntry.getNodes().size()) { return new ErrorQueryStatus("Error connecting dnode[" + electedNode + "] for " + msg); } else { log.warn("TException problem when connecting dnode[" + electedNode + "] at trial[" + tried + "] of[" + repEntry.getNodes().size() + "] DNodes. Will retry. Info: " + msg, e); } } catch (PoolCreationException e) { if (tried == repEntry.getNodes().size()) { return new ErrorQueryStatus("Error creating pool for dnode[" + electedNode + "] for " + msg); } else { log.warn("Error creating pool for dnode[" + electedNode + "] at trial[" + tried + "] of[" + repEntry.getNodes().size() + "] DNodes. Will retry. Info: " + msg, e); } } catch (DNodePoolFullException e) { if (tried == repEntry.getNodes().size()) { return new ErrorQueryStatus("Pool for dnode[" + electedNode + "] full after waiting for timeout. Consider increase " + QNodeProperties.DNODE_POOL_SIZE + " or increase " + QNodeProperties.QNODE_DNODE_POOL_TAKE_TIMEOUT + " timeout for waiting for connections. " + msg); } else { log.warn("Pool for dnode[" + electedNode + "] FULL! at trial[" + tried + "] of[" + repEntry.getNodes().size() + "] DNodes. Will retry. Info: " + msg, e); } } finally { if (client != null) { context.returnDNodeClientToPool(electedNode, client, renew); } } } } /** * Helper method for casting a String to the appropriate Tablespace key type. */ public Comparable<?> castKey(String key, String tablespace, Class<? extends Comparable> clazz) throws Exception { Comparable<?> keyObj; if (clazz.equals(Integer.class)) { keyObj = Integer.parseInt(key); } else if (clazz.equals(Long.class)) { keyObj = Long.parseLong(key); } else if (clazz.equals(Float.class)) { keyObj = Float.parseFloat(key); } else if (clazz.equals(Double.class)) { keyObj = Double.parseDouble(key); } else if (clazz.equals(String.class)) { keyObj = key + ""; } else { // ? throw new RuntimeException("Can't handle tablespace [" + tablespace + "] with key of type " + clazz + ". This is very likely a software bug"); } return keyObj; } }