/* * Copyright (c) 2013 Big Switch Networks, Inc. * * Licensed under the Eclipse Public License, Version 1.0 (the * "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.eclipse.org/legal/epl-v10.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.sdnplatform.storage.cassandra; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TimeZone; import java.util.UUID; import org.apache.cassandra.thrift.AuthenticationException; import org.apache.cassandra.thrift.AuthenticationRequest; import org.apache.cassandra.thrift.AuthorizationException; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.CfDef; import org.apache.cassandra.thrift.Column; import org.apache.cassandra.thrift.ColumnDef; import org.apache.cassandra.thrift.ColumnOrSuperColumn; import org.apache.cassandra.thrift.ColumnParent; import org.apache.cassandra.thrift.ColumnPath; import org.apache.cassandra.thrift.ConsistencyLevel; import org.apache.cassandra.thrift.IndexClause; import org.apache.cassandra.thrift.IndexExpression; import org.apache.cassandra.thrift.IndexOperator; import org.apache.cassandra.thrift.IndexType; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.KeyRange; import org.apache.cassandra.thrift.KeySlice; import org.apache.cassandra.thrift.KsDef; import org.apache.cassandra.thrift.Mutation; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.SlicePredicate; import org.apache.cassandra.thrift.SliceRange; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.transport.TFramedTransport; import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import org.sdnplatform.storage.StorageException; public class Connection { String host; int port; private TTransport transport; private Cassandra.Client client; private String keyspace; private String user; private String password; private boolean inUse; private Map<ByteBuffer,Map<String,List<Mutation>>> pendingMutations; private Set<Object> pendingDeletions; private String pendingColumnFamily; private static long lastTimestamp; // SimpleDateFormat is not thread-safe so we need to keep a separate // instance per thread. private static ThreadLocal<DateFormat> dateFormat = new ThreadLocal<DateFormat>(); private static String NULL_VALUE_STRING = "\b"; public Connection(String host, int port) { // Use defaults for host and port if they're not specified if (host == null) host = "localhost"; if (port == 0) port = 9160; this.host = host; this.port = port; open(); inUse = false; } private Cassandra.Client getClient() { if (client == null) open(); return client; } private SlicePredicate getSlicePredicate(String[] columnNameList) { SlicePredicate slicePredicate = new SlicePredicate(); try { if (columnNameList != null) { List<ByteBuffer> columnNameByteBufferList = new ArrayList<ByteBuffer>(); for (String columnName: columnNameList) { byte[] columnNameBytes = columnName.getBytes("UTF-8"); columnNameByteBufferList.add(ByteBuffer.wrap(columnNameBytes)); } slicePredicate.setColumn_names(columnNameByteBufferList); } else { SliceRange sliceRange = new SliceRange(); sliceRange.setStart(new byte[0]); sliceRange.setFinish(new byte[0]); // FIXME: The default column count is 100. We should tune the value. sliceRange.setCount(100000); slicePredicate.setSlice_range(sliceRange); } } catch (UnsupportedEncodingException exc) { throw new StorageException("Character encoding exception with key range", exc); } return slicePredicate; } private List<Map<String,Object>> convertKeySliceList(List<KeySlice> keySliceList, String primaryKeyName) { List<Map<String,Object>> rowList = new ArrayList<Map<String,Object>>(); try { for (KeySlice keySlice: keySliceList) { List<ColumnOrSuperColumn> columnList = keySlice.getColumns(); if (!columnList.isEmpty()) { byte[] keyBytes = keySlice.getKey(); String key = new String(keyBytes, "UTF-8"); Map<String,Object> columnMap = new HashMap<String,Object>(); columnMap.put(primaryKeyName, key); for (ColumnOrSuperColumn columnOrSuperColumn: columnList) { Column column = columnOrSuperColumn.getColumn(); byte[] columnNameBytes = column.getName(); String columnName = new String(columnNameBytes, "UTF-8"); byte[] valueBytes = column.getValue(); String value = new String(valueBytes, "UTF-8"); if (value.equals(NULL_VALUE_STRING)) value = null; columnMap.put(columnName, value); } rowList.add(columnMap); } } return rowList; } catch (UnsupportedEncodingException exc) { throw new StorageException("Character encoding exception with key range", exc); } } protected List<Map<String,Object>> getRowsByPrimaryKey(String tableName, String primaryKeyName, String keyStart, String keyEnd, String[] columnNameList, ConsistencyLevel consistencyLevel) { try { // Get the column parent ColumnParent columnParent = new ColumnParent(); columnParent.setColumn_family(tableName); SlicePredicate slicePredicate = getSlicePredicate(columnNameList); // Get the key range. // FIXME: These lower/upper bounds values make sense for typical ASCII // strings (i.e. space and tilde are at the start and end of the printable // ASCII characters), but wouldn't make sense for non-ASCII data. In theory, // it seems like you should be able to set keyStartBytes to [0] and // keyEndBytes to [255,255,255,255,255,...], but that didn't work when I // tried it (at least with an older version of Cassandra, haven't tried it // with the 0.7 beta versions). Should look into this some more to determine // what's the best solution. if (keyStart == null) keyStart = ""; if (keyEnd == null) keyEnd = ""; byte[] keyStartBytes = keyStart.getBytes("UTF-8"); byte[] keyEndBytes = keyEnd.getBytes("UTF-8"); KeyRange keyRange = new KeyRange(); keyRange.setStart_key(keyStartBytes); keyRange.setEnd_key(keyEndBytes); // FIXME: Shouldn't hard-code count here. Experiment with making this // bigger. Can we make it really big or do we need to worry about chunked // Cassandra reads to handle large result sets? keyRange.setCount(1000000); // Get the data List<KeySlice> keySliceList = getClient().get_range_slices(columnParent, slicePredicate, keyRange, consistencyLevel); List<Map<String,Object>> rowList = convertKeySliceList(keySliceList, primaryKeyName); return rowList; } catch (UnsupportedEncodingException exc) { throw new StorageException("Character encoding exception with key range", exc); } catch (TimedOutException exc) { throw new StorageException("Cassandra request timed out", exc); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (UnavailableException exc) { throw new StorageException("Cassandra unavailable", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } protected List<Map<String,Object>> getRowsByIndexedColumn(String tableName, String primaryKeyName, String indexedColumnName, Comparable<?> indexedColumnValue, String[] columnNameList, ConsistencyLevel consistencyLevel) { try { // Get the column parent ColumnParent columnParent = new ColumnParent(); columnParent.setColumn_family(tableName); SlicePredicate slicePredicate = getSlicePredicate(columnNameList); // Get the index expression. IndexExpression indexExpression = new IndexExpression(); byte[] indexedColumnNameBytes = indexedColumnName.getBytes("UTF-8"); indexExpression.setColumn_name(indexedColumnNameBytes); indexExpression.setOp(IndexOperator.EQ); String indexedColumnValueString; if (indexedColumnValue == null) indexedColumnValueString = NULL_VALUE_STRING; else indexedColumnValueString = indexedColumnValue.toString(); byte[] columnValueBytes = indexedColumnValueString.getBytes("UTF-8"); indexExpression.setValue(columnValueBytes); List<IndexExpression> indexExpressionList = new ArrayList<IndexExpression>(); indexExpressionList.add(indexExpression); IndexClause indexClause = new IndexClause(); // FIXME: Shouldn't hard-code count here. Should do chunked reads // instead of getting all at once. indexClause.setCount(1000000); byte[] startKeyBytes = " ".getBytes("UTF-8"); indexClause.setStart_key(startKeyBytes); indexClause.setExpressions(indexExpressionList); // Get the data List<KeySlice> keySliceList = getClient().get_indexed_slices(columnParent, indexClause, slicePredicate, consistencyLevel); List<Map<String,Object>> rowList = convertKeySliceList(keySliceList, primaryKeyName); return rowList; } catch (UnsupportedEncodingException exc) { throw new StorageException("Character encoding exception with column name/value", exc); } catch (TimedOutException exc) { throw new StorageException("Cassandra request timed out", exc); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (UnavailableException exc) { throw new StorageException("Cassandra unavailable", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void commit(ConsistencyLevel consistencyLevel) { try { try { if (pendingMutations != null) { getClient().batch_mutate(pendingMutations, consistencyLevel); } if (pendingDeletions != null) { long timestamp = getNextTimestamp(); for (Object key: pendingDeletions) { String keyString = key.toString(); byte[] keyBytes = keyString.getBytes("UTF-8"); ByteBuffer keyByteBuffer = ByteBuffer.wrap(keyBytes); ColumnPath columnPath = new ColumnPath(); columnPath.setColumn_family(pendingColumnFamily); client.remove(keyByteBuffer, columnPath, timestamp, consistencyLevel); } } } catch (UnsupportedEncodingException exc) { throw new StorageException("Unsupported character encoding in row key", exc); } catch (TimedOutException exc) { throw new StorageException("Cassandra request timed out", exc); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (UnavailableException exc) { throw new StorageException("Cassandra unavailable", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } finally { rollback(); } } public void rollback() { pendingMutations = null; pendingDeletions = null; pendingColumnFamily = null; } public void updateColumn(String columnFamily, Object rowKey, String columnName, Object value) { Map<String,Object> columnUpdateMap = new HashMap<String,Object>(); columnUpdateMap.put(columnName, value); updateRow(columnFamily, rowKey, columnUpdateMap); } public void updateRow(String columnFamily, Object rowKey, Map<String,Object> columnUpdateMap) { Map<Object,Map<String,Object>> rowUpdateMap = new HashMap<Object,Map<String,Object>>(); rowUpdateMap.put(rowKey, columnUpdateMap); updateRows(columnFamily, rowUpdateMap); } public void updateRow(String columnFamily, Map<String,Object> columnUpdateMap, String primaryKeyName) { List<Map<String,Object>> rowUpdateList = new ArrayList<Map<String,Object>>(); rowUpdateList.add(columnUpdateMap); updateRows(columnFamily, primaryKeyName, rowUpdateList); } private List<Mutation> getRowMutationList(String columnFamily, Object rowKey) { if (pendingMutations == null) pendingMutations = new HashMap<ByteBuffer,Map<String,List<Mutation>>>(); ByteBuffer rowKeyBytes; try { rowKeyBytes = ByteBuffer.wrap(rowKey.toString().getBytes("UTF-8")); } catch (UnsupportedEncodingException exc) { throw new StorageException("Unsupported character encoding for row ID", exc); } Map<String,List<Mutation>> rowIdMap = pendingMutations.get(rowKeyBytes); if (rowIdMap == null) { rowIdMap = new HashMap<String,List<Mutation>>(); pendingMutations.put(rowKeyBytes, rowIdMap); } List<Mutation> rowMutationList = rowIdMap.get(columnFamily); if (rowMutationList == null) { rowMutationList = new ArrayList<Mutation>(); rowIdMap.put(columnFamily, rowMutationList); } return rowMutationList; } byte[] convertValueToBytes(Object value) throws StorageException { try { String s; if (value == null) { s = NULL_VALUE_STRING; } else if (value instanceof Date) { // FIXME: This is a hack to do the date conversion here. Currently // the date conversion is split between the cassandra bundle and the // nosql bundle. This should be refactored so that the logic for // conversion to the format to store the data in the database is only // in one place. DateFormat df = dateFormat.get(); if (df == null) { df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); df.setTimeZone(TimeZone.getTimeZone("GMT")); dateFormat.set(df); } s = df.format(value); } else { s = value.toString(); } byte[] bytes = s.getBytes("UTF-8"); return bytes; } catch (UnsupportedEncodingException exc) { throw new StorageException("Unsupported character encoding for column value", exc); } } private long getNextTimestamp() { // Java only lets us get the time in milliseconds, not microseconds, grrr long timestamp = System.currentTimeMillis() * 1000; if (timestamp <= lastTimestamp) timestamp = lastTimestamp + 1; lastTimestamp = timestamp; return timestamp; } private Mutation getMutation(String columnName, Object value, long timestamp) { byte[] columnNameBytes; try { columnNameBytes = columnName.getBytes("UTF-8"); } catch (UnsupportedEncodingException exc) { throw new StorageException("Unsupported character encoding for column name", exc); } byte[] valueBytes = convertValueToBytes(value); Column column = new Column(); column.setName(columnNameBytes); column.setValue(valueBytes); column.setTimestamp(timestamp); ColumnOrSuperColumn columnOrSuperColumn = new ColumnOrSuperColumn(); columnOrSuperColumn.setColumn(column); Mutation mutation = new Mutation(); mutation.setColumn_or_supercolumn(columnOrSuperColumn); return mutation; } public void updateRows(String columnFamily, Map<Object,Map<String,Object>> rowUpdateMap) { long timestamp = getNextTimestamp(); for (Map.Entry<Object,Map<String,Object>> rowEntry: rowUpdateMap.entrySet()) { Object rowKey = rowEntry.getKey(); List<Mutation> rowMutationList = getRowMutationList(columnFamily, rowKey); for (Map.Entry<String,Object> columnEntry: rowEntry.getValue().entrySet()) { Mutation mutation = getMutation(columnEntry.getKey(), columnEntry.getValue(), timestamp); rowMutationList.add(mutation); } } } public void updateRows(String columnFamily, Set<Object> rowKeys, Map<String,Object> columnUpdateMap) { Map<Object,Map<String,Object>> rowUpdateMap = new HashMap<Object,Map<String,Object>>(); for (Object rowKey: rowKeys) { rowUpdateMap.put(rowKey, columnUpdateMap); } updateRows(columnFamily, rowUpdateMap); } public String generateRowId() { return UUID.randomUUID().toString(); } public void updateRows(String columnFamily, String primaryKeyName, List<Map<String,Object>> rowUpdateList) { long timestamp = getNextTimestamp(); for (Map<String,Object> rowUpdateMap: rowUpdateList) { String rowId = (String) rowUpdateMap.get(primaryKeyName); if (rowId == null) rowId = generateRowId(); List<Mutation> rowMutationList = getRowMutationList(columnFamily, rowId); for (Map.Entry<String,Object> entry: rowUpdateMap.entrySet()) { String columnName = entry.getKey(); // FIXME: For now we include the primary key data as column data too. // This is not completely efficient, because it means we're storing that // data twice in Cassandra, but if you don't do that, then you can't set // up secondary indexes on the primary key column in order to do range // queries on that data (not supported currently in 0.7.0, but is targeted // for the 0.7.1 release). Also there are (arguably pathological) cases // where if you don't store the data as column data too then the row could // be incorrectly interpreted as a deleted (tombstoned) row. So to make // things simpler (at least for now) we just always included the key as // column data too. //if (!columnName.equals(primaryKeyName)) { Mutation mutation = getMutation(columnName, entry.getValue(), timestamp); rowMutationList.add(mutation); //} } } } public void deleteRows(String columnFamily, Set<Object> rowKeys) { for (Object rowKey : rowKeys) { if (pendingDeletions == null) pendingDeletions = new HashSet<Object>(); pendingDeletions.add(rowKey); pendingColumnFamily = columnFamily; } } public void truncate(String columnFamily) { try { client.truncate(columnFamily); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (UnavailableException exc) { throw new StorageException("Cassandra unavailable", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void setKeyspace(String keyspace) { try { client.set_keyspace(keyspace); this.keyspace = keyspace; } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void login(String user, String password) { if (user == null) return; this.user = user; this.password = password; try { // FIXME: Not sure if this is correct? The only example I could find // for login was the Perl example program and this seemed to be what it // was doing. This also seemed like what the SimpleAuthenticator was // expecting in the auth credentials, but I'm not sure if this is // intended to apply to other authenticators as well. Need to do some // more research. Map<String,String> credentials = new HashMap<String,String>(); credentials.put("username", user); credentials.put("password", password); client.login(new AuthenticationRequest(credentials)); } catch (TException exc) { throw new StorageException("Thrift exception", exc); } catch (AuthenticationException exc) { throw new StorageException("Authentication failed", exc); } catch (AuthorizationException exc) { throw new StorageException("Authorization failed", exc); } } public void createKeyspace(String keyspaceName, String strategyClass, Map<String,String> strategyOptions) { KsDef keyspaceDef = new KsDef(); keyspaceDef.setName(keyspaceName); keyspaceDef.setStrategy_class(strategyClass); if (strategyOptions != null) keyspaceDef.setStrategy_options(strategyOptions); List<CfDef> cfDefList = new ArrayList<CfDef>(); keyspaceDef.setCf_defs(cfDefList); try { client.system_add_keyspace(keyspaceDef); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (SchemaDisagreementException exc) { throw new StorageException("Cassandra schema disagreement error", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void dropKeyspace(String keyspaceName) { try { client.system_drop_keyspace(keyspaceName); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (SchemaDisagreementException exc) { throw new StorageException("Cassandra schema disagreement error", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void dropColumnFamily(String columnFamilyName) { if (keyspace == null) throw new StorageException("Null keyspace name in dropColumnFamily"); try { client.system_drop_column_family(columnFamilyName); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (SchemaDisagreementException exc) { throw new StorageException("Cassandra schema disagreement error", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void createColumnFamily(String columnFamilyName, Set<String> indexedColumns) { try { if (keyspace == null) throw new StorageException("Null keyspace name in createColumnFamily"); CfDef columnFamilyDef = new CfDef(); columnFamilyDef.setName(columnFamilyName); columnFamilyDef.setKeyspace(keyspace); columnFamilyDef.setComparator_type("UTF8Type"); if (indexedColumns != null) { List<ColumnDef> metadataList = new ArrayList<ColumnDef>(); for (String indexedColumn: indexedColumns) { ColumnDef columnDef = new ColumnDef(); try { byte[] columnNameBytes = indexedColumn.getBytes("UTF-8"); columnDef.setName(columnNameBytes); } catch (UnsupportedEncodingException exc) { throw new StorageException("Unsupported character encoding for indexed column name", exc); } // FIXME: Shouldn't hard-code these. columnDef.setIndex_type(IndexType.KEYS); columnDef.setValidation_class("BytesType"); metadataList.add(columnDef); } columnFamilyDef.setColumn_metadata(metadataList); } client.system_add_column_family(columnFamilyDef); } catch (InvalidRequestException exc) { throw new StorageException("Invalid Cassandra request", exc); } catch (SchemaDisagreementException exc) { throw new StorageException("Cassandra schema disagreement error", exc); } catch (TException exc) { throw new StorageException("Thrift error connecting to Cassandra", exc); } } public void open() { try { // FIXME: Is this the optimal code for thrift 0.5? This code seems to change // with every new Cassandra release and they never update the sample code. // Probably need to get the source package and look at the unit tests to verify. TSocket socket = new TSocket(this.host, this.port); transport = new TFramedTransport(socket); TProtocol protocol = new TBinaryProtocol(transport); client = new Cassandra.Client(protocol); transport.open(); } catch (TTransportException exc) { close(); throw new StorageException("Error opening Cassandra connection", exc); } } public void close() { if (transport != null) transport.close(); client = null; transport = null; } public void reconnect() { close(); open(); setKeyspace(keyspace); login(this.user, this.password); } public boolean getInUse() { return inUse; } public void setInUse(boolean inUse) { this.inUse = inUse; } }