/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.core.util; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.ArrayByteSequence; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Column; import org.apache.accumulo.core.data.ColumnUpdate; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.thrift.TMutation; import org.apache.accumulo.core.file.FileSKVIterator; import org.apache.accumulo.core.file.rfile.RFile.Reader; import org.apache.commons.lang.mutable.MutableLong; import org.apache.hadoop.io.Text; import com.google.common.base.Joiner; public class LocalityGroupUtil { // private static final Logger log = Logger.getLogger(ColumnFamilySet.class); public static final Set<ByteSequence> EMPTY_CF_SET = Collections.emptySet(); public static Set<ByteSequence> families(Collection<Column> columns) { if (columns.size() == 0) return EMPTY_CF_SET; Set<ByteSequence> result = new HashSet<>(columns.size()); for (Column col : columns) { result.add(new ArrayByteSequence(col.getColumnFamily())); } return result; } @SuppressWarnings("serial") static public class LocalityGroupConfigurationError extends AccumuloException { LocalityGroupConfigurationError(String why) { super(why); } } public static Map<String,Set<ByteSequence>> getLocalityGroups(AccumuloConfiguration acuconf) throws LocalityGroupConfigurationError { Map<String,Set<ByteSequence>> result = new HashMap<>(); String[] groups = acuconf.get(Property.TABLE_LOCALITY_GROUPS).split(","); for (String group : groups) { if (group.length() > 0) result.put(group, new HashSet<ByteSequence>()); } HashSet<ByteSequence> all = new HashSet<>(); for (Entry<String,String> entry : acuconf) { String property = entry.getKey(); String value = entry.getValue(); String prefix = Property.TABLE_LOCALITY_GROUP_PREFIX.getKey(); if (property.startsWith(prefix)) { // this property configures a locality group, find out which one: String group = property.substring(prefix.length()); String[] parts = group.split("\\."); group = parts[0]; if (result.containsKey(group)) { if (parts.length == 1) { Set<ByteSequence> colFamsSet = decodeColumnFamilies(value); if (!Collections.disjoint(all, colFamsSet)) { colFamsSet.retainAll(all); throw new LocalityGroupConfigurationError("Column families " + colFamsSet + " in group " + group + " is already used by another locality group"); } all.addAll(colFamsSet); result.put(group, colFamsSet); } } } } // result.put("", all); return result; } public static Set<ByteSequence> decodeColumnFamilies(String colFams) throws LocalityGroupConfigurationError { HashSet<ByteSequence> colFamsSet = new HashSet<>(); for (String family : colFams.split(",")) { ByteSequence cfbs = decodeColumnFamily(family); colFamsSet.add(cfbs); } return colFamsSet; } public static ByteSequence decodeColumnFamily(String colFam) throws LocalityGroupConfigurationError { byte output[] = new byte[colFam.length()]; int pos = 0; for (int i = 0; i < colFam.length(); i++) { char c = colFam.charAt(i); if (c == '\\') { // next char must be 'x' or '\' i++; if (i >= colFam.length()) { throw new LocalityGroupConfigurationError("Expected 'x' or '\' after '\' in " + colFam); } char nc = colFam.charAt(i); switch (nc) { case '\\': output[pos++] = '\\'; break; case 'x': // next two chars must be [0-9][0-9] i++; output[pos++] = (byte) (0xff & Integer.parseInt(colFam.substring(i, i + 2), 16)); i++; break; default: throw new LocalityGroupConfigurationError("Expected 'x' or '\' after '\' in " + colFam); } } else { output[pos++] = (byte) (0xff & c); } } return new ArrayByteSequence(output, 0, pos); } public static String encodeColumnFamilies(Set<Text> colFams) { SortedSet<String> ecfs = new TreeSet<>(); StringBuilder sb = new StringBuilder(); for (Text text : colFams) { String ecf = encodeColumnFamily(sb, text.getBytes(), text.getLength()); ecfs.add(ecf); } return Joiner.on(",").join(ecfs); } public static String encodeColumnFamily(ByteSequence bs) { if (bs.offset() != 0) { throw new IllegalArgumentException("The offset cannot be non-zero."); } return encodeColumnFamily(new StringBuilder(), bs.getBackingArray(), bs.length()); } private static String encodeColumnFamily(StringBuilder sb, byte[] ba, int len) { sb.setLength(0); for (int i = 0; i < len; i++) { int c = 0xff & ba[i]; if (c == '\\') sb.append("\\\\"); else if (c >= 32 && c <= 126 && c != ',') sb.append((char) c); else sb.append("\\x").append(String.format("%02X", c)); } String ecf = sb.toString(); return ecf; } public static class PartitionedMutation extends Mutation { private byte[] row; private List<ColumnUpdate> updates; public PartitionedMutation(byte[] row, List<ColumnUpdate> updates) { this.row = row; this.updates = updates; } @Override public byte[] getRow() { return row; } @Override public List<ColumnUpdate> getUpdates() { return updates; } @Override public TMutation toThrift() { throw new UnsupportedOperationException(); } @Override public int hashCode() { throw new UnsupportedOperationException(); } @Override public boolean equals(Object o) { throw new UnsupportedOperationException(); } @Override public boolean equals(Mutation m) { throw new UnsupportedOperationException(); } } public static class Partitioner { private Map<ByteSequence,Integer> colfamToLgidMap; private PreAllocatedArray<Map<ByteSequence,MutableLong>> groups; public Partitioner(PreAllocatedArray<Map<ByteSequence,MutableLong>> groups) { this.groups = groups; this.colfamToLgidMap = new HashMap<>(); for (int i = 0; i < groups.length; i++) { for (ByteSequence cf : groups.get(i).keySet()) { colfamToLgidMap.put(cf, i); } } } public void partition(List<Mutation> mutations, PreAllocatedArray<List<Mutation>> partitionedMutations) { MutableByteSequence mbs = new MutableByteSequence(new byte[0], 0, 0); PreAllocatedArray<List<ColumnUpdate>> parts = new PreAllocatedArray<>(groups.length + 1); for (Mutation mutation : mutations) { if (mutation.getUpdates().size() == 1) { int lgid = getLgid(mbs, mutation.getUpdates().get(0)); partitionedMutations.get(lgid).add(mutation); } else { for (int i = 0; i < parts.length; i++) { parts.set(i, null); } int lgcount = 0; for (ColumnUpdate cu : mutation.getUpdates()) { int lgid = getLgid(mbs, cu); if (parts.get(lgid) == null) { parts.set(lgid, new ArrayList<ColumnUpdate>()); lgcount++; } parts.get(lgid).add(cu); } if (lgcount == 1) { for (int i = 0; i < parts.length; i++) if (parts.get(i) != null) { partitionedMutations.get(i).add(mutation); break; } } else { for (int i = 0; i < parts.length; i++) if (parts.get(i) != null) partitionedMutations.get(i).add(new PartitionedMutation(mutation.getRow(), parts.get(i))); } } } } private Integer getLgid(MutableByteSequence mbs, ColumnUpdate cu) { mbs.setArray(cu.getColumnFamily(), 0, cu.getColumnFamily().length); Integer lgid = colfamToLgidMap.get(mbs); if (lgid == null) lgid = groups.length; return lgid; } } /** * This method created to help seek an rfile for a locality group obtained from {@link Reader#getLocalityGroupCF()}. This method can possibly return an empty * list for the default locality group. When this happens the default locality group needs to be seeked differently. This method helps do that. * * <p> * For the default locality group will seek using the families of all other locality groups non-inclusive. * * @see Reader#getLocalityGroupCF() */ public static void seek(FileSKVIterator reader, Range range, String lgName, Map<String,ArrayList<ByteSequence>> localityGroupCF) throws IOException { Collection<ByteSequence> families; boolean inclusive; if (lgName == null) { // this is the default locality group, create a set of all families not in the default group Set<ByteSequence> nonDefaultFamilies = new HashSet<>(); for (Entry<String,ArrayList<ByteSequence>> entry : localityGroupCF.entrySet()) { if (entry.getKey() != null) { nonDefaultFamilies.addAll(entry.getValue()); } } families = nonDefaultFamilies; inclusive = false; } else { families = localityGroupCF.get(lgName); inclusive = true; } reader.seek(range, families, inclusive); } }