/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ignite.internal.processors.hadoop.shuffle.collections; import java.io.DataInput; import java.util.Random; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLongArray; import java.util.concurrent.atomic.AtomicReference; import org.apache.ignite.IgniteCheckedException; import org.apache.ignite.internal.processors.hadoop.HadoopJobInfo; import org.apache.ignite.internal.processors.hadoop.HadoopTaskContext; import org.apache.ignite.internal.processors.hadoop.HadoopTaskInput; import org.apache.ignite.internal.util.GridLongList; import org.apache.ignite.internal.util.GridRandom; import org.apache.ignite.internal.util.offheap.unsafe.GridUnsafeMemory; import org.apache.ignite.internal.util.typedef.internal.A; import org.apache.ignite.internal.util.typedef.internal.U; import org.jetbrains.annotations.Nullable; /** * Multimap for map reduce intermediate results. */ public class HadoopConcurrentHashMultimap extends HadoopHashMultimapBase { /** */ private final AtomicReference<State> state = new AtomicReference<>(State.READING_WRITING); /** */ private volatile AtomicLongArray oldTbl; /** */ private volatile AtomicLongArray newTbl; /** */ private final AtomicInteger keys = new AtomicInteger(); /** */ private final CopyOnWriteArrayList<AdderImpl> adders = new CopyOnWriteArrayList<>(); /** */ private final AtomicInteger inputs = new AtomicInteger(); /** * @param jobInfo Job info. * @param mem Memory. * @param cap Initial capacity. */ public HadoopConcurrentHashMultimap(HadoopJobInfo jobInfo, GridUnsafeMemory mem, int cap) { super(jobInfo, mem); assert U.isPow2(cap); newTbl = oldTbl = new AtomicLongArray(cap); } /** * @return Number of keys. */ public long keys() { int res = keys.get(); for (AdderImpl adder : adders) res += adder.locKeys.get(); return res; } /** * @return Current table capacity. */ @Override public int capacity() { return oldTbl.length(); } /** * @return Adder object. * @param ctx Task context. */ @Override public Adder startAdding(HadoopTaskContext ctx) throws IgniteCheckedException { if (inputs.get() != 0) throw new IllegalStateException("Active inputs."); if (state.get() == State.CLOSING) throw new IllegalStateException("Closed."); return new AdderImpl(ctx); } /** {@inheritDoc} */ @Override public void close() { assert inputs.get() == 0 : inputs.get(); assert adders.isEmpty() : adders.size(); state(State.READING_WRITING, State.CLOSING); if (keys() == 0) return; super.close(); } /** {@inheritDoc} */ @Override protected long meta(int idx) { return oldTbl.get(idx); } /** * Incrementally visits all the keys and values in the map. * * @param ignoreLastVisited Flag indicating that visiting must be started from the beginning. * @param v Visitor. * @return {@code false} If visiting was impossible due to rehashing. */ @Override public boolean visit(boolean ignoreLastVisited, Visitor v) throws IgniteCheckedException { if (!state.compareAndSet(State.READING_WRITING, State.VISITING)) { assert state.get() != State.CLOSING; return false; // Can not visit while rehashing happens. } AtomicLongArray tbl0 = oldTbl; for (int i = 0; i < tbl0.length(); i++) { long meta = tbl0.get(i); while (meta != 0) { long valPtr = value(meta); long lastVisited = ignoreLastVisited ? 0 : lastVisitedValue(meta); if (valPtr != lastVisited) { v.onKey(key(meta), keySize(meta)); lastVisitedValue(meta, valPtr); // Set it to the first value in chain. do { v.onValue(valPtr + 12, valueSize(valPtr)); valPtr = nextValue(valPtr); } while (valPtr != lastVisited); } meta = collision(meta); } } state(State.VISITING, State.READING_WRITING); return true; } /** {@inheritDoc} */ @Override public HadoopTaskInput input(HadoopTaskContext taskCtx) throws IgniteCheckedException { inputs.incrementAndGet(); if (!adders.isEmpty()) throw new IllegalStateException("Active adders."); State s = state.get(); if (s == State.CLOSING) throw new IllegalStateException("Closed."); assert s != State.REHASHING; return new Input(taskCtx) { @Override public void close() throws IgniteCheckedException { if (inputs.decrementAndGet() < 0) throw new IllegalStateException(); super.close(); } }; } /** * @param fromTbl Table. */ private void rehashIfNeeded(AtomicLongArray fromTbl) { if (fromTbl.length() == Integer.MAX_VALUE) return; long keys0 = keys(); if (keys0 < 3 * (fromTbl.length() >>> 2)) // New size has to be >= than 3/4 of capacity to rehash. return; if (fromTbl != newTbl) // Check if someone else have done the job. return; if (!state.compareAndSet(State.READING_WRITING, State.REHASHING)) { assert state.get() != State.CLOSING; // Visiting is allowed, but we will not rehash. return; } if (fromTbl != newTbl) { // Double check. state(State.REHASHING, State.READING_WRITING); // Switch back. return; } // Calculate new table capacity. int newLen = fromTbl.length(); do { newLen <<= 1; } while (newLen < keys0); if (keys0 >= 3 * (newLen >>> 2)) // Still more than 3/4. newLen <<= 1; // This is our target table for rehashing. AtomicLongArray toTbl = new AtomicLongArray(newLen); // Make the new table visible before rehashing. newTbl = toTbl; // Rehash. int newMask = newLen - 1; long failedMeta = 0; GridLongList collisions = new GridLongList(16); for (int i = 0; i < fromTbl.length(); i++) { // Scan source table. long meta = fromTbl.get(i); assert meta != -1; if (meta == 0) { // No entry. failedMeta = 0; if (!fromTbl.compareAndSet(i, 0, -1)) // Mark as moved. i--; // Retry. continue; } do { // Collect all the collisions before the last one failed to nullify or 0. collisions.add(meta); meta = collision(meta); } while (meta != failedMeta); do { // Go from the last to the first to avoid 'in-flight' state for meta entries. meta = collisions.remove(); int addr = keyHash(meta) & newMask; for (;;) { // Move meta entry to the new table. long toCollision = toTbl.get(addr); collision(meta, toCollision); if (toTbl.compareAndSet(addr, toCollision, meta)) break; } } while (!collisions.isEmpty()); // Here 'meta' will be a root pointer in old table. if (!fromTbl.compareAndSet(i, meta, -1)) { // Try to mark as moved. failedMeta = meta; i--; // Retry the same address in table because new keys were added. } else failedMeta = 0; } // Now old and new tables will be the same again. oldTbl = toTbl; state(State.REHASHING, State.READING_WRITING); } /** * Switch state. * * @param oldState Expected state. * @param newState New state. */ private void state(State oldState, State newState) { if (!state.compareAndSet(oldState, newState)) throw new IllegalStateException(); } /** * @param meta Meta pointer. * @return Value pointer. */ @Override protected long value(long meta) { return mem.readLongVolatile(meta + 16); } /** * @param meta Meta pointer. * @param oldValPtr Old value. * @param newValPtr New value. * @return {@code true} If succeeded. */ private boolean casValue(long meta, long oldValPtr, long newValPtr) { return mem.casLong(meta + 16, oldValPtr, newValPtr); } /** * @param meta Meta pointer. * @return Collision pointer. */ @Override protected long collision(long meta) { return mem.readLongVolatile(meta + 24); } /** * @param meta Meta pointer. * @param collision Collision pointer. */ @Override protected void collision(long meta, long collision) { assert meta != collision : meta; mem.writeLongVolatile(meta + 24, collision); } /** * @param meta Meta pointer. * @return Last visited value pointer. */ private long lastVisitedValue(long meta) { return mem.readLong(meta + 32); } /** * @param meta Meta pointer. * @param valPtr Last visited value pointer. */ private void lastVisitedValue(long meta, long valPtr) { mem.writeLong(meta + 32, valPtr); } /** * Adder. Must not be shared between threads. */ private class AdderImpl extends AdderBase { /** */ private final Reader keyReader; /** */ private final AtomicInteger locKeys = new AtomicInteger(); /** */ private final Random rnd = new GridRandom(); /** * @param ctx Task context. * @throws IgniteCheckedException If failed. */ private AdderImpl(HadoopTaskContext ctx) throws IgniteCheckedException { super(ctx); keyReader = new Reader(keySer); rehashIfNeeded(oldTbl); adders.add(this); } /** * @param in Data input. * @param reuse Reusable key. * @return Key. * @throws IgniteCheckedException If failed. */ @Override public Key addKey(DataInput in, @Nullable Key reuse) throws IgniteCheckedException { KeyImpl k = reuse == null ? new KeyImpl() : (KeyImpl)reuse; k.tmpKey = keySer.read(in, k.tmpKey); k.meta = add(k.tmpKey, null); return k; } /** {@inheritDoc} */ @Override public void write(Object key, Object val) throws IgniteCheckedException { A.notNull(val, "val"); add(key, val); } /** * @param tbl Table. */ private void incrementKeys(AtomicLongArray tbl) { locKeys.lazySet(locKeys.get() + 1); if (rnd.nextInt(tbl.length()) < 512) rehashIfNeeded(tbl); } /** * @param keyHash Key hash. * @param keySize Key size. * @param keyPtr Key pointer. * @param valPtr Value page pointer. * @param collisionPtr Pointer to meta with hash collision. * @param lastVisitedVal Last visited value pointer. * @return Created meta page pointer. */ private long createMeta(int keyHash, int keySize, long keyPtr, long valPtr, long collisionPtr, long lastVisitedVal) { long meta = allocate(40); mem.writeInt(meta, keyHash); mem.writeInt(meta + 4, keySize); mem.writeLong(meta + 8, keyPtr); mem.writeLong(meta + 16, valPtr); mem.writeLong(meta + 24, collisionPtr); mem.writeLong(meta + 32, lastVisitedVal); return meta; } /** * @param key Key. * @param val Value. * @return Updated or created meta page pointer. * @throws IgniteCheckedException If failed. */ private long add(Object key, @Nullable Object val) throws IgniteCheckedException { AtomicLongArray tbl = oldTbl; int keyHash = U.hash(key.hashCode()); long newMetaPtr = 0; long valPtr = 0; if (val != null) { valPtr = write(12, val, valSer); int valSize = writtenSize() - 12; valueSize(valPtr, valSize); } for (AtomicLongArray old = null;;) { int addr = keyHash & (tbl.length() - 1); long metaPtrRoot = tbl.get(addr); // Read root meta pointer at this address. if (metaPtrRoot == -1) { // The cell was already moved by rehashing. AtomicLongArray n = newTbl; // Need to read newTbl first here. AtomicLongArray o = oldTbl; tbl = tbl == o ? n : o; // Trying to get the oldest table but newer than ours. old = null; continue; } if (metaPtrRoot != 0) { // Not empty slot. long metaPtr = metaPtrRoot; do { // Scan all the collisions. if (keyHash(metaPtr) == keyHash && key.equals(keyReader.readKey(metaPtr))) { // Found key. if (newMetaPtr != 0) // Deallocate new meta if one was allocated. localDeallocate(key(newMetaPtr)); // Key was allocated first, so rewind to it's pointer. if (valPtr != 0) { // Add value if it exists. long nextValPtr; // Values are linked to each other to a stack like structure. // Replace the last value in meta with ours and link it as next. do { nextValPtr = value(metaPtr); nextValue(valPtr, nextValPtr); } while (!casValue(metaPtr, nextValPtr, valPtr)); } return metaPtr; } metaPtr = collision(metaPtr); } while (metaPtr != 0); // Here we did not find our key, need to check if it was moved by rehashing to the new table. if (old == null) { // If the old table already set, then we will just try to update it. AtomicLongArray n = newTbl; if (n != tbl) { // Rehashing happens, try to find the key in new table but preserve the old one. old = tbl; tbl = n; continue; } } } if (old != null) { // We just checked new table but did not find our key as well as in the old one. tbl = old; // Try to add new key to the old table. addr = keyHash & (tbl.length() - 1); old = null; } if (newMetaPtr == 0) { // Allocate new meta page. long keyPtr = write(0, key, keySer); int keySize = writtenSize(); if (valPtr != 0) nextValue(valPtr, 0); newMetaPtr = createMeta(keyHash, keySize, keyPtr, valPtr, metaPtrRoot, 0); } else // Update new meta with root pointer collision. collision(newMetaPtr, metaPtrRoot); if (tbl.compareAndSet(addr, metaPtrRoot, newMetaPtr)) { // Try to replace root pointer with new one. incrementKeys(tbl); return newMetaPtr; } } } /** {@inheritDoc} */ @Override public void close() throws IgniteCheckedException { if (!adders.remove(this)) throw new IllegalStateException(); keys.addAndGet(locKeys.get()); // Here we have race and #keys() method can return wrong result but it is ok. super.close(); } /** * Key. */ private class KeyImpl implements Key { /** */ private long meta; /** */ private Object tmpKey; /** * @return Meta pointer for the key. */ public long address() { return meta; } /** * @param val Value. */ @Override public void add(Value val) { int size = val.size(); long valPtr = allocate(size + 12); val.copyTo(valPtr + 12); valueSize(valPtr, size); long nextVal; do { nextVal = value(meta); nextValue(valPtr, nextVal); } while(!casValue(meta, nextVal, valPtr)); } } } /** * Current map state. */ private enum State { /** */ REHASHING, /** */ VISITING, /** */ READING_WRITING, /** */ CLOSING } }