/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.storage; import gnu.trove.iterator.TIntObjectIterator; import gnu.trove.map.hash.TIntObjectHashMap; import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import ch.epfl.data.squall.predicates.Predicate; import ch.epfl.data.squall.storage.indexes.Index; import ch.epfl.data.squall.utilities.MyUtilities; import ch.epfl.data.squall.visitors.PredicateUpdateIndexesVisitor; /** * Tuple storage. Provides ~O(1) random access and insertion */ public class TupleStorage implements Serializable { public static class DiscardSpecificTuple { private final boolean _isTagged; private final int _address; public DiscardSpecificTuple(boolean isTagged, int address) { _isTagged = isTagged; _address = address; } public int getAddress() { return _address; } public boolean isTagged() { return _isTagged; } @Override public String toString() { String tag; if (isTagged()) tag = "Tagged: "; else tag = "unTagged: "; return tag + _address; } } public static TIntObjectHashMap<DiscardSpecificTuple> getHashedStringToAddress( TupleStorage tagged, TupleStorage untagged) { final TIntObjectHashMap<DiscardSpecificTuple> map = new TIntObjectHashMap<TupleStorage.DiscardSpecificTuple>(); final TIntObjectHashMap<byte[]> taggedStorage = tagged.getStorage(); for (final TIntObjectIterator<byte[]> iterator = taggedStorage .iterator(); iterator.hasNext();) { iterator.advance(); final int address = iterator.key(); String tuple = null; try { tuple = new String(iterator.value(), "UTF-8"); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); } int hash = tuple.hashCode(); int count = 0; while (map.contains(hash)) { count++; final String newString = tuple + count; hash = newString.hashCode(); } map.put(hash, new DiscardSpecificTuple(true, address)); } final TIntObjectHashMap<byte[]> unTaggedStorage = untagged.getStorage(); for (final TIntObjectIterator<byte[]> iterator = unTaggedStorage .iterator(); iterator.hasNext();) { iterator.advance(); final int address = iterator.key(); String tuple = null; try { tuple = new String(iterator.value(), "UTF-8"); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); } int hash = tuple.hashCode(); int count = 0; while (map.contains(hash)) { count++; final String newString = tuple + count; hash = newString.hashCode(); } map.put(hash, new DiscardSpecificTuple(false, address)); } return map; } public static void preProcess(TupleStorage tagged, TupleStorage untagged, int[] hashes, int[] addresses) { final TIntObjectHashMap<byte[]> taggedStorage = tagged.getStorage(); int index = 0; for (final TIntObjectIterator<byte[]> iterator = taggedStorage .iterator(); iterator.hasNext();) { iterator.advance(); final int address = iterator.key(); String tuple = null; try { tuple = new String(iterator.value(), "UTF-8"); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); } final int hash = tuple.hashCode(); hashes[index] = hash; addresses[index] = address; index++; } final TIntObjectHashMap<byte[]> unTaggedStorage = untagged.getStorage(); for (final TIntObjectIterator<byte[]> iterator = unTaggedStorage .iterator(); iterator.hasNext();) { iterator.advance(); final int address = iterator.key(); String tuple = null; try { tuple = new String(iterator.value(), "UTF-8"); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); } final int hash = tuple.hashCode(); hashes[index] = hash; addresses[index] = address; index++; } } private static final long serialVersionUID = 1L; private TIntObjectHashMap<byte[]> _storage; private int _lastId; public TupleStorage() { _storage = new TIntObjectHashMap<byte[]>(); _lastId = -1; } public TupleStorage(TupleStorage t) { copy(t); } public void clear() { _lastId = -1; _storage.clear(); } @Override protected Object clone() throws CloneNotSupportedException { return super.clone(); } public void copy(TupleStorage t) { _storage.putAll(t._storage); _lastId = t._lastId; } public String get(int id) { try { return new String(_storage.get(id), "UTF-8"); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); return null; } } public TIntObjectHashMap<byte[]> getStorage() { return _storage; } public int insert(String tuple) { _lastId++; try { _storage.put(_lastId, tuple.getBytes("UTF-8")); } catch (final UnsupportedEncodingException e) { e.printStackTrace(); } return _lastId; } /** * Purge stale state */ public void purgeState(long tillTimeStamp, List<Index> indexes, Predicate joinPredicate, Map conf, boolean isFirstRelations) { // TODO This is linear now, needs to be optimized by indexing DateFormat convDateFormat = new SimpleDateFormat( "EEE MMM d HH:mm:ss zzz yyyy"); for (TIntObjectIterator<byte[]> it = this.getStorage().iterator(); it .hasNext();) { it.advance(); int row_id = it.key(); String tuple = ""; try { tuple = new String(it.value(), "UTF-8"); } catch (Exception e1) { // e1.printStackTrace(); // throw new RuntimeException(e1.toString()); } if (tuple.equals("")) return; final String parts[] = tuple.split("\\@"); if (parts.length < 2) System.out.println("UNEXPECTED TIMESTAMP SIZES: " + parts.length); final long storedTimestamp = Long.valueOf(new String(parts[0])); final String tupleString = parts[1]; if (storedTimestamp < (tillTimeStamp)) { // delete // Cleaning up storage it.remove(); // Cleaning up indexes final PredicateUpdateIndexesVisitor visitor = new PredicateUpdateIndexesVisitor( isFirstRelations, MyUtilities.stringToTuple( tupleString, conf)); joinPredicate.accept(visitor); final List<String> valuesToIndex = new ArrayList<String>( visitor._valuesToIndex); final List<Object> typesOfValuesToIndex = new ArrayList<Object>( visitor._typesOfValuesToIndex); for (int i = 0; i < indexes.size(); i++) if (typesOfValuesToIndex.get(i) instanceof Integer) indexes.get(i).remove(row_id, Integer.parseInt(valuesToIndex.get(i))); else if (typesOfValuesToIndex.get(i) instanceof Double) indexes.get(i).remove(row_id, Double.parseDouble(valuesToIndex.get(i))); else if (typesOfValuesToIndex.get(i) instanceof Date) try { indexes.get(i).remove(row_id, convDateFormat.parse(valuesToIndex.get(i))); } catch (final ParseException e) { throw new RuntimeException( "Parsing problem in StormThetaJoin.removingIndexes " + e.getMessage()); } else if (typesOfValuesToIndex.get(i) instanceof String) indexes.get(i).remove(row_id, valuesToIndex.get(i)); else throw new RuntimeException("non supported type"); // ended cleaning indexes } } } // Should be treated with care. Valid indexes From 0-->(_storage.size()-1) public void remove(int beginIndex, int endIndex) { for (int i = beginIndex; i <= endIndex; i++) _storage.remove(i); } public int size() { return _storage.size(); } public List<String> toList() throws UnsupportedEncodingException { ArrayList<byte[]> list = new ArrayList<byte[]>( _storage.valueCollection()); ArrayList<String> transformed = new ArrayList<String>(list.size()); for (int i = 0; i < transformed.size(); i++) { transformed.set(i, new String(list.get(i), "UTF-8")); } return transformed; } @Override public String toString() { return _storage.toString(); } }