/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.sysml.runtime.util; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.lang.ArrayUtils; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; import org.apache.sysml.runtime.matrix.data.NumItemsByEachReducerMetaData; import org.apache.sysml.runtime.matrix.data.Pair; import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue; public class UtilFunctions { //for accurate cast of double values to int and long //IEEE754: binary64 (double precision) eps = 2^(-53) = 1.11 * 10^(-16) //(same epsilon as used for matrix index cast in R) public static double DOUBLE_EPS = Math.pow(2, -53); //prime numbers for old hash function (divide prime close to max int, //because it determines the max hash domain size public static final long ADD_PRIME1 = 99991; public static final int DIVIDE_PRIME = 1405695061; public static int longHashCode(long v) { return (int)(v^(v>>>32)); } /** * Returns the hash code for a long-long pair. This is the default * hash function for the keys of a distributed matrix in MR/Spark. * * @param key1 first long key * @param key2 second long key * @return hash code */ public static int longlongHashCode(long key1, long key2) { //basic hash mixing of two longs hashes (similar to //Arrays.hashCode(long[]) but w/o array creation/copy) int h = (int)(key1 ^ (key1 >>> 32)); return h*31 + (int)(key2 ^ (key2 >>> 32)); } public static int nextIntPow2( int in ) { int expon = (in==0) ? 0 : 32-Integer.numberOfLeadingZeros(in-1); long pow2 = (long) Math.pow(2, expon); return (int)((pow2>Integer.MAX_VALUE)?Integer.MAX_VALUE : pow2); } /** * Computes the 1-based block index based on the global cell index and block size meta * data. See computeCellIndex for the inverse operation. * * @param cellIndex global cell index * @param blockSize block size * @return 1-based block index */ public static long computeBlockIndex(long cellIndex, int blockSize) { return (cellIndex-1)/blockSize + 1; } /** * Computes the 0-based cell-in-block index based on the global cell index and block * size meta data. See computeCellIndex for the inverse operation. * * @param cellIndex global cell index * @param blockSize block size * @return 0-based cell-in-block index */ public static int computeCellInBlock(long cellIndex, int blockSize) { return (int) ((cellIndex-1)%blockSize); } /** * Computes the global 1-based cell index based on the block index, block size meta data, * and specific 0-based in-block cell index. * * NOTE: this is equivalent to cellIndexCalculation. * * @param blockIndex block index * @param blockSize block size * @param cellInBlock 0-based cell-in-block index * @return global 1-based cell index */ public static long computeCellIndex( long blockIndex, int blockSize, int cellInBlock ) { return (blockIndex-1)*blockSize + 1 + cellInBlock; } /** * Computes the actual block size based on matrix dimension, block index, and block size * meta data. For boundary blocks, the actual block size is less or equal than the block * size meta data; otherwise they are identical. * * @param len matrix dimension * @param blockIndex block index * @param blockSize block size metadata * @return actual block size */ public static int computeBlockSize( long len, long blockIndex, long blockSize ) { long remain = len - (blockIndex-1)*blockSize; return (int)Math.min(blockSize, remain); } public static boolean isInBlockRange( MatrixIndexes ix, int brlen, int bclen, long rl, long ru, long cl, long cu ) { long bRLowerIndex = (ix.getRowIndex()-1)*brlen + 1; long bRUpperIndex = ix.getRowIndex()*brlen; long bCLowerIndex = (ix.getColumnIndex()-1)*bclen + 1; long bCUpperIndex = ix.getColumnIndex()*bclen; if(rl > bRUpperIndex || ru < bRLowerIndex) { return false; } else if(cl > bCUpperIndex || cu < bCLowerIndex) { return false; } else { return true; } } public static boolean isInFrameBlockRange( Long ix, int brlen, long rl, long ru ) { if(rl > ix+brlen-1 || ru < ix) return false; else return true; } public static boolean isInBlockRange( MatrixIndexes ix, int brlen, int bclen, IndexRange ixrange ) { return isInBlockRange(ix, brlen, bclen, ixrange.rowStart, ixrange.rowEnd, ixrange.colStart, ixrange.colEnd); } public static boolean isInFrameBlockRange( Long ix, int brlen, int bclen, IndexRange ixrange ) { return isInFrameBlockRange(ix, brlen, ixrange.rowStart, ixrange.rowEnd); } // Reused by both MR and Spark for performing zero out public static IndexRange getSelectedRangeForZeroOut(IndexedMatrixValue in, int blockRowFactor, int blockColFactor, IndexRange indexRange) { IndexRange tempRange = new IndexRange(-1, -1, -1, -1); long topBlockRowIndex=UtilFunctions.computeBlockIndex(indexRange.rowStart, blockRowFactor); int topRowInTopBlock=UtilFunctions.computeCellInBlock(indexRange.rowStart, blockRowFactor); long bottomBlockRowIndex=UtilFunctions.computeBlockIndex(indexRange.rowEnd, blockRowFactor); int bottomRowInBottomBlock=UtilFunctions.computeCellInBlock(indexRange.rowEnd, blockRowFactor); long leftBlockColIndex=UtilFunctions.computeBlockIndex(indexRange.colStart, blockColFactor); int leftColInLeftBlock=UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor); long rightBlockColIndex=UtilFunctions.computeBlockIndex(indexRange.colEnd, blockColFactor); int rightColInRightBlock=UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor); //no overlap if(in.getIndexes().getRowIndex()<topBlockRowIndex || in.getIndexes().getRowIndex()>bottomBlockRowIndex || in.getIndexes().getColumnIndex()<leftBlockColIndex || in.getIndexes().getColumnIndex()>rightBlockColIndex) { tempRange.set(-1,-1,-1,-1); return tempRange; } //get the index range inside the block tempRange.set(0, in.getValue().getNumRows()-1, 0, in.getValue().getNumColumns()-1); if(topBlockRowIndex==in.getIndexes().getRowIndex()) tempRange.rowStart=topRowInTopBlock; if(bottomBlockRowIndex==in.getIndexes().getRowIndex()) tempRange.rowEnd=bottomRowInBottomBlock; if(leftBlockColIndex==in.getIndexes().getColumnIndex()) tempRange.colStart=leftColInLeftBlock; if(rightBlockColIndex==in.getIndexes().getColumnIndex()) tempRange.colEnd=rightColInRightBlock; return tempRange; } // Reused by both MR and Spark for performing zero out public static IndexRange getSelectedRangeForZeroOut(Pair<Long, FrameBlock> in, int blockRowFactor, int blockColFactor, IndexRange indexRange, long lSrcRowIndex, long lDestRowIndex) { int iRowStart, iRowEnd, iColStart, iColEnd; if(indexRange.rowStart <= lDestRowIndex) iRowStart = 0; else iRowStart = (int) (indexRange.rowStart - in.getKey()); iRowEnd = (int) Math.min(indexRange.rowEnd - lSrcRowIndex, blockRowFactor)-1; iColStart = UtilFunctions.computeCellInBlock(indexRange.colStart, blockColFactor); iColEnd = UtilFunctions.computeCellInBlock(indexRange.colEnd, blockColFactor); return new IndexRange(iRowStart, iRowEnd, iColStart, iColEnd); } public static long getTotalLength(NumItemsByEachReducerMetaData metadata) { long[] counts=metadata.getNumItemsArray(); long total=0; for(long count: counts) total+=count; return total; } public static long getLengthForInterQuantile(NumItemsByEachReducerMetaData metadata, double p) { long total = UtilFunctions.getTotalLength(metadata); long lpos=(long)Math.ceil(total*p);//lower bound is inclusive long upos=(long)Math.ceil(total*(1-p));//upper bound is inclusive return upos-lpos+1; } /** * JDK8 floating decimal double parsing, which is generally faster * than <JDK8 parseDouble and works well in multi-threaded tasks. * * @param str string to parse to double * @return double value */ public static double parseToDouble(String str) { //return FloatingDecimal.parseDouble(str); return Double.parseDouble(str); } public static int parseToInt( String str ) { int ret = -1; if( str.contains(".") ) ret = toInt( Double.parseDouble(str) ); else ret = Integer.parseInt(str); return ret; } public static long parseToLong( String str ) { long ret = -1; if( str.contains(".") ) ret = toLong( Double.parseDouble(str) ); else ret = Long.parseLong(str); return ret; } public static int toInt( double val ) { return (int) Math.floor( val + DOUBLE_EPS ); } public static long toLong( double val ) { return (long) Math.floor( val + DOUBLE_EPS ); } public static int toInt(Object obj) { if( obj instanceof Long ) return ((Long)obj).intValue(); else return ((Integer)obj).intValue(); } public static int roundToNext(int val, int factor) { //round up to next non-zero multiple of factor int pval = Math.max(val, factor); return ((pval + factor-1) / factor) * factor; } public static Object doubleToObject(ValueType vt, double in) { return doubleToObject(vt, in, true); } public static Object doubleToObject(ValueType vt, double in, boolean sparse) { if( in == 0 && sparse) return null; switch( vt ) { case STRING: return String.valueOf(in); case BOOLEAN: return (in!=0); case INT: return UtilFunctions.toLong(in); case DOUBLE: return in; default: throw new RuntimeException("Unsupported value type: "+vt); } } public static Object stringToObject(ValueType vt, String in) { if( in == null ) return null; switch( vt ) { case STRING: return in; case BOOLEAN: return Boolean.parseBoolean(in); case INT: return Long.parseLong(in); case DOUBLE: return Double.parseDouble(in); default: throw new RuntimeException("Unsupported value type: "+vt); } } public static double objectToDouble(ValueType vt, Object in) { if( in == null ) return 0; switch( vt ) { case STRING: return !((String)in).isEmpty() ? Double.parseDouble((String)in) : 0; case BOOLEAN: return ((Boolean)in)?1d:0d; case INT: return (Long)in; case DOUBLE: return (Double)in; default: throw new RuntimeException("Unsupported value type: "+vt); } } public static String objectToString( Object in ) { return (in !=null) ? in.toString() : null; } /** * Convert object to string * * @param in object * @param ignoreNull If this flag has set, it will ignore null. This flag is mainly used in merge functionality to override data with "null" data. * @return string representation of object */ public static String objectToString( Object in, boolean ignoreNull ) { String strReturn = objectToString(in); if( strReturn == null ) return strReturn; else if (ignoreNull){ if(in instanceof Double && ((Double)in).doubleValue() == 0.0) return null; else if(in instanceof Long && ((Long)in).longValue() == 0) return null; else if(in instanceof Boolean && ((Boolean)in).booleanValue() == false) return null; else if(in instanceof String && ((String)in).trim().length() == 0) return null; else return strReturn; } else return strReturn; } public static Object objectToObject(ValueType vt, Object in) { if( in instanceof Double && vt == ValueType.DOUBLE || in instanceof Long && vt == ValueType.INT || in instanceof Boolean && vt == ValueType.BOOLEAN || in instanceof String && vt == ValueType.STRING ) return in; //quick path to avoid double parsing else return stringToObject(vt, objectToString(in) ); } public static Object objectToObject(ValueType vt, Object in, boolean ignoreNull ) { String str = objectToString(in, ignoreNull); if (str==null || vt == ValueType.STRING) return str; else return stringToObject(vt, str); } public static int compareTo(ValueType vt, Object in1, Object in2) { if(in1 == null && in2 == null) return 0; else if(in1 == null) return -1; else if(in2 == null) return 1; switch( vt ) { case STRING: return ((String)in1).compareTo((String)in2); case BOOLEAN: return ((Boolean)in1).compareTo((Boolean)in2); case INT: return ((Long)in1).compareTo((Long)in2); case DOUBLE: return ((Double)in1).compareTo((Double)in2); default: throw new RuntimeException("Unsupported value type: "+vt); } } /** * Compares two version strings of format x.y.z, where x is major, * y is minor, and z is maintenance release. * * @param version1 first version string * @param version2 second version string * @return 1 if version1 greater, -1 if version2 greater, 0 if equal */ public static int compareVersion( String version1, String version2 ) { String[] partsv1 = version1.split("\\."); String[] partsv2 = version2.split("\\."); int len = Math.min(partsv1.length, partsv2.length); for( int i=0; i<partsv1.length && i<len; i++ ) { Integer iv1 = Integer.parseInt(partsv1[i]); Integer iv2 = Integer.parseInt(partsv2[i]); if( iv1.compareTo(iv2) != 0 ) return iv1.compareTo(iv2); } return 0; //equal } public static boolean isIntegerNumber( String str ) { byte[] c = str.getBytes(); for( int i=0; i<c.length; i++ ) if( c[i] < 48 || c[i] > 57 ) return false; return true; } public static byte max( byte[] array ) { byte ret = Byte.MIN_VALUE; for( int i=0; i<array.length; i++ ) ret = (array[i]>ret)?array[i]:ret; return ret; } public static String unquote(String s) { if (s != null && s.length() >=2 && ((s.startsWith("\"") && s.endsWith("\"")) || (s.startsWith("'") && s.endsWith("'")))) { s = s.substring(1, s.length() - 1); } return s; } public static String quote(String s) { return "\"" + s + "\""; } /** * Parses a memory size with optional g/m/k quantifiers into its * number representation. * * @param arg memory size as readable string * @return byte count of memory size */ public static long parseMemorySize(String arg) { if ( arg.endsWith("g") || arg.endsWith("G") ) return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024 * 1024; else if ( arg.endsWith("m") || arg.endsWith("M") ) return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024 * 1024; else if( arg.endsWith("k") || arg.endsWith("K") ) return Long.parseLong(arg.substring(0,arg.length()-1)) * 1024; else return Long.parseLong(arg.substring(0,arg.length())); } /** * Format a memory size with g/m/k quantifiers into its * number representation. * * @param arg byte count of memory size * @return memory size as readable string */ public static String formatMemorySize(long arg) { if (arg >= 1024 * 1024 * 1024) return String.format("%d GB", arg/(1024*1024*1024)); else if (arg >= 1024 * 1024) return String.format("%d MB", arg/(1024*1024)); else if (arg >= 1024) return String.format("%d KB", arg/(1024)); else return String.format("%d", arg); } /** * Obtain sequence list * * @param low lower bound (inclusive) * @param up upper bound (inclusive) * @param incr increment * @return list of integers */ public static List<Integer> getSequenceList(int low, int up, int incr) { ArrayList<Integer> ret = new ArrayList<Integer>(); for( int i=low; i<=up; i+=incr ) ret.add(i); return ret; } public static double getDouble(Object obj) { return (obj instanceof Double) ? (Double)obj : Double.parseDouble(obj.toString()); } public static boolean isNonZero(Object obj) { if( obj instanceof Double ) return ((Double) obj) != 0; else { //avoid expensive double parsing String sobj = obj.toString(); return (!sobj.equals("0") && !sobj.equals("0.0")); } } public static ValueType[] nCopies(int n, ValueType vt) { ValueType[] ret = new ValueType[n]; Arrays.fill(ret, vt); return ret; } public static int frequency(ValueType[] schema, ValueType vt) { int count = 0; for( ValueType tmp : schema ) count += tmp.equals(vt) ? 1 : 0; return count; } public static ValueType[] copyOf(ValueType[] schema1, ValueType[] schema2) { return (ValueType[]) ArrayUtils.addAll(schema1, schema2); } public static int countNonZeros(double[] data, int pos, int len) { int ret = 0; for( int i=pos; i<pos+len; i++ ) ret += (data[i] != 0) ? 1 : 0; return ret; } public static boolean containsZero(double[] data, int pos, int len) { for( int i=pos; i<pos+len; i++ ) if( data[i] == 0 ) return true; return false; } }