/* * Seldon -- open source prediction engine * ======================================= * Copyright 2011-2015 Seldon Technologies Ltd and Rummble Ltd (http://www.seldon.io/) * ********************************************************************************************** * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ********************************************************************************************** */ package io.seldon.vw; import org.apache.commons.lang.StringUtils; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; public class VwFeatureHash { final static int VW_CONSTANT_HASH = 11650396; final int mask; final int stride; public VwFeatureHash(int bits,int oaa) { mask = Math.round((float)Math.pow(2, bits) - 1); stride = Math.round((float)Math.pow(2,Math.ceil(log2(oaa,2)))); System.out.println("Stide is "+stride); } private double log2(int val,int base) { return Math.log(val) / Math.log(base); } private boolean isInteger(String s) { return isInteger(s,10); } private boolean isInteger(String s, int radix) { if(s.isEmpty()) return false; for(int i = 0; i < s.length(); i++) { if(i == 0 && s.charAt(i) == '-') { if(s.length() == 1) return false; else continue; } if(Character.digit(s.charAt(i),radix) < 0) return false; } return true; } public Integer getFeatureHash(int label,String namespace,String feature) { int nsHash = 0; if (!StringUtils.isEmpty(namespace)) { HashFunction h = Hashing.murmur3_32(0); nsHash = h.hashBytes(namespace.getBytes()).asInt(); } int hcl = 0; if (isInteger(feature)) hcl = Integer.parseInt(feature) + nsHash; else { HashFunction h = Hashing.murmur3_32(nsHash); hcl = (h.hashBytes(feature.getBytes()).asInt()); } int f = ((hcl * stride) + label - 1) & mask; return f; } public Integer getConstantHash(int label) { int hash_oaa = VW_CONSTANT_HASH * stride; int f = (hash_oaa + label - 1) & mask; return f; } public static void main(String[] args) { VwFeatureHash hasher = new VwFeatureHash(18,2); int label = 1; Integer hcl = hasher.getFeatureHash(label,"f","101"); System.out.println("code="+hcl); Integer hcon = hasher.getConstantHash(label); System.out.println("Constant="+hcon); } }