/* * Copyright (c) 2011 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.flaptor.indextank.index.scorer; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; import java.util.HashMap; import java.util.Map; import java.util.Scanner; import java.util.Set; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.log4j.Logger; import com.flaptor.indextank.index.DocId; import com.flaptor.indextank.index.scorer.CategoryMaskManager.CategoryInfo; import com.flaptor.indextank.index.scorer.CategoryMaskManager.CategoryValueInfo; import com.flaptor.indextank.index.scorer.DynamicBoostsManager.DynamicBoosts; import com.flaptor.util.Execute; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; @SuppressWarnings("deprecation") public class DynamicDataManager implements BoostsManager { private static final Logger logger = Logger.getLogger(Execute.whoAmI()); private static final String OLD_MAIN_FILE_NAME = "dynamicBoosts"; private static final String MAIN_FILE_NAME = "dynamicData"; private final int numberOfBoosts; private final ConcurrentMap<DocId, DynamicData> dynamicDataMap; private final File backupDir; private final DynamicData emptyData; private final CategoryMaskManager maskManager; private final ReentrantReadWriteLock dumpLock = new ReentrantReadWriteLock(); /** * Build a {@link DynamicDataManager} with a backupDir. If the directory containts a * dynamic boosts file, it loads the data from it. Otherwise, creates a new boosts map * * @param numberOfBoosts the number of boosting doubles that this Scorer will store. * @param backupDir the directory to which the data stored in this Scorer shall be */ @SuppressWarnings("unchecked") public DynamicDataManager(int numberOfBoosts, File backupDir) { Preconditions.checkArgument(numberOfBoosts > 0); checkDirArgument(backupDir); this.numberOfBoosts = numberOfBoosts; this.backupDir = backupDir; this.emptyData = new DynamicData(numberOfBoosts); this.maskManager = new CategoryMaskManager(dumpLock); File oldFormatFile = new File(this.backupDir, OLD_MAIN_FILE_NAME); File newFormatFile = new File(this.backupDir, MAIN_FILE_NAME); if (!newFormatFile.exists() && oldFormatFile.exists()) { logger.info("Found old format file, loading it."); ObjectInputStream is = null; try { is = new ObjectInputStream(new BufferedInputStream(new FileInputStream(oldFormatFile))); int storedNumberOfBoosts = is.readInt(); if (storedNumberOfBoosts != numberOfBoosts) { throw new IllegalArgumentException("Number of boosts specified in Manager construction differ from the one stored in the backup file (" + numberOfBoosts + " vs. " + storedNumberOfBoosts +")"); } try { ConcurrentMap<?, ?> read = (ConcurrentMap<?, ?>) is.readObject(); Set<?> keys = read.keySet(); if (keys.isEmpty() || (keys.iterator().next() instanceof DocId && read.values().iterator().next() instanceof DynamicData)) { // last version, assign directly to field dynamicDataMap = (ConcurrentMap<DocId, DynamicData>) read; } else { // values are definitely Boosts, we'll need to transform them // and check whether the keys are strings (v1) or DocIds (v2) dynamicDataMap = new ConcurrentHashMap<DocId, DynamicData>(); boolean areDocids = keys.iterator().next() instanceof DocId; for (Map.Entry<?,?> e : read.entrySet()) { // convert key to docid if necessary DocId docId = areDocids ? (DocId)e.getKey() : new DocId((String)e.getKey()); // convert value and add to the map dynamicDataMap.put(docId, new DynamicData((DynamicBoosts)e.getValue())); } } logger.info("State loaded."); } catch (ClassNotFoundException e) { throw new IllegalStateException(e); } } catch (FileNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } finally { Execute.close(is); } } else { this.dynamicDataMap = new ConcurrentHashMap<DocId, DynamicData>(); if (newFormatFile.exists()) { readFromDisk(); } } } @Override public Boosts getBoosts(DocId documentId) { DynamicData data = dynamicDataMap.get(documentId); if (data == null) { logger.warn("Failed to find boosts for document " + documentId); return this.emptyData; } return data; } public Map<Integer, Double> getVariablesAsMap(DocId documentId) { DynamicData data = dynamicDataMap.get(documentId); if (data == null) { logger.warn("Failed to find variables for document " + documentId); return ImmutableMap.of(); } return data.getVariablesAsMap(numberOfBoosts); } public Map<String, String> getCategoriesAsMap(DocId documentId) { try { return getCategoryValues(documentId); } catch (IllegalArgumentException e) { logger.warn(e.getMessage()); } return ImmutableMap.of(); } public int getNumberOfBoosts() { return numberOfBoosts; } @Override public int getDocumentCount() { return dynamicDataMap.size(); } DynamicData getDynamicData(DocId docId) { return dynamicDataMap.get(docId); } @Override public void removeBoosts(String documentId) { dynamicDataMap.remove(new DocId(documentId)); } @Override public void setBoosts(String documentId, Map<Integer, Float> boosts) { setBoosts(documentId, null, boosts); } public void setCategoryValues(String documentId, Map<String, String> categories) { DynamicData data = getOrCreateData(documentId); for (Map.Entry<String, String> entry : categories.entrySet()) { CategoryValueInfo catInfo = maskManager.getCategoryValueInfo(entry.getKey(), entry.getValue()); if (catInfo != null) { data.setCategoryValue(catInfo.getBitmask(), catInfo.getValueCode()); } } } public Map<String, String> getCategoryValues(DocId documentId) { DynamicData data = dynamicDataMap.get(documentId); if (null == data) { throw new IllegalArgumentException("no data for document " + documentId); } Map<String, String> results = Maps.newHashMap(); Map<String, CategoryInfo> categoryInfos = maskManager.getCategoryInfos(); for (Entry<String, CategoryInfo> entry : categoryInfos.entrySet()) { CategoryInfo categoryInfo = entry.getValue(); int valueCode = data.getCategoryValue(categoryInfo.getBitmask()); if (valueCode != 0) { results.put(entry.getKey(), categoryInfo.getValue(valueCode)); } } return results; } public interface FacetsCollector { public void addCategoryValue(String category, Integer valueCode); } public void populateCollector(DocId documentId, FacetsCollector collector) { DynamicData data = dynamicDataMap.get(documentId); if (null == data) { throw new IllegalArgumentException("no data for document " + documentId); } Map<String, CategoryInfo> categoryInfos = maskManager.getCategoryInfos(); for (Entry<String, CategoryInfo> entry : categoryInfos.entrySet()) { CategoryInfo categoryInfo = entry.getValue(); int valueCode = data.getCategoryValue(categoryInfo.getBitmask()); if (valueCode != 0) { collector.addCategoryValue(entry.getKey(), valueCode); } } } public CategoryMaskManager getMaskManager() { return maskManager; } @Override public void setBoosts(String documentId, Integer timestamp, Map<Integer, Float> boosts) { Preconditions.checkNotNull(documentId); for (Integer index : boosts.keySet()) { if (index >= numberOfBoosts || index < 0) { throw new IllegalArgumentException("Invalid boost index (" + index + " for a Scorer with a maximum of " + numberOfBoosts + " boosts)"); } } DynamicData data = getOrCreateData(documentId); for (Entry<Integer, Float> entry : boosts.entrySet()) { data.setBoost(entry.getKey(), entry.getValue()); } if (timestamp != null) { data.setTimestamp(timestamp); } } private DynamicData getOrCreateData(String docid) { DocId key = new DocId(docid); DynamicData data = dynamicDataMap.get(key); if (data == null) { data = new DynamicData(numberOfBoosts); DynamicData previousValue = dynamicDataMap.putIfAbsent(key, data); if (previousValue != null) { data = previousValue; } } return data; } /* * Check the synching block */ @Override public void dump() throws IOException { logger.info("Starting DynamicDataManager's dump."); dumpLock.writeLock().lock(); try { newSyncToDisk(); } finally { dumpLock.writeLock().unlock(); } logger.info("DynamicDataManager's dump completed."); } /** * Syncs the stored data to disk. * This method is non-blocking, and does not ensure that the operation will be completed * in time, or at all. */ public void nonBlockingSync() { (new SyncerThread()).start(); } private static final int SERIALIZATION_VERSION = 1; private synchronized void newSyncToDisk() throws IOException { File f = new File(backupDir, MAIN_FILE_NAME); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(f))); try { dos.writeInt(SERIALIZATION_VERSION); dos.writeInt(numberOfBoosts); for (Entry<DocId, DynamicData> entry : dynamicDataMap.entrySet()) { entry.getKey().writeData(dos); entry.getValue().writeData(dos); } DocId.writeNull(dos); maskManager.writeData(dos); } finally { Execute.close(dos); } } private synchronized void readFromDisk() { File f = new File(backupDir, MAIN_FILE_NAME); DataInputStream dis = null; try { dis = new DataInputStream(new BufferedInputStream(new FileInputStream(f))); int version = dis.readInt(); if (version > SERIALIZATION_VERSION) { throw new IllegalStateException(String.format("File version is newer than known by this class: %d > %d", version, SERIALIZATION_VERSION)); } int fileBoosts = dis.readInt(); if (numberOfBoosts != fileBoosts) { throw new IllegalStateException(String.format("Incorrect number of boosts in file. Actual: %d, Expected: %d", fileBoosts, numberOfBoosts)); } while (true) { DocId docid = DocId.readData(dis); if (docid == null) { break; } DynamicData data = DynamicData.readData(numberOfBoosts, dis); dynamicDataMap.put(docid, data); } maskManager.readData(dis); } catch (IOException e) { logger.fatal("Error while loading dynamic data", e); throw new RuntimeException(e); } finally { Execute.close(dis); } } //---------------------------------------------------------------------------------------- //STATIC METHODS private static void checkDirArgument(File backupDir) { Preconditions.checkNotNull(backupDir); if (!backupDir.canRead()) { String s = "Don't have read permission over the backup directory(" + backupDir.getAbsolutePath() + ")."; logger.error(s); throw new IllegalArgumentException(s); } if (!backupDir.canWrite()) { String s = "Don't have write permission over the backup directory(" + backupDir.getAbsolutePath() + ")."; logger.error(s); throw new IllegalArgumentException(s); } } //---------------------------------------------------------------------------------------- //PRIVATE CLASSES static class DynamicData implements Serializable, Boosts { private static final long serialVersionUID = 1L; private int[] data; // timestamp = data[0], variables = data[1-n], categories = data[n+1, m] private int dataBoundary; DynamicData(int numberOfBoosts) { data = new int[1 + numberOfBoosts]; dataBoundary = numberOfBoosts + 1; } DynamicData(DynamicBoosts oldBoosts) { data = new int[1 + oldBoosts.boosts.length]; data[0] = oldBoosts.timestamp; for (int i = 0; i < oldBoosts.boosts.length; i++) { data[1 + i] = Float.floatToRawIntBits(oldBoosts.boosts[i]); } } public DynamicData(int numberOfBoosts, int[] data) { this.data = data; this.dataBoundary = numberOfBoosts + 1; } public int[] getData() { return data; } public Map<Integer, Double> getVariablesAsMap(int numberOfVariables) { HashMap<Integer, Double> map = new HashMap<Integer, Double>(numberOfVariables); for(int id = 0; id < numberOfVariables; id++) { map.put(id, Double.valueOf(getBoost(id))); } return map; } @Override public float getBoost(int boostIndex) { return Float.intBitsToFloat(data[1 + boostIndex]); } public void setBoost(int boostIndex, float boostValue) { data[1 + boostIndex] = Float.floatToRawIntBits(boostValue); } public void setCategoryValue(int[] bitmask, int value) { if (data.length - dataBoundary < bitmask.length) { int[] newData = new int[bitmask.length + dataBoundary]; System.arraycopy(data, 0, newData, 0, data.length); data = newData; } data = CategoryEncoder.encode(data, dataBoundary, bitmask, value); } public int getCategoryValue(int[] bitmask) { return CategoryEncoder.decode(data, dataBoundary, bitmask); } @Override public int getTimestamp() { return data[0]; } public void setTimestamp(int timestamp) { data[0] = timestamp; } void writeData(DataOutputStream dos) throws IOException { int len = data.length; dos.writeInt(len); for (int i = 0; i < len; i++) { dos.writeInt(data[i]); } } static DynamicData readData(int numberOfBoosts, DataInputStream dis) throws IOException { int len = dis.readInt(); int[] data = new int[len]; for (int i = 0; i < len; i++) { data[i] = dis.readInt(); } return new DynamicData(numberOfBoosts, data); } } private class SyncerThread extends Thread { public SyncerThread() { setName("DynamicDataManager's syncer thread"); } @Override public void run() { try { newSyncToDisk(); } catch (Exception e) { logger.error(e); } } } public static void main(String[] args) { int boosts = Integer.parseInt(args[0]); DynamicDataManager ddm = new DynamicDataManager(boosts, new File(args[1])); System.out.println("Count: " + ddm.getDocumentCount()); Scanner in = new Scanner(System.in); while (in.hasNextLine()) { String line = in.nextLine(); DocId docId = new DocId(line); DynamicData data = ddm.getDynamicData(docId); System.out.println("timestamp: " + data.getTimestamp()); for (int i = 0; i < boosts; i++) { System.out.println("var["+i+"]: " + data.getBoost(i)); } System.out.println(ddm.getCategoryValues(docId)); } } public Map<String, String> getStats() { HashMap<String, String> stats = Maps.newHashMap(); stats.putAll(maskManager.getStats()); stats.put("dynamic_data_count", String.valueOf(this.dynamicDataMap.size())); stats.put("dynamic_data_variables", String.valueOf(this.numberOfBoosts)); return stats; } }