/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.flamdex.ramses;
import com.google.common.base.Throwables;
import com.indeed.flamdex.api.DocIdStream;
import com.indeed.flamdex.api.FlamdexOutOfMemoryException;
import com.indeed.flamdex.api.FlamdexReader;
import com.indeed.flamdex.api.IntTermDocIterator;
import com.indeed.flamdex.api.IntTermIterator;
import com.indeed.flamdex.api.IntValueLookup;
import com.indeed.flamdex.api.StringTermDocIterator;
import com.indeed.flamdex.api.StringTermIterator;
import com.indeed.flamdex.api.StringValueLookup;
import com.indeed.flamdex.fieldcache.FieldCacher;
import com.indeed.imhotep.io.caching.CachedFile;
import com.indeed.imhotep.metrics.Count;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.Collection;
/**
* @author jsgroth
*/
public class RamsesFlamdexWrapper implements FlamdexReader {
private static final String TIME_UPPER_BITS_FILE = "timeupperbits.bin";
private static final String DOC_ID_BOUNDARIES_FILE = "tubdocids.bin";
private static final String TIME_LOWER_BITS_FILE = "timelowerbits.bin";
private static final String SCALE_FACTOR_FILE = "scale.bin";
private final FlamdexReader wrapped;
private final String directory;
private final int scaleFactor;
private final long memoryOverhead;
public RamsesFlamdexWrapper(FlamdexReader wrapped, String directory) throws IOException {
this.wrapped = wrapped;
this.directory = directory;
final CachedFile tubFile = CachedFile.create(CachedFile.buildPath(directory, TIME_UPPER_BITS_FILE));
final CachedFile docIdFile = CachedFile.create(CachedFile.buildPath(directory, DOC_ID_BOUNDARIES_FILE));
final CachedFile tlbFile = CachedFile.create(CachedFile.buildPath(directory, TIME_LOWER_BITS_FILE));
memoryOverhead = tubFile.length() + docIdFile.length() + tlbFile.length();
final File sfFile = CachedFile.create(CachedFile.buildPath(directory, SCALE_FACTOR_FILE)).loadFile();
final Integer rawScaleFactor = readObjectFromFile(sfFile, Integer.class);
scaleFactor = rawScaleFactor != null ? rawScaleFactor : 1;
}
@Override
public Collection<String> getIntFields() {
return wrapped.getIntFields();
}
@Override
public Collection<String> getStringFields() {
return wrapped.getStringFields();
}
@Override
public int getNumDocs() {
return wrapped.getNumDocs();
}
@Override
public String getDirectory() {
return wrapped.getDirectory();
}
@Override
public DocIdStream getDocIdStream() {
return wrapped.getDocIdStream();
}
@Override
public IntTermIterator getIntTermIterator(String field) {
return wrapped.getIntTermIterator(field);
}
@Override
public StringTermIterator getStringTermIterator(String field) {
return wrapped.getStringTermIterator(field);
}
@Override
public IntTermDocIterator getIntTermDocIterator(final String field) {
return wrapped.getIntTermDocIterator(field);
}
@Override
public StringTermDocIterator getStringTermDocIterator(final String field) {
return wrapped.getStringTermDocIterator(field);
}
@Override
public long getIntTotalDocFreq(String field) {
return wrapped.getIntTotalDocFreq(field);
}
@Override
public long getStringTotalDocFreq(String field) {
return wrapped.getStringTotalDocFreq(field);
}
@Override
public Collection<String> getAvailableMetrics() {
return wrapped.getAvailableMetrics();
}
@Override
public IntValueLookup getMetric(String metric) throws FlamdexOutOfMemoryException {
if ("time".equals(metric)) {
return newTimeLookup();
}
final IntValueLookup rawMetric = "counts".equals(metric) ? new Count() : wrapped.getMetric(metric);
return scaleFactor != 1 ? new ScalingMetric(rawMetric, scaleFactor) : rawMetric;
}
public StringValueLookup getStringLookup(final String field) throws FlamdexOutOfMemoryException {
try {
return FieldCacher.newStringValueLookup(field, this, directory);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
private IntValueLookup newTimeLookup() {
try {
final File tubFile = CachedFile.create(CachedFile.buildPath(directory, TIME_UPPER_BITS_FILE)).loadFile();
final int[] timeUpperBits = readObjectFromFile(tubFile, int[].class);
final File docIdFile = CachedFile.create(CachedFile.buildPath(directory, DOC_ID_BOUNDARIES_FILE)).loadFile();
final int[] docIdBoundaries = readObjectFromFile(docIdFile, int[].class);
final File tlbFile = CachedFile.create(CachedFile.buildPath(directory, TIME_LOWER_BITS_FILE)).loadFile();
final byte[] timeLowerBits = readObjectFromFile(tlbFile, byte[].class);
if (timeUpperBits == null || docIdBoundaries == null || timeLowerBits == null) {
throw new RuntimeException("unable to load ramses time metric from directory " + directory + ", missing one or more required files");
}
return new RamsesTimeIntValueLookup(timeUpperBits, docIdBoundaries, timeLowerBits, memoryOverhead);
} catch(IOException e) {
throw new RuntimeException("unable to load ramses time metric from directory " + directory + ", missing one or more required files");
}
}
@Override
public long memoryRequired(String metric) {
if ("time".equals(metric)) {
return memoryOverhead;
} else if ("counts".equals(metric)) {
return 0;
}
return wrapped.memoryRequired(metric);
}
@Override
public void close() throws IOException {
wrapped.close();
}
public static boolean ramsesFilesExist(String dir) {
return CachedFile.create(CachedFile.buildPath(dir, TIME_UPPER_BITS_FILE)).exists() &&
CachedFile.create(CachedFile.buildPath(dir, DOC_ID_BOUNDARIES_FILE)).exists() &&
CachedFile.create(CachedFile.buildPath(dir, TIME_LOWER_BITS_FILE)).exists();
}
/**
* Reads an object of type {@code T} from {@code file}.
*
* @param file file from which the object should be read
* @param clazz non-null Class object for {@code T}
* @param printException whether or not any stacktraces should be printed
* @param <T> the return type
* @return possibly null object of type {@code T}.
*/
private static <T> T readObjectFromFile(File file, Class<T> clazz) {
final FileInputStream fileIn;
try {
fileIn = new FileInputStream(file);
} catch (Exception e) {
return null;
}
final BufferedInputStream bufferedIn = new BufferedInputStream(fileIn);
final ObjectInputStream objIn;
try {
objIn = new ObjectInputStream(bufferedIn);
} catch (Exception e) {
try {
fileIn.close();
} catch (IOException e1) { }
return null;
}
final Object ret;
try {
ret = objIn.readObject();
} catch (Exception e) {
try {
objIn.close(); // objIn.close() also closes fileIn
} catch (IOException e2) { }
return null;
}
try {
objIn.close(); // objIn.close() also closes fileIn
} catch (IOException e) { }
return clazz.cast(ret);
}
}