/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.creator.impl.inv; import java.io.BufferedOutputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.Arrays; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.roaringbitmap.buffer.MutableRoaringBitmap; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.core.segment.creator.InvertedIndexCreator; import com.linkedin.pinot.core.segment.creator.impl.V1Constants; /** * Nov 12, 2014 */ public class HeapBitmapInvertedIndexCreator implements InvertedIndexCreator { private static final Logger LOGGER = LoggerFactory.getLogger(HeapBitmapInvertedIndexCreator.class); private final File invertedIndexFile; private final FieldSpec spec; private final MutableRoaringBitmap[] invertedIndex; long start = 0; public HeapBitmapInvertedIndexCreator(File indexDir, int cardinality, int numDocs, int totalNumberOfEntries, FieldSpec spec) { this.spec = spec; invertedIndexFile = new File(indexDir, spec.getName() + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); invertedIndex = new MutableRoaringBitmap[cardinality]; for (int i = 0; i < invertedIndex.length; ++i) { invertedIndex[i] = new MutableRoaringBitmap(); } start = System.currentTimeMillis(); } @Override public void add(int docId, int dictionaryId) { invertedIndex[dictionaryId].add(docId); } @Override public long totalTimeTakeSoFar() { return (System.currentTimeMillis() - start); } @Override public void seal() throws IOException { final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(invertedIndexFile))); // First, write out offsets of bitmaps. The information can be used to access a certain bitmap directly. // Totally (invertedIndex.length+1) offsets will be written out; the last offset is used to calculate the length of // the last bitmap, which might be needed when accessing bitmaps randomly. // If a bitmap's offset is k, then k bytes need to be skipped to reach the bitmap. int offset = 4 * (invertedIndex.length + 1); // The first bitmap's offset out.writeInt(offset); for (final MutableRoaringBitmap element : invertedIndex) { // the other bitmap's offset offset += element.serializedSizeInBytes(); out.writeInt(offset); } // write out bitmaps one by one for (final MutableRoaringBitmap element : invertedIndex) { element.serialize(out); } out.close(); LOGGER.debug("persisted bitmap inverted index for column : " + spec.getName() + " in " + invertedIndexFile.getAbsolutePath()); } @Override public void add(int docId, int[] dictionaryIds) { add(docId, dictionaryIds, dictionaryIds.length); } @Override public void add(int docId, int[] dictionaryIds, int length) { if (spec.isSingleValueField()) { throw new RuntimeException("Method not applicable to single value fields"); } Arrays.sort(dictionaryIds, 0, length); indexMultiValue(docId, dictionaryIds, length); } private void indexSingleValue(int entry, int docId) { if (entry == -1) { return; } invertedIndex[entry].add(docId); } private void indexMultiValue(int docId, int[] entries, int length) { for (int i = 0; i < length; i++) { final int entry = entries[i]; if (entry != -1) { invertedIndex[entry].add(docId); } } } }