/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.segment.store;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.memory.PinotDataBuffer;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import org.apache.commons.configuration.ConfigurationException;
/**
*
* Basic top-level interface to access segment indexes.
* Usage:
* <pre>
* {@code
* SegmentDirectory segmentDir =
* SegmentDirectory.createFromLocalFS(dirName, segmentMetadata, ReadMode.mmap);
* SegmentDirectory.Writer writer =
* segmentDir.createWriter();
* try {
* writer.getIndexFor("column1", ColumnIndexType.FORWARD_INDEX);
* PinotDataBufferOld buffer =
* writer.newIndexFor("column1", ColumnIndexType.FORWARD_INDEX, 1024);
* // write value 87 at index 512
* buffer.putLong(512, 87L);
* writer.saveAndClose();
* } finally {
* writer.close();
* }
*
* SegmentDirectory.Reader reader =
* segmentDir.createReader();
* try {
* PinotDataBufferOld col1Dictionary = reader.getIndexFor("col1Dictionary", ColumnIndexType.DICTIONARY);
* } catch (Exception e) {
* // handle error
* } finally {
* reader.close();
* }
*
* // this should be in finally{} block
* segmentDir.close();
* }
* </pre>
*
* Typical use cases for Pinot:
* 1. Read existing indexes
* 2. Read forward index and create new inverted index
* 3. drop inverted index
* 4. Create dictionary, forward index and inverted index.
*
* Semantics:
* ===========
* The semantics below are explicitly tied to the use cases above. Typically, you
* should cluster all the writes at the beginning (before reads). After writing,
* save/close the writer and create reader for reads. saveAndClose() is a costly operation.
* Reading after writes triggers full reload of data so use it with caution. For pinot, this
* is a costly operation performed only at the initialization time so the penalty is acceptable.
*
* 1. Single writer, multiple reader semantics
* 2. Writes are not visible till the user calls saveAndClose()
* 3. saveAndClose() is costly! Creating readers after writers is a costly operation.
* 4. saveAndClose() does not guarantee atomicity. Failures during saveAndClose()
* can leave the directory corrupted.
* 5. SegmentDirectory controls placement of data. User should not make
* any assumptions about data storage.
* 6. Use factory-methods to instantiate SegmentDirectory. This is with the
* goal of supporting networked/distributed file system reads in the future.
*
* All things said, users can always get the bytebuffers through readers and
* change contents. If these buffers are mmapped then the changes will reflect
* in the segment storage.
*/
public abstract class SegmentDirectory implements AutoCloseable {
/**
* Create segment directory from local file system
* @param directory File object representing segment directory on disk
* @param metadata segment metadata
* @param readMode mmap vs heap ReadMode for data
* @return segmentDirectory
*/
// NOTE: this needs to be metadata impl to read all columns.
// In future, we will have this class load metadata rather than
// passing it in.
public static SegmentDirectory createFromLocalFS(File directory,
SegmentMetadataImpl metadata, ReadMode readMode) {
return new SegmentLocalFSDirectory(directory, metadata, readMode);
}
public static SegmentDirectory createFromLocalFS(File directory, ReadMode readMode)
throws IOException, ConfigurationException {
return new SegmentLocalFSDirectory(directory, readMode);
}
public static SegmentMetadataImpl loadSegmentMetadata(File directory)
throws IOException, ConfigurationException {
return SegmentLocalFSDirectory.loadSegmentMetadata(directory);
}
/**
* Get the path/URL for the directory
* @return
*/
public abstract Path getPath();
public abstract long getDiskSizeBytes();
/**
* Reader for columnar index buffers from segment directory
*/
public abstract class Reader implements AutoCloseable {
/**
* Get columnar index data buffer.
* @param column column name
* @param type index type
* @return a bytebuffer-like buffer holding index data
* @throws IOException
*/
public abstract PinotDataBuffer getIndexFor(String column, ColumnIndexType type)
throws IOException;
/**
* Get StarTree index as InputStream
* @return InputStream representing serialized version of star tree.
*/
public abstract InputStream getStarTreeStream();
/**
* Get the StarTree index file.
*
* @return File for StarTree index.
*/
public abstract File getStarTreeFile();
/**
* Check if the segment has star tree
*/
public abstract boolean hasStarTree();
public abstract boolean hasIndexFor(String column, ColumnIndexType type);
@Override
public abstract void close();
public abstract String toString();
}
/**
*
* Writer to update columnar index. Read about the semantics at the top
*/
public abstract class Writer extends Reader {
/**
* create a new buffer for writers to store index. This buffer will be visible
* after this point.
* Failures in the middle can cause corruption.
* @param columnName column name
* @param indexType column index type
* @param sizeBytes sizeBytes of index data
* @return PinotDataBufferOld that writers can update
* @throws IOException
*/
// NOTE: an interface like readFrom(File f, String column, ColumnIndexType, int sizeBytes) will be safe
// but it can lead to potential endianness issues. Endianness used to create data may not be
// same as PinotDataBufferOld
public abstract PinotDataBuffer newIndexFor(String columnName, ColumnIndexType indexType, int sizeBytes)
throws IOException;
/**
* Create star tree output stream
* @return Output stream to write serialized version of star tree
*/
public abstract OutputStream starTreeOutputStream();
/**
* Check if the removal of index is a supported operation
* @return true if the index removal is supported
*/
public abstract boolean isIndexRemovalSupported();
/**
* Removes an existing column index from directory
* @param columnName column name
* @param indexType column index type
*/
public abstract void removeIndex(String columnName, ColumnIndexType indexType);
/**
* Remove existing star tree
*/
public abstract void removeStarTree();
/**
* Save all the write and delete operations and close writer
* @throws Exception
*/
public void saveAndClose()
throws IOException {
save();
close();
}
/**
* Discard all recorded writes and deletes and close the writer.
* @throws Exception
*/
public void abortAndClose()
throws Exception {
abort();
close();
}
abstract void save()
throws IOException;
abstract void abort();
public SegmentDirectory toSegmentDirectory() {
return SegmentDirectory.this;
}
public abstract String toString();
}
/**
* Create Reader for the directory
* @return Reader object if successfully created. null if the directory
* is already locked for writes
* @throws IOException
*/
public abstract Reader createReader()
throws IOException, ConfigurationException;
/**
* Create Writer for the directory
* @return Writer object on success. null if the directory has active readers
* @throws IOException
*/
public abstract Writer createWriter()
throws IOException;
public abstract String toString();
protected SegmentDirectory() {
}
}