SegmentDumpTool.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.tools;

import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.segment.SegmentMetadata;
import com.linkedin.pinot.core.common.Block;
import com.linkedin.pinot.core.common.BlockSingleValIterator;
import com.linkedin.pinot.core.common.BlockValSet;
import com.linkedin.pinot.core.common.DataSource;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.segment.creator.impl.V1Constants;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.index.loader.Loaders;
import com.linkedin.pinot.core.segment.index.readers.Dictionary;
import com.linkedin.pinot.core.startree.StarTreeInterf;
import com.linkedin.pinot.core.startree.StarTreeSerDe;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;

public class SegmentDumpTool {
  @Argument
  private String segmentPath;

  @Argument(index = 1, multiValued = true)
  private List<String> columnNames;

  @Option(name="-dumpStarTree")
  private boolean dumpStarTree;

  public void doMain(String[] args) throws Exception {
    CmdLineParser parser = new CmdLineParser(this);
    parser.parseArgument(args);

    File segmentDir = new File(segmentPath);

    SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir);

    // All columns by default
    if (columnNames == null) {
      columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames());
      Collections.sort(columnNames);
    }

    IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap);

    Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>();
    Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>();

    for (String columnName : columnNames) {
      DataSource dataSource = indexSegment.getDataSource(columnName);
      dataSource.open();
      Block block = dataSource.nextBlock();
      BlockValSet blockValSet = block.getBlockValueSet();
      BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator();
      iterators.put(columnName, itr);
      dictionaries.put(columnName, dataSource.getDictionary());
    }

    System.out.print("Doc\t");
    for (String columnName : columnNames) {
      System.out.print(columnName);
      System.out.print("\t");
    }
    System.out.println();


    for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) {
      System.out.print(i);
      System.out.print("\t");
      for (String columnName : columnNames) {
        FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType();
        BlockSingleValIterator itr = iterators.get(columnName);
        Integer encodedValue = itr.nextIntVal();
        Object value = dictionaries.get(columnName).get(encodedValue);
        System.out.print(value);
        System.out.print("\t");
      }
      System.out.println();
    }

    if (dumpStarTree) {
      System.out.println();
      File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE);
      StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap);
      tree.printTree();
    }
  }

  public static void main(String[] args) throws Exception {
    new SegmentDumpTool().doMain(args);
  }
}