/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.tools; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.segment.SegmentMetadata; import com.linkedin.pinot.core.common.Block; import com.linkedin.pinot.core.common.BlockSingleValIterator; import com.linkedin.pinot.core.common.BlockValSet; import com.linkedin.pinot.core.common.DataSource; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.segment.creator.impl.V1Constants; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.core.segment.index.loader.Loaders; import com.linkedin.pinot.core.segment.index.readers.Dictionary; import com.linkedin.pinot.core.startree.StarTreeInterf; import com.linkedin.pinot.core.startree.StarTreeSerDe; import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.kohsuke.args4j.Argument; import org.kohsuke.args4j.CmdLineParser; import org.kohsuke.args4j.Option; public class SegmentDumpTool { @Argument private String segmentPath; @Argument(index = 1, multiValued = true) private List<String> columnNames; @Option(name="-dumpStarTree") private boolean dumpStarTree; public void doMain(String[] args) throws Exception { CmdLineParser parser = new CmdLineParser(this); parser.parseArgument(args); File segmentDir = new File(segmentPath); SegmentMetadata metadata = new SegmentMetadataImpl(segmentDir); // All columns by default if (columnNames == null) { columnNames = new ArrayList<String>(metadata.getSchema().getColumnNames()); Collections.sort(columnNames); } IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.mmap); Map<String, Dictionary> dictionaries = new HashMap<String, Dictionary>(); Map<String, BlockSingleValIterator> iterators = new HashMap<String, BlockSingleValIterator>(); for (String columnName : columnNames) { DataSource dataSource = indexSegment.getDataSource(columnName); dataSource.open(); Block block = dataSource.nextBlock(); BlockValSet blockValSet = block.getBlockValueSet(); BlockSingleValIterator itr = (BlockSingleValIterator) blockValSet.iterator(); iterators.put(columnName, itr); dictionaries.put(columnName, dataSource.getDictionary()); } System.out.print("Doc\t"); for (String columnName : columnNames) { System.out.print(columnName); System.out.print("\t"); } System.out.println(); for (int i = 0; i < indexSegment.getSegmentMetadata().getTotalDocs(); i++) { System.out.print(i); System.out.print("\t"); for (String columnName : columnNames) { FieldSpec.DataType columnType = metadata.getSchema().getFieldSpecFor(columnName).getDataType(); BlockSingleValIterator itr = iterators.get(columnName); Integer encodedValue = itr.nextIntVal(); Object value = dictionaries.get(columnName).get(encodedValue); System.out.print(value); System.out.print("\t"); } System.out.println(); } if (dumpStarTree) { System.out.println(); File starTreeFile = new File(segmentDir, V1Constants.STAR_TREE_INDEX_FILE); StarTreeInterf tree = StarTreeSerDe.fromFile(starTreeFile, ReadMode.mmap); tree.printTree(); } } public static void main(String[] args) throws Exception { new SegmentDumpTool().doMain(args); } }