/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.segment.creator;
import com.linkedin.pinot.common.Utils;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.extractors.FieldExtractor;
import com.linkedin.pinot.core.data.extractors.FieldExtractorFactory;
import com.linkedin.pinot.core.data.extractors.PlainFieldExtractor;
import com.linkedin.pinot.core.data.readers.RecordReader;
import com.linkedin.pinot.core.segment.creator.impl.stats.SegmentPreIndexStatsCollectorImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link com.linkedin.pinot.core.segment.creator.SegmentCreationDataSource} that uses a
* {@link com.linkedin.pinot.core.data.readers.RecordReader} as the underlying data source.
*/
public class RecordReaderSegmentCreationDataSource implements SegmentCreationDataSource {
private static final Logger LOGGER = LoggerFactory.getLogger(RecordReaderSegmentCreationDataSource.class);
public RecordReaderSegmentCreationDataSource(RecordReader recordReader) {
_recordReader = recordReader;
try {
recordReader.init();
} catch (Exception e) {
LOGGER.error("Caught exception while initializing record reader", e);
Utils.rethrowException(e);
}
}
private RecordReader _recordReader;
@Override
public SegmentPreIndexStatsCollector gatherStats(StatsCollectorConfig statsCollectorConfig) {
try {
PlainFieldExtractor fieldExtractor = FieldExtractorFactory.getPlainFieldExtractor(statsCollectorConfig.getSchema());
SegmentPreIndexStatsCollector collector = new SegmentPreIndexStatsCollectorImpl(statsCollectorConfig);
collector.init();
// Gather the stats
GenericRow readRow = new GenericRow();
GenericRow transformedRow = new GenericRow();
while (_recordReader.hasNext()) {
transformedRow = readNextRowSanitized(readRow, transformedRow, fieldExtractor);
collector.collectRow(transformedRow);
}
collector.build();
return collector;
} catch (Exception e) {
LOGGER.error("Caught exception while gathering stats", e);
Utils.rethrowException(e);
return null;
}
}
private GenericRow readNextRowSanitized(GenericRow readRow, GenericRow transformedRow, FieldExtractor extractor) {
readRow = GenericRow.createOrReuseRow(readRow);
readRow = _recordReader.next(readRow);
transformedRow = GenericRow.createOrReuseRow(transformedRow);
return extractor.transform(readRow, transformedRow);
}
@Override
public RecordReader getRecordReader() {
try {
_recordReader.rewind();
} catch (Exception e) {
LOGGER.error("Caught exception while rewinding record reader", e);
Utils.rethrowException(e);
}
return _recordReader;
}
}