/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data2.dataset2.lib.timeseries;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.lib.cube.DimensionValue;
import co.cask.cdap.api.dataset.lib.cube.TimeValue;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Scanner;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Scans facts in a {@link FactTable}.
*/
public final class FactScanner implements Iterator<FactScanResult> {
private final FactCodec codec;
private final Scanner scanner;
private final long startTs;
private final long endTs;
// Track the number of row scanned through the iterator. It's for reporting and debugging purpose.
private int rowScanned;
// Use an internal iterator to avoid leaking AbstractIterator methods to outside.
private final Iterator<FactScanResult> internalIterator;
// set of measureNames - useful to process measures that are requested while scanning.
private final Set<String> measureNames;
/**
* Construct a FactScanner. Should only be called by FactTable.
*/
FactScanner(Scanner scanner, FactCodec codec, long startTs, long endTs, Collection<String> measureNames) {
this.scanner = scanner;
this.codec = codec;
this.internalIterator = createIterator();
this.startTs = startTs;
this.endTs = endTs;
this.measureNames = ImmutableSet.copyOf(measureNames);
}
public void close() {
scanner.close();
}
public int getRowScanned() {
return rowScanned;
}
@Override
public boolean hasNext() {
return internalIterator.hasNext();
}
@Override
public FactScanResult next() {
return internalIterator.next();
}
@Override
public void remove() {
internalIterator.remove();
}
private Iterator<FactScanResult> createIterator() {
return new AbstractIterator<FactScanResult>() {
@Override
protected FactScanResult computeNext() {
Row rowResult;
while ((rowResult = scanner.next()) != null) {
rowScanned++;
byte[] rowKey = rowResult.getRow();
// Decode context and metric from key
String measureName = codec.getMeasureName(rowKey);
// if measureNames is empty we include all metrics
if (!measureNames.isEmpty() && !measureNames.contains(measureName)) {
continue;
}
// todo: codec.getDimensionValues(rowKey) needs to un-encode dimension names which may result in read in
// entity table (depending on the cache and its state). To avoid that, we can pass to scanner the
// list of dimension names as we *always* know it (it is given) at the time of scanning
List<DimensionValue> dimensionValues = codec.getDimensionValues(rowKey);
boolean exhausted = false;
List<TimeValue> timeValues = Lists.newLinkedList();
// todo: entry set is ordered by ts?
for (Map.Entry<byte[], byte[]> columnValue : rowResult.getColumns().entrySet()) {
long ts = codec.getTimestamp(rowKey, columnValue.getKey());
if (ts < startTs) {
continue;
}
if (ts > endTs) {
exhausted = true;
break;
}
// todo: move Bytes.toLong into codec?
TimeValue timeValue = new TimeValue(ts, Bytes.toLong(columnValue.getValue()));
timeValues.add(timeValue);
}
if (timeValues.isEmpty() && exhausted) {
break;
}
// todo: can return empty list, if all data is < startTs or > endTs
return new FactScanResult(measureName, dimensionValues, timeValues);
}
scanner.close();
return endOfData();
}
};
}
}