/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.blur.mapreduce.lib;
import java.io.IOException;
import java.util.Collection;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.lucene.codec.Blur024Codec;
import org.apache.blur.mapreduce.lib.BlurInputFormat.BlurInputSplit;
import org.apache.blur.store.blockcache.LastModified;
import org.apache.blur.store.hdfs.HdfsDirectory;
import org.apache.blur.utils.RowDocumentUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfoPerCommit;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
public class GenericRecordReader {
private static final String LASTMOD = ".lastmod";
private static final Log LOG = LogFactory.getLog(GenericRecordReader.class);
private boolean _setup;
private Text _rowId;
private TableBlurRecord _tableBlurRecord;
private Bits _liveDocs;
private StoredFieldsReader _fieldsReader;
private Directory _directory;
private Directory _readingDirectory;
private int _docId = -1;
private int _maxDoc;
private Text _table;
public void initialize(BlurInputSplit blurInputSplit, Configuration configuration) throws IOException {
if (_setup) {
return;
}
_setup = true;
_table = blurInputSplit.getTable();
Path localCachePath = BlurInputFormat.getLocalCachePath(configuration);
LOG.info("Local cache path [{0}]", localCachePath);
_directory = BlurInputFormat.getDirectory(configuration, _table.toString(), blurInputSplit.getDir());
SegmentInfoPerCommit commit = segmentInfosRead(_directory, blurInputSplit.getSegmentsName(),
blurInputSplit.getSegmentInfoName());
SegmentInfo segmentInfo = commit.info;
if (localCachePath != null) {
_readingDirectory = copyFilesLocally(configuration, _directory, _table.toString(), blurInputSplit.getDir(),
localCachePath, commit.files(), blurInputSplit.getSegmentInfoName());
} else {
_readingDirectory = _directory;
}
Blur024Codec blur024Codec = new Blur024Codec();
IOContext iocontext = IOContext.READ;
String segmentName = segmentInfo.name;
FieldInfos fieldInfos = blur024Codec.fieldInfosFormat().getFieldInfosReader()
.read(_readingDirectory, segmentName, iocontext);
if (commit.getDelCount() > 0) {
_liveDocs = blur024Codec.liveDocsFormat().readLiveDocs(_readingDirectory, commit, iocontext);
}
_fieldsReader = blur024Codec.storedFieldsFormat().fieldsReader(_readingDirectory, segmentInfo, fieldInfos,
iocontext);
_maxDoc = commit.info.getDocCount();
}
private SegmentInfoPerCommit segmentInfosRead(Directory directory, String segmentFileName, String segmentInfoName)
throws IOException {
boolean success = false;
ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
try {
final int format = input.readInt();
if (format == CodecUtil.CODEC_MAGIC) {
// 4.0+
CodecUtil.checkHeaderNoMagic(input, "segments", SegmentInfos.VERSION_40, SegmentInfos.VERSION_40);
input.readLong();// read version
input.readInt(); // read counter
int numSegments = input.readInt();
if (numSegments < 0) {
throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")");
}
for (int seg = 0; seg < numSegments; seg++) {
String segName = input.readString();
Codec codec = Codec.forName(input.readString());
SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName, IOContext.READ);
info.setCodec(codec);
long delGen = input.readLong();
int delCount = input.readInt();
if (delCount < 0 || delCount > info.getDocCount()) {
throw new CorruptIndexException("invalid deletion count: " + delCount + " (resource: " + input + ")");
}
if (segName.equals(segmentInfoName)) {
success = true;
return new SegmentInfoPerCommit(info, delCount, delGen);
}
}
} else {
throw new IOException("Legacy Infos not supported for dir [" + directory + "].");
}
throw new IOException("Segment [" + segmentInfoName + "] nout found in dir [" + directory + "]");
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
private static Directory copyFilesLocally(Configuration configuration, Directory dir, String table, Path shardDir,
Path localCachePath, Collection<String> files, String segmentName) throws IOException {
LOG.info("Copying files need to local cache for faster reads [{0}].", shardDir);
Path localShardPath = new Path(new Path(new Path(localCachePath, table), shardDir.getName()), segmentName);
HdfsDirectory localDir = new HdfsDirectory(configuration, localShardPath, null);
for (String name : files) {
if (!isValidFileToCache(name)) {
continue;
}
LOG.info("Valid file for local copy [{0}].", name);
if (!isValid(localDir, dir, name)) {
LastModified lastModified = (LastModified) dir;
long fileModified = lastModified.getFileModified(name);
IndexInput input = dir.openInput(name, IOContext.READONCE);
IndexOutput output = localDir.createOutput(name, IOContext.READONCE);
output.copyBytes(input, input.length());
output.close();
IndexOutput lastMod = localDir.createOutput(name + LASTMOD, IOContext.DEFAULT);
lastMod.writeLong(fileModified);
lastMod.close();
}
}
return localDir;
}
private static boolean isValidFileToCache(String name) {
if (name.endsWith(".fdt")) {
return true;
} else if (name.endsWith(".fdx")) {
return true;
} else if (name.endsWith(".del")) {
return true;
} else if (name.endsWith(".fnm")) {
return true;
} else {
return false;
}
}
private static boolean isValid(HdfsDirectory localDir, Directory remoteDir, String name) throws IOException {
LastModified lastModified = (LastModified) remoteDir;
long fileModified = lastModified.getFileModified(name);
long fileLength = remoteDir.fileLength(name);
if (localDir.fileExists(name)) {
LOG.info("Cache file exists [{0}]", name);
if (localDir.fileLength(name) == fileLength) {
LOG.info("Cache file length matches [{0}]", name);
String lastModFile = name + LASTMOD;
if (localDir.fileExists(lastModFile) && localDir.fileLength(lastModFile) == 8) {
LOG.info("Cache file last mod file exists [{0}]", name);
IndexInput input = localDir.openInput(lastModFile, IOContext.DEFAULT);
long lastMod = input.readLong();
if (lastMod == fileModified) {
LOG.info("Cache file last mod matches [{0}]", name);
return true;
} else {
LOG.info("Cache file last mod does not match [{0}]", name);
}
} else {
LOG.info("Cache file last mod file does not exist [{0}]", name);
}
} else {
LOG.info("Cache file length does not match [{0}]", name);
}
} else {
LOG.info("Cache file does not exist [{0}]", name);
}
return false;
}
public boolean nextKeyValue() throws IOException {
if (_docId >= _maxDoc) {
return false;
}
while (true) {
_docId++;
if (_docId >= _maxDoc) {
return false;
}
if (_liveDocs == null) {
fetchBlurRecord();
return true;
} else if (_liveDocs.get(_docId)) {
fetchBlurRecord();
return true;
}
}
}
private void fetchBlurRecord() throws IOException {
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
_fieldsReader.visitDocument(_docId, visitor);
BlurRecord blurRecord = new BlurRecord();
String rowId = RowDocumentUtil.readRecord(visitor.getDocument(), blurRecord);
blurRecord.setRowId(rowId);
_rowId = new Text(rowId);
_tableBlurRecord = new TableBlurRecord(_table, blurRecord);
}
public Text getCurrentKey() throws IOException {
return _rowId;
}
public TableBlurRecord getCurrentValue() throws IOException {
return _tableBlurRecord;
}
public float getProgress() throws IOException {
return (float) _docId / (float) _maxDoc;
}
public void close() throws IOException {
_fieldsReader.close();
_directory.close();
if (_readingDirectory != _directory) {
_readingDirectory.close();
}
}
}