// Copyright 2011 Google Inc. All Rights Reserved.
package com.google.appengine.tools.mapreduce.inputs;
import com.google.appengine.api.blobstore.BlobInfoFactory;
import com.google.appengine.api.blobstore.BlobKey;
import com.google.appengine.tools.mapreduce.Input;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.appengine.tools.mapreduce.MapperJobContext;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*/
public class BlobstoreInput extends Input<BlobstoreRecordKey, byte[]> {
// --------------------------- STATIC FIELDS ---------------------------
private static final long serialVersionUID = 2235444204028285444L;
// ------------------------------ FIELDS ------------------------------
private final String blobKey;
private final byte terminator;
private final int shardCount;
// --------------------------- CONSTRUCTORS ---------------------------
public BlobstoreInput(String blobKey) {
this(blobKey, (byte) '\n');
}
public BlobstoreInput(String blobKey, byte terminator) {
this(blobKey, terminator, 4);
}
public BlobstoreInput(String blobKey, byte terminator, int shardCount) {
this.blobKey = blobKey;
this.terminator = terminator;
this.shardCount = shardCount;
}
// ------------------------ IMPLEMENTING METHODS ------------------------
@Override
public List<? extends InputReader<BlobstoreRecordKey, byte[]>> split(
MapperJobContext<BlobstoreRecordKey, byte[], ?, ?> context) {
long blobSize = new BlobInfoFactory().loadBlobInfo(new BlobKey(blobKey)).getSize();
return split(blobKey, blobSize, shardCount);
}
// -------------------------- INSTANCE METHODS --------------------------
private List<? extends InputReader<BlobstoreRecordKey, byte[]>> split(String blobKey,
long blobSize, int shardCount) {
try {
Preconditions.checkNotNull(blobKey);
Preconditions.checkArgument(shardCount > 0);
Preconditions.checkArgument(blobSize >= 0);
long splitLength = blobSize / shardCount;
/*
* Currently a single shard gets assigned a single split.
* shardCount is only a hint for a number of splits we create.
* If a shard workload is to small we want to reduce a number of shards.
*/
if (splitLength == 0L) {
splitLength = blobSize;
shardCount = 1;
}
List<BlobstoreInputReader> result = new ArrayList<BlobstoreInputReader>();
long startOffset = 0L;
for (int i = 1; i < shardCount; i++) {
long endOffset = (long) i * splitLength;
result.add(new BlobstoreInputReader(blobKey, startOffset, endOffset, terminator));
startOffset = endOffset;
}
result.add(new BlobstoreInputReader(blobKey, startOffset, blobSize, terminator));
return result;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}