/**
* Copyright 2010 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package datameer.awstasks.aws.emr;
import static org.fest.assertions.Assertions.*;
import static org.junit.Assert.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.Before;
import org.junit.Test;
import awstasks.com.amazonaws.services.s3.AmazonS3;
import awstasks.com.amazonaws.services.s3.model.Bucket;
import awstasks.com.amazonaws.services.s3.model.S3ObjectSummary;
import datameer.awstasks.aws.AbstractAwsIntegrationTest;
import datameer.awstasks.aws.emr.EmrCluster.ClusterState;
import datameer.awstasks.aws.emr.EmrCluster.StepFuture;
import datameer.awstasks.aws.emr.EmrCluster.StepMetadata;
import datameer.awstasks.aws.s3.S3BucketTest;
import datameer.awstasks.util.IoUtil;
import datameer.awstasks.util.S3Util;
public class EmrClusterTest extends AbstractAwsIntegrationTest {
private Bucket _s3Bucket;
protected EmrCluster _emrCluster;
private AmazonS3 _s3Service;
private static String _jobFlowId;
@Before
public void before() {
Logger.getLogger("datameer.awstasks").setLevel(Level.DEBUG);
_s3Service = _ec2Conf.createS3Service();
_s3Bucket = _s3Service.createBucket(S3BucketTest.AWS_TEST_BUCKET);
List<S3ObjectSummary> s3Objects = _s3Service.listObjects(_s3Bucket.getName()).getObjectSummaries();
for (S3ObjectSummary s3ObjectSummary : s3Objects) {
_s3Service.deleteObject(_s3Bucket.getName(), s3ObjectSummary.getKey());
}
_emrCluster = createEmrCluster();
}
private EmrCluster createEmrCluster() {
EmrSettings settings = new EmrSettings(getClass().getName(), _ec2Conf.getAccessKey(), _ec2Conf.getPrivateKeyName(), _s3Bucket.getName(), 1);
return new EmrCluster(settings, _ec2Conf.getAccessSecret());
}
@Test
public void testStart() throws Exception {
assertNull(_emrCluster.getJobFlowId());
final Set<ClusterState> clusterStates = new HashSet<ClusterState>();
Thread thread = new Thread() {
@Override
public void run() {
do {
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
// nothing todo
}
clusterStates.add(_emrCluster.getState());
} while (_emrCluster.getState() != ClusterState.CONNECTED);
}
};
thread.start();
_emrCluster.startup();
assertTrue(clusterStates.toString(), clusterStates.contains(ClusterState.STARTING));
assertEquals(ClusterState.CONNECTED, _emrCluster.getState());
_jobFlowId = _emrCluster.getJobFlowId();
assertNotNull(_jobFlowId);
}
@Test
public void testStart2nClusterWithSameName() throws Exception {
try {
_emrCluster.startup();
fail("should throw exception");
} catch (Exception e) {
// expected
}
assertEquals(ClusterState.UNCONNECTED, _emrCluster.getState());
}
@Test
public void testConnectById() throws Exception {
assertNull(_emrCluster.getJobFlowId());
assertThat(_emrCluster.getJobFlowId()).isNull();
assertThat(_jobFlowId).isNotNull();
_emrCluster.connectById(_jobFlowId);
assertEquals(ClusterState.CONNECTED, _emrCluster.getState());
assertEquals(_jobFlowId, _emrCluster.getJobFlowId());
}
@Test
public void testConnectByName() throws Exception {
_emrCluster.connectByName();
assertEquals(ClusterState.CONNECTED, _emrCluster.getState());
assertNotNull(_emrCluster.getJobFlowId());
}
@Test
public void testSynchronizeState() throws Exception {
_emrCluster.connectByName();
_emrCluster.disconnect();
_emrCluster.synchronizeState();
assertEquals(ClusterState.CONNECTED, _emrCluster.getState());
assertNotNull(_emrCluster.getJobFlowId());
}
@Test
public void testExecuteJobStep() throws Exception {
_emrCluster.connectByName();
File jobJar = new File("lib/test/hadoop-0.18.3-examples.jar");
// prepare input
File localInputFile = _tempFolder.newFile("inputFile");
String remoteInputPath = "/emr/input";
String remoteOutputPath = "/emr/output";
IoUtil.writeFile(localInputFile, "K O H L", "K O P F");
S3Util.uploadFile(_s3Service, _s3Bucket.getName(), localInputFile, remoteInputPath);
// execute job
String inputUri = "s3n://" + _s3Bucket.getName() + remoteInputPath;
String outputUri = "s3n://" + _s3Bucket.getName() + remoteOutputPath;
StepFuture stepFuture = _emrCluster.executeJobStep("testStep" + System.currentTimeMillis(), jobJar, "wordcount", inputUri, outputUri);
// assertEquals(2, stepFuture.getStepIndex());// 1 is debug step
stepFuture.join();
// check simpledb debugging information
StepMetadata stepMetaData = stepFuture.getStepMetaData();
assertNotNull(stepMetaData);
assertEquals(_emrCluster.getJobFlowId(), stepMetaData.get(StepMetadata.JOB_FLOW_ID));
// System.out.println(stepMetaData);
// check output
BufferedReader reader = new BufferedReader(new InputStreamReader(_s3Service.getObject(_s3Bucket.getName(), remoteOutputPath.substring(1) + "/part-00000").getObjectContent()));
assertEquals("F\t1", reader.readLine());
assertEquals("H\t1", reader.readLine());
assertEquals("K\t2", reader.readLine());
assertEquals("L\t1", reader.readLine());
assertEquals("O\t2", reader.readLine());
assertEquals("P\t1", reader.readLine());
reader.close();
}
@Test
public void testExecuteJobStep_ThrottleSafeness() throws Exception {
long oldRequestInterval = _emrCluster.getRequestInterval();
_emrCluster.setRequestInterval(1000);// this should produce throttle exceptions
_emrCluster.connectByName();
File jobJar = new File("lib/test/hadoop-0.18.3-examples.jar");
// prepare input
File localInputFile = _tempFolder.newFile("inputFile");
String remoteInputPath = "/emr/input";
String remoteOutputPath = "/emr/output";
_s3Service.deleteObject(_s3Bucket.getName(), remoteOutputPath);
IoUtil.writeFile(localInputFile, "K O H L", "K O P F");
S3Util.uploadFile(_s3Service, _s3Bucket.getName(), localInputFile, remoteInputPath);
// execute job
String inputUri = "s3n://" + _s3Bucket.getName() + remoteInputPath;
String outputUri = "s3n://" + _s3Bucket.getName() + remoteOutputPath;
StepFuture stepFuture = _emrCluster.executeJobStep("testStep" + System.currentTimeMillis(), jobJar, "wordcount", inputUri, outputUri);
stepFuture.join();
_emrCluster.setRequestInterval(oldRequestInterval);
}
@Test(timeout = 60000 * 5)
public void testShutdown() throws Exception {
EmrCluster cluster2 = createEmrCluster();
cluster2.connectByName();
_emrCluster.connectByName();
assertEquals(ClusterState.CONNECTED, _emrCluster.getState());
assertEquals(ClusterState.CONNECTED, cluster2.getState());
while (!_emrCluster.isIdle()) {
Thread.sleep(10000);
}
_emrCluster.shutdown();
assertEquals(ClusterState.UNCONNECTED, _emrCluster.getState());
assertEquals(ClusterState.CONNECTED, cluster2.getState());
assertNull(_emrCluster.getJobFlowId());
assertNotNull(cluster2.getJobFlowId());
cluster2.synchronizeState();
assertEquals(ClusterState.UNCONNECTED, cluster2.getState());
assertNull(cluster2.getJobFlowId());
}
}