/*
* Copyright 2015 Shazam Entertainment Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied. See the License for the specific
* language governing permissions and limitations under the License
*/
package com.shazam.dataengineering.pipelinebuilder;
import com.amazonaws.services.datapipeline.model.Field;
import hudson.util.IOUtils;
import net.sf.json.test.JSONAssert;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.TimeZone;
import static junit.framework.Assert.*;
public class PipelineObjectTest {
@Test
public void pipeline1shouldParseProperly() throws Exception {
String json = IOUtils.toString(new FileInputStream(new File("src/test/resources/pipeline1.json")), "UTF-8");
PipelineObject obj = new PipelineObject(json);
assertTrue(obj.isValid());
}
@Test
public void pipeline1shouldReParseIntoTheSameText() throws Exception {
String json = IOUtils.toString(new FileInputStream(new File("src/test/resources/pipeline1.json")), "UTF-8");
PipelineObject obj = new PipelineObject(json);
String generatedJson = obj.getJson();
PipelineObject reparsed = new PipelineObject(generatedJson);
assertTrue(reparsed.isValid());
JSONAssert.assertJsonEquals(generatedJson, reparsed.getJson());
}
@Test
public void pipeline1shouldReplaceDate() throws Exception {
String json = IOUtils.toString(new FileInputStream(new File("src/test/resources/pipeline1.json")), "UTF-8");
String replacedJson = json.replace("2014-07-26T01:20:00", "2014-08-22T03:45:10");
PipelineObject obj = new PipelineObject(json);
PipelineObject validation = new PipelineObject(replacedJson);
assertEquals("2014-07-26T01:20:00", obj.getScheduleDate());
obj.setScheduleDate("2014-08-22T03:45:10");
assertEquals("2014-08-22T03:45:10", obj.getScheduleDate());
assertEquals(validation.getJson(), obj.getJson());
}
@Test
public void pipeline2shouldPlaceEMRStepsInSameOrder() throws Exception {
String json = IOUtils.toString(new FileInputStream(new File("src/test/resources/pipeline2.json")), "UTF-8");
PipelineObject obj = new PipelineObject(json);
List<com.amazonaws.services.datapipeline.model.PipelineObject> awsObjects = obj.getAWSObjects();
List<Field> fields = awsObjects.get(0).getFields();
int counter = 0;
for (Field field : fields) {
if (field.getKey().equals("step")) {
switch (counter++) {
case 0:
assertEquals("step 1", field.getStringValue());
break;
case 1:
assertEquals("step 02", field.getStringValue());
break;
case 2:
assertEquals("step three", field.getStringValue());
break;
}
}
}
}
@Test
public void pipeline3shouldParseIntoAWSPipelineObjectCorrectly() throws Exception {
List<com.amazonaws.services.datapipeline.model.PipelineObject> pipeline3 = getAWSPipeline3();
String json = IOUtils.toString(new FileInputStream(new File("src/test/resources/pipeline3.json")), "UTF-8");
PipelineObject obj = new PipelineObject(json);
// DeepEquals doesn't validate properly, must be incorrect implementation of equals.
// As a result order actually matters in this check.
List<com.amazonaws.services.datapipeline.model.PipelineObject> testObjects = obj.getAWSObjects();
for (int i = 0; i < pipeline3.size(); i++) {
com.amazonaws.services.datapipeline.model.PipelineObject expected = pipeline3.get(i);
com.amazonaws.services.datapipeline.model.PipelineObject validation = testObjects.get(i);
assertEquals(expected.getId(), validation.getId());
assertEquals(expected.getName(), validation.getName());
assertEquals(expected.getFields().size(), validation.getFields().size());
for (Field expectedField : expected.getFields()) {
assertTrue(validation.getFields().contains(expectedField));
}
}
}
@Test
public void validateDateShouldValidateProperDate() throws Exception {
assertTrue(PipelineObject.validateDate("2014-07-26T01:20:00"));
}
@Test
public void validateDateShouldNotValidateImproperDate() throws Exception {
assertFalse(PipelineObject.validateDate("2014-07/26T01=20:00"));
}
@Test
public void validateDateShouldNotValidateEpochStart() throws Exception {
Date epoch = new Date(0);
SimpleDateFormat dateFormat = new SimpleDateFormat(PipelineObject.PIPELINE_DATE_FORMAT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
assertFalse(PipelineObject.validateDate(dateFormat.format(epoch)));
}
@Test
public void isPastShouldReturnTrueForDatesInThePast() throws Exception {
Date past = new Date(1400000000L * 1000);
SimpleDateFormat dateFormat = new SimpleDateFormat(PipelineObject.PIPELINE_DATE_FORMAT);
assertTrue(PipelineObject.isPast(dateFormat.format(past)));
}
@Test
public void isPastShouldReturnFalseForDatesInTheFuture() throws Exception {
Date future = new Date(2407866423L * 1000);
SimpleDateFormat dateFormat = new SimpleDateFormat(PipelineObject.PIPELINE_DATE_FORMAT);
assertFalse(PipelineObject.isPast(dateFormat.format(future)));
}
/**
* Manually construct AWS Pipeline object for pipeline3 definition.
* Note - order matters - see the test
*
* @return List<PipelineObject>
*/
private List<com.amazonaws.services.datapipeline.model.PipelineObject> getAWSPipeline3() {
ArrayList<com.amazonaws.services.datapipeline.model.PipelineObject> pipelineList =
new ArrayList<com.amazonaws.services.datapipeline.model.PipelineObject>();
com.amazonaws.services.datapipeline.model.PipelineObject redhsiftDb =
new com.amazonaws.services.datapipeline.model.PipelineObject();
redhsiftDb.withId("RedshiftDatabaseId_123").withName("Database")
.withFields(
new Field().withKey("*password").withStringValue("password"),
new Field().withKey("type").withStringValue("RedshiftDatabase"),
new Field().withKey("clusterId").withStringValue("cluster"),
new Field().withKey("username").withStringValue("username")
);
pipelineList.add(redhsiftDb);
com.amazonaws.services.datapipeline.model.PipelineObject schedule =
new com.amazonaws.services.datapipeline.model.PipelineObject();
schedule.withId("ScheduleId_234").withName("Daily")
.withFields(
new Field().withKey("startDateTime").withStringValue("2014-07-29T15:00:00"),
new Field().withKey("period").withStringValue("1 Day"),
new Field().withKey("type").withStringValue("Schedule")
);
pipelineList.add(schedule);
com.amazonaws.services.datapipeline.model.PipelineObject emrActivity =
new com.amazonaws.services.datapipeline.model.PipelineObject();
emrActivity.withId("ActivityId_345").withName("EMR Job")
.withFields(
new Field().withKey("onFail").withRefValue("ActionId_098"),
new Field().withKey("step").withStringValue("s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar,s3://us-east-1.elasticmapreduce/libs/pig/pig-script,--base-path,s3://us-east-1.elasticmapreduce/libs/pig/,--pig-versions,latest,--run-pig-script,--args,-f,s3://bucket/script.pig,-p,PARALLEL=10,-p,INPUT=s3://bucket/input/#{format(minusDays(@scheduledStartTime,1),'YYYY-MM-dd')}/*,-p,OUTPUT=s3://bucket/output/#{format(minusDays(@scheduledStartTime,1),'YYYY-MM-dd')}"),
new Field().withKey("runsOn").withRefValue("EmrClusterId_678"),
new Field().withKey("step").withStringValue("s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar,s3://us-east-1.elasticmapreduce/libs/pig/pig-script,--base-path,s3://us-east-1.elasticmapreduce/libs/pig/,--install-pig,--pig-versions,latest"),
new Field().withKey("type").withStringValue("EmrActivity"),
new Field().withKey("schedule").withRefValue("ScheduleId_234")
);
pipelineList.add(emrActivity);
com.amazonaws.services.datapipeline.model.PipelineObject sqlQuery1 =
new com.amazonaws.services.datapipeline.model.PipelineObject();
sqlQuery1.withId("ActivityId_456").withName("SQL Query")
.withFields(
new Field().withKey("onFail").withRefValue("ActionId_098"),
new Field().withKey("maximumRetries").withStringValue("0"),
new Field().withKey("runsOn").withRefValue("Ec2Resource_678"),
new Field().withKey("database").withRefValue("RedshiftDatabaseId_123"),
new Field().withKey("type").withStringValue("SqlActivity"),
new Field().withKey("schedule").withRefValue("ScheduleId_234"),
new Field().withKey("script").withStringValue("SELECT 1;")
);
pipelineList.add(sqlQuery1);
com.amazonaws.services.datapipeline.model.PipelineObject sqlQuery2 =
new com.amazonaws.services.datapipeline.model.PipelineObject();
sqlQuery2.withId("ActivityId_567").withName("SQL Query 2")
.withFields(
new Field().withKey("onFail").withRefValue("ActionId_098"),
new Field().withKey("maximumRetries").withStringValue("0"),
new Field().withKey("dependsOn").withRefValue("ActivityId_345"),
new Field().withKey("runsOn").withRefValue("Ec2Resource_678"),
new Field().withKey("dependsOn").withRefValue("ActivityId_456"),
new Field().withKey("database").withRefValue("RedshiftDatabaseId_123"),
new Field().withKey("type").withStringValue("SqlActivity"),
new Field().withKey("schedule").withRefValue("ScheduleId_234"),
new Field().withKey("script").withStringValue("SELECT 2;")
);
pipelineList.add(sqlQuery2);
com.amazonaws.services.datapipeline.model.PipelineObject snsAlarm =
new com.amazonaws.services.datapipeline.model.PipelineObject();
snsAlarm.withId("ActionId_098").withName("SNS Alert")
.withFields(
new Field().withKey("message").withStringValue("Fail Message"),
new Field().withKey("role").withStringValue("DataPipelineDefaultRole"),
new Field().withKey("subject").withStringValue("Error"),
new Field().withKey("type").withStringValue("SnsAlarm"),
new Field().withKey("topicArn").withStringValue("arn:aws:sns:us-east-1:sns_feed")
);
pipelineList.add(snsAlarm);
com.amazonaws.services.datapipeline.model.PipelineObject defaultObject =
new com.amazonaws.services.datapipeline.model.PipelineObject();
defaultObject.withId("Default").withName("Default")
.withFields(
new Field().withKey("resourceRole").withStringValue("DataPipelineDefaultResourceRole"),
new Field().withKey("role").withStringValue("DataPipelineDefaultRole"),
new Field().withKey("scheduleType").withStringValue("cron"),
new Field().withKey("failureAndRerunMode").withStringValue("cascade")
);
pipelineList.add(defaultObject);
com.amazonaws.services.datapipeline.model.PipelineObject emrCluster =
new com.amazonaws.services.datapipeline.model.PipelineObject();
emrCluster.withId("EmrClusterId_678").withName("DefaultEmrCluster1")
.withFields(
new Field().withKey("region").withStringValue("us-east-1"),
new Field().withKey("coreInstanceType").withStringValue("m1.small"),
new Field().withKey("masterInstanceType").withStringValue("m1.small"),
new Field().withKey("terminateAfter").withStringValue("10 Hours"),
new Field().withKey("keyPair").withStringValue("mykeypair"),
new Field().withKey("coreInstanceCount").withStringValue("1"),
new Field().withKey("bootstrapAction").withStringValue("s3://bucket/bootstrap-script.sh"),
new Field().withKey("enableDebugging").withStringValue("true"),
new Field().withKey("schedule").withRefValue("ScheduleId_234"),
new Field().withKey("type").withStringValue("EmrCluster"),
new Field().withKey("emrLogUri").withStringValue("s3://bucket/"),
new Field().withKey("logUri").withStringValue("s3://bucket/tasklogs")
);
pipelineList.add(emrCluster);
return pipelineList;
}
}