/**
* Model for Morphline-projects
*
* Such a project has a:
*
* - morphline file - solr schema - flume config - test dataset - an avro schema
* (optionally)
*/
package de.bitocean.mm;
import com.cloudera.cdk.morphline.api.MorphlineContext;
import com.cloudera.cdk.morphline.api.Record;
import com.cloudera.cdk.morphline.base.Fields;
import com.cloudera.cdk.morphline.base.Notifications;
import com.cloudera.cdk.morphline.base.Compiler;
import com.cloudera.cdk.morphline.api.Command;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Vector;
import java.util.logging.Level;
import java.util.logging.Logger;
import static org.apache.solr.util.SimplePostTool.stringToStream;
/**
* @author kamir
*/
public class MLPModel {
/**
*
* @param args
*/
public static void main(String[] args) {
// some testdata recors are defined
Vector<String> messages = new Vector<String>();
messages.add("S1 P1 O1 D1");
messages.add("S2 P2 O2 D2");
// the model instance is used to handle all configuration
MLPModel model = new MLPModel();
model.setTestData(messages);
try {
model.morphlineConfiguration = model.getTemplateConfig();
model.dumpMorphlineCFG();
model.runFullMorphlineTest(System.out);
} catch (Exception ex) {
Logger.getLogger(MLPModel.class.getName()).log(Level.SEVERE, null, ex);
}
}
/**
*
* A default morphline configuration is bundled.
*
* @return
*/
private String getTemplateConfig() {
InputStream ins = getClass().getResourceAsStream("ml.conf");
BufferedReader br = new BufferedReader(new InputStreamReader(ins));
StringBuffer sb = new StringBuffer();
try {
while (br.ready()) {
sb.append(br.readLine() + "\n");
}
return sb.toString();
} catch (IOException ex) {
ex.printStackTrace();
return "NULL";
}
}
/**
*
* A default morphline configuration is bundled.
*
* @return
*/
private void dumpMorphlineCFG() {
System.out.println(this.morphlineConfiguration);
}
public void runFullMorphlineTest(String hocon, PrintStream out) throws Exception {
this.morphlineConfiguration = hocon;
runFullMorphlineTest(out);
}
public void runFullMorphlineTest(String hocon) throws Exception {
this.morphlineConfiguration = hocon;
runFullMorphlineTest(System.out);
}
public void runFullMorphlineTest(PrintStream out) throws Exception {
File folder = new File(".");
File configFile = File.createTempFile("morphline-test-", ".conf", folder);
FileWriter fw = new FileWriter(configFile);
fw.write(this.morphlineConfiguration);
fw.flush();
fw.close();
MorphlineContext context = new MorphlineContext.Builder().build();
Command morphline = new Compiler().compile(configFile, null, context, null);
// process each input record form the testdata file
Notifications.notifyBeginTransaction(morphline);
if (testdata != null) {
int i = 0;
for (String m : testdata) {
Record record = new Record();
record.put(Fields.ATTACHMENT_BODY, stringToStream(m));
boolean status = morphline.process(record);
out.println("\n[" + i + "] successfully tested record <String>: (" + m + ") : " + status);
i++;
}
} else {
if (testdataBytes != null) {
int i = 0;
for (byte[] m : testdataBytes) {
Record record = new Record();
record.put(Fields.ATTACHMENT_BODY, m);
boolean status = morphline.process(record);
out.println("\n[" + i + "] successfully tested record <byte[" + m.length + "]>: (" + m + ") : " + status);
i++;
}
}
}
Notifications.notifyCommitTransaction(morphline);
}
public Vector<String> testdata;
public Vector<byte[]> testdataBytes;
public String morphlineConfiguration;
public String flumeConfiguration;
public String avroSchema;
public String solrSchema;
public void setTestData(Vector<String> messages) {
testdata = messages;
}
public void setTestDataBytes(Vector<byte[]> messages) {
testdataBytes = messages;
}
}