/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package edu.mayo.bior.cli.cmd;
import com.tinkerpop.pipes.Pipe;
import com.tinkerpop.pipes.util.Pipeline;
import edu.mayo.bior.pipeline.UnixStreamPipeline;
import edu.mayo.cli.CommandPlugin;
import edu.mayo.pipes.JSON.InjectIntoJsonPipe;
import edu.mayo.pipes.JSON.inject.ColumnArrayInjector;
import edu.mayo.pipes.JSON.inject.ColumnInjector;
import edu.mayo.pipes.JSON.inject.Injector;
import edu.mayo.pipes.JSON.inject.JsonType;
import edu.mayo.pipes.JSON.inject.LiteralInjector;
import edu.mayo.pipes.SplitPipe;
import edu.mayo.pipes.UNIX.CatPipe;
import edu.mayo.pipes.bioinformatics.VCF2VariantPipe;
import edu.mayo.pipes.bioinformatics.vocab.CoreAttributes;
import edu.mayo.pipes.bioinformatics.vocab.Type;
import edu.mayo.pipes.history.History;
import edu.mayo.pipes.history.HistoryInPipe;
import edu.mayo.pipes.history.HistoryOutPipe;
import edu.mayo.pipes.util.metadata.Metadata;
import edu.mayo.pipes.util.metadata.Metadata.CmdType;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Properties;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
/**
*
* @author dquest
*/
public class Tab2JSONCommand implements CommandPlugin {
private UnixStreamPipeline mPipeline = new UnixStreamPipeline();
private String operation;
public void init(Properties props) throws Exception {
operation = props.getProperty("command.name");
}
/**
* A config file takes the following tab-delimited form:
* ColNum Key JsonType InjectorType Delimiter/Literal_Value GoldenIdentifier
*
* Here is what each of these values means:
*
* 0) Column# (not specified) is the column number that we wish to get the data from.
* The parser will insert the keys in the order that they are found in the file
* It will assume there is one key for every column in the tab delimited file
* Followed by any literals that should be injected for all values in the set
*
* 1) Key is the name of the identifier used to describe the value in the column
*
* 2) JsonType is BOOLEAN, NUMBER, or STRING
* This describes the types of values the column can take.
*
* 3) An injector takes the value from the tab delimited file and puts it into the JSON column.
* InjectorType is LITERAL, ARRAY, or COLUMN
* LITERAL - every JSON will have the same value over the entire set
* COLUMN - the data that appears in the COLUMN will be injected into the JSON (99% of the time this is what you want)
* ARRAY - the data that appears in the column is actually a delimited array (e.g. values separated by a comma) and should be converted to a JSON array
*
* 4) Delimiter/Literal_Value additional information to direct the injector
* If the injector is a COLUMN this value is just a period (.)
* If the injector is a LITERAL, this value is the value of the literal that should be injected.
* If the injector is an ARRAY, this denotes the delimiter that should be used to parse the array.
*
* 5) Golden Identifier (used to be called attribute)
* If the column can be interpreted as a golden identifier (e.g. _landmark, _minBP) then place it here
* else place a dot (.)
* There can not be more than one golden attributed associated with a column, users will need to use tools
* such as perl and awk to replicate the column before ingesting the data.
*
* @param filename
*/
public Injector[] parseConfigFile(String filename){
ArrayList<Injector> injectors = new ArrayList<Injector>();
ArrayList<Injector> addQueue = new ArrayList<Injector>(); //for those injectors that we need to add out of order
Pipeline<String,ArrayList<String>> parse = new Pipeline<String,ArrayList<String>>(
new CatPipe(),
new SplitPipe("\t")
);
parse.setStarts(Arrays.asList(filename));
int count = 0;
for(count = 0; parse.hasNext(); count++){
ArrayList<String> next = parse.next();
if(next.size() < 5) break;
Integer col = new Integer(next.get(0));
Injector i = null; Injector j = null;
if(next.get(3).equalsIgnoreCase("LITERAL")){
//e.g. new LiteralInjector(CoreAttributes._type.toString(), Type.VARIANT.toString(), JsonType.STRING),
//String key, String value, JsonType type
i = new LiteralInjector(next.get(1), next.get(4), JsonType.valueOf(next.get(2)));
if(!next.get(5).equalsIgnoreCase(".")){
j = new LiteralInjector(next.get(5), next.get(4), JsonType.valueOf(next.get(2)));
addQueue.add(j);
}
}else if(next.get(3).equalsIgnoreCase("COLUMN")){
//int column, String key, JsonType type
i = new ColumnInjector(col, next.get(1), JsonType.valueOf(next.get(2)));
if(!next.get(5).equalsIgnoreCase(".")){
j = new ColumnInjector(col, next.get(5), JsonType.valueOf(next.get(2)));
addQueue.add(j);
}
}else if(next.get(3).equalsIgnoreCase("ARRAY")){
//int column, String key, JsonType type, String delimiterRegex, boolean stripWhitespace
i= new ColumnArrayInjector(col, next.get(1), JsonType.valueOf(next.get(2)), next.get(4), true);
if(!next.get(5).equalsIgnoreCase(".")){
j = new ColumnArrayInjector(col, next.get(1), JsonType.valueOf(next.get(2)), next.get(4), true);
addQueue.add(j);
}
}
injectors.add(i);
}
for(Injector i : addQueue){
injectors.add(i);
}
Injector[] ret = new Injector[injectors.size()];
ret = injectors.toArray(ret);
return ret;
}
public void execute(CommandLine line, Options opts) throws Exception {
String config = "";
if(line.hasOption('c')){
config = line.getOptionValue('c');
}
Injector[] injectors = parseConfigFile(config);
Metadata metadata = new Metadata(operation);
Pipe<String, History> preLogic = new HistoryInPipe(metadata);
Pipe<History, History> logic = new InjectIntoJsonPipe(true, injectors);
Pipe<History, String> postLogic = new HistoryOutPipe();
mPipeline.execute(preLogic, logic, postLogic);
}
}