package edu.isi.karma.transformation.tokenizer; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.isi.karma.controller.command.transformation.PythonTransformationCommand; public class PythonTransformationAsURITokenizer { private static final String returnStatement = "return"; private static final String getValue = "getValue\\(\\s*\\\"(\\w+)\\\"\\s*\\)"; private static final String literal = "\\\"([$-_.+!*'()\\w]+)\\\""; private static List<String> acceptedTokens; private static final Map<String, Pattern> patterns; static { patterns = new HashMap<>(); acceptedTokens = new LinkedList<>(); acceptedTokens.add(getValue); acceptedTokens.add(literal);; for(String acceptedToken : acceptedTokens) { Pattern p = Pattern.compile(acceptedToken); patterns.put(acceptedToken, p); } } private PythonTransformationAsURITokenizer() { } public static List<PythonTransformationToken> tokenize(PythonTransformationCommand command) { return tokenize(command.getTransformationCode()); } public static List<PythonTransformationToken> tokenize(String transformationCode) { List<PythonTransformationToken> tokens = new LinkedList<>(); if(transformationCode == null || transformationCode.isEmpty()) { return tokens; } if(!transformationCode.contains(returnStatement) || transformationCode.trim().indexOf(returnStatement) != 0 ) { return tokens; } String codeToParse =transformationCode.substring(transformationCode.indexOf(returnStatement) + returnStatement.length()); StringTokenizer tokenizer = new StringTokenizer(codeToParse, "+"); while(tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken().trim(); Matcher getValueMatcher = patterns.get(getValue).matcher(token); if(getValueMatcher.matches()) { tokens.add(new PythonTransformationColumnToken(getValueMatcher.group(1))); continue; } Matcher literalMatcher = patterns.get(literal).matcher(token); if(literalMatcher.matches()) { tokens.add(new PythonTransformationStringToken(literalMatcher.group(1))); continue; } tokens.add(new PythonTransformationInvalidToken(token)); } return tokens; } }