package edu.washington.escience.myria.expression; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.expression.evaluate.ExpressionOperatorParameter; /** * Returns all n-character substrings in sequence ("n-grams") from the input string. */ public class NgramExpression extends BinaryExpression { /***/ private static final long serialVersionUID = 1L; /** * This is not really unused, it's used automagically by Jackson deserialization. */ @SuppressWarnings("unused") private NgramExpression() { super(); } /** * Takes the string from which to compute n-grams and the length `n` of each n-gram. * * @param left the left operand (input string) * @param right the right operand (value of `n` in "n-gram") */ public NgramExpression(final ExpressionOperator left, final ExpressionOperator right) { super(left, right); } @Override public Type getOutputType(final ExpressionOperatorParameter parameters) { checkOperandTypes(Type.STRING_TYPE, Type.LONG_TYPE, parameters); return Type.STRING_TYPE; } @Override public String getJavaString(final ExpressionOperatorParameter parameters) { return new StringBuilder() .append("String val = (") .append(getLeft().getJavaString(parameters)) .append(");\n") .append("int n = (int) (") .append(getRight().getJavaString(parameters)) .append(");\n") .append("int numNgrams = val.length() - n + 1;\n") .append("String[] ngrams = new String[numNgrams];\n") .append("for (int i = 0; i < numNgrams; ++i) {\n") .append("ngrams[i] = val.substring(i, i + n));\n") .append("}\n") .append("return ngrams;\n") .toString(); } @Override public String getJavaExpressionWithAppend(final ExpressionOperatorParameter parameters) { return new StringBuilder() .append("String val = (") .append(getLeft().getJavaString(parameters)) .append(");\n") .append("int n = (int) (") .append(getRight().getJavaString(parameters)) .append(");\n") .append("int numNgrams = val.length() - n + 1;\n") .append(Expression.COUNT) .append(".appendInt(numNgrams);\n") .append("for (int i = 0; i < numNgrams; ++i) {\n") .append(Expression.RESULT) .append(".appendString(val.substring(i, i + n));\n") .append("}\n") .toString(); } @Override public boolean hasArrayOutputType() { return true; } }