/*
* Hibernate OGM, Domain model persistence for NoSQL datastores
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>.
*/
package org.hibernate.ogm.util.parser.impl;
import java.util.regex.Pattern;
import org.hibernate.hql.ast.spi.predicate.LikePredicate;
/**
* Creates {@link Pattern} objects equivalent to given HQL/JPQL {@code LIKE} expressions. Used by
* {@link LikePredicate} implementations to emulate {@code LIKE} predicates via a native regexp operator.
* <p>
* The following rules apply for creating regular expressions:
* <ul>
* <li>the {@code _} wildcard is replaced by {@code .} (unless it is escaped)</li>
* <li>the {@code %} wildcard is replaced by {@code .*} (unless it is escaped)</li>
* <li>non-wildcard character sequences are quoted (wrapped by {@code \\Q...\\E}) to match them as is</li>
* <li>escape characters are omitted from the resulting pattern</li>
* <li>the pattern is wrapped by {@code ^...$} to make sure the entire string is matched</li>
* <li>the reg exp wildcard {@code .} matches line breaks</li>
* </ul>
*
* @author Gunnar Morling
*/
public class LikeExpressionToRegExpConverter {
private final Character escapeCharacter;
public LikeExpressionToRegExpConverter() {
this( null );
}
public LikeExpressionToRegExpConverter(Character escapeCharacter) {
this.escapeCharacter = escapeCharacter;
}
/**
* Creates a regular expression pattern object equivalent to the given {@code LIKE} expression.
*
* @param likeExpression the HQL/JPQL {@code LIKE} expression to convert
* @return a regular expression pattern object equivalent to the given {@code LIKE} expression
*/
public Pattern getRegExpFromLikeExpression(String likeExpression) {
StringBuilder pattern = new StringBuilder( "^" );
State state = State.INITIAL;
for ( int i = 0; i < likeExpression.length(); i++ ) {
char character = likeExpression.charAt( i );
state = state.handleCharacter( character, escapeCharacter, pattern );
}
if ( state == State.PATTERN ) {
pattern.append( "\\E" );
}
pattern.append( "$" );
return Pattern.compile( pattern.toString(), Pattern.DOTALL );
}
/**
* Possible states while parsing a pattern.
*/
private enum State {
INITIAL, ESCAPE, PATTERN, WILDCARD_CHARACTER;
/**
* Handles the given character, e.g. by appending it to the result string, discarding it etc.
*
* @param character the character to process
* @param escapeCharacter the escape character, if any
* @param result the currently created regex pattern
* @return the next state as determined by processing the given character in the current state
*/
private State handleCharacter(Character character, Character escapeCharacter, StringBuilder result) {
switch ( this ) {
case INITIAL:
if ( Character.valueOf( character ).equals( escapeCharacter ) ) {
return State.ESCAPE;
}
else if ( character == '%' ) {
result.append( ".*" );
return State.WILDCARD_CHARACTER;
}
else if ( character == '_' ) {
result.append( "." );
return State.WILDCARD_CHARACTER;
}
else {
result.append( "\\Q" );
result.append( character );
return State.PATTERN;
}
case PATTERN:
if ( Character.valueOf( character ).equals( escapeCharacter ) ) {
return State.ESCAPE;
}
else if ( character == '%' ) {
result.append( "\\E" );
result.append( ".*" );
return State.WILDCARD_CHARACTER;
}
else if ( character == '_' ) {
result.append( "\\E" );
result.append( "." );
return State.WILDCARD_CHARACTER;
}
else {
result.append( character );
return State.PATTERN;
}
case ESCAPE:
result.append( character );
return State.PATTERN;
case WILDCARD_CHARACTER:
if ( Character.valueOf( character ).equals( escapeCharacter ) ) {
return State.ESCAPE;
}
else if ( character == '%' ) {
result.append( ".*" );
return State.WILDCARD_CHARACTER;
}
else if ( character == '_' ) {
result.append( "." );
return State.WILDCARD_CHARACTER;
}
else {
result.append( "\\Q" );
result.append( character );
return State.PATTERN;
}
default:
throw new IllegalStateException( "Unsupported parsing state" );
}
}
}
}