/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.operation.regex; import java.beans.ConstructorProperties; import java.util.regex.Matcher; import cascading.flow.FlowProcess; import cascading.operation.OperationCall; import cascading.tuple.Fields; import cascading.tuple.Tuple; import org.apache.log4j.Logger; /** * Class RegexMatcher is the base class for common regular expression operations. * <p/> * This operation uses {@link java.util.regex.Matcher} internally, specifically the method {@link java.util.regex.Matcher#find()}. * * @see java.util.regex.Matcher * @see java.util.regex.Pattern */ public class RegexMatcher extends RegexOperation<Matcher> { /** Field LOG */ private static final Logger LOG = Logger.getLogger( RegexMatcher.class ); /** Field removeMatch */ protected final boolean negateMatch; @ConstructorProperties({"patternString"}) protected RegexMatcher( String patternString ) { super( patternString ); this.negateMatch = false; } @ConstructorProperties({"patternString", "negateMatch"}) protected RegexMatcher( String patternString, boolean negateMatch ) { super( patternString ); this.negateMatch = negateMatch; } @ConstructorProperties({"fieldDeclaration", "patternString"}) protected RegexMatcher( Fields fieldDeclaration, String patternString ) { super( ANY, fieldDeclaration, patternString ); this.negateMatch = false; verify(); } @ConstructorProperties({"fieldDeclaration", "patternString", "negateMatch"}) protected RegexMatcher( Fields fieldDeclaration, String patternString, boolean negateMatch ) { super( ANY, fieldDeclaration, patternString ); this.negateMatch = negateMatch; verify(); } private void verify() { if( fieldDeclaration.size() != 1 ) throw new IllegalArgumentException( "num fields in fieldDeclaration must be one, found: " + fieldDeclaration.printVerbose() ); } @Override public void prepare( FlowProcess flowProcess, OperationCall<Matcher> operationCall ) { operationCall.setContext( getPattern().matcher( "" ) ); } /** * Method matchWholeTuple ... * * @param matcher * @param input of type Tuple @return boolean */ protected boolean matchWholeTuple( Matcher matcher, Tuple input ) { matcher.reset( input.toString( "\t" ) ); boolean matchFound = matcher.find(); if( LOG.isDebugEnabled() ) LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound ); return matchFound == negateMatch; } /** * Method matchEachElement ... * * @param matcher * @param input of type Tuple @return boolean */ protected boolean matchEachElement( Matcher matcher, Tuple input ) { return matchEachElementPos( matcher, input ) != -1; } protected int matchEachElementPos( Matcher matcher, Tuple input ) { int pos = 0; for( Object value : input ) { if( value == null ) value = ""; matcher.reset( value.toString() ); boolean matchFound = matcher.find(); if( LOG.isDebugEnabled() ) LOG.debug( "pattern: " + getPatternString() + ", matches: " + matchFound + ", element: '" + value + "'" ); if( matchFound == negateMatch ) return pos; pos++; } return -1; } @Override public boolean equals( Object object ) { if( this == object ) return true; if( !( object instanceof RegexMatcher ) ) return false; if( !super.equals( object ) ) return false; RegexMatcher that = (RegexMatcher) object; if( negateMatch != that.negateMatch ) return false; return true; } @Override public int hashCode() { int result = super.hashCode(); result = 31 * result + ( negateMatch ? 1 : 0 ); return result; } }