/* * Copyright 2007 T-Rank AS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package no.trank.openpipe.step; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import no.trank.openpipe.api.MultiInputFieldPipelineStep; import no.trank.openpipe.api.PipelineException; import no.trank.openpipe.api.document.AnnotatedField; import no.trank.openpipe.api.document.BaseAnnotatedField; import no.trank.openpipe.api.document.Document; /** * Chops field(s) to a maximum length. * * Set the appendOnChop, if you want this step to append with a string to indicate that it has been chopped. * {@link #setAppendOnChop(String)} * * @version $Revision$ */ public class ChopField extends MultiInputFieldPipelineStep { private int chopLength; private String appendOnChop; private String fitField; private Pattern LAST_WORD = Pattern.compile("[\\s\\.]+[\\S&&[^\\.]]+[\\s\\.]*$"); @Override protected void process(Document doc, String fieldName, List<AnnotatedField> fieldValues) throws PipelineException { List<AnnotatedField> newFields = new ArrayList<AnnotatedField>(); for (AnnotatedField fieldValue : fieldValues) { if (fieldValue.getValue().length() > chopLength) { String choppedString = chop(fieldValue.getValue(), doc.getFieldValue(fitField)); newFields.add(new BaseAnnotatedField(choppedString)); } else { newFields.add(fieldValue); } } doc.setField(fieldName, newFields); } private String chop(String value, String fitFieldValue) { int chopTo = appendOnChop == null ? chopLength : chopLength - appendOnChop.length(); chopTo = fitFieldValue == null ? chopTo : chopTo - fitFieldValue.length(); chopTo = Math.max(0, chopTo); Matcher matcher = LAST_WORD.matcher(value.substring(0, chopTo)); return matcher.replaceAll(appendOnChop == null ? "" : appendOnChop); } /** * Gets the maximum length that the field(s) should be chopped to. * * @return the maximum length that the field(s) should be chopped to. */ public int getChopLength() { return chopLength; } /** * Sets the the maximum length that the field(s) should be chopped to. * * @param chopLength the maximum length that the field(s) should be chopped to. */ public void setChopLength(int chopLength) { this.chopLength = chopLength; } /** * Sets the string that should be appended to the string if it has been chopped. * * @return the string that should be appended to the string if it has been chopped. */ public String getAppendOnChop() { return appendOnChop; } /** * Gets the string that should be appended to the string if it has been chopped. * * @param appendOnChop the string that should be appended to the string if it has been chopped. */ public void setAppendOnChop(String appendOnChop) { this.appendOnChop = appendOnChop; } /** * Gets the fitField. * * @return the fitField * @see #setFitField(String) */ public String getFitField() { return fitField; } /** * Sets the fitField. If this is set the chopper will chop texts relative to the length of the value of fitfield. * That means that the chopLength will be shortened by the length of the fitField value. * * @param fitField the fitField */ public void setFitField(String fitField) { this.fitField = fitField; } @Override public String getRevision() { return "$Revision$"; } }