// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.converters;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
/**
* This class is used to remove consecutive duplicate characters.<br/>
* created by qiongli on 2017.3.30
*
*/
public class DuplicateCharEraser {
private Pattern removeRepeatCharPattern = null;
/**
*
* This constructor is used to initialize removeRepeatCharPattern and remove WhiteSpace chars like as " ","\t","\r","\n","\f".
*/
public DuplicateCharEraser() {
removeRepeatCharPattern = Pattern.compile("([\\s\\u0085\\p{Z}]|\r\n)" + "\\1+"); //$NON-NLS-1$//$NON-NLS-2$ //$NON-NLS-3$
}
/**
*
* This constructor is used to remove a given repeated String {@link #removeRepeatedChar(String)} .
* initialize removeRepeatCharPattern,add the Escape "\\" for non-word character like as "{","[","(","^,"+" and so on.
*
* @param repeatChar the string to be removed when it appears consecutively several times.
*/
public DuplicateCharEraser(char repeatChar) {
this(Character.toString(repeatChar));
}
/**
* Create a DuplicateCharEraser that will remove any repetition of the given char sequence.
*
* @param repeatChar the char sequence of which repetitions must be removed.
*/
public DuplicateCharEraser(String repeatChar) {
removeRepeatCharPattern = Pattern.compile("(" + Pattern.quote(repeatChar) + ")\\1+");
}
/**
*
* Remove consecutive repeated characters by a specified char.
*
* @param inputStr the source String
* @return the string with the source string removed if found
*/
public String removeRepeatedChar(String inputStr) {
if (StringUtils.isEmpty(inputStr)) {
return inputStr;
}
Matcher matcher = removeRepeatCharPattern.matcher(inputStr);
return matcher.replaceAll("$1"); //$NON-NLS-1$
}
}