// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.frequency.recognition;
import java.util.Collections;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.talend.dataquality.common.regex.ChainResponsibilityHandler;
import org.talend.dataquality.common.regex.HandlerFactory;
import org.talend.dataquality.statistics.type.DataTypeEnum;
/**
* * Recognize East Asia characters given predefined list of characters and its pattern mappings.
*
* @since 1.3.0
* @author mzhao
*/
public class EastAsianCharPatternRecognizer extends AbstractPatternRecognizer {
private final ChainResponsibilityHandler createEastAsiaPatternHandler = HandlerFactory.createEastAsiaPatternHandler();
@Override
public RecognitionResult recognize(String stringToRecognize, DataTypeEnum type) {
RecognitionResult result = new RecognitionResult();
if (StringUtils.isEmpty(stringToRecognize)) {
result.setResult(Collections.singleton(stringToRecognize), false);
return result;
}
// since the current implementation of East Asia character replacement is using regex macher , there is no way
// to get the "isComplete" status during the process. So here the status simply deemed as "not complete yet".
result.setResult(Collections.singleton(createEastAsiaPatternHandler.handleRequest(stringToRecognize)), true);
return result;
}
@Override
protected Set<String> getValuePattern(String originalValue) {
RecognitionResult result = recognize(originalValue);
return result.getPatternStringSet();
}
}