// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.semantic.classifier.custom;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.talend.dataquality.semantic.classifier.ISubCategory;
import org.talend.dataquality.semantic.classifier.impl.AbstractSubCategoryClassifier;
import org.talend.dataquality.semantic.filter.ISemanticFilter;
import org.talend.dataquality.semantic.model.DQCategory;
import org.talend.dataquality.semantic.model.MainCategory;
import org.talend.dataquality.semantic.validator.ISemanticValidator;
/**
* created by talend on 2015-07-28 Detailled comment.
*/
public class UserDefinedClassifier extends AbstractSubCategoryClassifier {
private static final long serialVersionUID = 6641017802505586690L;
/**
* Method "addSubCategory" adds a subcategory if it does not already exists.
*
* @param category the category to add
* @return true when the category is added
*/
public boolean addSubCategory(UserDefinedCategory category) {
return potentialSubCategories.add(category);
}
/**
* Method "insertOrUpdateSubCategory" inserts or update a category.
*
* @param category the category to insert or update
* @return true when the category is correctly either inserted or updated
*/
public boolean insertOrUpdateSubCategory(UserDefinedCategory category) {
if (potentialSubCategories.contains(category)) {
return updateSubCategory(category);
} // else
return addSubCategory(category);
}
/**
* Method "updateSubCategory" updates a given category.
*
* @param category the category to update
* @return true if the category exists and is updated.
*/
public boolean updateSubCategory(UserDefinedCategory category) {
if (removeSubCategory(category)) {
return addSubCategory(category);
}
return false;
}
public boolean removeSubCategory(UserDefinedCategory category) {
return potentialSubCategories.remove(category);
}
/**
* classify data into Semantic Category IDs
*
* @see org.talend.dataquality.semantic.classifier.impl.AbstractSubCategoryClassifier#classify(java.lang.String)
*/
@Override
public Set<String> classify(String str) {
MainCategory mainCategory = MainCategory.getMainCategory(str);
return classify(str, mainCategory);
}
@Override
public boolean validCategories(String str, DQCategory semanticType, Set<DQCategory> children) {
MainCategory mainCategory = MainCategory.getMainCategory(str);
if (mainCategory == MainCategory.UNKNOWN || mainCategory == MainCategory.NULL || mainCategory == MainCategory.BLANK)
return false;
if (CollectionUtils.isEmpty(children))
return validCategories(str, mainCategory, semanticType);
return validChildrenCategories(str, mainCategory, children);
}
/**
* if there are children, we valid a COMPOUND category, so we have to valid the string with the children categories list
*
* @param str, the string to valid
* @param mainCategory
* @param children, the children categories list
* @return
*/
private boolean validChildrenCategories(String str, MainCategory mainCategory, Set<DQCategory> children) {
int cpt = 0;
final Set<String> childrenId = new HashSet<>();
for (DQCategory child : children)
childrenId.add(child.getId());
for (ISubCategory classifier : potentialSubCategories) {
if (childrenId.contains(classifier.getId())) {
cpt++;
if (isValid(str, mainCategory, (UserDefinedCategory) classifier, true))
return true;
if (cpt == children.size())
return false;
}
}
return false;
}
private boolean validCategories(String str, MainCategory mainCategory, DQCategory semanticType) {
for (ISubCategory classifier : potentialSubCategories) {
if (semanticType.getId().equals(classifier.getId()))
return isValid(str, mainCategory, (UserDefinedCategory) classifier, true);
}
return false;
}
/**
* <p>
* classify data into Semantic Category IDs
* <p/>
* Validate this input data to adapt which customized rules.
* <p/>
* Actually, the main category can be calculated based on the input string, but this method has better performance
* in case the mainCategory is already calculated previously.
*
* @param str is input data
* @param mainCategory: the MainCategory is computed by the input data
* @return
*/
public Set<String> classify(String str, MainCategory mainCategory) {
Set<String> catSet = new HashSet<>();
if (mainCategory != MainCategory.UNKNOWN && mainCategory != MainCategory.NULL && mainCategory != MainCategory.BLANK) {
for (ISubCategory classifier : potentialSubCategories) {
if (isValid(str, mainCategory, (UserDefinedCategory) classifier, false))
catSet.add(classifier.getId());
}
}
return catSet;
}
private boolean isValid(String str, MainCategory mainCategory, UserDefinedCategory classifier, boolean caseSensitive) {
MainCategory classifierCategory = classifier.getMainCategory();
// if the MainCategory is different, ignor it and continue;AlphaNumeric rule should contain pure Alpha and
// Numeric.
if (mainCategory == MainCategory.Alpha || mainCategory == MainCategory.Numeric) {
if (classifierCategory != mainCategory && classifierCategory != MainCategory.AlphaNumeric)
return false;
} else if (classifierCategory != mainCategory)
return false;
if (invalidFilter(str, classifier.getFilter()))
return false;
return validValidator(str, classifier.getValidator(), caseSensitive);
}
private boolean invalidFilter(String str, ISemanticFilter filter) {
return filter != null && !filter.isQualified(str);
}
private boolean validValidator(String str, ISemanticValidator validator, boolean caseSensitive) {
return validator != null && validator.isValid(str, caseSensitive);
}
}