// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.semantic.classifier.impl; import java.util.HashSet; import java.util.Set; import java.util.StringTokenizer; import org.talend.dataquality.semantic.classifier.ISubCategoryClassifier; import org.talend.dataquality.semantic.index.Index; import org.talend.dataquality.semantic.model.DQCategory; /** * Created by sizhaoliu on 27/03/15. */ public class DataDictFieldClassifier implements ISubCategoryClassifier { private static final long serialVersionUID = 6174669848299972111L; private Index dictionary; private Index keyword; public DataDictFieldClassifier(Index dictionary, Index keyword) { this.dictionary = dictionary; this.keyword = keyword; } @Override public Set<String> classify(String data) { StringTokenizer t = new StringTokenizer(data, " "); final int tokenCount = t.countTokens(); HashSet<String> result = new HashSet<>(); // if it's a valid syntactic data --> search in DD if (tokenCount < 3) { result.addAll(dictionary.findCategories(data)); } else { result.addAll(dictionary.findCategories(data)); result.addAll(keyword.findCategories(data)); } return result; } @Override public boolean validCategories(String data, DQCategory semanticType, Set<DQCategory> children) { StringTokenizer t = new StringTokenizer(data, " "); final int tokenCount = t.countTokens(); boolean result = dictionary.validCategories(data, semanticType, children); // if it's a valid syntactic data --> search in DD if (!result && tokenCount >= 3) { result = keyword.validCategories(data, semanticType, children); } return result; } public void closeIndex() { dictionary.closeIndex(); keyword.closeIndex(); } }