// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.semantic.index; import java.io.IOException; import java.net.URI; import java.util.HashSet; import java.util.Set; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.talend.dataquality.semantic.model.DQCategory; /** * Created by sizhaoliu on 03/04/15. */ public class LuceneIndex implements Index { private static final Logger LOG = Logger.getLogger(LuceneIndex.class); private final DictionarySearcher searcher; public LuceneIndex(URI indexPath, DictionarySearchMode searchMode) { this(new DictionarySearcher(indexPath), searchMode); } public LuceneIndex(Directory directory, DictionarySearchMode searchMode) { this(new DictionarySearcher(directory), searchMode); } private LuceneIndex(DictionarySearcher searcher, DictionarySearchMode searchMode) { this.searcher = searcher; searcher.setTopDocLimit(20); searcher.setSearchMode(searchMode); } @Override public void initIndex() { searcher.maybeRefreshIndex(); } @Override public void closeIndex() { searcher.close(); } @Override public Set<String> findCategories(String data) { Set<String> foundCategorySet = new HashSet<>(); try { TopDocs docs = searcher.searchDocumentBySynonym(data); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document document = searcher.getDocument(scoreDoc.doc); foundCategorySet.add(document.getField(DictionarySearcher.F_CATID).stringValue()); } } catch (IOException e) { LOG.error(e, e); } return foundCategorySet; } @Override public boolean validCategories(String data, DQCategory semanticType, Set<DQCategory> children) { Boolean validCategory = false; try { validCategory = searcher.validDocumentWithCategories(data, semanticType, children); } catch (IOException e) { LOG.error(e, e); } return validCategory; } }