// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.standardization.action; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.search.TopDocs; import org.talend.dataquality.standardization.index.SynonymIndexSearcher; /** * DOC zshen class global comment. Detailled comment */ public class SynonymReplaceAction implements ITalendStrConversionAction { private static final Logger LOG = Logger.getLogger(SynonymReplaceAction.class); private Map<String, SynonymIndexSearcher> synonymSearcherMap; /* * (non-Javadoc) * * @see org.talend.dataquality.standardization.action.ITalendStrConversionAction#run(java.lang.String, int, * java.lang.String) */ @Override public String run(String str, int modifCount, String extraParameter, final Random random) { if (str.length() > 0) { SynonymIndexSearcher searcher = getSynonymSearcherMap().get(extraParameter); if (searcher == null) { searcher = new SynonymIndexSearcher(extraParameter); synonymSearcherMap.put(extraParameter, searcher); } try { TopDocs docs = searcher.searchDocumentBySynonym(str); if (docs.totalHits > 0) { Document doc = searcher.getDocument(docs.scoreDocs[0].doc); List<String> replaceList = new ArrayList<String>(); String word = doc.getValues(SynonymIndexSearcher.F_WORD)[0]; if (!word.equalsIgnoreCase(str)) { replaceList.add(word); } String[] synonyms = doc.getValues(SynonymIndexSearcher.F_SYN); for (String syn : synonyms) { if (!syn.equalsIgnoreCase(str)) { replaceList.add(syn); } } if (replaceList.isEmpty()) { return str; } else { return replaceList.get(random.nextInt(replaceList.size())); } } } catch (IOException e) { LOG.error(e, e); } } return StringUtils.EMPTY; } private Map<String, SynonymIndexSearcher> getSynonymSearcherMap() { if (synonymSearcherMap == null) { synonymSearcherMap = new HashMap<String, SynonymIndexSearcher>(); } return synonymSearcherMap; } public void closeSynonymIndice() { if (synonymSearcherMap != null && !synonymSearcherMap.isEmpty()) { for (SynonymIndexSearcher searcher : synonymSearcherMap.values()) { searcher.close(); } synonymSearcherMap.clear(); } } }