// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.semantic.broadcast;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import org.apache.log4j.Logger;
import org.apache.lucene.store.Directory;
import org.talend.dataquality.semantic.api.CategoryRegistryManager;
import org.talend.dataquality.semantic.model.CategoryType;
import org.talend.dataquality.semantic.model.DQCategory;
/**
* Created by jteuladedenantes on 20/10/16.
* <p>
* This class uses the singleton pattern to avoid creating the lucene index for each row. The lucene index will be created for
* each worker.
*/
public class BroadcastIndexObject implements Serializable {
private static final long serialVersionUID = 7350930198992853600L;
private static final Logger LOGGER = Logger.getLogger(BroadcastIndexObject.class);
// The serializable object
private List<BroadcastDocumentObject> documentList;
// The lucene index created from the serializable object
private Directory ramDirectory;
/**
* Build an index based on a list of {@link BroadcastDocumentObject}.
*
* @param documentList The {@link BroadcastDocumentObject} to be used to build up the index.
*/
public BroadcastIndexObject(List<BroadcastDocumentObject> documentList) {
this.documentList = documentList;
}
/**
* Constructor
*
* @param inputDirectory
*/
public BroadcastIndexObject(Directory inputDirectory) {
this(inputDirectory, false);
}
/**
* Constructor
*
* @param inputDirectory
* @param includeOpenCategories whether open categories should be included
*/
public BroadcastIndexObject(Directory inputDirectory, boolean includeOpenCategories) {
try {
if (includeOpenCategories) {
documentList = BroadcastUtils.readDocumentsFromIndex(inputDirectory);
} else {
Collection<DQCategory> cats = CategoryRegistryManager.getInstance().listCategories(false);
Set<String> catNames = new HashSet<String>();
for (DQCategory dqCat : cats) {
if (CategoryType.DICT.equals(dqCat.getType())) {
catNames.add(dqCat.getName());
}
}
documentList = BroadcastUtils.readDocumentsFromIndex(inputDirectory, catNames);
}
} catch (IOException e) {
documentList = Collections.emptyList();
LOGGER.error("Unable to read synonym index.", e);
}
}
public List<BroadcastDocumentObject> getDocumentList() {
return documentList;
}
/**
* The singleton method which creates the lucene index if necessary.
*
* @return the lucene index
*/
public synchronized Directory get() {
if (ramDirectory == null) {
try {
ramDirectory = BroadcastUtils.createRamDirectoryFromDocuments(documentList);
} catch (IOException e) {
LOGGER.error("Unable to rebuild the broadcast dictionary.", e);
}
}
return ramDirectory;
}
}