package org.talend.dataquality.datamasking.shuffling;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
/**
* This class offers a shuffling service to manipulates the {@link ShuffleColumn} action and the
* {@link ShufflingHandler} action together.
*/
public class ShufflingService {
private static final Logger LOGGER = Logger.getLogger(ShufflingService.class);
protected ConcurrentLinkedQueue<Future<List<List<Object>>>> concurrentQueue = new ConcurrentLinkedQueue<Future<List<List<Object>>>>();
protected ShufflingHandler shufflingHandler;
protected List<List<String>> shuffledColumns;
protected List<String> allInputColumns;
protected List<String> partitionColumns;
protected ExecutorService executor;
protected long seed;
protected boolean hasSeed;
private List<List<Object>> rows = new ArrayList<List<Object>>();
private int seperationSize = Integer.MAX_VALUE;
private boolean hasLaunched = false;
private boolean hasFinished = false;
private boolean hasSubmitted = false;
/**
* Constructor without the partition choice
*
* @param shuffledColumns the 2D list of shuffled columns
* @param allInputColumns the list of all input columns name
* @throws IllegalArgumentException when the some columns in the shuffledColumns do not exist in the allInputColumns
*/
public ShufflingService(List<List<String>> shuffledColumns, List<String> allInputColumns) {
this(shuffledColumns, allInputColumns, null);
}
public ShufflingService(List<List<String>> shuffledColumns, List<String> allInputColumns, List<String> partitionColumns) {
this.shuffledColumns = shuffledColumns;
this.allInputColumns = allInputColumns;
this.partitionColumns = partitionColumns;
}
public void setShufflingHandler(ShufflingHandler shufflingHandler) {
this.shufflingHandler = shufflingHandler;
}
/**
* Executes a row list value.<br>
*
* The row is not executed immediately but is submitted to the {@link java.util.concurrent.ExecutorService}.<br>
* The results will be retrieved from the {@link java.util.concurrent.Future} objects which are appended to a
* {@link java.util.concurrent.ConcurrentLinkedQueue}<br>
*
* If the variable hasFinished equals true, it means this service has been closed. Tests whether the rows is empty
* or not. If the rows have still the values, submits those values to a callable process.<br>
*
* If the variable hasFinished equals false, adds the new value into the rows. Tests whether the rows' size equals
* the partition demand. When the size equals the partition size, submits those values to a callable process.<br>
*
* @param row
*/
protected synchronized void execute(List<Object> row) {
launcheHandler();
if (hasSubmitted) {
if (!rows.isEmpty()) {
executeFutureCall();
}
} else {
if (!row.isEmpty()) {
rows.add(row);
if (rows.size() == seperationSize) {
executeFutureCall();
}
}
}
}
/**
* Deep copies the rows value to another 2D list. Submits the rows' value to a callable process. Then submits the
* process to the executor.
*/
private void executeFutureCall() {
List<List<Object>> copyRow = deepCopyListTo(rows);
ShuffleColumn shuffle = new ShuffleColumn(shuffledColumns, allInputColumns, partitionColumns);
if (hasSeed) {
shuffle.setRandomSeed(seed);
}
Future<List<List<Object>>> future = executor.submit(new RowDataCallable<List<List<Object>>>(shuffle, copyRow));
concurrentQueue.add(future);
}
private void launcheHandler() {
if (!hasLaunched) {
shufflingHandler.start();
hasLaunched = true;
}
if (executor == null) {
executor = Executors.newCachedThreadPool();
} else {
if (executor.isShutdown()) {
throw new IllegalArgumentException("executor shutdown");
}
}
}
private synchronized List<List<Object>> deepCopyListTo(List<List<Object>> rows) {
List<List<Object>> copyRows = new ArrayList<List<Object>>(rows.size());
for (List<Object> row : rows) {
List<Object> copyRow = new ArrayList<Object>(row.size());
for (Object o : row) {
copyRow.add(o);
}
copyRows.add(copyRow);
}
rows.clear();
return copyRows;
}
public void setSeperationSize(int seperationSize2) {
this.seperationSize = seperationSize2;
}
/**
* Adds a new row in the waiting list and check the size of waiting list. When the waiting list fulfills the
* partition size then launches the shuffle algorithm.
*
* @param row a list of row data
*/
public void addOneRow(List<Object> row) {
execute(row);
}
public ConcurrentLinkedQueue<Future<List<List<Object>>>> getConcurrentQueue() {
return concurrentQueue;
}
/**
* Gets the hasFinished.
*
* @return hasFinished
*/
public boolean hasFinished() {
return hasFinished;
}
/**
* <ul>
* <li>First sets the hasSubmitted variable to be true and launches the execute() method with the global variable
* hasSubmitted equals true. This allows the resting rows to be submitted to a callable process.
* <li>To avoid the handler stopping scanning the result, lets the thread sleep 100 miliseconds. This allows the
* last callable job to stand by</li>
* <li>Sets the hasFinished variable true to announce the handler to finish the scan</li>
* </ul>
*
* @param hasFinished
*/
public void setHasFinished(boolean hasFinished) {
this.hasSubmitted = hasFinished;
execute(new ArrayList<Object>());
this.hasFinished = hasFinished;
shufflingHandler.join();
}
/**
* Sets the a table value directly by giving a 2D list.
*
* @param rows list of list of object
*/
public void setRows(List<List<Object>> rows) {
for (List<Object> row : rows) {
execute(row);
}
}
/**
* Shuts down the shuffling execution
*/
public void shutDown() {
if (executor != null) {
try {
executor.shutdown();
while (!concurrentQueue.isEmpty()) {
Thread.sleep(200);
}
executor.awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException e) {
LOGGER.error(e.getMessage(), e);
}
}
}
public void setRandomSeed(long seed) {
this.hasSeed = true;
this.seed = seed;
}
}