package com.cloudera.sa.hcu.io.put;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sa.hcu.io.put.hdfs.writer.AbstractWriter;
import com.cloudera.sa.hcu.io.put.hdfs.writer.WriterFactory;
import com.cloudera.sa.hcu.io.put.listener.PutListener;
import com.cloudera.sa.hcu.io.put.local.reader.AbstractLocalFileColumnReader;
import com.cloudera.sa.hcu.io.put.local.reader.ReaderFactory;
import com.cloudera.sa.hcu.io.utils.LocalFileUtils;
import com.cloudera.sa.hcu.utils.PropertyUtils;
public class Putter
{
public static final String CONF_NUM_THREAD = "batch.files.thread.split";
ArrayList<PutListener> listeners = new ArrayList<PutListener>();
public void addListener(PutListener listener)
{
listeners.add(listener);
}
private void notifyWriten1000Rows(long rowsAdded, long lastReadTime, long lastWriteTime )
{
for (PutListener pl: listeners)
{
pl.onA1000Processed(rowsAdded, lastReadTime, lastWriteTime);
}
}
private void notifyOfStart(long numberOfFiles)
{
for (PutListener pl: listeners)
{
pl.onStart(numberOfFiles);
}
}
private void notifyOfFinished(long rowsAdded)
{
for (PutListener pl: listeners)
{
pl.onFinished(rowsAdded);
}
}
public void put(Properties p) throws IOException
{
String numOfThreadsStr = p.getProperty(CONF_NUM_THREAD);
int numOfThreads = 1;
if (numOfThreadsStr != null)
{
try
{
numOfThreads = Integer.parseInt(numOfThreadsStr);
}catch(NumberFormatException e)
{
System.out.println("Value for '" + CONF_NUM_THREAD + "' is not a valid number. Value was '" + numOfThreadsStr + "'.");
return;
}
}
put(p, numOfThreads);
}
public void put( Properties properties, int threads) throws IOException
{
System.out.println("Put threads set to " + threads );
if (threads == 1)
{
putSingleThread(properties);
}else
{
putMultiThread(properties, threads);
}
}
private void putMultiThread(Properties properties, int threads) throws IOException
{
String[] inputFilePaths = PropertyUtils.getStringProperty(properties ,AbstractLocalFileColumnReader.CONF_INPUT_PATHS).split(",");
String rootOutputDir = PropertyUtils.getStringProperty(properties, AbstractWriter.CONF_OUTPUT_PATH);
inputFilePaths = LocalFileUtils.createStringArrayOfFiles(inputFilePaths);
ArrayList<ArrayList<String>> seperatedFiles = new ArrayList<ArrayList<String>>();
for (int i = 0; i < threads; i++)
{
seperatedFiles.add(new ArrayList<String>());
}
for (int i = 0; i < inputFilePaths.length; i++)
{
int goToThreadIdx = Math.abs(inputFilePaths[i].hashCode() % threads);
System.out.println(inputFilePaths[i] + " is going to be processed by thread " + goToThreadIdx);
seperatedFiles.get(goToThreadIdx).add(inputFilePaths[i]);
}
//Open hdfs file system
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(config);
//Create root folder - HBase there is no need to make a directory TODO clean this up
Path outputFilePath = new Path(rootOutputDir);
System.out.println("Checking if directory exist: " + outputFilePath + " " + hdfs.exists(outputFilePath) + " " + hdfs.isDirectory(outputFilePath));
if (hdfs.exists(outputFilePath) == false)
{
if (rootOutputDir.equals("HBase") == false)
{
System.out.println("Tring to make outputDirectory: " + outputFilePath);
hdfs.mkdirs(outputFilePath);
}
}
hdfs.close();
ArrayList<PutThread> putThreadList = new ArrayList<PutThread>();
for (int i = 0; i < threads; i++)
{
PutThread putThread = new PutThread(seperatedFiles.get(i).toArray(new String[0]), rootOutputDir, i, properties);
putThread.start();
putThreadList.add(putThread);
}
System.out.println("Threads Started: " + outputFilePath);
synchronized(putThreadList)
{
boolean canExit = false;
while(canExit == false)
{
canExit = true;
for (PutThread t: putThreadList)
{
if (t.isAlive())
{
canExit = false;
break;
}
}
try
{
putThreadList.wait(1000);
} catch (InterruptedException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
private void putSingleThread(Properties properties) throws IOException
{
AbstractLocalFileColumnReader reader = ReaderFactory.initReader(properties);
AbstractWriter writer = WriterFactory.initWriter(properties);
String[] columns;
long rowsAddedCounter = 0;
long readTimeCounter = 0;
long writeTimeCounter = 0;
long writeStartTime = 0;
notifyOfStart(reader.getNumberOfFiles());
long readStartTime = System.currentTimeMillis();
while((columns = reader.getNextRow()) != null)
{
readTimeCounter += System.currentTimeMillis() - readStartTime;
writeStartTime = System.currentTimeMillis();
writer.writeRow(reader.getRowType(), columns);
writeTimeCounter += System.currentTimeMillis() - writeStartTime;
rowsAddedCounter++;
if (rowsAddedCounter % 1000 == 0)
{
notifyWriten1000Rows(rowsAddedCounter, readTimeCounter, writeTimeCounter);
readTimeCounter = 0;
writeTimeCounter = 0;
}
readStartTime = System.currentTimeMillis();
}
writer.close();
reader.close();
notifyOfFinished(rowsAddedCounter);
}
private class PutThread extends Thread
{
String[] inputFilePaths;
String rootOutputDir;
Properties properties;
int threadNum;
PutThread(String[] inputFilePaths, String rootOutputDir, int threadNum, Properties properties)
{
this.inputFilePaths = inputFilePaths;
this.rootOutputDir = rootOutputDir;
this.properties = properties;
this.threadNum = threadNum;
}
public void run()
{
try
{
Properties putProperties = (Properties)properties.clone();
putProperties.setProperty(AbstractLocalFileColumnReader.CONF_INPUT_PATHS,PropertyUtils.convertStringArrayToString(inputFilePaths));
putProperties.setProperty(AbstractWriter.CONF_OUTPUT_PATH, putProperties.getProperty(AbstractWriter.CONF_OUTPUT_PATH) + "/part-i-" + threadNum);
(new Putter()).put( putProperties, 1);
} catch (IOException e)
{
e.printStackTrace();
}
}
}
}