package lda.wikievidence.modelcreation; import hbase.operations.HBaseOperations; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.Charset; import java.util.zip.GZIPInputStream; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import org.apache.log4j.Logger; public class LDAClientExtractProbabilities extends LDAClient { private String lastIterationPath; private String topic; private String tableName; public LDAClientExtractProbabilities(int threadnr, byte[] dataset, String topic, String tableName) { super(threadnr); this.lastIterationPath = modeloutputPath + "00050"; this.topic = topic; this.tableName = tableName; } @Override public void run() { // // Create LDA Configuration File // byte[] config = ConfigCreation.createStandardPLDAConfig(datafilePath, // modeloutputPath); // writeOutput(config, configPath); // // // Create Datafile //// writeOutput(data, datafilePath); // // ToDo Generate Data! // // // Execute LDA // try { // Process proc = Runtime.getRuntime().exec( // "java -jar " // + LDAProperties.getInstance().getLDAClientApp() // + " " + configPath); // proc.waitFor(); // // Then retrieve the process output // InputStream in = proc.getInputStream(); // InputStream err = proc.getErrorStream(); // // byte b[] = new byte[in.available()]; // in.read(b, 0, b.length); // // System.out.println(new String(b)); // // byte c[] = new byte[err.available()]; // err.read(c, 0, c.length); // // System.out.println(new String(c)); // } catch (IOException e) { // e.printStackTrace(); // } catch (InterruptedException e1) { // e1.printStackTrace(); // } // // storeProbabilityDistribution(); // // // Delete Thread Directory // deleteDir(threadDir); } private void storeProbabilityDistribution() { BufferedReader buffered = null; try { InputStream fileStream = new FileInputStream(lastIterationPath + "topic-term-distributions.csv.gz"); InputStream gzipStream = new GZIPInputStream(fileStream); Reader decoder = new InputStreamReader(gzipStream, Charset.defaultCharset()); buffered = new BufferedReader(decoder); int neccLine = extractTopicLine(lastIterationPath, topic); int it = 0; while (it < neccLine) { buffered.readLine(); it++; } String line = buffered.readLine(); String[] split = line.split(","); double[] vals = new double[split.length]; for (int i = 0; i < split.length; i++) { vals[i] = Double.parseDouble(split[i]); } storeProbabilities(vals); } catch (FileNotFoundException e) { Logger.getRootLogger().error("Error:", e); } catch (IOException e) { Logger.getRootLogger().error("Error:", e); } finally { if (buffered != null) { try { buffered.close(); } catch (IOException e) { e.printStackTrace(); } } } } private void storeProbabilities(double[] probline) { File file = new File(lastIterationPath + "term-index.txt"); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(file)); String line = null; HBaseOperations.getInstance().deleteIDRow(tableName, topic); Put put = new Put(topic.getBytes()); int i = 0; while ((line = reader.readLine()) != null) { put.add(Bytes.toBytes("data"), Bytes.toBytes(line), Bytes.toBytes(probline[i])); ++i; } HBaseOperations.getInstance().addCompleteEntry(tableName, put); } catch (FileNotFoundException e) { Logger.getRootLogger().error("Error:", e); } catch (IOException e) { Logger.getRootLogger().error("Error:", e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { Logger.getRootLogger().error("Error:", e); } } } } }