/* * Copyright 2015 Themistoklis Mavridis <themis.mavridis@issel.ee.auth.gr>. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.thesmartweb.swebrank; import jgibblda.Estimator; import jgibblda.Inferencer; import jgibblda.LDACmdOption; import jgibblda.Model; import jgibblda.*; /** * Class for the use of Latent Dirichlet Allocation * @author Themistoklis Mavridis */ public class LDAcall { /** * Method to setup LDA * @param nTopics number of Topics top recognize * @param alpha alpha value of LDA * @param beta beta value of LDA * @param niters number of iterations the algorithm to run * @param top_words the amount of top words per topic to get * @param directory the directory to save the output */ public void call(int nTopics,double alpha,double beta,int niters,int top_words,String directory){ //run the LDA String directory_LDA=directory; System.gc(); LDAestimate( nTopics, directory_LDA , alpha,beta,niters, top_words); System.gc(); } /** * Method to run LDA * @param nTopics number of Topics top recognize * @param alpha alpha value of LDA * @param beta beta value of LDA * @param niters number of iterations the algorithm to run * @param top_words the amount of top words per topic to get * @param directory the directory to save the output */ public void LDAestimate(int nTopics, String directory, double alpha, double beta, int niters,int top_words){ System.gc(); System.out.println("Starting LDA for discovering " + nTopics + " topics in " + directory + "content_for_analysis.txt"); LDACmdOption option = new LDACmdOption(); option.est = true; option.alpha = alpha; option.beta = beta; option.K = nTopics; option.niters = niters; option.savestep = 2000; option.dir = directory; option.twords = top_words; option.dfile = "content_for_analysis.txt"; System.out.println("Gibbs LDA Parameters:"); System.out.println("alpha:\t" + option.alpha); System.out.println("beta:\t" + option.beta); System.out.println("Topics:\t" + option.K); System.out.println("Iterations:\t" + option.niters); System.out.println("savestep:\t" + option.savestep); System.out.println("Topic Words:\t" + option.twords); System.out.println("dfile:\t" + option.dfile); try { if (option.est || option.estc) { if (option.est) { System.out.println("Estimate the LDA model from scratch"); } else { System.out.println("Continue to estimate the model from a previously estimated model"); } Estimator estimator = new Estimator(); estimator.init(option); estimator.estimate(); } else if (option.inf) { System.out.println("Do inference for previously unseen (new) data using a previously estimated LDA model"); Inferencer inferencer = new Inferencer(); inferencer.init(option); Model newModel = inferencer.inference(); for (int i = 0; i < newModel.phi.length; ++i){ //phi: K * V //System.out.println("-----------------------\ntopic" + i + " : "); for (int j = 0; j < 10; ++j){ //System.out.println(inferencer.globalDict.id2word.get(j) + "\t" + newModel.phi[i][j]); } } } } catch (Exception e){ System.out.println("Error in main: " + e.getMessage()); return; } } }