package com.cse10.classifier;
import com.cse10.article.Article;
import com.cse10.article.CrimeArticle;
import com.cse10.database.DatabaseConstants;
import com.cse10.database.DatabaseHandler;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
/**
* handle data base transactions
* Created by Chamath on 12/16/2014.
*/
public abstract class DataHandler {
protected HashMap<Integer, Integer> articleIds;
protected String fileName;
protected boolean isFeatureVectorTransformerRequired;
public DataHandler() {
articleIds = new HashMap<Integer, Integer>();
fileName = "file.arff";
isFeatureVectorTransformerRequired = true;
}
/**
* print description for data handler
*
* @return
*/
protected abstract String printDescription();
/**
* fetch training data from database
*
* @param featureVectorTransformer
*/
public abstract Instances loadTrainingData(FeatureVectorTransformer featureVectorTransformer);
/**
* fetch test data under given conditions
*
* @param articleClass which type of articles that need to be classified (ex:- CeylonTodayArticle.class)
* @param constrain specify WHERE clause including 'where '
* @return
* @throws Exception
*/
public Instances loadTestData(Class articleClass, String constrain, boolean isApplyingKeyWordFilter) {
FastVector attributeList = new FastVector(2);
KeyWordClassifierHandler keyWordClassifierHandler = new KeyWordClassifierHandler();
keyWordClassifierHandler.configure(1, 1, "\\W");
articleIds.clear();
Attribute a1 = new Attribute("text", (FastVector) null);
FastVector classVal = new FastVector();
classVal.addElement("crime");
classVal.addElement("other");
Attribute c = new Attribute("@@class@@", classVal);
//add class attribute and news text
attributeList.addElement(a1);
attributeList.addElement(c);
Instances testData = new Instances("TestNews", attributeList, 0);
if (testData.classIndex() == -1) {
testData.setClassIndex(testData.numAttributes() - 1);
}
String tableName = new DatabaseConstants().classToTableName.get(articleClass);
String q = "SELECT id,content FROM " + tableName + " " + constrain;
ResultSet rs = DatabaseHandler.executeQuery(q);
int instNumber = 0;
try {
while (rs.next()) {
int id = rs.getInt("id");
String news = rs.getString("content");
Instance inst = new Instance(testData.numAttributes());
inst.setValue(a1, news);
inst.setDataset(testData);
inst.setClassMissing();
//if we apply key word filter, we remove obvious non-crime articles first
if (isApplyingKeyWordFilter) {
double value = keyWordClassifierHandler.classifyInstance(inst);
if (value == 0.0) {
testData.add(inst);
articleIds.put(instNumber, id); //in order to keep track of ID
instNumber++;
}
} else {
testData.add(inst);
articleIds.put(instNumber, id); //in order to keep track of ID
instNumber++;
}
}
} catch (SQLException e) {
e.printStackTrace();
}
return testData;
}
/**
* @return
*/
public HashMap<Integer, Integer> getArticleIds() {
return articleIds;
}
/**
* @return
*/
public String getFileName() {
return fileName;
}
public boolean isFeatureVectorTransformerRequired() {
return isFeatureVectorTransformerRequired;
}
//wrapper methods for data base handler class
/**
* @param tableName
* @param endDate
* @return
*/
public List<Article> fetchArticlesWithNullLabels(Class tableName, Date endDate) {
return DatabaseHandler.fetchArticlesWithNullLabels(tableName, endDate);
}
/**
* @param tableName
* @param crimeArticleIdList
* @return
*/
public List<Article> fetchArticlesByIdList(Class tableName, List<Integer> crimeArticleIdList) {
return DatabaseHandler.fetchArticlesByIdList(tableName, crimeArticleIdList);
}
/**
* @param crimeArticle
* @param article
*/
public void insertCrimeArticleAndUpdatePprArticle(CrimeArticle crimeArticle, Article article) {
DatabaseHandler.insertCrimeArticleAndUpdatePprArticle(crimeArticle, article);
}
/**
* @param article
*/
public void updateArticle(Article article) {
DatabaseHandler.updateArticle(article);
}
/**
*
*/
public void closeDatabase() {
DatabaseHandler.closeDatabase();
}
}