/*
* Copyright 2013 SciFY NPO <info@scify.org>.
*
* This product is part of the NewSum Free Software.
* For more information about NewSum visit
*
* http://www.scify.gr/site/en/our-projects/completed-projects/newsum-menu-en
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* If this code or its output is used, extended, re-engineered, integrated,
* or embedded to any extent in another software or hardware, there MUST be
* an explicit attribution to this work in the resulting source code,
* the packaging (where such packaging exists), or user interface
* (where such an interface exists).
* The attribution must be of the form "Powered by NewSum, SciFY"
*/
package org.scify.NewSumServer.Server.MachineLearning;
import gr.demokritos.iit.jinsect.documentModel.representations.DocumentNGramSymWinGraph;
import gr.demokritos.iit.jinsect.storage.INSECTDB;
import gr.demokritos.iit.conceptualIndex.structs.Distribution;
import java.util.ArrayList;
import java.util.Arrays;
import javax.swing.text.StyledEditorKit;
/**
* Create a server to do all important actions for the Article labeling Contains
* two methods. One for feeding the classifier with data and one for labeling
*
* @author panagiotis
*/
public class classificationModule {
public INSECTDB file = new INSECTDBWithDir("", "./data/MachineLearningData");
// public static void main(String[] args){
//
//
//
// System.out.println(getCategory("Ο ηθοποιός Ντάνιελ Ντέι Λούις έρχεται ξανά στην Αθήνα προκειμένου να βοηθήσει τους σκοπούς της Εταιρείας Προστασίας Σπαστικών "));
//
// //{Τεχνολογία,Ελλάδα,Αθλητισμός,Κόσμος,Πολιτισμός,Οικονομία,Επιστήμη}
//
// }
/**
*
* @param categoryName The category that the text belongs to
* @param text The text that belongs to the specified category
*/
public void feedClassifier(String categoryName, String text, boolean mergeGraph) {
DocumentNGramSymWinGraph gTextGraph = new DocumentNGramSymWinGraph();
// define graph for the text received
gTextGraph.setDataString(text);
//read all class names
if (mergeGraph) {
String[] aCategories = file.getObjectList("cg");
//search if categoryName exists in the .cg list
ArrayList<String> lsCategories = new ArrayList<String>(Arrays.asList(aCategories));
if (lsCategories.contains(categoryName)) {
//if true merge between the two graphs
DocumentNGramSymWinGraph CategoryG =
(DocumentNGramSymWinGraph) file.loadObject(categoryName, "cg");
Distribution<String> dClassCounts;
String[] counterNames;
counterNames = file.getObjectList("counter");
if (counterNames.length == 0) {
dClassCounts = new Distribution<String>();
dClassCounts.increaseValue(categoryName, 1.0);
file.saveObject(dClassCounts, "mergeCounter", "counter");
double dInstanceCount = dClassCounts.getValue(categoryName);
CategoryG.mergeGraph(gTextGraph, 1 / dInstanceCount);
} else {
dClassCounts = (Distribution<String>) file.loadObject("mergeCounter", "counter");
dClassCounts.increaseValue(categoryName, 1.0);
file.saveObject(dClassCounts, "mergeCounter", "counter");
double dInstanceCount = dClassCounts.getValue(categoryName);
CategoryG.mergeGraph(gTextGraph, 1 / dInstanceCount);
}
// file.saveObject(CategoryG, categoryName, "cg");
} else {
//if false create new .cg with the current graph and categoryName as a name
file.saveObject(gTextGraph, categoryName, "cg");
}
}
//save in info.txt record with the text and the category name
String sID = writeToFile.createTxtFile(categoryName, true);
//save the current graph as .ig and name the serial number
file.saveObject(gTextGraph, sID, "ig");
}
/**
*
* @param text The article text
* @return the category that this text belongs to
*/
public String getCategory(String text) {
/* here begins the labelling process */
String label;
DocumentNGramSymWinGraph Textg = new DocumentNGramSymWinGraph(); // define graph for the Text tha i recive
Textg.setDataString(text); //Create the text graph
String[] categoryArray = file.getObjectList("cg"); //read all class names and we put it in categoryArray
if (categoryArray.length == 0) {
label = "-none-";
} else {
//semLabelling.acquire();
//recommendation for the text
label = labelTagging.recommendation(file, text);
//semLabelling.release();
}
return label; // send the label to client
}
}