/*
* JNI_SVM-light - A Java Native Interface for SVM-light
*
* Copyright (C) 2005
* Tom Crecelius & Martin Theobald
* Max-Planck Institute for Computer Science
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package jnisvmlight;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.text.ParseException;
import java.util.ArrayList;
/**
* The main interface class that transfers the training data to the SVM-light
* library by a native call. Optionally takes as input an individually modified
* set of training parameters or an array of string parameters that exactly
* simulate the command line input parameters used by the SVM-light binaries.
* This class can also be used for native classification calls.
*
* @author Tom Crecelius & Martin Theobald, including a bug fix by George Shaw (MIT)
*/
public class SVMLightInterface {
/**
* Apply an in-place quicksort prior to each native training call to
* SVM-light. SVM-light requires each input feature vector to be sorted in
* ascending order of dimensions. Disable this option if you are sure to
* provide sorted vectors already.
*/
public static boolean SORT_INPUT_VECTORS = true;
static {
String osName = System.getProperty("os.name", "null");
if (osName.toLowerCase().indexOf("windows") > -1) {
System.loadLibrary("svmlight" + System.getProperty("sun.arch.data.model", "32"));
} else {
System.loadLibrary("svmlight");
}
}
/**
* Reads a set of labeled training vectors from a URL. The format is
* compatible to the SVM-light training files.
*/
public static LabeledFeatureVector[] getLabeledFeatureVectorsFromURL(
URL file, int numOfLinesToSkip) throws ParseException {
ArrayList<LabeledFeatureVector> data = new ArrayList<LabeledFeatureVector>();
LabeledFeatureVector[] traindata = null;
BufferedReader bi = null;
try {
bi = new BufferedReader(new InputStreamReader(file.openStream()));
String line = null;
ArrayList<String> dimlist, vallist;
String label, dimval, dim, val;
String[] tokens;
int idx, cnt = 0;
while ((line = bi.readLine()) != null) {
cnt++;
if (cnt <= numOfLinesToSkip) {
continue;
}
label = null;
tokens = line.trim().split("[ \\t\\n\\x0B\\f\\r]");
if (tokens.length > 1) {
label = tokens[0];
dimlist = new ArrayList<String>();
vallist = new ArrayList<String>();
for (int tokencnt = 1; tokencnt < tokens.length; tokencnt++) {
dimval = tokens[tokencnt];
if (dimval.trim().startsWith("#"))
break;
idx = dimval.indexOf(':');
if (idx >= 0) {
dim = dimval.substring(0, idx);
val = dimval.substring(idx + 1, dimval.length());
dimlist.add(dim);
vallist.add(val);
} else {
throw new ParseException("Parse error in FeatureVector of file '"
+ file.toString() + "' at line: " + cnt + ", token: "
+ tokencnt + ". Could not estimate a \"int:double\" pair ?! "
+ file.toString()
+ " contains a wrongly defined feature vector!", 0);
}
}
if (dimlist.size() > 0) {
double labelvalue = new Double(label).doubleValue();
int[] dimarray = new int[dimlist.size()];
double[] valarray = new double[vallist.size()];
for (int i = 0; i < dimlist.size(); i++) {
dimarray[i] = new Integer((String) dimlist.get(i)).intValue();
}
for (int i = 0; i < vallist.size(); i++) {
valarray[i] = new Double((String) vallist.get(i)).doubleValue();
}
LabeledFeatureVector lfv = new LabeledFeatureVector(labelvalue,
dimarray, valarray);
data.add(lfv);
}
} else {
throw new ParseException("Parse error in FeatureVector of file '"
+ file.toString() + "' at line: " + cnt + ". "
+ " Wrong format of the labeled feature vector?", 0);
}
}
if (data.size() > 0) {
traindata = new LabeledFeatureVector[data.size()];
for (int i = 0; i < data.size(); i++) {
traindata[i] = (LabeledFeatureVector) data.get(i);
}
} else {
throw new ParseException("No labeled features found within " + cnt
+ "lines of file '" + file.toString() + "'.", 0);
}
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
if (bi != null) {
try {
bi.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return traindata;
}
protected TrainingParameters m_tp;
/**
* Performs a classification step as a native call to SVM-light. If this
* method is used exclusively, no additional SVMLightModel object has to be
* kept in the Java runtime process.
*/
public native double classifyNative(FeatureVector doc);
public TrainingParameters getTrainingParameters() {
return m_tp;
}
private void quicksort(int[] dims, double[] vals, int low, int high) {
if (low >= high)
return;
int leftIdx = low;
int pivot = low;
int rightIdx = high;
pivot = (low + high) / 2;
while (leftIdx <= pivot && rightIdx >= pivot) {
while (dims[leftIdx] < dims[pivot] && leftIdx <= pivot)
leftIdx++;
while (dims[rightIdx] > dims[pivot] && rightIdx >= pivot)
rightIdx--;
int tmp = dims[leftIdx];
dims[leftIdx] = dims[rightIdx];
dims[rightIdx] = tmp;
double tmpd = vals[leftIdx];
vals[leftIdx] = vals[rightIdx];
vals[rightIdx] = tmpd;
leftIdx++;
rightIdx--;
if (leftIdx - 1 == pivot)
pivot = rightIdx = rightIdx + 1;
else if (rightIdx + 1 == pivot)
pivot = leftIdx = leftIdx - 1;
quicksort(dims, vals, low, pivot - 1);
quicksort(dims, vals, pivot + 1, high);
}
}
private void sort(FeatureVector[] trainingData) {
for (int i = 0; i < trainingData.length; i++) {
if (trainingData[i] != null) {
quicksort(trainingData[i].m_dims, trainingData[i].m_vals, 0,
trainingData[i].size() - 1);
// verifyIsSorted(trainingData[i].m_dims);
}
}
}
private native SVMLightModel trainmodel(LabeledFeatureVector[] traindata,
TrainingParameters p);
public SVMLightModel trainModel(LabeledFeatureVector[] trainingData) {
this.m_tp = new TrainingParameters();
if (SORT_INPUT_VECTORS) {
sort(trainingData);
}
return trainmodel(trainingData, m_tp);
}
public SVMLightModel trainModel(LabeledFeatureVector[] trainingData,
String[] argv) {
this.m_tp = new TrainingParameters(argv);
if (SORT_INPUT_VECTORS) {
sort(trainingData);
}
return trainmodel(trainingData, m_tp);
}
public SVMLightModel trainModel(LabeledFeatureVector[] trainingData,
TrainingParameters tp) {
this.m_tp = tp;
if (SORT_INPUT_VECTORS) {
sort(trainingData);
}
return trainmodel(trainingData, m_tp);
}
}