/** FileData.java
*
* Interface for reading and writing fixed column record data.
*
* @author Sunita Sarawagi
* @since 1.0
* @version 1.3
*/
package iitb.MaxentClassifier;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;
public class FileData {
BufferedReader inpStream;
DataDesc dataDescriptor;
public void openForRead(String fileName, DataDesc data) throws IOException {
inpStream=new BufferedReader(new FileReader(fileName));
dataDescriptor = data;
}
boolean readNext(DataRecord dataRecord) throws IOException {
return readNext(inpStream,dataDescriptor,dataRecord);
}
static boolean readNext(BufferedReader in, DataDesc dataDesc, DataRecord dataRecord) throws IOException {
String line;
if ((line=in.readLine())!=null) {
StringTokenizer strTok = new StringTokenizer(line, dataDesc.colSep);
for (int colNum = 0; strTok.hasMoreTokens() && (colNum < dataDesc.numColumns); colNum++) {
dataRecord.vals[colNum] = (float)Double.parseDouble(strTok.nextToken());
}
assert (strTok.hasMoreTokens());
dataRecord.label = Integer.parseInt(strTok.nextToken());
assert ((dataRecord.label >= 0) && (dataRecord.label < dataDesc.numLabels));
return true;
}
return false;
}
public static Vector<DataRecord> read(String fileName, DataDesc dataDesc) throws IOException {
Vector<DataRecord> allRecords = new Vector<DataRecord>();
BufferedReader in=new BufferedReader(new FileReader(fileName));
DataRecord dataRecord = new DataRecord(dataDesc.numColumns);
while (readNext(in,dataDesc,dataRecord)) {
allRecords.add(new DataRecord(dataRecord));
}
return allRecords;
}
static void write(String fileName, Vector<DataRecord> allRecords, int numColumns, String colSep) throws IOException {
PrintWriter out=new PrintWriter(new FileOutputStream(fileName));
for(Enumeration<DataRecord> e = allRecords.elements(); e.hasMoreElements();) {
DataRecord dataRecord = (DataRecord)e.nextElement();
for (int i = 0; i < numColumns; i++) {
out.print(dataRecord.getColumn(i) + colSep);
}
out.println(dataRecord.y(0));
}
out.close();
}
class FileIterator implements Iterator<DataRecord> {
DataRecord dataRecord;
FileIterator() {
dataRecord = new DataRecord(dataDescriptor.numColumns);
}
public boolean hasNext() {
try {
return readNext(dataRecord);
} catch (IOException e) {
e.printStackTrace();
}
return false;
}
public DataRecord next() {
return dataRecord;
}
public void remove() {
}
}
public Iterator<DataRecord> iterator() {
return new FileIterator();
}
};