/* * 文件名:SimpleFileReader.java * 版权:Copyright 2008-20012 复旦大学 All Rights Reserved. * 描述: * 修改人:xpqiu * 修改时间:2009 Sep 2, 2009 6:19:22 PM * 修改内容:新增 * * 修改人:〈修改人〉 * 修改时间:YYYY-MM-DD * 跟踪单号:〈跟踪单号〉 * 修改单号:〈修改单号〉 * 修改内容:〈修改内容〉 */ package edu.fudan.data.reader; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import edu.fudan.ml.types.Instance; import edu.fudan.ml.types.sv.HashSparseVector; import edu.fudan.ml.types.sv.SparseVector; /** * @author xpqiu * @version 1.0 * 简单文件格式如下: 类别 + “空格” + 数据 package * */ public class svmFileReader extends Reader { String content = null; BufferedReader reader; int type = 1; public svmFileReader(String file) { try { File f = new File(file); FileInputStream in = new FileInputStream(f); reader = new BufferedReader(new InputStreamReader(in, "UTF-8")); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * * @param file * @param type (+1,-1,0)分别表示类标签在每行的(左,右,无) */ public svmFileReader(String file,int type) { this(file); this.type = 1; } public boolean hasNext() { try { content = reader.readLine(); if (content == null) { reader.close(); return false; } } catch (IOException e) { e.printStackTrace(); return false; } return true; } public Instance next() { String[] tokens = content.split("\\t+|\\s+"); HashSparseVector sv = new HashSparseVector(); for (int i = 1; i < tokens.length; i++) { String[] taken = tokens[i].split(":"); if (taken.length > 1) { float value = Float.parseFloat(taken[1]); int idx = Integer.parseInt(taken[0]); sv.put(idx, value); } } return new Instance(sv, tokens[0]); } }