package com.yahoo.labs.samoa.moa.streams; /* * #%L * SAMOA * %% * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import com.github.javacliparser.FileOption; import com.github.javacliparser.IntOption; import com.yahoo.labs.samoa.instances.Instances; import com.yahoo.labs.samoa.instances.InstancesHeader; import com.yahoo.labs.samoa.moa.core.InputStreamProgressMonitor; import com.yahoo.labs.samoa.moa.core.InstanceExample; import com.yahoo.labs.samoa.moa.core.ObjectRepository; import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; /** * Stream reader of ARFF files. * * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) * @version $Revision: 7 $ */ public class ArffFileStream extends AbstractOptionHandler implements InstanceStream { @Override public String getPurposeString() { return "A stream read from an ARFF file."; } private static final long serialVersionUID = 1L; public FileOption arffFileOption = new FileOption("arffFile", 'f', "ARFF file to load.", null, "arff", false); public IntOption classIndexOption = new IntOption( "classIndex", 'c', "Class index of data. 0 for none or -1 for last attribute in file.", -1, -1, Integer.MAX_VALUE); protected Instances instances; transient protected Reader fileReader; protected boolean hitEndOfFile; protected InstanceExample lastInstanceRead; protected int numInstancesRead; transient protected InputStreamProgressMonitor fileProgressMonitor; protected boolean hasStarted; public ArffFileStream() { } public ArffFileStream(String arffFileName, int classIndex) { this.arffFileOption.setValue(arffFileName); this.classIndexOption.setValue(classIndex); this.hasStarted = false; restart(); } @Override public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { //restart(); this.hasStarted = false; this.lastInstanceRead = null; } @Override public InstancesHeader getHeader() { return new InstancesHeader(this.instances); } @Override public long estimatedRemainingInstances() { double progressFraction = this.fileProgressMonitor.getProgressFraction(); if ((progressFraction > 0.0) && (this.numInstancesRead > 0)) { return (long) ((this.numInstancesRead / progressFraction) - this.numInstancesRead); } return -1; } @Override public boolean hasMoreInstances() { return !this.hitEndOfFile; } @Override public InstanceExample nextInstance() { if (this.lastInstanceRead == null) { readNextInstanceFromFile(); } InstanceExample prevInstance = this.lastInstanceRead; this.hitEndOfFile = !readNextInstanceFromFile(); return prevInstance; } @Override public boolean isRestartable() { return true; } @Override public void restart() { try { reset(); //this.hitEndOfFile = !readNextInstanceFromFile(); } catch (IOException ioe) { throw new RuntimeException("ArffFileStream restart failed.", ioe); } } protected boolean readNextInstanceFromFile() { boolean ret; if (!this.hasStarted){ try { reset(); ret = getNextInstanceFromFile(); this.hitEndOfFile = !ret; } catch (IOException ioe) { throw new RuntimeException("ArffFileStream restart failed.", ioe); } this.hasStarted = true; } else { ret = getNextInstanceFromFile(); } return ret; } @Override public void getDescription(StringBuilder sb, int indent) { // TODO Auto-generated method stub } private void reset() throws IOException { if (this.fileReader != null) { this.fileReader.close(); } InputStream fileStream = new FileInputStream(this.arffFileOption.getFile()); this.fileProgressMonitor = new InputStreamProgressMonitor( fileStream); this.fileReader = new BufferedReader(new InputStreamReader( this.fileProgressMonitor)); this.instances = new Instances(this.fileReader, 1, this.classIndexOption.getValue()); if (this.classIndexOption.getValue() < 0) { this.instances.setClassIndex(this.instances.numAttributes() - 1); } else if (this.classIndexOption.getValue() > 0) { this.instances.setClassIndex(this.classIndexOption.getValue() - 1); } this.numInstancesRead = 0; this.lastInstanceRead = null; } private boolean getNextInstanceFromFile() throws RuntimeException { try { if (this.instances.readInstance(this.fileReader)) { this.lastInstanceRead = new InstanceExample(this.instances.instance(0)); this.instances.delete(); // keep instances clean this.numInstancesRead++; return true; } if (this.fileReader != null) { this.fileReader.close(); this.fileReader = null; } return false; } catch (IOException ioe) { throw new RuntimeException( "ArffFileStream failed to read instance from stream.", ioe); } } }