/**Copyright 2010 Research Studios Austria Forschungsgesellschaft mBH * * This file is part of easyrec. * * easyrec is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * easyrec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with easyrec. If not, see <http://www.gnu.org/licenses/>. */ package org.easyrec.utils.io.tabular.input.impl; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.easyrec.utils.io.tabular.input.*; import java.io.*; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * Reads tabular data into lists of String. There are three ways to use this * class: via its Iterator methods, via its readAll() method or by implementing * a TabularInputObserver (and calling visitAll() or using one of the other two * methods as well). * <p> * Note: empty lines are quietly omitted. * </p> * <p> * <b>Company: </b> SAT, Research Studios Austria * </p> * <p/> * <p> * <b>Copyright: </b> (c) 2007 * </p> * <p/> * <p> * <b>last modified:</b><br/> * $Author: szavrel $<br/> * $Date: 2011-03-22 15:26:04 +0100 (Di, 22 Mär 2011) $<br/> * $Revision: 17973 $ * </p> * * @author Florian Kleedorfer */ public abstract class AbstractTabularInput implements TabularInput { private static final String DEFAULT_CHARSET = "ISO-8859-1"; protected BufferedReader reader; protected Log logger = LogFactory.getLog(getClass()); protected int rowNum = 0; protected String currentLine = null; private List<String> currentFields = null; protected int columnCount = 0; // this is just a copy of the first currentFields list private List<String> columnNames = null; protected Set<TabularInputObserver> observers = new HashSet<TabularInputObserver>(); private boolean endOfStream = false; /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#addObserver * (at.researchstudio.sat.utils.io.tabular.input.TabularInputObserver) */ public void addObserver(TabularInputObserver observer) { observers.add(observer); } /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#removeObserver * (at.researchstudio.sat.utils.io.tabular.input.TabularInputObserver) */ public void removeObserver(TabularInputObserver observer) { observers.remove(observer); } /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#clearObservers * () */ public void clearObservers() { observers.clear(); } /* * (non-Javadoc) * * @see java.util.Iterator#hasNext() */ public boolean hasNext() { if (this.reader == null) throw new IllegalStateException("No reader available. Did you call setSource(...)?"); boolean ret = this.currentFields != null; if (!ret) fireOnFinish(); return ret; } /* * (non-Javadoc) * * @see java.util.Iterator#next() */ public List<String> next() { if (this.reader == null) throw new IllegalStateException("No reader available. Did you call setSource(...)?"); List<String> ret = this.currentFields; prepareNext(); return ret; } /* (non-Javadoc) * @see at.researchstudio.sat.utils.io.tabular.input.TabularInput#nextMappedObject(at.researchstudio.sat.utils.io.tabular.input.TabularInputRowMapper) */ public <T> T next(TabularInputRowMapper<T> mapper) { //subtracting 1 from rowNum as the call to next() advances the num once more, so we are //here 1 ahead return mapper.mapRow(next(), this.rowNum - 1, this.rowNum - 1 == 1, !hasNext()); } /** * Reads all data from the current position on into a List<List<String>>. * * @return */ public List<List<String>> readAll() { List<List<String>> data = new ArrayList<List<String>>(); while (hasNext()) { data.add(next()); } return data; } /* (non-Javadoc) * @see at.researchstudio.sat.utils.io.tabular.input.TabularInput#mapRows(at.researchstudio.sat.utils.io.tabular.input.TabularInputRowMapper) */ public <T> List<T> readAll(TabularInputRowMapper<T> mapper) { List<T> data = new ArrayList<T>(); while (hasNext()) { T mapped = next(mapper); if (mapped != null) data.add(mapped); } return data; } /** * Reads all data from the current position but does not return anything. * This method is only useful in connection with registering observers. * * @return */ public void visitAll() { while (hasNext()) { next(); } } /* * (non-Javadoc) * * @see java.util.Iterator#remove() */ public void remove() { throw new UnsupportedOperationException("This iterator does not support remove()"); } private void prepareNext() throws TabularInputException { try { if (this.endOfStream) return; this.rowNum++; while ("".equals(this.currentLine = reader.readLine())) ; if (this.currentLine == null) { this.currentFields = null; this.endOfStream = true; fireOnFinish(); freeSourceSpecificResources(); return; } this.currentFields = parseLine(this.currentLine); if (rowNum == 1) { this.columnNames = new ArrayList<String>(this.currentFields); this.columnCount = columnNames.size(); this.fireOnStart(); } else if (this.currentFields.size() != this.columnCount) { throw new InconsistentFieldCountException(rowNum, -1, this.columnCount, this.currentFields.size()); } fireOnDataRow(); } catch (TabularInputException e) { throw e; } catch (Exception e) { this.fireOnAbort(); this.currentFields = null; closeReader(); throw new TabularInputException(e, this.rowNum, -1); } } /* * (non-Javadoc) * * @see at.researchstudio.sat.utils.io.tabular.input.TabularInput#abort() */ public void freeSourceSpecificResources() { closeReader(); this.columnNames = null; this.currentFields = null; this.currentLine = null; } /** * */ private void closeReader() throws TabularInputException { if (this.reader != null) { try { this.reader.close(); } catch (IOException e) { throw new TabularInputException(e, this.rowNum, -1); } } } /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#setSource(java * .io.InputStream, java.lang.String) */ public void setSource(InputStream stream, String charsetName) { this.rowNum = 0; this.currentLine = null; this.currentFields = null; this.columnNames = null; this.endOfStream = false; this.columnCount = 0; try { this.reader = new BufferedReader(new InputStreamReader(stream, charsetName)); } catch (UnsupportedEncodingException e) { throw new TabularInputException(e, -1, -1); } prepareNext(); } /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#process(java * .io.File) */ public void setSource(File file, String charsetName) throws TabularInputException { try { setSource(new FileInputStream(file), charsetName); } catch (FileNotFoundException e) { throw new TabularInputException(e, -1, -1); } } /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#process(java * .io.File) */ public void setSource(File file) throws TabularInputException { try { setSource(new FileInputStream(file), DEFAULT_CHARSET); } catch (FileNotFoundException e) { throw new TabularInputException(e, 0, 0); } } protected abstract List<String> parseLine(String line); protected void fireOnStart() { for (TabularInputObserver o : this.observers) { o.onStart(this.currentFields.size(), this.columnNames); } } protected void fireOnDataRow() { for (TabularInputObserver o : this.observers) { o.onDataRow(this.rowNum, this.currentFields); } } protected void fireOnFinish() { for (TabularInputObserver o : this.observers) { o.onFinish(this.rowNum); } } protected void fireOnAbort() { for (TabularInputObserver o : this.observers) { o.onAbort(this.rowNum); } } /* * (non-Javadoc) * * @see * at.researchstudio.sat.utils.io.tabular.input.TabularInput#getColumnNames * () */ public List<String> getColumnNames() { return this.columnNames; } }