package org.activityinfo.ui.client.component.importDialog.data; import com.google.common.collect.Lists; import java.util.List; /** * Parses delimited text files into rows and columns */ public class RowParser { public static final char QUOTE_CHAR = '"'; private String text; private int length; private int currentPos = 0; private char delimiter; private int rowIndex; private int maxRowCount = Integer.MAX_VALUE; private boolean skipBlankRows = true; public RowParser(String text, char delimiter) { this.text = text; this.length = text.length(); this.delimiter = delimiter; } public RowParser withMaxRows(int maxRowCount) { this.maxRowCount = maxRowCount; return this; } public List<PastedRow> parseAllRows() { return parseRows(Integer.MAX_VALUE); } public List<PastedRow> parseRows(int numberOfRowsToParse) { if (numberOfRowsToParse <= 0) { throw new IllegalArgumentException("Number of rows to count must be more than 0."); } List<PastedRow> rows = Lists.newArrayList(); int count = 0; while(hasNextRow() && rows.size() < maxRowCount && count < numberOfRowsToParse) { PastedRow parsedRow = readNextLine(); if (parsedRow != null) { rows.add(parsedRow); count++; } } return rows; } public boolean hasNextRow() { return !eof(); } private PastedRow readNextLine() { List<Integer> offsets = Lists.newArrayList(); offsets.add(currentPos); while(advanceToNextColumn()) { offsets.add(currentPos); } offsets.add(currentPos); if (isEmptyRow(offsets)) { // skip if row is empty return null; } return new PastedRow(text, offsets, rowIndex++); } private boolean isEmptyRow(List<Integer> offsets) { final int size = offsets.size(); if (size > 2) { return false; } else if (size == 2 && (offsets.get(0) + 1) == offsets.get(1)) { return true; } return false; } private boolean advanceToNextColumn() { if(currentPos >= text.length()) { return false; } if(text.charAt(currentPos) == QUOTE_CHAR) { currentPos++; return advanceThroughQuotedColumn(); } char c; while(true) { if(currentPos == length) { c = '\n'; currentPos++; // advance position as if there had been a trailing newline } else { c = text.charAt(currentPos++); } if(c == delimiter) { return true; // more to come } else if(c == '\n') { return false; } } } private boolean advanceThroughQuotedColumn() { while(true) { if(currentPos == length) { // unterminated quote, handle gracefully // advance two characters for the terminating quote // and the missing newline currentPos = currentPos + 2; return false; } char c = text.charAt(currentPos++); if(c == QUOTE_CHAR) { // typically quotes withing the column are escaped by being doubled // but more generally, we only consider it the end of the column if it's followed // by a column or row terminator if(currentPos == length) { return false; } char nextChar = text.charAt(currentPos++); if(nextChar == '\n' || nextChar == '\r') { //currentPos++; return false; } else if(nextChar == delimiter) { return true; } } } } public boolean eof() { return currentPos >= length; } }