package org.activityinfo.ui.client.component.importDialog.data; import java.util.List; /** * Guesses the delimiter used in an a text file */ public class DelimiterGuesser { private static final char[] POSSIBLE_DELIMITERS = new char[]{',', ';', '\t', '|'}; private static final int ROWS_TO_SCAN = 10; private static final double MATCH_RATE_IN_PERCENT = 1.0; private final String text; private int firstNotMatchedRow = -1; private boolean isDataSetOfOneColumn = false; public DelimiterGuesser(String text) { this.text = text; } public char guess() { // first, look for a delimiter that divides the columns into // a consistent number of columns > 1 for (char delimiter : POSSIBLE_DELIMITERS) { if (matchColumnCount(delimiter)) { return delimiter; } } // if not, then assume that this is a dataset of 1 column isDataSetOfOneColumn = true; return '\0'; } private boolean matchColumnCount(char delimiter) { // we expect a delimiter to divide the input data set into // a more or less similar number of columns List<PastedRow> rows = new RowParser(text, delimiter) .parseRows(ROWS_TO_SCAN); int numColumns = -1; int matchedRowCount = 1; // start with 1 for first match for (PastedRow row : rows) { if (numColumns < 0) { numColumns = row.getColumnCount(); } else if(numColumns == row.getColumnCount()) { matchedRowCount++; } else { if (firstNotMatchedRow < 0) { firstNotMatchedRow = rows.indexOf(row); } } } if (numColumns == 1) { return false; } double actualMatchPercent = (double) matchedRowCount / (double) rows.size(); return actualMatchPercent >= MATCH_RATE_IN_PERCENT ; } public int getFirstNotMatchedRow() { return firstNotMatchedRow; } public boolean isDataSetOfOneColumn() { return isDataSetOfOneColumn; } }