/*! * This program is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software * Foundation. * * You should have received a copy of the GNU Lesser General Public License along with this * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html * or from the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * Copyright (c) 2002-2013 Pentaho Corporation.. All rights reserved. */ package org.pentaho.platform.dataaccess.datasource.wizard.csv; import java.util.ArrayList; import java.util.List; import org.pentaho.di.trans.steps.textfileinput.TextFileInputMeta; public class CsvInspector { public int determineFileFormat( String line ) { int type = -1; int n = line.length(); char c1 = 0; char c2 = 0; if ( n > 0 ) { c1 = line.charAt( n - 1 ); if ( n > 1 ) { c2 = line.charAt( n - 2 ); } if ( c1 == '\n' || c1 == '\r' ) { if ( c2 == '\n' || c2 == '\r' ) { type = TextFileInputMeta.FILE_FORMAT_DOS; } else { type = TextFileInputMeta.FILE_FORMAT_UNIX; } } } return type; } public List<String> getColumnData( int columnNumber, String[][] data ) { List<String> dataSample = new ArrayList<String>( data.length ); for ( String[] row : data ) { dataSample.add( row[ columnNumber ] ); } return dataSample; } public String guessDelimiter( String line ) { int numTabs = 0; int numCommas = 0; int numPipes = 0; int numTildas = 0; int numColons = 0; int numSemiColons = 0; for ( int idx = 0; idx < line.length(); idx++ ) { char c = line.charAt( idx ); switch( c ) { case '\t': numTabs++; break; case ',': numCommas++; break; case '|': numPipes++; break; case '~': numTildas++; break; case ':': numColons++; break; case ';': numSemiColons++; break; } } int max = Math.max( numTabs, numCommas ); max = Math.max( max, numPipes ); max = Math.max( max, numTildas ); max = Math.max( max, numColons ); max = Math.max( max, numSemiColons ); if ( max == 0 ) { return null; } if ( max == numCommas ) { return ","; //$NON-NLS-1$ } if ( max == numTabs ) { return "\t"; //$NON-NLS-1$ } if ( max == numPipes ) { return "|"; //$NON-NLS-1$ } if ( max == numTildas ) { return "~"; //$NON-NLS-1$ } if ( max == numColons ) { return ":"; //$NON-NLS-1$ } if ( max == numSemiColons ) { return ";"; //$NON-NLS-1$ } return null; } }