package org.pentaho.platform.dataaccess.datasource.wizard.csv;
import java.util.ArrayList;
import java.util.List;
import org.pentaho.di.core.logging.LogWriter;
import org.pentaho.di.trans.steps.textfileinput.TextFileInputMeta;
public class CsvInspector {
protected LogWriter log;
public CsvInspector() {
log = LogWriter.getInstance();
}
public int determineFileFormat( String line ) {
int type = -1;
int n = line.length();
char c1 = 0;
char c2 = 0;
if( n > 0 ) {
c1 = line.charAt(n-1);
if( n > 1 ) {
c2 = line.charAt(n-2);
}
if( c1 == '\n' || c1 == '\r' ) {
if( c2 == '\n' || c2 == '\r') {
type = TextFileInputMeta.FILE_FORMAT_DOS;
} else {
type = TextFileInputMeta.FILE_FORMAT_UNIX;
}
}
}
return type;
}
public List<String> getColumnData(int columnNumber, String data[][]) {
List<String> dataSample = new ArrayList<String>(data.length);
for (String[] row : data) {
dataSample.add(row[columnNumber]);
}
return dataSample;
}
public String guessDelimiter( String line ) {
int numTabs = 0;
int numCommas = 0;
int numPipes = 0;
int numTildas = 0;
int numColons = 0;
int numSemiColons = 0;
for( int idx=0; idx<line.length(); idx++ ) {
char c = line.charAt( idx );
switch (c) {
case '\t' : numTabs++; break;
case ',' : numCommas++; break;
case '|' : numPipes++; break;
case '~' : numTildas++; break;
case ':' : numColons++; break;
case ';' : numSemiColons++; break;
}
}
int max = Math.max(numTabs, numCommas);
max = Math.max( max, numPipes);
max = Math.max( max, numTildas);
max = Math.max( max, numColons);
max = Math.max( max, numSemiColons);
if( max == 0 ) {
return null;
}
if( max == numCommas ) {
return ","; //$NON-NLS-1$
}
if( max == numTabs ) {
return "\t"; //$NON-NLS-1$
}
if( max == numPipes ) {
return "|"; //$NON-NLS-1$
}
if( max == numTildas ) {
return "~"; //$NON-NLS-1$
}
if( max == numColons ) {
return ":"; //$NON-NLS-1$
}
if( max == numSemiColons ) {
return ";"; //$NON-NLS-1$
}
return null;
}
}