/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.internal.csv.writer;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
/**
* Tries to guess a config based on an InputStream.
*
*/
public class CSVConfigGuesser {
/** The stream to read */
private InputStream in;
/**
* if the file has a field header (need this info, to be able to guess better)
* Defaults to false
*/
private boolean hasFieldHeader = false;
/** The found config */
protected CSVConfig config;
/**
*
*/
public CSVConfigGuesser() {
this.config = new CSVConfig();
}
/**
* @param in the inputstream to guess from
*/
public CSVConfigGuesser(InputStream in) {
this();
setInputStream(in);
}
public void setInputStream(InputStream in) {
this.in = in;
}
/**
* Allow override.
* @return the inputstream that was set.
*/
protected InputStream getInputStream() {
return in;
}
/**
* Guess the config based on the first 10 (or less when less available)
* records of a CSV file.
*
* @return the guessed config.
*/
public CSVConfig guess() {
try {
// tralalal
BufferedReader bIn = new BufferedReader(new InputStreamReader(getInputStream(), StandardCharsets.UTF_8));
String[] lines = new String[10];
String line = null;
int counter = 0;
while ( (line = bIn.readLine()) != null && counter <= 10) {
lines[counter] = line;
counter++;
}
if (counter < 10) {
// remove nulls from the array, so we can skip the null checking.
String[] newLines = new String[counter];
System.arraycopy(lines, 0, newLines, 0, counter);
lines = newLines;
}
analyseLines(lines);
} catch(Exception e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch(Exception e) {
// ignore exception.
}
}
}
CSVConfig conf = config;
// cleanup the config.
config = null;
return conf;
}
protected void analyseLines(String[] lines) {
guessFixedWidth(lines);
guessFieldSeparator(lines);
}
/**
* Guess if this file is fixedwidth.
* Just basing the fact on all lines being of the same length
*/
protected void guessFixedWidth(String[] lines) {
int lastLength = 0;
// assume fixedlength.
config.setFixedWidth(true);
for (int i = 0; i < lines.length; i++) {
if (i == 0) {
lastLength = lines[i].length();
} else {
if (lastLength != lines[i].length()) {
config.setFixedWidth(false);
}
}
}
}
protected void guessFieldSeparator(String[] lines) {
if (config.isFixedWidth()) {
guessFixedWidthSeparator(lines);
return;
}
for (int i = 0; i < lines.length; i++) {
}
}
protected void guessFixedWidthSeparator(String[] lines) {
// keep track of the fieldlength
int previousMatch = -1;
for (int i = 0; i < lines[0].length(); i++) {
char last = ' ';
boolean charMatches = true;
for (int j = 0; j < lines.length; j++) {
if (j == 0) {
last = lines[j].charAt(i);
}
if (last != lines[j].charAt(i)) {
charMatches = false;
break;
}
}
if (charMatches) {
if (previousMatch == -1) {
previousMatch = 0;
}
CSVField field = new CSVField();
field.setName("field"+config.getFields().length+1);
field.setSize((i-previousMatch));
config.addField(field);
}
}
}
/**
*
* @return if the field uses a field header. Defaults to false.
*/
public boolean hasFieldHeader() {
return hasFieldHeader;
}
/**
* Specify if the CSV file has a field header
* @param hasFieldHeader true or false
*/
public void setHasFieldHeader(boolean hasFieldHeader) {
this.hasFieldHeader = hasFieldHeader;
}
}