package timeflow.format.file;
import java.util.*;
import timeflow.util.*;
import timeflow.model.Display;
public class DelimitedText {
private char delimiter;
public DelimitedText(char delimiter)
{
if (delimiter=='"')
throw new IllegalArgumentException("Can't use quote as delimiter.");
this.delimiter=delimiter;
}
private static boolean isBreak(char c)
{
return c=='\n' || c=='\r';
}
public List<String[]> read(String text)
{
ArrayList<String[]> results=new ArrayList<String[]>();
int n=text.length();
StringBuffer currentToken=new StringBuffer();
ArrayList<String> currentList=new ArrayList<String>();
boolean quoted=false;
for (int i=0; i<n; i++)
{
char c=text.charAt(i);
if (quoted)
{
if (c=='"')
{
if (i==n-1) // end of file, ignore quote.
{
quoted=false;
continue;
}
char next=text.charAt(i+1);
if (next=='"') // a quoted quote.
{
currentToken.append('"');
i++;
// Alas, there is a weird special case here
// if the user has pasted from Excel.
// If a field starts with a quote, and ends with two quotes,
// it turns out to be ambiguous!
// Excel doesn't do any escaping on: "blah blah""
// But, it does escape: blah "\n
// turning it into: "blah blah""\n
// So if "blah blah"" occurs at the end of the line,
// you actually do not know which it is!
// In practice, our first bug report was for a literal of "blah blah""
// so that is what we will choose.
//System.out.println("next++: '"+text.charAt(i+1)+"'="+(int)text.charAt(i+1));
if (i<n-1 && isBreak(text.charAt(i+1)))
{
quoted=false;
}
continue;
}
if (isBreak(next)) // end of line
{
quoted=false;
currentList.add(currentToken.toString());
currentToken.setLength(0);
results.add((String[])currentList.toArray(new String[0]));
currentList=new ArrayList<String>();
i++;
if (i<n-1 && isBreak(text.charAt(i+1)))
i++;
continue;
}
if (next==delimiter)
{
quoted=false;
continue;
}
System.out.println("a bad quote from excel: next char="+(int)next);
quoted=false;
}
currentToken.append(c);
continue;
}
// ok, not quoted.
if (c==delimiter)
{
currentList.add(currentToken.toString());
currentToken.setLength(0);
quoted=false;
continue;
}
// not delimiter, not in the middle of a quote.
if (c=='"')
{
if (currentToken.length()==0) // we are at beginning of a token, so this is a quote.
{
quoted=true;
continue;
}
}
// is it a line feed? we're not in the middle of a quote, so this means a new line.
if (c=='\n' || c=='\r' || c=='\f')
{
currentList.add(currentToken.toString());
currentToken.setLength(0);
results.add((String[])currentList.toArray(new String[0]));
currentList=new ArrayList<String>();
if (i<n-1 && (text.charAt(i+1)=='\n' || text.charAt(i+1)=='\r'))
i++;
continue;
}
// by golly, just a normal character!
currentToken.append(c);
}
// did it just end in a blank line?
if (currentList.size()>0 || currentToken.toString().trim().length()>0)
{
currentList.add(currentToken.toString());
results.add((String[])currentList.toArray(new String[0]));
}
return results;
}
public String write(String s)
{
return write(new String[] {s});
}
public String write(String[] data)
{
StringBuffer b=new StringBuffer();
for (int i=0; i<data.length; i++)
{
// add a delimiter if necessary.
if (i>0)
b.append(delimiter);
// if null, just don't write anything.
if (data[i]==null)
continue;
// does it have weird characters in it?
boolean weird=false;
int n=data[i].length();
for (int j=0; j<n; j++)
{
char c=data[i].charAt(j);
if (c==delimiter || isBreak(c))
{
weird=true;
break;
}
}
if (weird)
{
b.append('"');
for (int j=0; j<n; j++)
{
char c=data[i].charAt(j);
if (c=='"')
b.append('"');
b.append(c);
}
b.append('"');
}
else
b.append(data[i]);
}
return b.toString();
}
public static String[] split(String s, char delimiter)
{
DelimitedText t= new DelimitedText(delimiter);
List<String[]> lines=t.read(s);
return lines.get(0);
}
public static void main(String[] args) throws Exception
{
String bad=IO.read("test/bad-all.txt");
String[][] s=DelimitedFormat.readArrayFromString(bad, System.out);
System.out.println("len="+s.length);
/*
//DelimitedText c=new DelimitedText(';');
//List<String[]> arrays=c.read(IO.read("test/bad.txt"));
//List<String[]> arrays=c.read("a;b;\"x;y\";c");
//List<String[]> arrays=c.read("a;\"a\n\rq\";b;\"x;y\";c");
//List<String[]> arrays=c.read("a;b;\"with a \"\"blah\";c\nd;e;f\ng;h;i");
//List<String[]> arrays=c.read("a,\"b\",\"c\r\nd\"\r\ne,f,g\nh,i,j");
for (String[] s:arrays)
{
System.out.println("["+Display.arrayToString(s)+"]");
}
*/
}
}