package au.gov.amsa.mariweb;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import rx.Observable.Operator;
import rx.Subscriber;
/**
* Note that this Operator does not support backpressure.
*/
public class OperatorExtractValuesFromInsertStatement implements Operator<List<String>, String> {
private static final char COMMA = ',';
private static final char QUOTE = '\'';
private static final char ESCAPE = '\\';
private static final char SPACE = ' ';
@Override
public Subscriber<? super String> call(final Subscriber<? super List<String>> child) {
return new Subscriber<String>(child) {
@Override
public void onCompleted() {
if (!child.isUnsubscribed())
child.onCompleted();
}
@Override
public void onError(Throwable e) {
if (!child.isUnsubscribed())
child.onError(e);
}
@Override
public void onNext(String line) {
if (!child.isUnsubscribed()) {
String clause = getClause(line);
parseValuesFromClause(clause, child);
}
}
};
}
private String getClause(String s) {
int i = s.indexOf("VALUES");
return s.substring(i + "VALUES".length()).trim();
}
private static <T> T unexpected() {
throw new RuntimeException();
}
private static final boolean DEBUG = false;
private static final int CHECK_UNSUBSCRIBED_EVERY = 1000;
static void parseValuesFromClause(String s, Subscriber<? super List<String>> subscriber) {
int charPosition = 0;
boolean isOpen = false;
boolean isEscaped = false;
boolean isInQuotes = false;
List<String> values = new ArrayList<>();
StringBuilder token = new StringBuilder();
// check unsubscribed every 100 emissions to reduce the number of
// volatile
// reads of isUnsubscribed()
long count = 0;
char currentCh = '?';
try {
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
currentCh = ch;
if (DEBUG)
System.out.println(charPosition + ":'" + currentCh + "' open=" + isOpen
+ ", escaped=" + isEscaped + ",inQuotes=" + isInQuotes + ",token="
+ token + "values=" + values);
if (ch == '(' && !isInQuotes) {
if (isOpen)
unexpected();
else
isOpen = true;
} else if (ch == ')' && !isInQuotes) {
if (!isOpen)
unexpected();
else {
isOpen = false;
if (token.length() > 0) {
values.add(token.toString());
token = new StringBuilder();
}
if (++count % CHECK_UNSUBSCRIBED_EVERY == 0 && subscriber.isUnsubscribed())
return;
subscriber.onNext(values);
values = new ArrayList<String>();
}
} else if (ch == QUOTE && !isInQuotes) {
isInQuotes = true;
} else if (ch == QUOTE && !isEscaped) {
isInQuotes = false;
} else if (ch == QUOTE) {
// must be escaped
token.append(QUOTE);
isEscaped = false;
} else if (ch == ESCAPE && isInQuotes && isEscaped) {
token.append(ESCAPE);
isEscaped = false;
} else if (ch == ESCAPE && isInQuotes) {
isEscaped = true;
} else if (ch == ESCAPE) {
unexpected();
} else if (ch == COMMA && !isInQuotes && isOpen) {
values.add(token.toString());
token = new StringBuilder();
} else if (ch == COMMA && isInQuotes) {
token.append(COMMA);
} else if (ch == COMMA) {
// ignore
} else if (ch == SPACE && !isInQuotes) {
// ignore
} else {
token.append(ch);
isEscaped = false;
}
charPosition++;
if (token.length() > 2000)
throw new RuntimeException("token too long (>2000)");
if (values.size() > 100)
throw new RuntimeException("too many columns found in a values clause (>100)");
}
} catch (RuntimeException e) {
System.out.println("'" + currentCh + "' open=" + isOpen + ", escaped=" + isEscaped
+ ",inQuotes=" + isInQuotes + ",token=" + token + "values=" + values);
writeLineToFile(s);
throw new RuntimeException("error at position " + charPosition
+ ". line with problem written to target/error-line.txt", e);
}
}
private static void writeLineToFile(String s) {
try {
FileOutputStream fos = new FileOutputStream("target/error-line.txt");
fos.write(s.getBytes("US-ASCII"));
fos.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}