/**
* Copyright (C) 2009-2013 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.server.service.externaldata;
import com.foundationdb.ais.model.Column;
import com.foundationdb.ais.model.Table;
import com.foundationdb.qp.operator.QueryContext;
import com.foundationdb.qp.row.Row;
import com.foundationdb.server.error.ExternalRowReaderException;
import com.foundationdb.server.types.common.types.TypesTranslator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.List;
/** Read from a mysqldump -t file of INSERT statements (with
* relatively little flexibility).
*/
public class MysqlDumpRowReader extends RowReader
{
private final byte[] insert, into, values, lock, unlock;
private enum State {
STATEMENT_START, SINGLE_LINE_COMMENT, DELIMITED_COMMENT,
STATEMENT_VERB, IGNORED_STATEMENT, IGNORED_BACKQUOTE,
INSERT, INSERT_TABLE, TABLE_BACKQUOTE, INSERT_VALUES,
NEXT_ROW_CTOR, AFTER_ROW_CTOR,
NEXT_FIELD, UNQUOTED_FIELD, QUOTED_FIELD, AFTER_QUOTED_FIELD
};
private State state;
private byte[] tableName = null; // According to the file.
private static final Logger logger = LoggerFactory.getLogger(MysqlDumpRowReader.class);
public MysqlDumpRowReader(Table table, List<Column> columns,
InputStream inputStream, String encoding,
QueryContext queryContext, TypesTranslator typesTranslator) {
super(table, columns, inputStream, encoding, getBytes("NULL", encoding),
queryContext, typesTranslator);
this.insert = getBytes("INSERT", encoding);
this.into = getBytes("INTO", encoding);
this.values = getBytes("VALUES", encoding);
this.lock = getBytes("LOCK", encoding);
this.unlock = getBytes("UNLOCK", encoding);
this.state = State.STATEMENT_START;
}
private static byte[] getBytes(String str, String encoding) {
try {
return str.getBytes(encoding);
}
catch (UnsupportedEncodingException ex) {
UnsupportedCharsetException nex = new UnsupportedCharsetException(encoding);
nex.initCause(ex);
throw nex;
}
}
@Override
public Row nextRow() throws IOException {
while (true) {
int b = read();
switch (state) {
case STATEMENT_START:
if (b < 0) {
return null;
}
else if (b == '-') {
b = read();
if (b == '-') {
state = State.SINGLE_LINE_COMMENT;
}
else {
throw unexpectedToken('-', b);
}
}
else if (b == '/') {
b = read();
if (b == '*') {
state = State.DELIMITED_COMMENT;
}
else {
throw unexpectedToken('/', b);
}
}
else if ((b >= 'A') && (b <= 'Z')) {
addToField(b);
state = State.STATEMENT_VERB;
}
else if ((b == ' ') || (b == '\r') || (b == '\n')) {
}
else {
throw unexpectedToken(b);
}
break;
case SINGLE_LINE_COMMENT:
if (b < 0) {
return null;
}
else if (b == '\n') {
state = State.STATEMENT_START;
}
break;
case DELIMITED_COMMENT:
if (b < 0) {
throw eofInTheMiddleOf("a comment");
}
else if (b == '*') {
b = read();
if (b == '/') {
b = read();
if (b != ';')
unread(b); // Allow stray ; after comment.
state = State.STATEMENT_START;
}
else {
unread(b);
}
}
break;
case STATEMENT_VERB:
case INSERT:
case INSERT_VALUES:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if ((b >= 'A') && (b <= 'Z')) {
addToField(b);
}
else {
if (b != ' ') unread(b);
if (state == State.INSERT) {
if (fieldMatches(into)) {
clearField();
state = State.INSERT_TABLE;
}
else {
throw new ExternalRowReaderException("Unrecognized statement INSERT " + decodeField());
}
}
else if (state == State.INSERT_VALUES) {
if (fieldMatches(values)) {
clearField();
state = State.NEXT_ROW_CTOR;
}
else {
throw new ExternalRowReaderException("Unrecognized statement INSERT INTO " + decodeField());
}
}
else if (fieldMatches(lock) || fieldMatches(unlock)) {
clearField();
state = State.IGNORED_STATEMENT;
}
else if (fieldMatches(insert)) {
clearField();
state = State.INSERT;
}
else {
throw new ExternalRowReaderException("Unrecognized statement " + decodeField());
}
}
break;
case IGNORED_STATEMENT:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == ';') {
state = State.STATEMENT_START;
}
else if (b == '`') {
state = State.IGNORED_BACKQUOTE;
}
break;
case IGNORED_BACKQUOTE:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == '`') {
state = State.IGNORED_STATEMENT;
}
else if (b == '\\') {
b = read();
}
break;
case INSERT_TABLE:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == '`') {
addToField(b);
state = State.TABLE_BACKQUOTE;
}
else if ((b == '.') ||
((b >= 'A') && (b <= 'Z')) ||
((b >= 'a') && (b <= 'z')) ||
((b >= '0') && (b <= '9')) ||
(b == '_')) {
// Unquoted or qualified table name.
addToField(b);
}
else {
if (b != ' ') unread(b);
if (tableName == null) {
tableName = copyField();
if (logger.isTraceEnabled()) {
logger.trace("Original target table: {}", decodeField());
}
}
else if (!fieldMatches(tableName)) {
throw new ExternalRowReaderException("INSERT INTO changed from " +
decode(tableName) +
" to " + decodeField() +
". Does file contain multiple tables?");
}
clearField();
state = State.INSERT_VALUES;
}
break;
case TABLE_BACKQUOTE:
if (b < 0) {
throw eofInTheMiddleOf("table name");
}
else if (b == '`') {
addToField(b);
state = State.INSERT_TABLE;
}
else if (b == '\\') {
addToField(b);
b = read();
if (b >= 0)
addToField(b);
}
else {
addToField(b);
}
break;
case NEXT_ROW_CTOR:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == '(') {
newRow();
state = State.NEXT_FIELD;
}
else {
throw unexpectedToken(b);
}
break;
case AFTER_ROW_CTOR:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == ';') {
state = State.STATEMENT_START;
}
else if (b == ',') {
state = State.NEXT_ROW_CTOR;
}
else {
throw unexpectedToken(b);
}
break;
case NEXT_FIELD:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == ')') {
state = State.AFTER_ROW_CTOR;
return finishRow();
}
else if (b == '\'') {
state = State.QUOTED_FIELD;
}
else if (b == ',') {
addField(false);
}
else if ((b == ' ') || (b == '\r') || (b == '\n')) {
}
else {
addToField(b);
state = State.UNQUOTED_FIELD;
}
break;
case UNQUOTED_FIELD:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == ',') {
addField(false);
state = State.NEXT_FIELD;
}
else if (b == ')') {
addField(false);
state = State.AFTER_ROW_CTOR;
return finishRow();
}
else if (b == '\'') {
throw new ExternalRowReaderException("Quote in the middle of a value");
}
else {
addToField(b);
}
break;
case QUOTED_FIELD:
if (b < 0) {
throw eofInTheMiddleOf("quoted string");
}
else if (b == '\'') {
state = State.AFTER_QUOTED_FIELD;
}
else if (b == '\\') {
b = read();
switch (b) {
case -1:
throw eofInTheMiddleOf("quoted string");
case 'n':
b = '\n';
break;
case 'r':
b = '\r';
break;
case 't':
b = '\t';
break;
}
addToField(b);
}
else {
addToField(b);
}
break;
case AFTER_QUOTED_FIELD:
if (b < 0) {
throw eofInTheMiddleOf("a statement");
}
else if (b == ',') {
addField(true);
state = State.NEXT_FIELD;
}
else if (b == ')') {
addField(true);
state = State.AFTER_ROW_CTOR;
return finishRow();
}
else {
throw unexpectedToken(b);
}
break;
}
}
}
protected ExternalRowReaderException unexpectedToken(int... bytes) {
byte[] ba = new byte[bytes.length];
for (int i = 0; i < ba.length; i++) {
ba[i] = (byte)bytes[i];
}
return new ExternalRowReaderException("Unexpected token " + decode(ba));
}
protected ExternalRowReaderException eofInTheMiddleOf(String what) {
return new ExternalRowReaderException("EOF in the middle of " + what);
}
}