package org.wikibrain.parser.sql; import com.akiban.sql.StandardException; import com.akiban.sql.parser.*; import org.wikibrain.utils.WpIOUtils; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Parses a MySQL dump file containing insert statements. */ public class MySqlDumpParser { private static final Logger LOG = LoggerFactory.getLogger(MySqlDumpParser.class); private SQLParser sqlParser = new SQLParser(); /** * Parses a mysql dump into rows. * @param dump The file containing the schema and insert statements * @return An iterable of maps, each containing column name to column value. */ public Iterable<Object[]> parse(final File dump) { return new Iterable<Object[]>() { @Override public Iterator<Object[]> iterator() { try { return new MyIterator(dump); } catch (IOException e) { throw new RuntimeException(e); } } }; } /** * See: http://dev.mysql.com/doc/refman/5.0/en/string-literals.html * @param line * @return */ protected String unescapeString(String line) { StringBuffer result = new StringBuffer(); int i = 0; while (true) { int j = line.indexOf("\\", i); if (j < 0) { break; } else if (j == line.length() - 1) { // how to handle a trailing slash? break; } else { result.append(line.substring(i, j)); String escaped; switch (line.charAt(j+1)) { case '0' : escaped = "\0"; break; case '\'' : escaped = "''"; break; case '"' : escaped = "\""; break; case 'b' : escaped = "\b"; break; case 'n' : escaped = "\n"; break; case 'r' : escaped = "\r"; break; case 't' : escaped = "\t"; break; case 'Z' : escaped = "\u001a"; break; case '\\' : escaped = "\\"; break; case '_' : escaped = "_'"; break; default: throw new IllegalArgumentException("invalid escape character encountered: " + line.charAt(j+1)); } result.append(escaped); i = j + 2; } } result.append(line.substring(i)); return result.toString(); } /** * Parses a mysql dump into rows. * @param line The line containing (possibly multiple) insert statements. * @return An iterable of maps, each containing column name to column value. */ public List<Object[]> parse(String line) throws StandardException { final List<Object[]> result = new ArrayList<Object[]>(); if (!line.startsWith("INSERT ")) { return result; } line = unescapeString(line); if (line.endsWith(";")) { line = line.substring(0, line.length()-1); } StatementNode node = sqlParser.parseStatement(line); if (node instanceof InsertNode) { node.accept(new Visitor() { @Override public Visitable visit(Visitable node) throws StandardException { if (node instanceof RowResultSetNode) { List<Object> values = new ArrayList<Object>(); for (ResultColumn column : ((RowResultSetNode)node).getResultColumns()) { // TODO: are other types of values possible? ConstantNode value = (ConstantNode) column.getExpression(); values.add(value.getValue()); } result.add(values.toArray()); } return node; } @Override public boolean visitChildrenFirst(Visitable node) { return true; } @Override public boolean stopTraversal() { return false; } @Override public boolean skipChildren(Visitable node) throws StandardException { return (node instanceof RowResultSetNode); } }); } return result; } /** * A buffered iterator for a file containing insert statements. */ class MyIterator implements Iterator<Object[]> { private final File path; private List<Object[]> buffer = new LinkedList<Object[]>(); private BufferedReader reader; private int line = 0; public MyIterator(File path) throws IOException { this.path = path; reader = WpIOUtils.openBufferedReader(path); } private void fillBuffer() { if (buffer.size() > 0) { return; } if (reader == null) { return; } while (buffer.isEmpty()) { try { String line = reader.readLine(); if (line == null) { reader = null; return; } buffer.addAll(parse(line)); } catch (IOException e) { throw new RuntimeException(e); } catch (StandardException e) { LOG.error("error parsing line " + line + " of " + path + ":", e); } line++; } } @Override public boolean hasNext() { fillBuffer(); return buffer.size() > 0; } @Override public Object[] next() { fillBuffer(); return buffer.remove(0); } @Override public void remove() { throw new UnsupportedOperationException(); } } }