/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.sql_dumps; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.flds.*; import gplx.core.ios.*; import gplx.core.ios.streams.*;
public class Xosql_dump_parser {
private Xosql_dump_cbk cbk;
private Io_url src_fil; private int src_rdr_bfr_len = 8 * Io_mgr.Len_mb;
private Xosql_fld_hash cbk_flds;
private Ordered_hash tbl_flds;
public Xosql_dump_parser(Xosql_dump_cbk cbk, String... cbk_keys) {
this.cbk = cbk;
this.cbk_flds = Xosql_fld_hash.New(cbk_keys);
}
public void Src_fil_(Io_url v) {this.src_fil = v;}
public void Parse(Gfo_usr_dlg usr_dlg) {
Io_buffer_rdr rdr = Io_buffer_rdr.Null;
try {
// init bfrs, rdr
Bry_bfr val_bfr = Bry_bfr_.New();
rdr = Io_buffer_rdr.new_(Io_stream_rdr_.New_by_url(src_fil), src_rdr_bfr_len);
byte[] bfr = rdr.Bfr(); int bfr_len = rdr.Bfr_len(), fld_idx = 0, cur_pos = 0;
this.tbl_flds = Identify_flds(cbk_flds, bfr);
// init fld_rdr
Gfo_fld_rdr fld_rdr = Gfo_fld_rdr.sql_();
byte[] decode_regy = fld_rdr.Escape_decode();
byte mode_prv = Mode__sql_bgn; byte mode = Mode__sql_bgn;
boolean reading_file = true;
while (reading_file) {
if (cur_pos + 256 > bfr_len && rdr.Fil_pos() != rdr.Fil_len()) { // buffer 256 characters; can be 0, but erring on side of simplicity
rdr.Bfr_load_from(cur_pos);
cur_pos = 0;
bfr = rdr.Bfr();
bfr_len = rdr.Bfr_len();
}
if (cur_pos == bfr_len) break;
byte b = bfr[cur_pos];
switch (mode) {
case Mode__sql_bgn:// skip over header to 1st "VALUES"
cur_pos = Bry_find_.Find_fwd(bfr, Bry_insert_into, cur_pos);
if (cur_pos == Bry_find_.Not_found || cur_pos > bfr_len) {reading_file = false; continue;}
cur_pos = Bry_find_.Find_fwd(bfr, Bry_values, cur_pos);
if (cur_pos == Bry_find_.Not_found || cur_pos > bfr_len) throw Err_.new_wo_type("VALUES not found"); // something went wrong;
mode = Mode__fld;
cur_pos += Bry_values.length;
break;
case Mode__row_bgn: // assert "("
switch (b) {
case Byte_ascii.Paren_bgn: mode = Mode__fld; break;
default: throw Err_.new_unhandled(mode);
}
++cur_pos;
break;
case Mode__row_end: // handle 1st char after ")";
switch (b) {
case Byte_ascii.Nl: break; // ignore \n
case Byte_ascii.Comma: mode = Mode__row_bgn; break; // handle ","; EX: "(1),(2)"
case Byte_ascii.Semic: mode = Mode__sql_bgn; break; // handle ";"; EX: "(1);INSERT INTO"
default: throw Err_.new_unhandled(mode);
}
++cur_pos;
break;
case Mode__fld: // handle fld chars; EX: "(1,'ab')"
switch (b) {
case Byte_ascii.Space: // ws: skip; EX: "(1 , 2)"; "(1,\n2)"
case Byte_ascii.Nl:
break;
case Byte_ascii.Apos: // apos: switch modes; NOTE: never escape apos by doubling; will fail for empty fields; EX: ", '', ''"; DATE:2013-07-06
mode = Mode__quote;
break;
case Byte_ascii.Backslash: // backslash: switch modes;
mode_prv = mode;
mode = Mode__escape;
break;
case Byte_ascii.Comma: // comma: end fld
Commit_fld(fld_idx++, val_bfr);
break;
case Byte_ascii.Paren_end: // paren_end: end fld and row
Commit_fld(fld_idx++, val_bfr);
cbk.On_row_done();
fld_idx = 0;
mode = Mode__row_end;
break;
default: // all other chars; add to val_bfr
val_bfr.Add_byte(b);
break;
}
++cur_pos;
break;
case Mode__quote: // add to val_bfr until quote encountered; also, handle backslashes;
switch (b) {
case Byte_ascii.Apos: mode = Mode__fld; break;
case Byte_ascii.Backslash: mode_prv = mode; mode = Mode__escape; break;
default: val_bfr.Add_byte(b); break;
}
++cur_pos;
break;
case Mode__escape: // get escape_val from decode_regy; if unknown, just add original
byte escape_val = decode_regy[b];
if (escape_val == Byte_ascii.Null)
val_bfr.Add_byte(Byte_ascii.Backslash).Add_byte(b);
else
val_bfr.Add_byte(escape_val);
mode = mode_prv; // switch back to prv_mode
++cur_pos;
break;
default: throw Err_.new_unhandled(mode);
}
}
}
finally {rdr.Rls();}
}
private void Commit_fld(int fld_idx, Bry_bfr val_bfr) {
Xosql_fld_itm fld = (Xosql_fld_itm)tbl_flds.Get_at(fld_idx); // handle new flds added by MW, but not supported by XO; EX:hiddencat and pp_sortkey; DATE:2014-04-28
if (fld.Uid() != Int_.Max_value)
cbk.On_fld_done(fld.Uid(), val_bfr.Bfr(), 0, val_bfr.Len());
val_bfr.Clear();
}
private static Ordered_hash Identify_flds(Xosql_fld_hash cbk_hash, byte[] raw) {
// parse tbl def
Xosql_tbl_parser tbl_parser = new Xosql_tbl_parser();
Ordered_hash tbl_flds = tbl_parser.Parse(raw);
// loop over tbl_flds
int len = tbl_flds.Len();
for (int i = 0; i < len; ++i) {
Xosql_fld_itm tbl_itm = (Xosql_fld_itm)tbl_flds.Get_at(i);
// get cbk_itm
Xosql_fld_itm cbk_itm = cbk_hash.Get_by_key(tbl_itm.Key());
if (cbk_itm == null) continue;// throw Err_.New("sql_dump_parser: failed to find fld; src={0} fld={1}", src_fil.Raw(), tbl_itm.Key());
// set tbl_def's uid to cbk_itm's uid
tbl_itm.Uid_(cbk_itm.Uid());
}
tbl_flds.Sort();
return tbl_flds;
}
public Xosql_dump_parser Src_rdr_bfr_len_(int v) {src_rdr_bfr_len = v; return this;} // TEST:
private static final byte[] Bry_insert_into = Bry_.new_a7("INSERT INTO "), Bry_values = Bry_.new_a7(" VALUES (");
private static final byte Mode__sql_bgn = 0, Mode__row_bgn = 1, Mode__row_end = 2, Mode__fld = 3, Mode__quote = 4, Mode__escape = 5;
}