package org.basex.build.file;
import static org.basex.util.Token.*;
import java.io.IOException;
import org.basex.build.SingleParser;
import org.basex.core.Prop;
import org.basex.io.IO;
import org.basex.io.in.NewlineInput;
import org.basex.util.*;
/**
* This class parses files in the plain-text format
* and sends events to the specified database builder.
*
* <p>The parser provides one option, which can be specified via
* <code>SET PARSEROPT ...</code>:</p>
*
* <ul>
* <li><code>encoding</code> specifies the input encoding
* (default: <code>UTF-8</code>).</li>
* <li><code>lines</code> specified if the resulting XML splits the input
* into lines. Can be set to <code>yes</code> or <code>no</code>
* (default: <code>yes</code>).</li>
* </ul>
*
* <p><b>Example</b>:
* <code>SET PARSEROPT lines=no; CREATE DB ...</code><br/>
* <b>Description</b>: Puts complete input into one text node.</p>
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
public final class TextParser extends SingleParser {
/** Text element. */
private static final byte[] TEXT = token("text");
/** Line element. */
private static final byte[] LINE = token("line");
/** Lines format. */
private final boolean lines;
/** Encoding. */
private final String encoding;
/**
* Constructor.
* @param source document source
* @param target target path
* @param prop database properties
* @throws IOException I/O exception
*/
public TextParser(final IO source, final String target, final Prop prop)
throws IOException {
super(source, target);
// set parser properties
final ParserProp props = new ParserProp(prop.get(Prop.PARSEROPT));
lines = props.is(ParserProp.LINES);
encoding = props.get(ParserProp.ENCODING);
}
@Override
public void parse() throws IOException {
builder.startElem(TEXT, atts);
final TokenBuilder tb = new TokenBuilder();
final NewlineInput nli = new NewlineInput(src, encoding);
try {
for(int ch; (ch = nli.read()) != -1;) {
if(ch == '\n' && lines) {
builder.startElem(LINE, atts);
builder.text(tb.finish());
builder.endElem();
tb.reset();
} else {
tb.add(XMLToken.valid(ch) ? ch : '?');
}
}
} finally {
nli.close();
}
if(!lines) builder.text(tb.finish());
builder.endElem();
}
}