package org.basex.io.serial;
import static org.basex.core.Text.*;
import static org.basex.data.DataText.*;
import static org.basex.io.serial.SerializerProp.*;
import static org.basex.query.util.Err.*;
import static org.basex.util.Token.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import org.basex.data.FTPos;
import org.basex.io.MimeTypes;
import org.basex.io.out.PrintOutput;
import org.basex.query.QueryException;
import org.basex.query.item.Item;
import org.basex.query.item.StrStream;
import org.basex.util.TokenBuilder;
import org.basex.util.ft.FTLexer;
import org.basex.util.ft.FTSpan;
import org.basex.util.hash.TokenSet;
import org.basex.util.list.TokenList;
/**
* This class serializes data to an output stream.
*
* @author BaseX Team 2005-12, BSD License
* @author Christian Gruen
*/
public abstract class OutputSerializer extends Serializer {
/** (X)HTML: elements with an empty content model. */
static final TokenList EMPTIES = new TokenList();
/** (X)HTML: URI attributes. */
static final TokenSet URIS = new TokenSet();
/** System document type. */
private String docsys;
/** Public document type. */
private String docpub;
/** Flag for printing content type. */
int ct;
/** Indentation flag (used for formatting). */
boolean ind;
/** Item flag (used for formatting). */
private boolean item;
/** Script flag. */
boolean script;
/** URI escape flag. */
final boolean escape;
/** CData elements. */
private final TokenList cdata = new TokenList();
/** Suppress indentation elements. */
private final TokenList suppress = new TokenList();
/** Indentation flag. */
final boolean indent;
/** Include content type flag. */
final boolean content;
/** Media type. */
private final String media;
/** Charset. */
private final Charset encoding;
/** New line. */
final byte[] nl;
/** Output stream. */
final PrintOutput out;
/** UTF8 flag. */
private final boolean utf8;
// project specific properties
/** Number of spaces to indent. */
final int indents;
/** Tabular character. */
final char tab;
/** Format items. */
private final boolean format;
/** Prefix for wrapped results. */
private final byte[] wPre;
/** Wrapper flag. */
private final boolean wrap;
/**
* Constructor, specifying serialization options.
* @param os output stream reference
* @param props serialization properties
* @param versions supported versions
* @throws IOException I/O exception
*/
OutputSerializer(final OutputStream os, final SerializerProp props,
final String... versions) throws IOException {
final SerializerProp p = props == null ? PROPS : props;
final String ver = p.get(S_VERSION).isEmpty() ?
versions.length > 0 ? versions[0] : "" : p.check(S_VERSION, versions);
final boolean decl = !p.yes(S_OMIT_XML_DECLARATION);
final boolean bom = p.yes(S_BYTE_ORDER_MARK);
final String sa = p.check(S_STANDALONE, YES, NO, OMIT);
p.check(S_NORMALIZATION_FORM, NFC, NONE);
final String maps = p.get(S_USE_CHARACTER_MAPS);
final String enc = normEncoding(p.get(S_ENCODING), null);
try {
encoding = Charset.forName(enc);
} catch(final Exception ex) {
throw SERENCODING.thrwSerial(enc);
}
utf8 = enc == UTF8;
// project specific properties
indents = Math.max(0, toInt(p.get(S_INDENTS)));
format = p.yes(S_FORMAT);
tab = p.yes(S_TABULATOR) ? '\t' : ' ';
wPre = token(p.get(S_WRAP_PREFIX));
/* URI for wrapped results. */
final byte[] wUri = token(p.get(S_WRAP_URI));
wrap = wPre.length != 0;
final String eol = p.check(S_NEWLINE, S_NL, S_CR, S_CRNL);
nl = (eol.equals(S_NL) ? "\n" : eol.equals(S_CR) ? "\r" : "\r\n").
getBytes(encoding);
docsys = p.get(S_DOCTYPE_SYSTEM);
docpub = p.get(S_DOCTYPE_PUBLIC);
media = p.get(S_MEDIA_TYPE);
escape = p.yes(S_ESCAPE_URI_ATTRIBUTES);
content = p.yes(S_INCLUDE_CONTENT_TYPE);
undecl = p.yes(S_UNDECLARE_PREFIXES);
indent = p.yes(S_INDENT) && format;
if(!maps.isEmpty()) SERMAP.thrwSerial(maps);
if(docsys.isEmpty()) {
docsys = null;
docpub = null;
} else if(docpub.isEmpty()) {
docpub = null;
}
// print byte-order-mark
out = PrintOutput.get(os);
if(bom) {
// comparison by reference
if(enc == UTF8) {
out.write(0xEF); out.write(0xBB); out.write(0xBF);
} else if(enc == UTF16LE) {
out.write(0xFF); out.write(0xFE);
} else if(enc == UTF16BE) {
out.write(0xFE); out.write(0xFF);
}
}
final String supp = p.get(S_SUPPRESS_INDENTATION);
if(!supp.isEmpty()) {
for(final String c : supp.split("\\s+")) {
if(!c.isEmpty()) suppress.add(token(c));
}
}
// print document declaration
if(this instanceof XMLSerializer || this instanceof XHTMLSerializer) {
final String cdse = p.get(S_CDATA_SECTION_ELEMENTS);
if(!cdse.isEmpty()) {
for(final String c : cdse.split("\\s+")) {
if(!c.isEmpty()) cdata.add(token(c));
}
}
if(undecl && ver.equals(V10)) SERUNDECL.thrwSerial();
if(decl) {
print(PI_O);
print(DOCDECL1);
print(ver);
print(DOCDECL2);
print(p.get(S_ENCODING));
if(!sa.equals(OMIT)) {
print(DOCDECL3);
print(sa);
}
print(ATT2);
print(PI_C);
ind = indent;
} else if(!sa.equals(OMIT) || !ver.equals(V10) && docsys != null) {
SERSTAND.thrwSerial();
}
}
// open results element
if(wrap) {
openElement(wPre.length != 0 ?
concat(wPre, COLON, T_RESULTS) : T_RESULTS);
namespace(wPre, wUri);
}
}
@Override
public final void reset() {
ind = false;
item = false;
}
@Override
public void close() throws IOException {
if(wrap) closeElement();
out.flush();
}
@Override
public void openResult() throws IOException {
if(wrap) {
openElement(wPre.length != 0 ? concat(wPre, COLON, T_RESULT) : T_RESULT);
ind = false;
}
}
@Override
public void closeResult() throws IOException {
if(wrap) closeElement();
}
@Override
public void attribute(final byte[] n, final byte[] v) throws IOException {
print(' ');
print(n);
print(ATT1);
for(int k = 0; k < v.length; k += cl(v, k)) {
final int ch = cp(v, k);
if(!format) {
printChar(ch);
} else if(ch == '"') {
print(E_QU);
} else if(ch == 0x9 || ch == 0xA) {
hex(ch);
} else {
code(ch);
}
}
print(ATT2);
}
@Override
public void finishText(final byte[] b) throws IOException {
if(cdata.empty() || tags.empty() || !cdata.contains(tags.peek())) {
for(int k = 0; k < b.length; k += cl(b, k)) code(cp(b, k));
} else {
print(CDATA_O);
int c = 0;
for(int k = 0; k < b.length; k += cl(b, k)) {
final int ch = cp(b, k);
if(ch == ']') {
++c;
} else {
if(c > 1 && ch == '>') {
print(CDATA_C);
print(CDATA_O);
}
c = 0;
}
printChar(ch);
}
print(CDATA_C);
}
ind = false;
}
@Override
public void finishText(final byte[] b, final FTPos ftp) throws IOException {
final FTLexer lex = new FTLexer().sc().init(b);
while(lex.hasNext()) {
final FTSpan span = lex.next();
if(!span.special && ftp.contains(span.pos))
print((char) TokenBuilder.MARK);
final byte[] t = span.text;
for(int k = 0; k < t.length; k += cl(t, k)) code(cp(t, k));
}
ind = false;
}
@Override
public void finishComment(final byte[] n) throws IOException {
if(ind) indent();
print(COMM_O);
print(n);
print(COMM_C);
}
@Override
public void finishPi(final byte[] n, final byte[] v) throws IOException {
if(ind) indent();
print(PI_O);
print(n);
print(' ');
print(v);
print(PI_C);
}
@Override
public void finishItem(final Item it) throws IOException {
if(ind) print(' ');
try {
if(it instanceof StrStream) {
final InputStream ni = ((StrStream) it).input(null);
try {
for(int i; (i = ni.read()) != -1;) code(i);
} finally {
ni.close();
}
} else {
final byte[] atom = it.string(null);
for(int a = 0; a < atom.length; a += cl(atom, a)) code(cp(atom, a));
}
} catch(final QueryException ex) {
throw new SerializerException(ex);
}
ind = format;
item = true;
}
/**
* Encode the specified character before printing it.
* @param ch character to be encoded and printed
* @throws IOException I/O exception
*/
void code(final int ch) throws IOException {
if(!format) {
printChar(ch);
} else if(ch < ' ' && ch != '\n' && ch != '\t' || ch > 0x7F && ch < 0xA0) {
hex(ch);
} else if(ch == '&') {
print(E_AMP);
} else if(ch == '>') {
print(E_GT);
} else if(ch == '<') {
print(E_LT);
} else {
printChar(ch);
}
}
@Override
public final boolean finished() {
return out.finished();
}
@Override
protected void startOpen(final byte[] t) throws IOException {
doctype(t);
if(ind) indent();
print(ELEM_O);
print(t);
ind = indent;
}
/**
* Prints the document type declaration.
* @param dt document type, or {@code null} for html type
* @throws IOException I/O exception
*/
void doctype(final byte[] dt) throws IOException {
if(level != 0 || docsys == null) return;
if(ind) indent();
print(DOCTYPE);
if(dt == null) print(M_HTML);
else print(dt);
if(docpub != null) {
print(' ' + PUBLIC + " \"" + docpub + '"');
} else {
print(' ' + SYSTEM);
}
print(" \"" + docsys + '"');
print(ELEM_C);
print(nl);
docsys = null;
}
@Override
protected void finishOpen() throws IOException {
print(ELEM_C);
}
@Override
protected void finishEmpty() throws IOException {
print(ELEM_SC);
}
@Override
protected void finishClose() throws IOException {
if(ind) indent();
print(ELEM_OS);
print(tag);
print(ELEM_C);
ind = indent;
}
/**
* Indents the next text.
* @throws IOException I/O exception
*/
final void indent() throws IOException {
if(!indent) return;
if(item) {
item = false;
} else {
if(!suppress.empty() && !tags.empty()) {
for(final byte[] s : suppress) {
if(tags.contains(s)) return;
}
}
print(nl);
final int ls = level * indents;
for(int l = 0; l < ls; ++l) print(tab);
}
}
/**
* Returns a hex entity for the specified character.
* @param ch character
* @throws IOException I/O exception
*/
final void hex(final int ch) throws IOException {
print("");
print(HEX[ch >> 4]);
print(HEX[ch & 15]);
print(';');
}
/**
* Writes a character in the current encoding.
* Converts newlines to the operating system default.
* @param ch character to be printed
* @throws IOException I/O exception
*/
final void printChar(final int ch) throws IOException {
if(ch == '\n') out.write(nl);
else print(ch);
}
/**
* Writes a character in the current encoding.
* @param ch character to be printed
* @throws IOException I/O exception
*/
void print(final int ch) throws IOException {
// comparison by reference
if(utf8) out.utf8(ch);
else out.write(new TokenBuilder(4).add(ch).toString().getBytes(encoding));
}
/**
* Writes a token in the current encoding.
* @param token token to be printed
* @throws IOException I/O exception
*/
final void print(final byte[] token) throws IOException {
// comparison by reference
if(utf8) {
for(final byte b : token) out.write(b);
} else {
out.write(string(token).getBytes(encoding));
}
}
/**
* Writes a string in the current encoding.
* @param s string to be printed
* @throws IOException I/O exception
*/
final void print(final String s) throws IOException {
// comparison by reference
if(utf8) {
for(final byte b : token(s)) out.write(b);
} else {
out.write(s.getBytes(encoding));
}
}
/**
* Prints the content type declaration.
* @param empty empty flag
* @param html method
* @return {@code true} if declaration was printed
* @throws IOException I/O exception
*/
boolean ct(final boolean empty, final boolean html)
throws IOException {
if(ct != 1) return false;
ct++;
if(empty) finishOpen();
level++;
startOpen(META);
attribute(HTTPEQUIV, token(CONTENT_TYPE));
attribute(CONTENT, new TokenBuilder(media.isEmpty() ? MimeTypes.TEXT_HTML :
media).add(CHARSET).addExt(encoding).finish());
if(html) {
print(ELEM_C);
} else {
print(' ');
print(ELEM_SC);
}
level--;
if(empty) finishClose();
return true;
}
// HTML Serializer: cache elements
static {
// elements with an empty content model
EMPTIES.add(token("area"));
EMPTIES.add(token("base"));
EMPTIES.add(token("br"));
EMPTIES.add(token("col"));
EMPTIES.add(token("hr"));
EMPTIES.add(token("img"));
EMPTIES.add(token("input"));
EMPTIES.add(token("link"));
EMPTIES.add(token("meta"));
EMPTIES.add(token("basefont"));
EMPTIES.add(token("frame"));
EMPTIES.add(token("isindex"));
EMPTIES.add(token("param"));
// URI attributes
URIS.add(token("a:href"));
URIS.add(token("a:name"));
URIS.add(token("applet:codebase"));
URIS.add(token("area:href"));
URIS.add(token("base:href"));
URIS.add(token("blockquote:cite"));
URIS.add(token("body:background"));
URIS.add(token("button:datasrc"));
URIS.add(token("del:cite"));
URIS.add(token("div:datasrc"));
URIS.add(token("form:action"));
URIS.add(token("frame:longdesc"));
URIS.add(token("frame:src"));
URIS.add(token("head:profile"));
URIS.add(token("iframe:longdesc"));
URIS.add(token("iframe:src"));
URIS.add(token("img:longdesc"));
URIS.add(token("img:src"));
URIS.add(token("img:usemap"));
URIS.add(token("input:datasrc"));
URIS.add(token("input:src"));
URIS.add(token("input:usemap"));
URIS.add(token("ins:cite"));
URIS.add(token("link:href"));
URIS.add(token("object:archive"));
URIS.add(token("object:classid"));
URIS.add(token("object:codebase"));
URIS.add(token("object:data"));
URIS.add(token("object:datasrc"));
URIS.add(token("object:usemap"));
URIS.add(token("q:cite"));
URIS.add(token("script:for"));
URIS.add(token("script:src"));
URIS.add(token("select:datasrc"));
URIS.add(token("span:datasrc"));
URIS.add(token("table:datasrc"));
URIS.add(token("textarea:datasrc"));
}
}