/* * eXist Open Source Native XML Database * Copyright (C) 2001-2014, Wolfgang M. Meier (meier@ifs.tu-darmstadt.de) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * $Id: */ package org.exist.dom.persistent; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.exist.util.serializer.DOMSerializer; import org.exist.xquery.Constants; import org.w3c.dom.DocumentFragment; import org.xml.sax.InputSource; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.util.Optional; import static java.nio.charset.StandardCharsets.UTF_8; import javax.xml.transform.TransformerException; /** * Defines some static utility methods. */ public final class XMLUtil { private static final Logger LOG = LogManager.getLogger(XMLUtil.class.getName()); private XMLUtil() { //Utility class of static methods } public static final String dump(final DocumentFragment fragment) { final StringWriter writer = new StringWriter(); final DOMSerializer serializer = new DOMSerializer(writer, null); try { serializer.serialize(fragment); } catch(final TransformerException e) { //Nothing to do ? } return writer.toString(); } public static final String encodeAttrMarkup(final String str) { final StringBuilder buf = new StringBuilder(); char ch; for(int i = 0; i < str.length(); i++) { switch(ch = str.charAt(i)) { case '&': boolean isEntity = false; for(int j = i + 1; j < str.length(); j++) { if(str.charAt(j) == ';') { isEntity = true; break; } if(!Character.isLetter(str.charAt(j))) { break; } } if(isEntity) { buf.append('&'); } else { buf.append("&"); } break; case '<': buf.append("<"); break; case '>': buf.append(">"); break; case '"': buf.append("""); break; default: buf.append(ch); } } return buf.toString(); } public static final String decodeAttrMarkup(final String str) { final StringBuilder out = new StringBuilder(str.length()); char ch; String ent; int p; for(int i = 0; i < str.length(); i++) { ch = str.charAt(i); if(ch == '&') { p = str.indexOf(';', i); if(p != Constants.STRING_NOT_FOUND) { ent = str.substring(i + 1, p); if("amp".equals(ent)) { out.append('&'); } else if("lt".equals(ent)) { out.append('<'); } else if("gt".equals(ent)) { out.append('>'); } else if("quot".equals(ent)) { out.append('"'); } i = p; continue; } } out.append(ch); } return out.toString(); } public static final Optional<Charset> getEncoding(final String xmlDecl) { if(xmlDecl == null) { return Optional.empty(); } final StringBuilder buf = new StringBuilder(); final int p0 = xmlDecl.indexOf("encoding"); if(p0 == Constants.STRING_NOT_FOUND) { return Optional.empty(); } for(int i = p0 + 8; i < xmlDecl.length(); i++) { if(Character.isWhitespace(xmlDecl.charAt(i)) || xmlDecl.charAt(i) == '=') { continue; } else if(xmlDecl.charAt(i) == '"') { while(xmlDecl.charAt(++i) != '"' && i < xmlDecl.length()) { buf.append(xmlDecl.charAt(i)); } return Optional.of(Charset.forName(buf.toString())); } else { return Optional.empty(); } } return Optional.empty(); } public static final String getXMLDecl(final byte[] data) { boolean foundTag = false; for(int i = 0; i < data.length && !foundTag; i++) { if(data[i] == '<') { foundTag = true; /* * Need to gather the next 4 non-zero values and test them * because greater than 8-bytes character encodings will be * represented with two bits */ boolean foundQuestionMark = false; int placeInDeclString = 0; final byte[] declString = new byte[4]; int x = (i + 1); for(; x < data.length; x++) { if(data[x] == 0) { continue; } if(!foundQuestionMark && data[x] != '?') { break; } else { foundQuestionMark = true; } declString[placeInDeclString] = data[x]; placeInDeclString++; if(placeInDeclString >= 4) { break; } } if(placeInDeclString == 4 && declString[0] == '?' && declString[1] == 'x' && declString[2] == 'm' && declString[3] == 'l') { final ByteArrayOutputStream out = new ByteArrayOutputStream(150); out.write('<'); out.write(declString, 0, 4); for(int j = (x + 1); j < data.length; j++) { if(data[j] != 0) { out.write(data[j]); } if(data[j] == '?') { j++; /* * When we find this we have to start looking for the end tag */ for(; j < data.length; j++) { if(data[j] == 0) { continue; } out.write(data[j]); if(data[j] != '>') { break; } return new String(out.toByteArray()); } } } } } } return null; } @Deprecated public static final String readFile(final Path file) throws IOException { return readFile(file, UTF_8); } @Deprecated public static String readFile(final Path file, final Charset defaultEncoding) throws IOException { // read the file into a string return readFile(Files.readAllBytes(file), defaultEncoding); } @Deprecated public static String readFile(final InputSource inSrc) throws IOException { // read the file into a string try(final ByteArrayOutputStream os = new ByteArrayOutputStream()) { try(final InputStream is = inSrc.getByteStream()) { final byte[] buf = new byte[2048]; int read = -1; while ((read = is.read(buf)) != -1) { os.write(buf, 0, read); } } return readFile(os.toByteArray(), Charset.forName(inSrc.getEncoding())); } } //TODO if needed, replace with a decent NIO implementation @Deprecated public static String readFile(final byte[] in, final Charset defaultEncoding) throws IOException { final String xmlDecl = getXMLDecl(in); final Charset enc = getEncoding(xmlDecl).orElse(defaultEncoding); return new String(in, enc); } public static String parseValue(final String value, final String key) { int p = value.indexOf(key); if(p == Constants.STRING_NOT_FOUND) { return null; } return parseValue(value, p); } public static String parseValue(final String value, int p) { while((p < value.length()) && (value.charAt(++p) != '"')) { // Do nothing } if(p == value.length()) { return null; } int e = ++p; while((e < value.length()) && (value.charAt(++e) != '"')) { // Do nothing } if(e == value.length()) { return null; } return value.substring(p, e); } }