/*
* Copyright (C) 2000-2015 aw2.0 LTD
*
* This file is part of Open BlueDragon (OpenBD) CFML Server Engine.
*
* OpenBD is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* Free Software Foundation,version 3.
*
* OpenBD is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenBD. If not, see http://www.gnu.org/licenses/
*
* Additional permission under GNU GPL version 3 section 7
*
* If you modify this Program, or any covered work, by linking or combining
* it with any of the JARS listed in the README.txt (or a modified version of
* (that library), containing parts covered by the terms of that JAR, the
* licensors of this Program grant you additional permission to convey the
* resulting work.
* README.txt @ http://www.openbluedragon.org/license/README.txt
*
* http://www.openbd.org/
* $Id: XmlSource.java 2506 2015-02-08 22:25:59Z alan $
*/
package com.naryx.tagfusion.cfm.xml.parse;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import org.xml.sax.InputSource;
/**
* Factory to create InputSource objects from an underlying data stream. Supports File instances, URL instances, or String instances (that represent the actual content itself).
*
* @author Matt Jacobsen
*
*/
public class XmlSource {
private final static String DEFAULT_ENCODING = "UTF-8";
private Object source = null;
/**
* Default constructor.
*
* @param source
* underlying data source
*/
public XmlSource(Object source) {
this.source = source;
}
/**
* Returns a new InputSource to parse.
*
* @return new InputSource to parse
* @throws IOException
*/
public InputSource newInputSource() throws IOException {
if (source instanceof File) {
return new InputSource(new BufferedInputStream(new FileInputStream((File) source)));
} else if (source instanceof URL) {
try {
return new InputSource(((URL) source).openStream());
} catch (IllegalArgumentException ex) {
com.nary.Debug.printStackTrace(ex);
throw new IOException(ex.getMessage());
}
} else if (source instanceof String) {
return new InputSource(new StringReader((String) source));
} else {
throw new IOException("Unexpected data stream type: " + ((source == null) ? "null" : source.getClass().getName()));
}
}
/**
* Returns the encoding for the xml data contained in the InputStream, if specified. Otherwise defaults to UTF-8.
*
* @param stream
* InputStream to read from
* @return encoding specified in the xml declaration or UTF-8.
* @throws IOException
*/
public static String getStreamEncoding(InputStream stream) throws IOException {
String str = null;
// Get the prolog from the stream
String prolog = readXmlProlog(stream);
if (prolog != null)
str = readEncoding(prolog);
// If specified, use it. Otherwise go with the default.
if (str == null || str.trim().equals(""))
str = DEFAULT_ENCODING;
return str;
}
/**
* Returns the value of the "encoding" attribute in the <?xml ...?> prolog if it exists in the specified String. Otherwise returns null.
*
* @param str
* String containing xml data
* @return value of the "encoding" attribute or null
*/
protected static String readEncoding(String str) {
int sndx = str.indexOf("<?xml");
if (sndx != -1) {
int endx = str.indexOf("?>", sndx);
if (endx != -1) {
int encndx = str.indexOf("encoding", sndx);
if (encndx != -1 && encndx < endx) {
sndx = encndx + 8;
sndx = str.indexOf('=', sndx);
if (sndx != -1 && sndx < endx) {
// May be single or double quoted according to the spec
for (int i = sndx + 1; i < endx; i++) {
if (str.charAt(i) == '\'') {
// Single quoted, return everything inside the quotes
int ndx = str.indexOf('\'', i + 1);
if (ndx != -1 && ndx < endx)
return str.substring(i + 1, ndx);
else
break; // no closing quote!
} else if (str.charAt(i) == '"') {
// Double quoted, return everything inside the quotes
int ndx = str.indexOf('"', i + 1);
if (ndx != -1 && ndx < endx)
return str.substring(i + 1, ndx);
else
break; // no closing quote!
}
}
}
}
}
}
// Didn't find it
return null;
}
/**
* Reads and returns the <?xml ...?> declaration from the prolog in the xml data contained in the specified InputStream. If no <?xml ...?> declaration is found returns null.
*
* @param stream
* InputStream containing xml data
* @return the <?xml ...?> declaration or null
* @throws IOException
*/
protected static String readXmlProlog(InputStream stream) throws IOException {
char[] buf = new char[64];
String prev = "";
int read = -1;
CommentFilterReader reader = null;
try{
reader = new CommentFilterReader(new InputStreamReader(stream));
while ((read = reader.read(buf, 0, buf.length)) != -1) {
String str = new String(buf, 0, read);
String combined = prev + str;
int ndx = -1;
if ((ndx = combined.indexOf('<')) != -1) {
// Need to read until we can at least check that this might be the prolog
if (ndx + 5 < combined.length()) {
if ((ndx = combined.indexOf("<?xml", ndx)) != -1) {
// Read to the end of the prolog
int endx = -1;
if ((endx = combined.indexOf('>', ndx)) != -1) {
// Have a full prolog, return it
return combined.substring(ndx, endx + 1);
}
} else {
// Not the prolog, so it must not have one
break;
}
}
}
prev = str;
}
}finally{
reader.close();
}
// No prolog found
return null;
}
/**
* Returns true if the source has a <!DOCTYPE ...> declaration, false otherwise.
*
* @return true if the source has a <!DOCTYPE ...> declaration, false otherwise
* @throws IOException
*/
public boolean hasDTD() throws IOException {
if (source instanceof File) {
FileInputStream fin = null;
BufferedInputStream bin = null;
try {
fin = new FileInputStream((File) source);
bin = new BufferedInputStream(fin);
return streamHasDocType(bin);
} finally {
if (bin != null)
bin.close();
if (fin != null)
fin.close();
}
} else if (source instanceof URL) {
InputStream in = null;
try {
in = ((URL) source).openStream();
return streamHasDocType(in);
} catch (IllegalArgumentException ex) {
com.nary.Debug.printStackTrace(ex);
throw new IOException(ex.getMessage());
} finally {
if (in != null)
in.close();
}
} else if (source instanceof String) {
return ((String) source).indexOf("<!DOCTYPE") != -1;
} else {
throw new IOException("Unexpected data stream type: " + ((source == null) ? "null" : source.getClass().getName()));
}
}
/**
* Reads a small window buffer through the source and looks for the <!DOCTYPE ...> element. Returns true if found, false otherwise.
*
* @param stream
* InputStream to search
* @return true if the content contains a <!DOCTYPE ...> element, false otherwise
* @throws IOException
*/
protected boolean streamHasDocType(InputStream stream) throws IOException {
char[] buf = new char[9];
String prev = "";
int read = -1;
CommentFilterReader reader = null;
try {
reader = new CommentFilterReader(new InputStreamReader(stream));
while ((read = reader.read(buf, 0, buf.length)) != -1) {
String str = new String(buf, 0, read);
String combined = prev + str;
int ndx = -1;
if ((ndx = combined.indexOf("<!DOCTYPE")) != -1) {
return true;
} else if ((ndx = combined.indexOf('<')) != -1) {
for (int i = ndx + 1; i < combined.length(); i++) {
if (!Character.isWhitespace(combined.charAt(i))) {
if (combined.charAt(i) == '?' || combined.charAt(i) == '!')
break;
else
return false;
}
}
}
prev = str;
}
} finally {
if (reader != null)
reader.close();
}
return false;
}
}