/* * Copyright 2002-2007 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.util.xml; import java.io.BufferedReader; import java.io.CharConversionException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import org.springframework.util.StringUtils; /** * Detects whether an XML stream is using DTD- or XSD-based validation. * * @author Rob Harrop * @author Juergen Hoeller * @since 2.0 */ public class XmlValidationModeDetector { /** * Indicates that the validation should be disabled. */ public static final int VALIDATION_NONE = 0; /** * Indicates that the validation mode should be auto-guessed, since we cannot find * a clear indication (probably choked on some special characters, or the like). */ public static final int VALIDATION_AUTO = 1; /** * Indicates that DTD validation should be used (we found a "DOCTYPE" declaration). */ public static final int VALIDATION_DTD = 2; /** * Indicates that XSD validation should be used (found no "DOCTYPE" declaration). */ public static final int VALIDATION_XSD = 3; /** * The token in a XML document that declares the DTD to use for validation * and thus that DTD validation is being used. */ private static final String DOCTYPE = "DOCTYPE"; /** * The token that indicates the start of an XML comment. */ private static final String START_COMMENT = "<!--"; /** * The token that indicates the end of an XML comment. */ private static final String END_COMMENT = "-->"; /** * Indicates whether or not the current parse position is inside an XML comment. */ private boolean inComment; /** * Detect the validation mode for the XML document in the supplied {@link InputStream}. * Note that the supplied {@link InputStream} is closed by this method before returning. * @param inputStream the InputStream to parse * @throws IOException in case of I/O failure * @see #VALIDATION_DTD * @see #VALIDATION_XSD */ public int detectValidationMode(InputStream inputStream) throws IOException { // Peek into the file to look for DOCTYPE. BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); try { boolean isDtdValidated = false; String content; while ((content = reader.readLine()) != null) { content = consumeCommentTokens(content); if (this.inComment || !StringUtils.hasText(content)) { continue; } if (hasDoctype(content)) { isDtdValidated = true; break; } if (hasOpeningTag(content)) { // End of meaningful data... break; } } return (isDtdValidated ? VALIDATION_DTD : VALIDATION_XSD); } catch (CharConversionException ex) { // Choked on some character encoding... // Leave the decision up to the caller. return VALIDATION_AUTO; } finally { reader.close(); } } /** * Does the content contain the the DTD DOCTYPE declaration? */ private boolean hasDoctype(String content) { return (content.indexOf(DOCTYPE) > -1); } /** * Does the supplied content contain an XML opening tag. If the parse state is currently * in an XML comment then this method always returns false. It is expected that all comment * tokens will have consumed for the supplied content before passing the remainder to this method. */ private boolean hasOpeningTag(String content) { if (this.inComment) { return false; } int openTagIndex = content.indexOf('<'); return (openTagIndex > -1 && content.length() > openTagIndex && Character.isLetter(content.charAt(openTagIndex + 1))); } /** * Consumes all the leading comment data in the given String and returns the remaining content, which * may be empty since the supplied content might be all comment data. For our purposes it is only important * to strip leading comment content on a line since the first piece of non comment content will be either * the DOCTYPE declaration or the root element of the document. */ private String consumeCommentTokens(String line) { if (line.indexOf(START_COMMENT) == -1 && line.indexOf(END_COMMENT) == -1) { return line; } while ((line = consume(line)) != null) { if (!this.inComment && !line.trim().startsWith(START_COMMENT)) { return line; } } return line; } /** * Consume the next comment token, update the "inComment" flag * and return the remaining content. */ private String consume(String line) { int index = (this.inComment ? endComment(line) : startComment(line)); return (index == -1 ? null : line.substring(index)); } /** * Try to consume the {@link #START_COMMENT} token. * @see #commentToken(String, String, boolean) */ private int startComment(String line) { return commentToken(line, START_COMMENT, true); } private int endComment(String line) { return commentToken(line, END_COMMENT, false); } /** * Try to consume the supplied token against the supplied content and update the * in comment parse state to the supplied value. Returns the index into the content * which is after the token or -1 if the token is not found. */ private int commentToken(String line, String token, boolean inCommentIfPresent) { int index = line.indexOf(token); if (index > - 1) { this.inComment = inCommentIfPresent; } return (index == -1 ? index : index + token.length()); } }