/* * Copyright (C) 2000 - 2010 TagServlet Ltd * * This file is part of Open BlueDragon (OpenBD) CFML Server Engine. * * OpenBD is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * Free Software Foundation,version 3. * * OpenBD is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenBD. If not, see http://www.gnu.org/licenses/ * * Additional permission under GNU GPL version 3 section 7 * * If you modify this Program, or any covered work, by linking or combining * it with any of the JARS listed in the README.txt (or a modified version of * (that library), containing parts covered by the terms of that JAR, the * licensors of this Program grant you additional permission to convey the * resulting work. * README.txt @ http://www.openbluedragon.org/license/README.txt * * http://www.openbluedragon.org/ */ package com.naryx.tagfusion.cfm.xml.parse; import java.io.IOException; import java.io.Reader; /** * FilterReader that handles the <!DOCTYPE ...> element as the document is read. * according the the mode. If mode == READ_ADD, then, this will read the * existing <!DOCTYPE ...> element or add a <!DOCTYPE ...> element with a new * SYSTEM identifier, and CUSTOM_DTD. If mode == REMOVE, the <!DOCTYPE ...> * element is simply removed from the document stream and parsing proceeds * without it. If mode == NO_CHANGE, no changes wil be made. * */ public class DTDFilterReader extends XmlFilterReader { /** Internal SYSTEM identifier for our modified DTD declarations */ public static final String CUSTOM_DTD = "http://www.newatlanta.com/bluedragondtd"; /** * Using mode == READ_ADD, the existing doctype will be read. If no doctype * exists in the xml data, then one will be inserted that contains the * CUSTOM_DTD SYSTEM identifier. Using mode == REMOVE_MODIFY will result in * any existing doctype declaration being updated so that it has no * SYSTEM/PUBLIC identifier. Using mode == REMOVE will result in the <!DOCTYPE * ...> element simply being removed from the document stream. Using mode == * NO_CHANGE, no changes will be made. */ public static final byte READ_ADD = 1; public static final byte REMOVE_MODIFY = 2; public static final byte REMOVE = 3; public static final byte NO_CHANGE = 4; private int state = 0; private int startPos = -1; private int startDTDPos = -1; private int endDTDPos = -1; private boolean more = true; protected byte mode = READ_ADD; protected StringBuilder inputBuffer = null; protected DTDListener listener = null; /** * Default constructor. Takes the mode value. If mode == READ_ADD, then, this * will read the existing <!DOCTYPE ...> element or add a <!DOCTYPE ...> * element with a new SYSTEM identifier and CUSTOM_DTD. If mode == * REMOVE_MODIFY, any existing doctype declaration will be updated so that it * has no SYSTEM/PUBLIC identifier. If mode == REMOVE, the <!DOCTYPE ...> * element is simply removed from the document stream and parsing proceeds * without it. If mode == NO_CHANGE, no changes will be made. * * @param r * Reader to filter * @param mode * either READ_ADD, REMOVE, REMOVE_MODIFY, or NO_CHANGE */ public DTDFilterReader(Reader r, byte mode) { super(r); this.mode = mode; this.state = 0; this.startPos = -1; this.startDTDPos = -1; this.endDTDPos = -1; this.more = true; this.inputBuffer = new StringBuilder(); } /** * Returns true if comment filtering should still continue, false otherwise. * * @return true if comment filtering should still continue, false otherwise. */ protected boolean stillFiltering() { return (state < 16); } /** * Inheritors must implement this method. It reads from the underlying Reader * instance and fills the localBuffer. Note, implementations should not call * any public methods in this class or infinite recursion will result. Returns * true if reading from the underlying Reader is not limited. Returns false if * the end of the data stream is reached during this read. * * @param minCount * minimum number of characters that should be read for this call * @return true if more data can be read, false otherwise * @throws IOException */ protected boolean readUnderlying(int minCount) throws IOException { // Make sure we read at least minCount new data minCount += localBuffer.length(); while (localBuffer.length() < minCount && more) { // Read the next bit of data into our input char[] chars = new char[512]; int r = in.read(chars, 0, chars.length); if (r != -1) { // Process the input, look for <?, <!DOCTYPE, or other. inputBuffer.append(chars, 0, r); parseInput(); more = true; } else { // Need to flush whatever's left in the input because we // won't be filtering after this call. localBuffer.append(inputBuffer); inputBuffer.setLength(0); more = false; } } return more; } /** * Does the heavy work of parsing the read xml data and correctly keeping * state so that the DTD can be filtered appropriately. * * @throws IOException */ protected void parseInput() throws IOException { for (int pos = 0; pos < inputBuffer.length(); pos++) { char c = inputBuffer.charAt(pos); switch (state) { case 0: // Not in any tag if (c == '<') { // Opening of some tag. Don't add to the output just yet startPos = pos; state = 1; } else { // Add whatever it is (should be whitespace) to output localBuffer.append(c); inputBuffer.deleteCharAt(pos); pos = -1; } break; case 1: // In some (unknown tag) if (c == '?') { // Opening of either a PI or xml decl. OK, now we can add // the open bracket to the output, and this char too. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } else if (c == '!') { // Opening of either a comment or DTD state = 3; } else { // Must be opening of the document element (by elimination) state = 4; } break; case 2: // In either a PI, xml decl, or comment tag // Add whatever it is to output localBuffer.append(c); inputBuffer.deleteCharAt(pos); pos = -1; if (c == '>') { // Back to looking for DTD and document element state = 0; } break; case 3: // In either a comment or DTD if (c == 'D') { // Opening of the DTD (most likely). state = 5; } else { // Opening of a comment. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 4: // In the document element tag if (c == '>') { handleDTD(startPos, pos + 1); // Flush everything to output buffer localBuffer.append(inputBuffer.toString()); inputBuffer.setLength(0); pos = -1; // Done filtering/scanning state = 16; } break; case 5: // In a DTD if (c == 'O') { // Opening of the DTD (most likely). state = 6; } else { // Opening of some other tag. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 6: // In a DTD if (c == 'C') { // Opening of the DTD (most likely). state = 7; } else { // Opening of some other tag. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 7: // In a DTD if (c == 'T') { // Opening of the DTD (most likely). state = 8; } else { // Opening of some other tag. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 8: // In a DTD if (c == 'Y') { // Opening of the DTD (most likely). state = 9; } else { // Opening of some other tag. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 9: // In a DTD if (c == 'P') { // Opening of the DTD (most likely). state = 10; } else { // Opening of some other tag. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 10: // In a DTD if (c == 'E') { // Opening of the DTD. state = 11; } else { // Opening of some other tag. OK, now we can add the open // bracket etc. to the output. localBuffer.append(inputBuffer.substring(startPos, pos + 1)); inputBuffer.delete(startPos, pos + 1); pos = -1; state = 2; } break; case 11: // In a DTD (for sure now) if (c == '[') { // DTD has internal subset state = 12; } else if (c == '>') { // Closing the DTD. startDTDPos = startPos; endDTDPos = pos + 1; state = 13; } break; case 12: // In a DTD internal subset if (c == ']') { // Back to just DTD state = 11; } break; case 13: // Not in any tag (after finding the DTD) if (c == '<') { // Opening of some tag. Don't add to the output just yet startPos = pos; state = 14; } break; case 14: // In some (unknown tag) (after finding the DTD) if (c == '?') { // Opening of a PI. state = 15; } else if (c == '!') { // Opening of a comment state = 15; } else { // Must be opening of the document element (by elimination) state = 4; } break; case 15: // In a PI or comment tag (after finding the DTD) if (c == '>') { // Close the tag state = 0; } break; case 16: // No longer filtering // Add whatever it is (should be whitespace) to output localBuffer.append(c); inputBuffer.deleteCharAt(pos); pos = -1; break; } } } /** * Handles manipulating the doctype data in the specified StringBuilder * buffer. Returns the emptied StringBuilder instance after the manipulations * are complete. * * @param dtdBuffer * StringBuilder containing the doctype element and document element * data * @return emptied StringBuilder instance */ protected void handleDTD(int startDocElem, int endDocElem) throws IOException { if (mode == REMOVE) { if (startDTDPos != -1) { inputBuffer.delete(startDTDPos, endDTDPos); } } else if (mode == REMOVE_MODIFY) { if (startDTDPos != -1) { String newDTD = replaceId(inputBuffer.substring(startDTDPos, endDTDPos)); inputBuffer.delete(startDTDPos, endDTDPos); inputBuffer.insert(startDTDPos, newDTD); } } else if (mode == READ_ADD) { if (startDTDPos != -1) { replaceId(inputBuffer.substring(startDTDPos, endDTDPos)); } else { String newDTD = "<!DOCTYPE " + readElementName(inputBuffer.substring(startDocElem, endDocElem)) + " SYSTEM \"" + CUSTOM_DTD + "\">"; inputBuffer.insert(startDocElem, newDTD); } } else if (mode == NO_CHANGE) { // Don't change anything } } /** * Reads the next xml element name from the specified string and returns it. * * @param str * String to parse * @return next xml element name */ protected String readElementName(String str) { StringBuilder buffy = new StringBuilder(); boolean readingName = false; for (int i = str.indexOf('<') + 1; i < str.length(); i++) { if (!Character.isWhitespace(str.charAt(i))) { buffy.append(str.charAt(i)); readingName = true; } else { if (readingName) break; } } return buffy.toString(); } /** * Replaces or removes the value of the SYSTEM/PUBLIC identifier in the * specified <!DOCTYPE ...> String with an internal identifier (see * ValidationInputSource.CUSTOM_DTD). Returns the updated <!DOCTYPE ...> * String. * * @param str * <!DOCTYPE ...> String to alter * @return altered <!DOCTYPE ...> String */ private String replaceId(String str) throws IOException { char c = ' '; int localState = 0; StringBuilder buffy = new StringBuilder(); str = str.trim(); for (int i = 0; i < str.length(); i++) { c = str.charAt(i); switch (localState) { case 0: // Reading the <!DOCTYPE token if (Character.isWhitespace(c)) localState = 1; buffy.append(c); break; case 1: // Reading the name token if (Character.isWhitespace(c)) localState = 2; buffy.append(c); break; case 2: // Reading either SYSTEM or PUBLIC or [ or > if (c == 'S') { if (str.length() > i + 6 && str.substring(i, i + 6).equals("SYSTEM")) { // Remove or replace this SYSTEM identifier for (int x = i + 6; x < str.length(); x++) { c = str.charAt(x); if (Character.isWhitespace(c)) { continue; } else if (c == '\'' || c == '"') { if (mode == READ_ADD) { // Replace and read String existingSysId = str.substring(x + 1, str.indexOf(c, x + 1)); if (this.listener != null) this.listener.setDTD(null, existingSysId); buffy.append("SYSTEM "); buffy.append(c); buffy.append(CUSTOM_DTD); buffy.append(c); buffy.append(str.substring(str.indexOf(c, x + 1) + 1)); return buffy.toString(); // Done! } else if (mode == REMOVE_MODIFY) { // Remove buffy.append(str.substring(str.indexOf(c, x + 1) + 1)); return buffy.toString(); // Done! } else { // Should never reach here throw new IOException("Invalid DTD Filter mode: " + mode + ". Expecting ADD_MODIFY (" + READ_ADD + ") or REMOVE_MODIFY (" + REMOVE_MODIFY + ")."); } } else { throw new IOException("Invalid doctype declaration. Expecting quoted SYSTEM " + "literal: " + str); } } } else { throw new IOException("Invalid doctype declaration. Expecting SYSTEM identifier: " + str); } } else if (c == 'P') { if (str.length() > i + 6 && str.substring(i, i + 6).equals("PUBLIC")) { // Remove or replace this PUBLIC identifier boolean tookCareOfPubId = false; for (int x = i + 6; x < str.length(); x++) { c = str.charAt(x); if (Character.isWhitespace(c)) { // Just continue } else if (c == '\'' || c == '"') { if (!tookCareOfPubId) { // Eat the first quoted string x = str.indexOf(c, x + 1); tookCareOfPubId = true; // Continue on } else { if (mode == READ_ADD) { // Replace and read String existingPubId = str.substring(x + 1, str.indexOf(c, x + 1)); if (this.listener != null) this.listener.setDTD(existingPubId, null); buffy.append("SYSTEM "); buffy.append(c); buffy.append(CUSTOM_DTD); buffy.append(c); buffy.append(str.substring(str.indexOf(c, x + 1) + 1)); return buffy.toString(); // Done! } else if (mode == REMOVE_MODIFY) { // Remove buffy.append(str.substring(str.indexOf(c, x + 1) + 1)); return buffy.toString(); // Done! } else { // Should never reach here throw new IOException("Invalid DTD Filter mode: " + mode + ". Expecting ADD_MODIFY (" + READ_ADD + ") or REMOVE_MODIFY (" + REMOVE_MODIFY + ")."); } } } else { throw new IOException("Invalid doctype declaration. Expecting quoted PUBLIC " + "literal: " + str); } } } else { throw new IOException("Invalid doctype declaration. Expecting PUBLIC identifier: " + str); } } else if (c == '[' || c == '>') { buffy.append(str.substring(i)); return buffy.toString(); } else if (Character.isWhitespace(c)) { buffy.append(c); } else { throw new IOException("Invalid doctype declaration. Expecting SYSTEM/PUBLIC identifier or " + "entity references, or ], or >: " + str); } break; default: // Should not reach here throw new IOException("Invalid doctype declaration. Expecting SYSTEM/PUBLIC identifier or " + "entity references, or ], or >: " + str); } } return buffy.toString(); } /** * Sets the DTDListener for this DTDFilterReader. * * @param list * DTDListener for this DTDFilterReader */ public void setListener(DTDListener list) { this.listener = list; } /** * Callback interface for objects interested in the DTD from the filtered xml * document. * * @author mattj * */ public interface DTDListener { /** * Sets the public id and system id from the read <!DOCTYPE ...> element. * * @param publicId * public id from the <!DOCTYPE ...> element * @param systemId * system id from the <!DOCTYPE ...> element */ public void setDTD(String publicId, String systemId); } }