/* See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * Esri Inc. licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.esri.gpt.framework.xml; import com.esri.gpt.framework.util.Val; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Cleans comments, new-lines and namespaces from an XML string. */ public class XmlCleaner { // class variables ============================================================= // instance variables ========================================================== /** Default constructor. */ public XmlCleaner() {} // properties ================================================================== // methods ===================================================================== /** * Cleans an XML string. * <br/>Comments, new-lines and namespaces are removed. * @param xml the string to modify * @return the modified string */ public String clean(String xml) { xml = Val.chkStr(xml); // remove new lines xml = xml.replaceAll("\r\n",""); xml = xml.replaceAll("\r",""); xml = xml.replaceAll("\n",""); // remove comments xml = xml.replaceAll("<!--.*?-->",""); // remove namespaces xml = removeNameSpaces(xml); return xml; } /** * Polishes CDATA sections within an xml string. * @param xml the string to modify * @return the modified string */ public String polishCDataSections(String xml) { Pattern pattern; Matcher matcher; String sMatch = ""; xml = Val.chkStr(xml); xml = xml.replaceAll("\r\n",""); pattern = Pattern.compile("<!\\[CDATA\\[.*?]]>"); matcher = pattern.matcher(xml); while (matcher.find()) { sMatch = matcher.group(); sMatch = sMatch.replaceFirst("<!\\[CDATA\\[","<![CDATA["); sMatch = sMatch.replaceFirst("]]>","]]>"); } if (sMatch.length() > 0) { xml = xml.replaceFirst("<!\\[CDATA\\[.*?]]>",sMatch); } return xml; } /** * Removes namespaces from an xml string. * @return xml the xml string to modify */ private String removeNameSpaces(String xml) { // collect name spaces Pattern pattern = Pattern.compile("xmlns:\\w.*?="); Matcher matcher = pattern.matcher(xml); ArrayList<String> nsList = new ArrayList<String>(); while (matcher.find()) { String sMatch = matcher.group(); sMatch = sMatch.substring(6,sMatch.length()-1); if (sMatch.length() > 0) { boolean bNotFound = true; for (int i=0;i<nsList.size()&& bNotFound;i++) { String ns = (String)nsList.get(i); if (ns.equals(sMatch)) { bNotFound = false; } } if (bNotFound) { nsList.add(sMatch); } } } // remove default namespaces and doctypes xml = xml.replaceAll("xmlns=\".*?\"", ""); xml = xml.replaceAll("xmlns:.*?=\".*?\"", ""); xml = xml.replaceAll("xsi:.*?=\".*?\"", ""); xml = xml.replaceAll("<!DOCTYPE.*?]>", ""); xml = xml.replaceAll("<!DOCTYPE.*?>", ""); // remove namespaces have been collected before for (int i=0;i< nsList.size();i++) { String ns = (String) nsList.get(i) + ":"; xml = xml.replaceAll(ns,""); } return xml; } }