/* * JEF - Copyright 2009-2010 Jiyi (mr.jiyi@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jef.tools; import java.io.BufferedReader; import java.io.File; import java.io.FilterReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PushbackInputStream; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; import java.lang.reflect.Array; import java.net.URL; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; import javax.management.ReflectionException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import jef.common.log.LogUtil; import jef.tools.reflect.BeanWrapper; import jef.tools.reflect.BeanWrapperImpl; import jef.tools.reflect.Property; import jef.tools.reflect.UnsafeUtils; import jef.tools.string.CharsetName; import org.apache.commons.lang.StringEscapeUtils; import org.apache.html.dom.HTMLDocumentImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Attr; import org.w3c.dom.CDATASection; import org.w3c.dom.Comment; import org.w3c.dom.Document; import org.w3c.dom.DocumentFragment; import org.w3c.dom.Element; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.w3c.dom.html.HTMLDocument; import org.xml.sax.EntityResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import com.alibaba.fastjson.JSONObject; /** * 使用JAXP,封装了基于XML的各种基本操作 * * * <b>重要,关于xercesImpl</b> * * <pre> * 本类的高级功能需要在有xerces解析器的情况下才能工作。 * xerces是 apache的一个第三方解析包。 * 作者目前测试了xercesImpl从 2.6.x到2.11.x各个版本的兼容性,推荐使用 2.7.1~2.9.1之间的版本。 * 2.7.1之前的版本不能支持cyberneko的HTML解析。因此不建议使用2.6.2或以前的版本。 * 2.10.0开始由于其用到了org.w3c.dom.ElementTraversal这个类,在JDK 6下要求再引入包xml-api。 * 这容易在weblogic等环境下产生兼容性问题,也不推荐使用。 * 本工程在这里默认引用2.9.1版本 * </pre> * * @author jiyi * */ public class XMLUtils { private static final Logger log = LoggerFactory.getLogger("XMLUtils"); /** * 缓存的DocumentBuilderFactory<br> * 每个DocumentBuilderFactory构造开销在0.3ms左右,缓存很有必要 */ private static DocumentBuilderFactory domFactoryTT; private static DocumentBuilderFactory domFactoryTF; private static DocumentBuilderFactory domFactoryFT; private static DocumentBuilderFactory domFactoryFF; /** * 初始化各类解析器,分析当前运行环境 */ static { try { Class.forName("org.apache.xerces.xni.XMLDocumentHandler"); try { Class<?> cParser = Class.forName("org.cyberneko.html.parsers.DOMFragmentParser"); if (cParser != null) { parser = (jef.tools.IDOMFragmentParser) cParser.newInstance(); } } catch (Exception e) { // 没有将common-net包依赖进来,无法使用HTML解析功能 LogUtil.warn("The EF-HTML parser engine not found, HTMLParser feature will be disabled. Import easyframe 'common-misc' library to the classpath to activate this feature."); } } catch (Exception e) { // xerces版本过旧,不支持进行HTML解析 LogUtil.warn("The Apache xerces implemention not avaliable, HTMLParser feature will be disabled. you must import library 'xercesImpl'(version >= 2.7.1) into classpath."); } try { domFactoryTT = initFactory(true, true); domFactoryTF = initFactory(true, false); domFactoryFT = initFactory(false, true); domFactoryFF = initFactory(false, false); } catch (Exception e) { log.error("FATAL: Error in init DocumentBuilderFactory. XML Parser will not work!", e); } } /* * 创建解析器工厂 * * @param ignorComments 忽略注释 * * @param namespaceAware 识别命名空间 * * @return DocumentBuilderFactoy */ private static DocumentBuilderFactory initFactory(boolean ignorComments, boolean namespaceAware) { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setIgnoringElementContentWhitespace(true); dbf.setValidating(false); // 关闭DTD校验 dbf.setIgnoringComments(ignorComments); dbf.setNamespaceAware(namespaceAware); // dbf.setCoalescing(true);//CDATA // 节点转换为Text节点,并将其附加到相邻(如果有)的文本节点,开启后解析更方便,但无法还原 try { // dbf.setFeature("http://xml.org/sax/features/namespaces", false); // dbf.setFeature("http://xml.org/sax/features/validation", false); dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } catch (ParserConfigurationException e) { log.warn("Your xerces implemention is too old to support 'load-dtd-grammar' and 'load-external-dtd' feature. Please upgrade xercesImpl.jar to 2.6.2 or above."); } catch (AbstractMethodError e) { log.warn("Your xerces implemention is too old to support 'load-dtd-grammar' and 'load-external-dtd' feature. Please upgrade xercesImpl.jar to 2.6.2 or above."); } try { dbf.setAttribute("http://xml.org/sax/features/external-general-entities", false); } catch (IllegalArgumentException e) { log.warn("Your xerces implemention is too old to support 'external-general-entities' attribute."); } try { dbf.setAttribute("http://xml.org/sax/features/external-parameter-entities", false); } catch (IllegalArgumentException e) { log.warn("Your xerces implemention is too old to support 'external-parameter-entities' attribute."); } try { dbf.setAttribute("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } catch (IllegalArgumentException e) { log.warn("Your xerces implemention is too old to support 'load-external-dtd' attribute."); } return dbf; } // 内部匿名类,ErrorHandler private static final ErrorHandler EH = new ErrorHandler() { public void error(SAXParseException x) throws SAXException { throw x; } public void fatalError(SAXParseException x) throws SAXException { throw x; } public void warning(SAXParseException x) throws SAXException { log.warn("SAXParserWarnning:", x); } }; /** * 内部匿名类,DTD解析器。优先寻找本地classpath下的DTD资源,然后才考虑通过网络连接获取DTD */ private static final EntityResolver ER = new EntityResolver() { public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (systemId != null && systemId.endsWith(".dtd")) { URL url = new URL(systemId); String file = StringUtils.substringAfterLastIfExist(url.getFile(), "/"); URL u = this.getClass().getClassLoader().getResource(file); if (u == null) { u = url; } InputSource source = new InputSource(u.openStream()); source.setPublicId(publicId); source.setSystemId(systemId); return source; } return null; } }; /** * 缓存的DocumentBuilderCache<br> * 每个DocumentBuilder的构造开销在0.4ms左右,缓存很有必要 */ private static final ThreadLocal<DocumentBuilderCache> REUSABLE_BUILDER = new ThreadLocal<DocumentBuilderCache>() { @Override protected DocumentBuilderCache initialValue() { return new DocumentBuilderCache(); } }; /** * 缓存DocumentBuilder的容器 * * @author jiyi * */ private final static class DocumentBuilderCache { DocumentBuilder cacheTT; DocumentBuilder cacheTF; DocumentBuilder cacheFT; DocumentBuilder cacheFF; /** * 根据传入的特性,提供满足条件的DocumentBuilder * * @param ignorComments * @param namespaceAware * @return */ private DocumentBuilder getDocumentBuilder(boolean ignorComments, boolean namespaceAware) { if (ignorComments && namespaceAware) { if (cacheTT == null) { cacheTT = initBuilder(domFactoryTT); } return cacheTT; } else if (ignorComments) { if (cacheTF == null) { cacheTF = initBuilder(domFactoryTF); } return cacheTF; } else if (namespaceAware) { if (cacheFT == null) { cacheFT = initBuilder(domFactoryFT); } return cacheFT; } else { if (cacheFF == null) { cacheFF = initBuilder(domFactoryFF); } return cacheFF; } } private DocumentBuilder initBuilder(DocumentBuilderFactory domFactory) { DocumentBuilder builder; try { builder = domFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { throw new UnsupportedOperationException(e); } builder.setErrorHandler(EH); builder.setEntityResolver(ER); return builder; } } /** * Xpath解析器 */ private static XPathFactory xp = XPathFactory.newInstance(); /** * HTML解析器 */ private static jef.tools.IDOMFragmentParser parser; /** * 丛Json格式转换为XML Document(兼容Json-Lib) * * @param json * 要读取的json * @return 由json转换而成的XML * @throws SAXException * @throws IOException */ public static Document loadDocument(JSONObject json) { return XMLFastJsonParser.DEFAULT.toDocument(json); } /** * 从XML Document转换为JsonObject,loadDocument(JsonObject json)的逆运算 * * @param node * 要转换的节点 * @return 转换后的json对象 */ public static JSONObject toJsonObject(Node node) { return XMLFastJsonParser.DEFAULT.toJsonObject(node); } /** * 载入XML文档 * * @param file * 文件 * @return Document 加载后的DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 磁盘操作错误 */ public static Document loadDocument(File file) throws SAXException, IOException { return loadDocument(file, true); } /** * 载入XML文件 * * @param file * 文件 * @param ignorComments * 是否忽略掉XML中的注释 * @return Document 加载后的DOM模型 * @throws SAXException * @throws IOException */ public static Document loadDocument(File file, boolean ignorComments) throws SAXException, IOException { InputStream in = IOUtils.getInputStream(file); try { Document document = loadDocument(in, null, ignorComments, false); return document; } finally { in.close(); } } /** * 传入文件路径,解析XML文件 * * @param filename * 文件路径 * @return 加载后的DOM模型 * @throws SAXException * @throws IOException */ public static Document loadDocument(String filename) throws SAXException, IOException { return loadDocument(new File(filename)); } /** * 从URL装载XML * * @param reader * @return 加载后的DOM模型 * @throws SAXException * @throws IOException */ public static Document loadDocument(URL url) throws SAXException, IOException { return loadDocument(url.openStream(), null, true, false); } /** * 从Reader装载XML * * @param reader * 数据 * @param ignorComments * 是否跳过注解 * @param namespaceAware * 是否忽略命名空间 * @return 加载后的DOM模型 * @throws SAXException * @throws IOException */ public static Document loadDocument(Reader reader, boolean ignorComments, boolean namespaceAware) throws SAXException, IOException { try { DocumentBuilder db = REUSABLE_BUILDER.get().getDocumentBuilder(ignorComments, namespaceAware); InputSource is = new InputSource(reader); Document doc = db.parse(is); return doc; } finally { IOUtils.closeQuietly(reader); } } /** * 解析xml文本 * * @param xmlContent * XML文本 * @return Document DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 读写错误 */ public static Document parse(String xmlContent) throws SAXException, IOException { Reader reader = null; try { reader = new StringReader(xmlContent); return loadDocument(reader, true, false); } finally { IOUtils.closeQuietly(reader); } } /** * 解析xml文本 * * @param xmlContent * XML文本 * @return Document 解析后的DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 读写错误 * @deprecated use {@link #parse(String)} */ public static Document loadDocumentByString(String xmlContent) throws SAXException, IOException { return parse(xmlContent); } /** * 读取XML文档 * * @param in * 输入流 * @param charSet * 字符编码 * @param ignorComment * 忽略注释 * @return Document 加载后的DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 读写错误 */ public static Document loadDocument(InputStream in, String charSet, boolean ignorComment) throws SAXException, IOException { return loadDocument(in, charSet, ignorComment, false); } /** * 载入XML文档 * * @param in * 输入流 * @param charSet * 编码 * @param ignorComment * 跳过注释节点 * @return Document. DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 读写错误 */ public static Document loadDocument(InputStream in, String charSet, boolean ignorComments, boolean namespaceAware) throws SAXException, IOException { DocumentBuilder db = REUSABLE_BUILDER.get().getDocumentBuilder(ignorComments, namespaceAware); InputSource is = null; // 解析流来获取charset if (charSet == null) {// 读取头200个字节来分析编码 byte[] buf = new byte[200]; PushbackInputStream pin = new PushbackInputStream(in, 200); in = pin; int len = pin.read(buf); if (len > 0) { pin.unread(buf, 0, len); charSet = getCharsetInXml(buf, len); } } if (charSet != null) { is = new InputSource(new XmlFixedReader(new InputStreamReader(in, charSet))); is.setEncoding(charSet); } else { // 自动检测编码 Reader reader = new InputStreamReader(in, "UTF-8");// 为了过滤XML当中的非法字符,所以要转换为Reader,又为了转换为Reader,所以要获得XML的编码 is = new InputSource(new XmlFixedReader(reader)); } Document doc = db.parse(is); doc.setXmlStandalone(true);// 设置为True保存时才不会出现讨厌的standalone="no" return doc; } /** * 通过读取XML头部文字来判断xml文件的编码 * * @param buf * XML文件头部若干字节 * @param len * 判定长度 * @return 获得XML编码。如果不成功返回null。 */ public static String getCharsetInXml(byte[] buf, int len) { buf = ArrayUtils.subarray(buf, 0, len); String s = new String(buf).toLowerCase(); int n = s.indexOf("encoding="); if (n > -1) { s = s.substring(n + 9); if (s.charAt(0) == '\"' || s.charAt(0) == '\'') { s = s.substring(1); } n = StringUtils.indexOfAny(s, "\"' ><"); if (n > -1) { s = s.substring(0, n); } if (StringUtils.isEmpty(s)) { return null; } s = CharsetName.getStdName(s); return s; } else { return null; } } /** * 载入HTML文档 * * @param in * 输入流 * @return DocumentFragment DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 读写错误 */ public static DocumentFragment parseHTML(Reader in) throws SAXException, IOException { if (parser == null) throw new UnsupportedOperationException( "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath"); InputSource source; source = new InputSource(in); synchronized (parser) { HTMLDocument document = new HTMLDocumentImpl(); DocumentFragment fragment = document.createDocumentFragment(); parser.parse(source, fragment); return fragment; } } /** * 从指定文件载入HTML * * @param in * 输入流 * @param charSet * 编码 * @return DocumentFragment对象 DOM模型 * @throws SAXException * 解析错误 * @throws IOException * 读写错误 */ public static DocumentFragment parseHTML(File file) throws IOException, SAXException { InputStream in = IOUtils.getInputStream(file); try { DocumentFragment document = parseHTML(in, null); return document; } finally { in.close(); } } /** * 从指定的地址加载HTMLDocument * * @param url * 输入源 * @return DocumentFragment DOM模型 * @throws SAXException * @throws IOException */ public static DocumentFragment parseHTML(URL url) throws SAXException, IOException { return parseHTML(url.openStream(), null); } /** * 从指定流解析HTML。已经废弃。 * * @param in * 输入流 * @param charSet * 字符集,为null时自动检测 * @return 解析后的DocumentFragment对象 * @throws SAXException * XML语法异常时抛出 * @throws IOException * IO操作错误时抛出 * @deprecated Use {@link #parseHTML(InputStream, String)} instead. */ public static DocumentFragment loadHtmlDocument(InputStream in, String charSet) throws SAXException, IOException { return parseHTML(in, charSet); } /** * 从指定流解析HTML * * @param in * 输入流 * @param charSet * 字符集,为null时自动检测 * @return 解析后的DocumentFragment对象 * @throws SAXException * XML语法异常时抛出 * @throws IOException * IO操作错误时抛出 */ public static DocumentFragment parseHTML(InputStream in, String charSet) throws SAXException, IOException { if (parser == null) throw new UnsupportedOperationException( "HTML parser module not loaded, to activate this feature, you must add JEF common-ioc.jar to classpath"); InputSource source; if (charSet != null) { source = new InputSource(new XmlFixedReader(new InputStreamReader(in, charSet))); source.setEncoding(charSet); } else { source = new InputSource(in); } synchronized (parser) { HTMLDocument document = new HTMLDocumentImpl(); DocumentFragment fragment = document.createDocumentFragment(); parser.parse(source, fragment); return fragment; } } /** * 保存XML文档 * * @param doc * 节点DOM对象 * @param file * 保存到文件 * @throws IOException * 读写错误 */ public static void saveDocument(Node doc, File file) throws IOException { saveDocument(doc, file, "UTF-8"); } /** * 保存XML文档 * * @param doc * 节点DOM对象 * @param file * 文件 * @param encoding * 编码 * @throws IOException */ public static void saveDocument(Node doc, File file, String encoding) throws IOException { OutputStream os = IOUtils.getOutputStream(file); try { output(doc, os, encoding); } finally { os.close(); } } /** * 将XML文档输出到流 * * @param node * 节点DOM对象 * @param os * 输出流 * @throws IOException * 读写错误 */ public static void output(Node node, OutputStream os) throws IOException { output(node, os, null, 4, null); } /** * 将XML文档输出到流 * * @param node * 节点DOM对象 * @param os * 输出流 * @param encoding * 编码 * @throws IOException * 读写错误 */ public static void output(Node node, OutputStream os, String encoding) { output(node, os, encoding, 4, null); } /** * 节点转换为String * * @param node * 节点DOM对象 * @return 转换后的XML文本 */ public static String toString(Node node) { return toString(node, null); } /** * 将XML文档输出到流 * * @param node * DOM对象 * @param os * 输出流 * @param encoding * 编码 * @param warpLine * 折行输出 * @param xmlDeclare * null如果是document對象則頂部有xml定義,true不管如何都有 false都沒有 * @throws IOException * 读写错误 */ public static void output(Node node, OutputStream os, String encoding, int warpLine, Boolean xmlDeclare){ try { StreamResult sr = new StreamResult(encoding == null ? new OutputStreamWriter(os) : new OutputStreamWriter(os, encoding)); output(node, sr, encoding, warpLine, xmlDeclare); }catch(IOException e) { throw new RuntimeException(e); } } /** * 保存文档 * * @param node * 要保存的节点或Document * @param os * 输出流 * @param encoding * 编码 * @param warpLine * 是否要排版 * @throws IOException * 读写错误 */ public static void output(Node node, Writer os, String encoding, int indent) throws IOException { StreamResult sr = new StreamResult(os); output(node, sr, encoding, indent, null); } private static void output(Node node, StreamResult sr, String encoding, int indent, Boolean XmlDeclarion) throws IOException { if(node.getNodeType()==Node.ATTRIBUTE_NODE) { sr.getWriter().write(node.getNodeValue()); sr.getWriter().flush(); return; } TransformerFactory tf = TransformerFactory.newInstance(); Transformer t = null; try { if (indent > 0) { try { tf.setAttribute("indent-number", indent); t = tf.newTransformer(); // 某些垃圾的XML解析包会造成无法正确的设置属性。一些旧版本的XML解析器会有此问题 t.setOutputProperty(OutputKeys.INDENT, "yes"); } catch (Exception e) { } } else { t = tf.newTransformer(); } t.setOutputProperty(OutputKeys.METHOD, "xml"); if (encoding != null) { t.setOutputProperty(OutputKeys.ENCODING, encoding); } if (XmlDeclarion == null) { XmlDeclarion = (node instanceof Document); } if (node instanceof Document) { Document doc = (Document) node; if(doc.getDoctype()!=null) { t.setOutputProperty(javax.xml.transform.OutputKeys.DOCTYPE_PUBLIC, doc.getDoctype().getPublicId()); t.setOutputProperty(javax.xml.transform.OutputKeys.DOCTYPE_SYSTEM, doc.getDoctype().getSystemId()); } } if (XmlDeclarion) { t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); } else { t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); } } catch (Exception tce) { throw new IOException(tce); } DOMSource doms = new DOMSource(node); try { t.transform(doms, sr); } catch (TransformerException te) { IOException ioe = new IOException(); ioe.initCause(te); throw ioe; } } /** * 在指定节点下添加一个CDATA节点 * * @param node * 父节点 * @param data * CDATA文字内容 * @return 生成的CDATA节点 */ public static CDATASection addCDataText(Node node, String data) { Document doc = null; if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } CDATASection e = doc.createCDATASection(data); node.appendChild(e); return e; } /** * 标准XPath计算 * * @param startPoint * 起始节点 * @param expr * xpath表达式 * @return xpath表达式的计算结果(文本) * @throws XPathExpressionException */ public static String evalXpath(Object startPoint, String expr) throws XPathExpressionException { XPath xpath = xp.newXPath(); return xpath.evaluate(expr, startPoint); } /** * 标准XPATH计算 * * @param startPoint * 起始节点 * @param expr * xpath表达式 * @return 该xpath下的节点 * @throws XPathExpressionException */ public static Node selectNode(Object startPoint, String expr) throws XPathExpressionException { XPath xpath = xp.newXPath(); return (Node) xpath.evaluate(expr, startPoint, XPathConstants.NODE); } /** * 标准XPATH计算 * * @param startPoint * 起始节点 * @param expr * xpath表达式 * @return 符合xpath的所有节点 * @throws XPathExpressionException */ public static NodeList selectNodes(Object startPoint, String expr) throws XPathExpressionException { XPath xpath = xp.newXPath(); return (NodeList) xpath.evaluate(expr, startPoint, XPathConstants.NODESET); } /** * 标准XPATH计算 * * @param start * @param expr * @return * @throws XPathExpressionException */ public static List<Element> selectElements(Node start, String expr) throws XPathExpressionException { return toElementList(selectNodes(start, expr)); } /** * 在节点下插入文本 * * @param node * 节点 * @param data * 文本内容 * @return DOM文本节点 */ public static Text setText(Node node, String data) { Document doc = null; if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } clearChildren(node, Node.TEXT_NODE); Text t = doc.createTextNode(data); node.appendChild(t); return t; } /** * 在一个节点下插入注释 * * @param node * 节点 * @param comment * 注释内容 * @return Comment节点 */ public static Comment addComment(Node node, String comment) { Document doc = null; if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } Comment e = doc.createComment(comment); node.appendChild(e); return e; } /** * 在指定节点之前插入节点(兄弟节点)。 举例 * * <pre> * <parent> * <a>text-a</a> * <c>text-c</c> * </parent> * </pre> * * 调用{@code addElementBefore(c,"b","text-b")},其中c为上面的c节点。 结果 * * <pre> * <parent> * <a>text-a</a> * <b>text-b</b> * <c>text-c</c> * </parent> * </pre> * * @param node * 节点DOM对象 * @param tagName * 新增的元素名称 * @param nodeText * 元素文本 * @return Element对象 */ public static Element addElementBefore(Node node, String tagName, String... nodeText) { Node pNode = node.getParentNode(); List<Node> movingNodes = new ArrayList<Node>(); for (Node n : toArray(pNode.getChildNodes())) { if (n == node) { movingNodes.add(n); } else if (movingNodes.size() > 0) { movingNodes.add(n); } } Element e = addElement(pNode, tagName, nodeText); for (Node n : movingNodes) { pNode.appendChild(n); } return e; } /** * 在之后插入节点(兄弟节点) * * @param node * 节点DOM对象 * @param tagName * 新增的元素名称 * @param nodeText * 元素文本 * @return Element对象 */ public static Element addElementAfter(Node node, String tagName, String... nodeText) { Node pNode = node.getParentNode(); List<Node> movingNodes = new ArrayList<Node>(); boolean flag = false; for (Node n : toArray(pNode.getChildNodes())) { if (flag) { movingNodes.add(n); } else if (n == node) { flag = true; } } Element e = addElement(pNode, tagName, nodeText); for (Node n : movingNodes) { pNode.appendChild(n); } return e; } /** * 生成新节点替换原来的节点 * * @param node * 旧节点 * @param tagName * 新节点名称 * @param nodeText * 节点文本 * @return Element对象 */ public static Element replaceElement(Node node, String tagName, String... nodeText) { Node pNode = node.getParentNode(); Assert.notNull(pNode); Document doc = null; if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } Element e = doc.createElement(tagName); if (nodeText.length == 1) { setText(e, nodeText[0]); } else if (nodeText.length > 1) { setText(e, StringUtils.join(nodeText, '\n')); } pNode.replaceChild(e, node); return e; } /** * 在指定节点下查找一个Element,如果没有就添加 * * @param parent * 父节点 * @param tagName * 查找的子节点名 * @param attribName * 查找属性名 * @param attribValue * 查找的属性值 * @return 如果找到,返回已有的节点。如果没找到,返回创建的新节点。 */ public static Element getOrCreateChildElement(Node parent, String tagName, String attribName, String attribValue) { for (Element e : XMLUtils.childElements(parent, tagName)) { if (attribValue == null || attribValue.equals(XMLUtils.attrib(e, attribName))) { return e; } } Element e = XMLUtils.addElement(parent, tagName); e.setAttribute(attribName, attribValue); return e; } /** * 删除节点下的指定了TagName的元素 * * @param node * 父节点 * @param tagName * 要删除的元素名称 * @return 删除数量 */ public static int removeChildElements(Node node, String... tagName) { List<Element> list = XMLUtils.childElements(node, tagName); for (Element e : list) { node.removeChild(e); } return list.size(); } /** * 清除节点的所有子节点。执行该方法后,节点下的所有子节点将被全部清除。 * * @param node * 父节点 */ public static void clearChildren(Node node) { clearChildren(node, 0); } /** * 清除下属的指定类型的节点 * * @param node * @param type * 如果不限制NodeType,传入0 */ public static void clearChildren(Node node, int type) { for (Node child : toArray(node.getChildNodes())) { if (type == 0 || child.getNodeType() == type) { node.removeChild(child); } } } /** * 清除元素节点的所有属性 * * @param element * 要清除属性的节点 */ public static void clearAttribute(Element element) { for (Node node : toArray(element.getAttributes())) { element.removeAttributeNode((Attr) node); } } /** * 清除元素节点所有属性和子节点 * * @param element * 要清除的节点 */ public static void clearChildrenAndAttr(Element element) { clearChildren(element); clearAttribute(element); } /** * 在一个节点下插入元素和文本 * * @param node * 父节点 * @param tagName * 新建元素名称 * @param nodeText * 新建元素文本 * @return 新建的Element */ public static Element addElement(Node node, String tagName, String... nodeText) { Document doc = null; if (node.getNodeType() == Node.DOCUMENT_NODE) { doc = (Document) node; } else { doc = node.getOwnerDocument(); } Element e = doc.createElement(tagName); node.appendChild(e); if (nodeText.length == 1) { setText(e, nodeText[0]); } else if (nodeText.length > 1) { setText(e, StringUtils.join(nodeText, '\n')); } return e; } /** * 反回一个新节点,代替旧节点,其名称可以设置 * * @param node * 要变更名称的元素节点 * @param newName * 新名称 * @return 重命名后的DOM节点(实现过程中会用新创建的Element代替旧的) */ public static Element changeNodeName(Element node, String newName) { Document doc = node.getOwnerDocument(); Element newEle = doc.createElement(newName); Node parent = node.getParentNode(); parent.removeChild(node); parent.appendChild(newEle); for (Node child : toArray(node.getChildNodes())) { node.removeChild(child); newEle.appendChild(child); } return newEle; } /** * 得到节点下,具有指定标签的Element。(只搜索一层) * * @param node * 父节点 * @param tagName * 要搜索的节点名,如果为null表示返回全部Element * @return 搜索到的全部子元素 */ public static List<Element> childElements(Node node, String... tagName) { if (node == null) throw new NullPointerException("the input node can not be null!"); List<Element> list = new ArrayList<Element>(); NodeList nds = node.getChildNodes(); if (tagName.length == 0 || tagName[0] == null) {// 预处理,兼容旧API tagName = null; } for (int i = 0; i < nds.getLength(); i++) { Node child = nds.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element) child; if (tagName == null || ArrayUtils.contains(tagName, e.getNodeName())) { list.add(e); } } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) { } else if (child.getNodeType() == Node.COMMENT_NODE) { } else if (child.getNodeType() == Node.DOCUMENT_FRAGMENT_NODE) { } else if (child.getNodeType() == Node.DOCUMENT_NODE) { } else if (child.getNodeType() == Node.DOCUMENT_TYPE_NODE) { } else if (child.getNodeType() == Node.ATTRIBUTE_NODE) { } else if (child.getNodeType() == Node.TEXT_NODE) { } } return list; } static class MyNodeList implements NodeList { Node[] list; public int getLength() { return list.length; } public Node item(int index) { return list[index]; } public MyNodeList(Node[] list) { this.list = list; } public MyNodeList(List<? extends Node> list) { this.list = list.toArray(new Node[list.size()]); } } /** * 获取指定元素的文本(Trimed) * * @param element * 节点 * @return 节点的文本。xml转义符会被还原;两端空格会被截去。 */ public static String nodeText(Node element) { Node first = first(element, Node.TEXT_NODE, Node.CDATA_SECTION_NODE); if (first != null && first.getNodeType() == Node.CDATA_SECTION_NODE) { return ((CDATASection) first).getTextContent(); } StringBuilder sb = new StringBuilder(); if (first == null || StringUtils.isBlank(first.getTextContent())) { for (Node n : toArray(element.getChildNodes())) { if (n.getNodeType() == Node.TEXT_NODE) { sb.append(n.getTextContent()); } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) { sb.append(((CDATASection) n).getTextContent()); } } } else { sb.append(first.getTextContent()); } return StringUtils.trimToNull(StringEscapeUtils.unescapeHtml(sb.toString())); } /** * 得到节点下全部的text文本内容 * * @param element * 元素节点 * @param withChildren * 如果为真,则将该节点下属所有子元素的文本合并起来返回 * @return 节点文本。xml转义符会被还原;两端空格会被截去。 */ public static String nodeText(Node element, boolean withChildren) { StringBuilder sb = new StringBuilder(); for (Node node : toArray(element.getChildNodes())) { if (node.getNodeType() == Node.TEXT_NODE) { sb.append(node.getNodeValue().trim()); } else if (node.getNodeType() == Node.CDATA_SECTION_NODE) { sb.append(((CDATASection) node).getTextContent()); } else if (withChildren) { if (node.getNodeType() == Node.ELEMENT_NODE) { sb.append(nodeText((Element) node, true)); } } } return sb.toString(); } /** * 获得属性值 * * @param e * 元素节点 * @param attributeName * 属性名 * @return 属性值。xml转义符会被还原;两端空格会被截去。 */ public static String attrib(Element e, String attributeName) { if (!e.hasAttribute(attributeName)) return null; String text = e.getAttribute(attributeName); return (text == null) ? null : StringEscapeUtils.unescapeXml(text.trim()); } /** * 获得属性值(遍历子节点) * * @param e * 父元素节点 * @param attributeName * 属性名 * @return 父节点和子节点的指定属性值组成List,一起返回 */ public static List<String> attribs(Element e, String attributeName) { List<String> _list = new ArrayList<String>(); if (e.hasAttribute(attributeName)) { String text = e.getAttribute(attributeName); _list.add((text == null) ? null : StringEscapeUtils.unescapeHtml(text.trim())); } if (e.hasChildNodes()) { NodeList nds = e.getChildNodes(); for (int i = 0; i < nds.getLength(); i++) { Node child = nds.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { _list.addAll(attribs((Element) child, attributeName)); } } } return _list; } /** * 获取当前元素下,一个子元素的文本(Trim) * * @param element * 元素 * @param subEleName * 子元素节点名 * @return 子元素的文本。xml转义符会被还原;两端空格会被截去。 */ public static String nodeText(Element element, String subEleName) { Element e = first(element, subEleName); if (e == null) return null; return nodeText(e); } /** * 得到节点下第n个指定元素(不分层次) * * @param parent * 父节点 * @param elementName * 元素名称 * @param index * 序号 * @return 查找到的元素节点DOM */ public static Element nthElement(Element parent, String elementName, int index) { NodeList nds = parent.getElementsByTagName(elementName); if (nds.getLength() < index) throw new NoSuchElementException(); Element node = (Element) nds.item(index - 1); return node; } /** * 得到当前元素下,第一个符合Tag Name的子元素 * * @param parent * 父节点 * @param elementName * 要求的元素名 * @return 父节点下第一个符合条件的元素 */ public static Element first(Node node, String tagName) { if (node == null) return null; NodeList nds = node.getChildNodes(); for (int i = 0; i < nds.getLength(); i++) { Node child = nds.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element) child; if (tagName == null || tagName.equals(e.getNodeName())) { return e; } // } else if (child.getNodeType() == Node.CDATA_SECTION_NODE) { // } else if (child.getNodeType() == Node.COMMENT_NODE) { // } else if (child.getNodeType() == // Node.DOCUMENT_FRAGMENT_NODE) { // } else if (child.getNodeType() == Node.DOCUMENT_NODE) { // } else if (child.getNodeType() == Node.DOCUMENT_TYPE_NODE) { // } else if (child.getNodeType() == Node.ATTRIBUTE_NODE) { // } else if (child.getNodeType() == Node.TEXT_NODE) { } } return null; } /** * 获得符合类型的第一个节点(单层) * * @param node * 父节点 * @param nodeType * 节点类型,定义在Node中的常量,如{@link Node#ELEMENT_NODE}、 * {@link Node#DOCUMENT_NODE} * @return 第一个类型相符的节点 */ public static Node first(Node node, int... nodeType) { if (node == null) return null; NodeList nds = node.getChildNodes(); for (int i = 0; i < nds.getLength(); i++) { Node child = nds.item(i); if (ArrayUtils.contains(nodeType, child.getNodeType())) { return child; } } return null; } /** * 创建一份带有根元素节点的XML文档 * * @param tagName * 根节点元素名 * @return Document对象 */ public static Document newDocument(String tagName) { Assert.notNull(tagName); Document doc = newDocument(); addElement(doc, tagName); return doc; } /** * 创建一份新的空白XML文档 * * @return 空白XML */ public static Document newDocument() { try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document document = builder.newDocument(); document.setXmlStandalone(true); return document; } catch (ParserConfigurationException e) { LogUtil.exception(e); return null; } } /** * 将NamedNodeMap对象转换为Node数组 * * @param nds * NamedNodeMap对象 * @return Node数组 */ public static Node[] toArray(NamedNodeMap nds) { Node[] array = new Node[nds.getLength()]; for (int i = 0; i < nds.getLength(); i++) { array[i] = nds.item(i); } return array; } /** * 将Map所有值设置为属性 * * @param e * 元素节点 * @param attrMap * 属性,这些值将作为属性设置到元素节点上 * @param isSubNode * 设置方式,false时优先设置为属性,true时设置为子节点 */ public static void setAttributesByMap(Element e, Map<String, Object> attrMap, boolean isSubNode) { if (attrMap == null) return; setAttrMap(e, attrMap, isSubNode); } /** * 将Map所有值设置为属性 * * @param e * 元素节点 * @param map * 属性,这些值将作为属性设置到元素节点上 */ public static void setAttributesByMap(Element e, Map<String, Object> map) { setAttributesByMap(e, map, false); } @SuppressWarnings("rawtypes") private static void setAttrMap(Element e, Map attrMap, boolean isSubNode) { if (isSubNode) { for (Object keyObj : attrMap.keySet()) { String key = StringUtils.toString(keyObj); Object value = attrMap.get(key); if (value.getClass().isArray()) { setAttrArray(e, key, (Object[]) value, isSubNode); continue; } else if (value instanceof List) { setAttrArray(e, key, ((List) value).toArray(), isSubNode); continue; } Element child = first(e, key); if (child == null) { child = addElement(e, key); } if (value instanceof Map) { setAttrMap(child, (Map) value, isSubNode); } else { setText(child, StringUtils.toString(value)); } } } else { for (Object keyObj : attrMap.keySet()) { String key = StringUtils.toString(keyObj); Object value = attrMap.get(key); if (value instanceof Map) { Element child = first(e, key); if (child == null) { child = addElement(e, key); } setAttrMap(child, (Map) value, isSubNode); } else if (value.getClass().isArray()) { setAttrArray(e, key, (Object[]) value, isSubNode); } else if (value instanceof List) { setAttrArray(e, key, ((List) value).toArray(), isSubNode); } else { e.setAttribute(key, StringUtils.toString(value)); } } } } @SuppressWarnings("rawtypes") private static void setAttrArray(Element e, String key, Object[] value, boolean isSubNode) { for (Object o : value) { if (o instanceof Map) { Element child = addElement(e, key); setAttrMap(child, (Map) o, isSubNode); } else { Element child = addElement(e, key); setText(child, StringUtils.toString(o)); } } } /** * 在父节点下找寻子元素并为其设置文本。 * * @param parent * 父元素节点 * @param tagName * 要找寻的子元素名 * @param value * 文本内容 */ public static void setNodeText(Element parent, String tagName, String value) { Element child = first(parent, tagName); if (child != null) { setText(child, value); } } /** * 获取所有属性,以Map形式返回 * * @param e * 元素节点 * @return 所有属性名称和值构成的Map */ public static Map<String, String> getAttributesMap(Element e) { return getAttributesMap(e, false); } /** * 获取所有属性。 * * @param e * 元素节点 * @param subElementAsAttr * 为true时,包括下属第一级的Element后的文本节点,也作为属性返回<br> * 例如 * * <pre> * <Foo size="103" name="Karen"> * <dob>2012-4-12</dobh> * <dod>2052-4-12</dodh> * </Foo> * </pre> * * 当subElementAsAttr=false时,dob,dod不作为属性,而当为true时则作为属性处理 * @return */ public static Map<String, String> getAttributesMap(Element e, boolean subElementAsAttr) { Map<String, String> attribs = new HashMap<String, String>(); if (e == null) return attribs; NamedNodeMap nmp = e.getAttributes(); for (int i = 0; i < nmp.getLength(); i++) { Attr child = (Attr) nmp.item(i); attribs.put(StringEscapeUtils.unescapeHtml(child.getName()), StringEscapeUtils.unescapeHtml(child.getValue())); } if (subElementAsAttr) { NodeList nds = e.getChildNodes(); for (int i = 0; i < nds.getLength(); i++) { Node node = nds.item(i); if (node.getNodeType() != Node.ELEMENT_NODE) continue; Element sub = (Element) node; String key = sub.getNodeName(); String value = nodeText(sub); if (attribs.containsKey(key)) { attribs.put(key, attribs.get(key) + "," + value); } else { attribs.put(key, value); } } } return attribs; } /** * 从子节点中获得指定的属性 * * <pre> * <object> * <id>100</id> * <name>Jhon smith</name> * <phone>130100000</phone> * <object> * </pre> * * 对上面例的对象, * {@code getAttributesInChildElements(objectNode, "name", "phone")},则可以得到 * {name="Jhon smith",phone="130100000"}这样的一个Map。 * 如果不指定属性名,那么所有子元素的文本都会被获取到Map中。 * * @param parent * 父节点 * @param keys * 需要的属性 * @return 由提取的文本构成的Map */ public static Map<String, String> getAttributesInChildElements(Element parent, String... keys) { NodeList nds = parent.getChildNodes(); Map<String, String> attribs = new HashMap<String, String>(); for (int i = 0; i < nds.getLength(); i++) { Node node = nds.item(i); if (node.getNodeType() != Node.ELEMENT_NODE) continue; Element sub = (Element) node; String key = sub.getNodeName(); if (keys.length == 0 || ArrayUtils.contains(keys, key)) { String value = nodeText(sub); if (attribs.containsKey(key)) { attribs.put(key, attribs.get(key) + "," + value); } else { attribs.put(key, value); } } } return attribs; } /** * 将 * * <pre> * <object> * <id>100</id> * <name>Jhon smith</name> * <phone>130100000</phone> * <object> * </pre> * * 结构的XML描述,改为 * * <pre> * <object id="100" name="Jhon smith" pone="130100000"> * </object> * </pre> * * 这样的格式。 * * @param e * 要处理的元素节点 * @param keys * 要迁移的属性值 */ public static void moveChildElementAsAttribute(Element e, String... keys) { NodeList nds = e.getChildNodes(); for (Node node : toArray(nds)) { if (node.getNodeType() == Node.TEXT_NODE) { e.removeChild(node); // 删除空白文本节点 } if (node.getNodeType() != Node.ELEMENT_NODE) continue; Element sub = (Element) node; String key = sub.getNodeName(); if (keys.length == 0 || ArrayUtils.contains(keys, key)) { String value = nodeText(sub); e.setAttribute(key, value); e.removeChild(sub); } } } /** * 将XML元素节点转换为一个Java对象 * * @param e * 元素节点 * @param clz * 要转换的java对象 * @throws ReflectionException * 反射错误 * @deprecated 使用loadBean(Element,Class)方法 */ public static <W> W elementToBean(Element e, Class<W> clz) throws ReflectionException { return loadBean(e, clz); } /** * 将XML元素节点转换为一个Java对象 * <p> * 注意这个方法和并不是putBean的逆运算、因为条件所限,这里只load bean的属性,但不会load * bean内部其他bean的值。即不支持递归嵌套。 而putBean的功能是比较强的。 * * @param e * 元素节点 * @param bean * 要转换的java对象 * @throws ReflectionException * 反射错误 */ public static <W> W loadBean(Element e, Class<W> clz) { W bean = UnsafeUtils.newInstance(clz); BeanWrapperImpl bw = new BeanWrapperImpl(bean); Map<String, String> attrs = getAttributesMap(e, true); for (String key : bw.getPropertyNames()) { if (attrs.containsKey(key)) { bw.setPropertyValueByString(key, attrs.get(key)); } } return bean; } /** * 将指定的java bean转换为XML元素。 并将转换后的元素添加到当前的XML节点下面 * * @param parent * 要放置的节点 * @param bean * 要放置的对象 * @return 转换后的元素节点 */ public static Element putBean(Node parent, Object bean) { if (bean == null) return null; return appendBean(parent, bean, bean.getClass(), null, null); } /** * 将指定的java bean转换为XML元素。 并将转换后的元素添加到当前的XML节点下面 * * * @param node * 要放置的节点 * @param bean * 要放置的对象 * @param tryAttribute * 当为true时对象的属性尽量作为XML属性 当为false对象的属性都作为XML文本节点 * 当为null时自动判断,一些简单类型作为属性,复杂类型用文本节点 * @return 转换后的元素节点 */ public static Element putBean(Node node, Object bean, Boolean tryAttribute) { if (bean == null) return null; return appendBean(node, bean, bean.getClass(), tryAttribute, null); } /** * 将一个或多个节点挂到指定的节点之下。如果两边节点不在同一个Document对象中,会自动创建新的拷贝。<br/> * 这个方式适用于在不同的Document中迁移节点。 * * @param parent * 父节点 * @param nodes * 要迁移的子节点 */ public static void appendChild(Node parent, Node... nodes) { Document doc = parent.getOwnerDocument(); for (Node node : nodes) { if (node.getOwnerDocument() != doc) { parent.appendChild(doc.importNode(node, true)); } else { parent.appendChild(node); } } } /** * NodeList转换为数组 * * @param nds * NodeList对象 * @return Node数组 */ public static Node[] toArray(NodeList nds) { if (nds instanceof MyNodeList) return ((MyNodeList) nds).list; Node[] array = new Node[nds.getLength()]; for (int i = 0; i < nds.getLength(); i++) { array[i] = nds.item(i); } return array; } /** * NodeList对象转换为List * * @param nds * NodeList对象 * @return Node组成的List */ public static List<? extends Node> toList(NodeList nds) { if (nds instanceof MyNodeList) return Arrays.asList(((MyNodeList) nds).list); List<Node> list = new ArrayList<Node>(); for (int i = 0; i < nds.getLength(); i++) { Node child = nds.item(i); list.add(child); } return list; } /** * 将Nodelist转换为Element List * * @param nds * NodeList * @return 将NodeList中的Element节点组成一个List。(其他类型节点被丢弃) */ public static List<Element> toElementList(NodeList nds) { List<Element> list = new ArrayList<Element>(); for (int i = 0; i < nds.getLength(); i++) { Node child = nds.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { list.add((Element) child); } } return list; } /** * 将Node的列表对象转换为NodeList对象。是{@link #toList(NodeList)}的逆运算。 * * @param list * Node的列表 * @return NodeList对象。 */ public static NodeList toNodeList(List<? extends Node> list) { return new MyNodeList(list); } /** * 将Node数组转换为NodeList对象,是{@link #toArray(NodeList)}的逆运算 * * @param array * Node数组 * @return NodeList对象 */ public static NodeList toNodeList(Node[] array) { return new MyNodeList(array); } /** * 在当前节点及下属节点中查找文本 * * @param node * 要查找的节点 * @param text * 检索文本 * @param searchAttribute * 是否在属性值中查找 * @return Node对象,匹配文本的第一个节点。未找到返回null。 */ public static Node findFirst(Node node, String text, boolean searchAttribute) { String value = getValue(node); if (value != null && value.indexOf(text) > -1) return node; if (searchAttribute && node.getAttributes() != null) { for (Node n : toArray(node.getAttributes())) { value = getValue(n); if (value != null && value.indexOf(text) > -1) return n; } } for (Node sub : toArray(node.getChildNodes())) { Node nd = findFirst(sub, text, searchAttribute); if (nd != null) return nd; } return null; } /** * 查找并移除具有指定关键字的节点。 * * @param node * 要查找的节点 * @param text * 检索关键字 * @param searchAttribute * 是否在属性值中查找 */ public static void removeNodeWithKeyword(Node node, String text, boolean searchAttribute) { String value = getValue(node); if (value != null && value.indexOf(text) > -1) { node.getParentNode().removeChild(node); return; } if (searchAttribute && node.getAttributes() != null) { for (Node n : toArray(node.getAttributes())) { value = getValue(n); if (value != null && value.indexOf(text) > -1) { node.getParentNode().removeChild(node); return; } } } for (Node sub : toArray(node.getChildNodes())) { removeNodeWithKeyword(sub, text, searchAttribute); } } /** * 在当前节点及下属节点中查找文本 * * @param node * 节点 * @param text * 关键字 * @param searchAttribute * 是否查找属性 * @return Node对象,匹配文本的第一个节点 */ public static Node[] find(Node node, String text, boolean searchAttribute) { List<Node> result = new ArrayList<Node>(); innerSearch(node, text, result, searchAttribute); return result.toArray(new Node[0]); } /** * 查找指定名称的Element,并且其指定的属性值符合条件 * * @param root * 根节点 * @param tagName * 要匹配的element名称 * @param attribName * 要匹配的属性名城 * @param keyword * 要匹配的属性值 * @return 第一个符合查询条件的元素 */ public static Element findElementByNameAndAttribute(Node root, String tagName, String attribName, String keyword) { Element[] es = findElementsByNameAndAttribute(root, tagName, attribName, keyword, true); if (es.length > 0) return es[0]; return null; } /** * 查找指定名称的Element,并且其指定的属性值符合条件 * * @param root * 根节点 * @param tagName * 要匹配的element名称 * @param attribName * 要匹配的属性名 * @param keyword * 要匹配的属性关键字 * @return 所有符合查询条件的元素 */ public static Element[] findElementsByNameAndAttribute(Node root, String tagName, String attribName, String keyword) { return findElementsByNameAndAttribute(root, tagName, attribName, keyword, false); } /** * 查找第一个属性为某个值的Element节点并返回 * * @param node * 根节点 * @param attribName * 属性名称 * @param keyword * 要匹配的属性关键字 * @return 符合条件的第一个Element */ public static Element findElementByAttribute(Node node, String attribName, String keyword) { Element[] result = findElementsByAttribute(node, attribName, keyword, true); if (result.length == 0) return null; return result[0]; } /** * 查找Element,其拥有某个指定的属性值。 * * @param node * 根节点 * @param attribName * 属性名 * @param keyword * 属性值 * @return 所有符合查询条件的元素 */ public static Element[] findElementsByAttribute(Node node, String attribName, String keyword) { return findElementsByAttribute(node, attribName, keyword, false); } /** * 根据attrib属性id定位节点,功能类似于JS中的document.getElementById(); * * @param node * 根节点 * @param id * 查找的ID * @return 找到的元素 */ public static Element findElementById(Node node, String id) { if (node == null) return null; if (node.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element) node; if (e.hasAttribute("id")) { String ss = StringUtils.trim(e.getAttribute("id")); if (ss.equals(id)) { return e; } } } for (Node sub : toArray(node.getChildNodes())) { Element nd = findElementById(sub, id); if (nd != null) return nd; } return null; } /** * 逐级向上查找父节点,返回第一个符合指定的tagName的Element * * @param node * 起始节点 * @param tagName * 要匹配的元素名称,为空表示无限制 * @return 符合的上级节点。 */ public static Element firstParent(Node node, String tagName) { if (StringUtils.isEmpty(tagName)) return (Element) node.getParentNode(); Node p = node.getParentNode(); while (p != null) { if (p.getNodeType() == Node.ELEMENT_NODE && p.getNodeName().equals(tagName)) { return (Element) p; } p = p.getParentNode(); } return null; } /** * 向后查找兄弟节点 * * @param node * 起始节点 * @param tagName * 匹配的元素名称,为空表示无限制 * @return 符合的兄弟节点 */ public static Element firstSibling(Node node, String tagName) { Node p = node.getNextSibling(); while (p != null) { if (p.getNodeType() == Node.ELEMENT_NODE) { if (StringUtils.isEmpty(tagName) || p.getNodeName().equals(tagName)) return (Element) p; } p = p.getNextSibling(); } return null; } /** * 向前查找符合条件的兄弟节点 * * @param node * 起始节点 * @param tagName * 匹配的元素名称,为空表示无限制 * @return 符合的兄弟节点 */ public static Element firstPrevSibling(Node node, String tagName) { Node p = node.getPreviousSibling(); while (p != null) { if (p.getNodeType() == Node.ELEMENT_NODE) { if (StringUtils.isEmpty(tagName) || p.getNodeName().equals(tagName)) return (Element) p; } p = p.getPreviousSibling(); } return null; } /** * 过滤xml的无效字符。 * <p/> * XML中出现以下字符就是无效的,此时Parser会抛出异常,仅仅因为个别字符导致整个文档无法解析,是不是小题大作了点? * 为此编写了这个类来过滤输入流中的非法字符。 * 不过这个类的实现不够好,性能比起原来的Reader实现和nio的StreamReader下降明显,尤其是read(char[] b, int * off, int len)方法. 如果不需要由XmlFixedReader带来的容错性,还是不要用这个类的好。 * <ol> * <li>0x00 - 0x08</li> * <li>0x0b - 0x0c</li> * <li>0x0e - 0x1f</li> * </ol> */ public static class XmlFixedReader extends FilterReader { public XmlFixedReader(Reader reader) { super(new BufferedReader(reader)); } public int read() throws IOException { int ch = super.read(); while ((ch >= 0x00 && ch <= 0x08) || (ch >= 0x0b && ch <= 0x0c) || (ch >= 0x0e && ch <= 0x1f) || ch == 0xFEFF) { ch = super.read(); } return ch; } // 最大的问题就是这个方法,一次读取一个字符速度受影响。 public int read(char[] b, int off, int len) throws IOException { if (b == null) { throw new NullPointerException(); } else if (off < 0 || len < 0 || len > b.length - off) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } int c = read(); if (c == -1) { return -1; } b[off] = (char) c; int i = 1; try { for (; i < len; i++) { c = read(); if (c == -1) { break; } b[off + i] = (char) c; } } catch (IOException ee) { } return i; } } /** * 无视层级,获得所有指定Tagname的element节点 * * @param node * 起始节点 * @param tagName * 找寻的下级元素节点名称 * @return 所有符合条件的节点 */ public static List<Element> getElementsByTagNames(Node node, String... tagName) { List<Element> nds = new ArrayList<Element>(); if (tagName.length == 0) tagName = new String[] { "" }; if (node.getNodeType() == Node.ELEMENT_NODE) { Element doc = (Element) node; for (String elementName : tagName) { nds.addAll(toElementList(doc.getElementsByTagName(elementName))); } } else if (node instanceof Document) { Document doc = ((Document) node); for (String elementName : tagName) { nds.addAll(toElementList(doc.getElementsByTagName(elementName))); } } else if (node instanceof DocumentFragment) { Document doc = ((DocumentFragment) node).getOwnerDocument(); for (String elementName : tagName) { nds.addAll(toElementList(doc.getElementsByTagName(elementName))); } } else { throw new IllegalArgumentException("a node who doesn't support getElementsByTagName operation."); } return nds; } /** * 将Node打印到输出流 * * @param node * DOM节点 * @param out * 输出流 */ public static void printNode(Node node, OutputStream out) { output(node, out, null, 4, null); } /** * 将DOM节点转换为文本 * * @param node * DOM节点 * @param charset * 字符集,该属性只影响xml头部的声明,由于返回的string仍然是标准的unicode string, * 你必须注意在输出时指定编码和此处的编码一致. * @param xmlHeader * 是否要携带XML头部标签<?xml ....> * @return 转换后的XML文本 */ public static String toString(Node node, String charset, Boolean xmlHeader) { if(node.getNodeType()==Node.ATTRIBUTE_NODE) { return node.getNodeValue(); } StringWriter sw = new StringWriter(4096); StreamResult sr = new StreamResult(sw); try { output(node, sr, charset, 4, xmlHeader); } catch (IOException e) { LogUtil.exception(e); } return sw.toString(); } /** * 将DOM节点还原为XML片段文本 * * @param node * DOM节点 * @param charset * 字符集,该属性只影响xml头部的声明,由于返回的string仍然是标准的unicode string, * 你必须注意在输出时指定编码和此处的编码一致. * @return 转换后的XML文本 */ public static String toString(Node node, String charset) { return toString(node, charset, null); } /** * 设置XSD Schema * * @param node * DOM节点 * @param schemaURL * 设置的XSD的URL */ public static void setXsdSchema(Node node, String schemaURL) { Document doc; if (node.getNodeType() != Node.DOCUMENT_NODE) { doc = node.getOwnerDocument(); } else { doc = (Document) node; } Element root = doc.getDocumentElement(); if (schemaURL == null) { root.removeAttribute("xmlns:xsi"); root.removeAttribute("xsi:noNamespaceSchemaLocation"); } else { root.setAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); root.setAttribute("xsi:noNamespaceSchemaLocation", schemaURL); } } private static void innerSearch(Node node, String text, List<Node> result, boolean searchAttribute) { String value = getValue(node); // 检查节点本身 if (value != null && value.indexOf(text) > -1) result.add(node); // 检查属性节点 if (searchAttribute && node.getAttributes() != null) { for (Node n : toArray(node.getAttributes())) { value = getValue(n); if (value != null && value.indexOf(text) > -1) { result.add(n); } } } // 检查下属元素节点 for (Node sub : toArray(node.getChildNodes())) { innerSearch(sub, text, result, searchAttribute); } } /* * 获得节点的值,无论是元素节点还是文本节点还是属性节点,都能正确获得其值 */ private static String getValue(Node node) { switch (node.getNodeType()) { case Node.ELEMENT_NODE: return nodeText((Element) node); case Node.TEXT_NODE: return StringUtils.trimToNull(StringEscapeUtils.unescapeHtml(node.getTextContent())); case Node.CDATA_SECTION_NODE: return ((CDATASection) node).getTextContent(); default: return StringEscapeUtils.unescapeHtml(node.getNodeValue()); } } private static void innerSearchByAttribute(Node node, String attribName, String id, List<Element> result, boolean findFirst) { if (node.getNodeType() == Node.ELEMENT_NODE) { Element e = (Element) node; String s = attrib(e, attribName); if (s != null && s.equals(id)) { result.add(e); if (findFirst) return; } } for (Node sub : toArray(node.getChildNodes())) { innerSearchByAttribute(sub, attribName, id, result, findFirst); if (findFirst && result.size() > 0) return; } } private static Element[] findElementsByNameAndAttribute(Node root, String tagName, String attribName, String keyword, boolean findFirst) { List<Element> result = new ArrayList<Element>(); List<Element> es; if (root instanceof Document) { es = toElementList(((Document) root).getElementsByTagName(tagName)); } else if (root instanceof Element) { es = toElementList(((Element) root).getElementsByTagName(tagName)); } else if (root instanceof DocumentFragment) { Element eRoot = (Element) first(root, Node.ELEMENT_NODE); es = toElementList(eRoot.getElementsByTagName(tagName)); if (eRoot.getNodeName().equals(tagName)) es.add(eRoot); } else { throw new UnsupportedOperationException(root + " is a unknow Node type to find"); } for (Element e : es) { String s = attrib(e, attribName); if (s != null && s.equals(keyword)) { result.add(e); if (findFirst) break; } } return result.toArray(new Element[result.size()]); } private static Element[] findElementsByAttribute(Node node, String attribName, String keyword, boolean findFirst) { List<Element> result = new ArrayList<Element>(); innerSearchByAttribute(node, attribName, keyword, result, findFirst); return result.toArray(new Element[0]); } private static Element appendBean(Node parent, Object bean, Class<?> type, Boolean asAttrib, String tagName) { if (type == null) { if (bean == null) { return null; } type = bean.getClass(); } if (tagName == null || tagName.length() == 0) { tagName = type.getSimpleName(); } if (type.isArray()) { if (bean == null) return null; Element collection = addElement(parent, tagName); for (int i = 0; i < Array.getLength(bean); i++) { appendBean(collection, Array.get(bean, i), null, asAttrib, null); } return collection; } else if (Collection.class.isAssignableFrom(type)) { if (bean == null) return null; Element collection = addElement(parent, tagName); for (Object obj : (Collection<?>) bean) { appendBean(collection, obj, null, asAttrib, null); } return collection; } else if (Map.class.isAssignableFrom(type)) { Element map = addElement(parent, tagName); for (Entry<?, ?> e : ((Map<?, ?>) bean).entrySet()) { Element entry = XMLUtils.addElement(map, "entry"); Element key = XMLUtils.addElement(entry, "key"); appendBean(key, e.getKey(), null, asAttrib, null); Element value = XMLUtils.addElement(entry, "value"); appendBean(value, e.getValue(), null, asAttrib, null); } return map; } else if (CharSequence.class.isAssignableFrom(type)) { if (Boolean.TRUE.equals(asAttrib)) { ((Element) parent).setAttribute(tagName, StringUtils.toString(bean)); } else { addElement(parent, tagName, StringUtils.toString(bean)); } } else if (Date.class.isAssignableFrom(type)) { if (Boolean.FALSE.equals(asAttrib)) { addElement(parent, tagName, DateUtils.formatDateTime((Date) bean)); } else { ((Element) parent).setAttribute(tagName, DateUtils.formatDateTime((Date) bean)); } } else if (Number.class.isAssignableFrom(type) || type.isPrimitive() || type == Boolean.class) { if (Boolean.FALSE.equals(asAttrib)) { addElement(parent, tagName, StringUtils.toString(bean)); } else { ((Element) parent).setAttribute(tagName, StringUtils.toString(bean)); } } else { if (bean == null) return null; Element root = addElement(parent, type.getSimpleName()); BeanWrapper bw = BeanWrapper.wrap(bean); for (Property p : bw.getProperties()) { appendBean(root, p.get(bean), p.getType(), asAttrib, p.getName()); } return root; } return null; } }