/* ================================================================== * Created [2009-4-27 下午11:32:55] by Jon.King * ================================================================== * TSS * ================================================================== * mailTo:jinpujun@hotmail.com * Copyright (c) Jon.King, 2009-2012 * ================================================================== */ package com.jinhe.tss.portal.engine.releasehtml; import java.io.File; import java.io.IOException; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.LinkRegexFilter; import org.htmlparser.filters.TagNameFilter; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.ScriptTag; import org.htmlparser.tags.StyleTag; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import org.xml.sax.SAXException; import com.jinhe.tss.core.util.FileHelper; public class TestOpenHtmlParser { public static void testHtmlParser() { try { Parser parser = new Parser("D:/Temp/Portal/index2.html"); parser.setEncoding("GBK"); StringBuffer sb = new StringBuffer(); NodeList list = parser.parse(null); for (NodeIterator it = list.elements(); it.hasMoreNodes();) { Node node = it.nextNode(); replaceDynamicUrl(node); replaceOtherResourcesUrl(node); sb.append(node.toHtml()); } FileHelper.writeFile(new File("D:/Temp/Portal/index3.html"), sb.toString()); } catch (ParserException e) { e.printStackTrace(); } } static class LinkNodeFilter implements NodeFilter { private static final long serialVersionUID = 1L; public boolean accept(Node node) { if (node.getText().indexOf("a href") != -1) return true; return false; } } private static void replaceDynamicUrl(Node bigNode) throws ParserException { NodeList linkElements = new NodeList(); bigNode.collectInto(linkElements, new LinkRegexFilter("")); for (NodeIterator it2 = linkElements.elements(); it2.hasMoreNodes();) { Node node2 = it2.nextNode(); // System.out.println(node2.getText()); org.htmlparser.tags.LinkTag linkTag = (LinkTag) node2; // System.out.println(linkTag.getLink()); // System.out.println(linkTag.getLinkText()); System.out.println(linkTag.getAttribute("href")); linkTag.setAttribute("href", "http://www.google.com"); // node2.setText("<a href='http://www.google.com'></a>"); } } private static void replaceOtherResourcesUrl(Node bigNode) throws ParserException { // org.htmlparser.filters.TagNameFilter filter; // org.htmlparser.tags.ImageTag imageTag; // org.htmlparser.tags.ScriptTag scriptTag; // org.htmlparser.tags.StyleTag styleTag; NodeList linkElements = new NodeList(); bigNode.collectInto(linkElements, new TagNameFilter("script")); for (NodeIterator iter = linkElements.elements(); iter.hasMoreNodes();) { Node node = iter.nextNode(); String link = node.getText(); System.out.println(link); org.htmlparser.tags.ScriptTag scriptTag = (ScriptTag) node; System.out.println(scriptTag.getAttribute("src")); scriptTag.setAttribute("src", "yyyyyyyyyyy"); } linkElements = new NodeList(); bigNode.collectInto(linkElements, new TagNameFilter("style")); for (NodeIterator iter = linkElements.elements(); iter.hasMoreNodes();) { StyleTag tag = (StyleTag) iter.nextNode(); System.out.println(tag.getStyleCode()); } } public static void main(String[] agrs) throws SAXException, IOException { testHtmlParser(); } }