/**
*
*/
package org.howsun.util;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.parser.ParserDelegator;
import static java.lang.System.out;
/**
* 说明:<br>
* 从文本串中获取图片<br>
* 调用示例:
* StringReader sr = new StringReader(src);
* AnalizeWebParse parse = new AnalizeWebParse();
* List<String> results = parse.parse(sr);
* 其中src就是需要检测的字符串,它可以Textarea控件传过来的,也可以是从文件中读取的,也可以从互联网上抓取的等等。
* results就是在字符串中分析出的图片结果集合。
*
*
* @author 张纪豪
* @version 0.1
* Build Time Apr 10, 2009
*/
public class AnalizeWebParse extends ParserCallback {
//String regex = "^(http://.+)";//有的图片URL是绝对路径,如http://image.sohu.com/2009/10/10/6278481.jpg
String regex = "^(.+)";
List<String> imgs = new ArrayList<String>();
boolean start = false;
boolean finished = false;
public void p(String s) {
out.println(s);
}
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribs, int pos) {
if (finished == true) {
return;
}
if (start == false) {
if (tag == HTML.Tag.DIV) {
String cla = (String) attribs.getAttribute(HTML.Attribute.CLASS);
if (cla == null) {
return;
}
if (cla.indexOf("body") != -1) {
// Start
start = true;
}
}
}
}
public void handleEndTag(HTML.Tag tag, int pos) {
if (tag == HTML.Tag.DIV && start == true && finished == false) {
finished = true;
}
}
public void handleText(char[] text, int pos) {
}
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
if (t == HTML.Tag.IMG) {
// get a src
String src = (String) a.getAttribute(HTML.Attribute.SRC);
if (src == null) {
return;
}
if (Pattern.matches(regex, src)) {
imgs.add(src);
}
}
}
//public String parse(BufferedReader file) throws Exception {
public List<String> parse(Reader file) throws Exception {
if (file == null) {
return null;
}
ParserDelegator pd = new ParserDelegator();
try {
pd.parse(file, this, true);
} catch (Exception e) {
throw e;
}
return imgs;
}
}
/*
*
* 调用,并将结果保存到文件中
*
* public class AnalizeIMG {
public void p(String s) {
System.out.println(s);
}
public void analizeFile(String infile, String outfile) throws Exception {
File file = new File(infile);
if (file == null || !file.exists()) {
p("File " + infile + " not exits !");
}
if (!file.canRead()) {
p("File " + infile + " can't read !");
}
FileReader frd = new FileReader(infile);
BufferedReader bufferedReader = new BufferedReader(frd);
try {
AnalizeWebParse parse = new AnalizeWebParse();
List<String> s = parse.parse(bufferedReader);
System.out.println(s);
createFile(outfile, s.toString());
} catch (Exception ex) {
throw ex;
} finally {
frd.close();
bufferedReader.close();
}
}
private void createFile(String filename, String content) {
FileWriter f = null;
try {
f = new FileWriter(filename);
if (f == null || content == null) {
return;
}
f.write(content);
f.flush();
f.close();
} catch (Exception e) {
} finally {
if (f != null) {
try {
f.close();
} catch (Exception e) {
}
}
}
}
public static void main(String arg[]) {
AnalizeIMG ana = new AnalizeIMG();
try {
ana.analizeFile("E:\\1.txt", "E:\\out.lst");
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
*/