package xyz.anduo.crawler;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;
public class RegexTest extends TestCase {
public void test() {
// 任意结尾的字符串
Pattern pattern1 = Pattern.compile("^Java.*");
Matcher matcher1 = pattern1.matcher("Java是一门编程语言");
boolean b1 = matcher1.matches();
assertEquals(b1, true);
// 以多个条件分割字符串
Pattern pattern2 = Pattern.compile("[, |]+");
String[] strs = pattern2.split("Java Hello World Java,Hello,,World|Sun");
for (int i = 0; i < strs.length; i++) {
System.out.println(strs[i]);
}
// 文字替换 - 首次出现的字符
Pattern pattern3 = Pattern.compile("正则表达式");
Matcher matcher3 = pattern3.matcher("正则表达式 Hello World,正则表达式 Hello World");
// 替换第一个符合正则表达式的数据
System.out.println(matcher3.replaceFirst("Java"));
// 全部替换
Pattern pattern4 = Pattern.compile("正则表达式");
Matcher matcher4 = pattern4.matcher("正则表达式 Hello World,正则表达式 Hello World");
System.out.println(matcher4.replaceAll("Java"));
// 文字置换(置换字符)
Pattern pattern5 = Pattern.compile("正则表达式");
Matcher matcher5 = pattern5.matcher("正则表达式 Hello World,正则表达式 Hello World");
StringBuffer sbr = new StringBuffer();
while (matcher5.find()) {
matcher5.appendReplacement(sbr, "Java");
}
matcher5.appendTail(sbr);
System.out.println(sbr.toString());
// 验证邮箱是否合法
String email = "xyz@qq.com";
Pattern pattern6 =
Pattern.compile("[\\w\\.\\-]+@([\\w\\-]+\\.)+[\\w\\-]+", Pattern.CASE_INSENSITIVE);
Matcher matcher6 = pattern6.matcher(email);
System.out.println(matcher6.matches());
// 去除html标记
Pattern pattern7 = Pattern.compile("<.+?>", Pattern.DOTALL);
Matcher matcher7 = pattern7.matcher("<a href=\"index.html\">主页</a>");
String string7 = matcher7.replaceAll("");
System.out.println(string7);
if (matcher7.find()) {
System.out.println(matcher7.group(1));
}
// 查找html中对应条件的字符串
Pattern pattern8 = Pattern.compile("<.+?>", Pattern.DOTALL);
Matcher matcher8 = pattern8.matcher("<a href=\"index.html\">主页</a>");
if (matcher8.find()) {
System.out.println(matcher8.group(1));
}
// 获取http://地址
Pattern pattern9 = Pattern.compile("(http://|https://){1}[\\w\\.\\-/:]+");
Matcher matcher9 = pattern9.matcher("fdfsdf<http://dddd/fdfsdf>fdsfsd");
StringBuffer sb9 = new StringBuffer();
while (matcher9.find()) {
sb9.append(matcher9.group());
sb9.append("\r\n");
}
System.out.println(sb9.toString());
// 替换{}中的文字
String str10 = "java目前的发展历史是由{0}年-{1}年";
String[][] object = {new String[] {"\\{0\\}", "1995"}, new String[] {"\\{1\\}", "2015"}};
System.out.println(replace(str10, object));
// 以正则条件查询指定目录下的文件
}
public static String replace(final String sourceString, Object[] object) {
String tmp = sourceString;
for (int i = 0; i < object.length; i++) {
String[] result = (String[]) object[i];
Pattern pattern = Pattern.compile(result[0]);
Matcher matcher = pattern.matcher(tmp);
tmp = matcher.replaceAll(result[1]);
}
return tmp;
}
}