package com.geccocrawler.gecco.utils;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class UrlMatcher {
private static Log log = LogFactory.getLog(UrlMatcher.class);
public static String replaceParams(String regex, String name, String value) {
Map<String, String> map = new HashMap<String, String>(1);
map.put(name, value);
return replaceParams(regex, map);
}
public static String replaceParams(String srcUrl, Map<String, String> params) {
return replaceRegexs(srcUrl, "\\{(.*?)\\}", params);
}
public static String replaceFields(String regex, String name, String value) {
Map<String, String> map = new HashMap<String, String>(1);
map.put(name, value);
return replaceFields(regex, map);
}
public static String replaceFields(String srcUrl, Map<String, String> params) {
return replaceRegexs(srcUrl, "\\[(.*?)\\]", params);
}
public static String replaceRegexs(String srcUrl, String regex, Map<String, String> params) {
if(params == null) {
return srcUrl;
}
StringBuffer sb = new StringBuffer();
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(srcUrl);
while(matcher.find()) {
String name = matcher.group(1);
String value = params.get(name);
if(StringUtils.isNotEmpty(value)) {
matcher.appendReplacement(sb, value);
}
}
matcher.appendTail(sb);
return sb.toString();
}
public static Map<String, String> match(String url, String regex) {
String regexSrc = StringUtils.replace(regex, "?", "\\?");
//regexSrc = StringUtils.replace(regexSrc, "/", "\\/");
String regex1 = "\\{(.*?)\\}";
StringBuffer sb = new StringBuffer();
Pattern pattern = Pattern.compile(regex1);
Matcher matcher = pattern.matcher(regexSrc);
List<String> names = new ArrayList<String>();
while(matcher.find()) {
matcher.appendReplacement(sb, "([^/]*)");
//matcher.appendReplacement(sb, "(.*)");
String name = matcher.group(1);
names.add(name);
}
if(names.size() > 0) {
matcher.appendTail(sb);
String regex2 = sb.toString();
if(log.isDebugEnabled()) {
log.debug(regex2);
}
regex2 = "^"+regex2;
Pattern pattern2 = Pattern.compile(regex2);
Matcher matcher2 = pattern2.matcher(url);
if(matcher2.matches()) {
Map<String, String> params = new HashMap<String, String>(names.size());
for(int i = 1; i <= matcher2.groupCount(); i++) {
String value = matcher2.group(i);
//boolean x = matcher2.requireEnd();
try {
value = URLDecoder.decode(value, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
params.put(names.get(i-1), value);
}
return params;
}
} else {
//如果没有变量,返回空map
if(url.equals(regex)) {
return new HashMap<String, String>(0);
}
}
//适配失败返回null
return null;
}
public static void main(String[] args) {
//http://temai.tuniu.com/{catalog}/{srcId}
//http://temai.tuniu.com/weihuo/{catalog}/s4-p{currPage}/
//http://temai.tuniu.com/weihuo/tours/s4-p1/
//http://temai.tuniu.com/tours/212055673
//String regex = "http://temai.tuniu.com/weihuo/{catalog}/s4-p{currPage}/";
//String url = "http://temai.tuniu.com/weihuo/tours/s4-p1/";
//String regex = "http://temai.tuniu.com/{catalog}/{srcId}";
//String url = "http://temai.tuniu.com/tours/212055673";
//System.out.println(match(url, regex));
String url = "http://www.ly.com/HotelInfo-597101.html#id_nameAndSliderInfo&is=1&searchId=undefined&ab=0";
String regex = "http://www.ly.com/HotelInfo-{code}.html#{hash}";
System.out.println(match(url, regex));
}
}