/** * * APDPlat - Application Product Development Platform * Copyright (c) 2013, 杨尚川, yang-shangchuan@qq.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package org.seo.rank.list; import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; /** * * @author 杨尚川 */ public class UrlTools { private static final String ACCEPT = "text/html, */*; q=0.01"; private static final String ENCODING = "gzip, deflate"; private static final String LANGUAGE = "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3"; private static final String CONNECTION = "keep-alive"; private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Firefox/31.0"; private UrlTools(){} /** * 将本页的非完整URL转换为完整的URL * @param url 本页的URL * @param href 本页上的相对或绝对非完整URL * @return 完整的URL * @throws MalformedURLException */ public static String normalizeUrl(String url, String href) throws MalformedURLException { URL u = new URL(url); String port = ""; if(u.getPort() > 0){ port = ":"+port; } String host = u.getProtocol()+"://"+u.getHost()+port; if (!href.startsWith("http")) { //处理非完整路径 if (href.startsWith("//")) { //处理绝对路径 href = "http:" + href; }else if (href.startsWith("/")) { //处理绝对路径 href = host + href; }else if(href.startsWith("?")) { //处理页面参数 int index = url.indexOf("?"); if(index > 0){ String temp = url.substring(0, index); href = temp + href; }else{ href = url + href; } } else { //处理相对路径 String temp = url; int index = url.lastIndexOf("/"); if (index > 7) { //非协议后面的// //如:http://yangshangchuan.iteye.com/ temp = url.substring(0, index + 1); } else { temp += "/"; } href = temp + href; } } return href; } }