/** * Licensed to DigitalPebble Ltd under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * DigitalPebble licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.digitalpebble.stormcrawler.util; import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.http.cookie.Cookie; import org.apache.http.impl.cookie.BasicClientCookie; /** * Helper to extract cookies from cookies string. * */ public class CookieConverter { private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat( "EEE, dd MMM yyyy HH:mm:ss zzz"); /** * Get a list of cookies based on the cookies string taken from response * header and the target url. * * @param cookiesString * the value of the http header for "Cookie" in the http * response. * @param targetURL * the url for which we wish to pass the cookies in the request. * @return List off cookies to add to the request. */ public static List<Cookie> getCookies(String[] cookiesStrings, URL targetURL) { ArrayList<Cookie> list = new ArrayList<Cookie>(); for (String cs : cookiesStrings) { String name = null; String value = null; String expires = null; String domain = null; String path = null; boolean secure = false; String[] tokens = cs.split(";"); int equals = tokens[0].indexOf("="); name = tokens[0].substring(0, equals); value = tokens[0].substring(equals + 1); for (int i = 1; i < tokens.length; i++) { String ti = tokens[i].trim(); if (ti.equalsIgnoreCase("secure")) secure = true; if (ti.toLowerCase().startsWith("path=")) { path = ti.substring(5); } if (ti.toLowerCase().startsWith("domain=")) { domain = ti.substring(7); } if (ti.toLowerCase().startsWith("expires=")) { expires = ti.substring(8); } } BasicClientCookie cookie = new BasicClientCookie(name, value); // check domain if (domain != null) { cookie.setDomain(domain); if (!checkDomainMatchToUrl(domain, targetURL.getHost())) continue; } // check path if (path != null) { cookie.setPath(path); if (!path.equals("") && !path.equals("/") && !targetURL.getPath().startsWith(path)) continue; } // check secure if (secure) { cookie.setSecure(secure); if (!targetURL.getProtocol().equalsIgnoreCase("https")) continue; } // check expiration if (expires != null) { try { Date expirationDate = DATE_FORMAT.parse(expires); cookie.setExpiryDate(expirationDate); // check that it hasn't expired? if (cookie.isExpired(new Date())) continue; cookie.setExpiryDate(expirationDate); } catch (ParseException e) { // ignore exceptions } } // attach additional infos to cookie list.add(cookie); } return list; } /** * Helper method to check if url matches a cookie domain. * * @param cookieDomain * the domain in the cookie * @param urlHostName * the host name of the url * @return does the cookie match the host name */ public static boolean checkDomainMatchToUrl(String cookieDomain, String urlHostName) { try { if (cookieDomain.startsWith(".")) { cookieDomain = cookieDomain.substring(1); } String[] domainTokens = cookieDomain.split("\\."); String[] hostTokens = urlHostName.split("\\."); int tokenDif = hostTokens.length - domainTokens.length; if (tokenDif < 0) { return false; } for (int i = domainTokens.length - 1; i >= 0; i--) { if (!domainTokens[i].equalsIgnoreCase(hostTokens[i + tokenDif])) { return false; } } return true; } catch (Exception e) { return true; } } }