/* $Id: CookieManager.java 988245 2010-08-23 18:39:35Z kwright $ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.connectors.webcrawler; import java.util.*; import java.io.*; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.crawler.interfaces.*; import org.apache.manifoldcf.authorities.interfaces.*; import org.apache.manifoldcf.crawler.interfaces.CacheKeyFactory; import org.apache.manifoldcf.crawler.system.ManifoldCF; import org.apache.manifoldcf.crawler.system.Logging; import org.apache.http.cookie.Cookie; import org.apache.http.cookie.ClientCookie; import org.apache.http.impl.cookie.BasicClientCookie2; /** This class manages the database table into which we write cookies. The data resides in the database, * as well as in cache (up to a certain point). The result is that there is a memory limited, database-backed repository * of cookies that we can draw on. * * <br><br> * <b>cookiedata</b> * <table border="1" cellpadding="3" cellspacing="0"> * <tr class="TableHeadingColor"> * <th>Field</th><th>Type</th><th>Description        </th> * <tr><td>sequencekey</td><td>VARCHAR(255)</td><td></td></tr> * <tr><td>ordinal</td><td>BIGINT</td><td></td></tr> * <tr><td>domainspecified</td><td>CHAR(1)</td><td></td></tr> * <tr><td>domain</td><td>LONGTEXT</td><td></td></tr> * <tr><td>name</td><td>LONGTEXT</td><td></td></tr> * <tr><td>value</td><td>LONGTEXT</td><td></td></tr> * <tr><td>pathspecified</td><td>CHAR(1)</td><td></td></tr> * <tr><td>path</td><td>LONGTEXT</td><td></td></tr> * <tr><td>versionspecified</td><td>CHAR(1)</td><td></td></tr> * <tr><td>version</td><td>BIGINT</td><td></td></tr> * <tr><td>comment</td><td>LONGTEXT</td><td></td></tr> * <tr><td>secure</td><td>CHAR(1)</td><td></td></tr> * <tr><td>expirationdate</td><td>BIGINT</td><td></td></tr> * <tr><td>discard</td><td>CHAR(1)</td><td></td></tr> * <tr><td>commenturl</td><td>LONGTEXT</td><td></td></tr> * <tr><td>portblank</td><td>CHAR(1)</td><td></td></tr> * <tr><td>portspecified</td><td>CHAR(1)</td><td></td></tr> * <tr><td>ports</td><td>LONGTEXT</td><td></td></tr> * </table> * <br><br> * */ public class CookieManager extends org.apache.manifoldcf.core.database.BaseTable { public static final String _rcsid = "@(#)$Id: CookieManager.java 988245 2010-08-23 18:39:35Z kwright $"; // Robots cache class. Only one needed. protected static CookiesCacheClass cookiesCacheClass = new CookiesCacheClass(); // Database fields protected final static String keyField = "sequencekey"; protected final static String ordinalField = "ordinal"; // The rest of these individual fields are here only because the &^*% httpclient Cookie class doesn't have a constructor that // accepts the string form, so we're forced to keep all the cookie construction arguments individually. protected final static String domainSpecifiedField = "domainspecified"; protected final static String domainField = "domain"; protected final static String nameField = "name"; protected final static String valueField = "value"; protected final static String pathSpecifiedField = "pathspecified"; protected final static String pathField = "path"; protected final static String versionSpecifiedField = "versionspecified"; protected final static String versionField = "version"; protected final static String commentField = "comment"; protected final static String secureField = "secure"; protected final static String expirationDateField = "expirationdate"; protected final static String discardField = "discard"; protected final static String commentURLField = "commenturl"; protected final static String portBlankField = "portblank"; protected final static String portSpecifiedField = "portspecified"; protected final static String portField = "ports"; // Cache manager. This handle is set up during the constructor. ICacheManager cacheManager; /** Constructor. Note that one cookiemanager handle is only useful within a specific thread context, * so the calling connector object logic must recreate the handle whenever the thread context changes. *@param tc is the thread context. *@param database is the database handle. */ public CookieManager(IThreadContext tc, IDBInterface database) throws ManifoldCFException { super(database,"cookiedata"); cacheManager = CacheManagerFactory.make(tc); } /** Install the manager. */ public void install() throws ManifoldCFException { beginTransaction(); try { Map existing = getTableSchema(null,null); if (existing == null) { // Install the table. HashMap map = new HashMap(); map.put(keyField,new ColumnDescription("VARCHAR(255)",false,false,null,null,false)); map.put(ordinalField,new ColumnDescription("BIGINT",false,false,null,null,false)); // The rest of the fields allow us to recreate Cookie objects from the database so we can hand them // to httpclient. (It would be better if we just kept the cookie data around, but that's not how httpclient works.) map.put(domainSpecifiedField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(domainField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); map.put(nameField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); map.put(valueField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); map.put(pathSpecifiedField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(pathField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); map.put(versionSpecifiedField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(versionField,new ColumnDescription("BIGINT",false,true,null,null,false)); map.put(commentField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); map.put(secureField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(expirationDateField,new ColumnDescription("BIGINT",false,true,null,null,false)); map.put(discardField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(commentURLField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); map.put(portBlankField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(portSpecifiedField,new ColumnDescription("CHAR(1)",false,false,null,null,false)); map.put(portField,new ColumnDescription("LONGTEXT",false,true,null,null,false)); performCreate(map,null); // Create the appropriate indices ArrayList list = new ArrayList(); list.add(keyField); addTableIndex(false,list); } } catch (ManifoldCFException e) { signalRollback(); throw e; } catch (Error e) { signalRollback(); throw e; } finally { endTransaction(); } } /** Uninstall the manager. */ public void deinstall() throws ManifoldCFException { performDrop(null); } /** Read cookies currently in effect for a given session key. *@param sessionKey is the session key. *@return the login cookies object. */ public LoginCookies readCookies(String sessionKey) throws ManifoldCFException { // Build description objects CookiesDescription[] objectDescriptions = new CookiesDescription[1]; StringSetBuffer ssb = new StringSetBuffer(); ssb.add(getCookiesCacheKey(sessionKey)); objectDescriptions[0] = new CookiesDescription(sessionKey,new StringSet(ssb)); CookiesExecutor exec = new CookiesExecutor(this,objectDescriptions[0]); cacheManager.findObjectsAndExecute(objectDescriptions,null,exec,getTransactionID()); // Expiration is in fact done by the web site; the cookies will be updated if necessary. return exec.getResults(); } /** Update cookes that are in effect for a given session key. *@param sessionKey is the session key. *@param cookies are the cookies to write into the database. */ public void updateCookies(String sessionKey, LoginCookies cookies) throws ManifoldCFException { StringSetBuffer ssb = new StringSetBuffer(); ssb.add(getCookiesCacheKey(sessionKey)); StringSet cacheKeys = new StringSet(ssb); ICacheHandle ch = cacheManager.enterCache(null,cacheKeys,getTransactionID()); try { beginTransaction(); try { // Delete any old cookies, and create new ones ArrayList list = new ArrayList(); list.add(sessionKey); performDelete("WHERE "+keyField+"=?",list,null); // Now, insert the new cookies int i = 0; while (i < cookies.getCookieCount()) { Cookie c = cookies.getCookie(i); HashMap map = new HashMap(); map.put(keyField,sessionKey); map.put(ordinalField,new Long(i)); String domain = c.getDomain(); if (domain != null && domain.length() > 0) map.put(domainField,domain); map.put(domainSpecifiedField,booleanToString(domain != null && domain.length() > 0)); String name = c.getName(); if (name != null && name.length() > 0) map.put(nameField,name); String value = c.getValue(); if (value != null && value.length() > 0) map.put(valueField,value); String path = c.getPath(); if (path != null && path.length() > 0) map.put(pathField,path); map.put(pathSpecifiedField,booleanToString(path != null && path.length() > 0)); map.put(versionField,new Long(c.getVersion())); // Make something up. It may not be correct, but there's really no choice. map.put(versionSpecifiedField,booleanToString(true)); String comment = c.getComment(); if (comment != null && comment.length() > 0) map.put(commentField,comment); map.put(secureField,booleanToString(c.isSecure())); Date expirationDate = c.getExpiryDate(); if (expirationDate != null) map.put(expirationDateField,new Long(expirationDate.getTime())); //map.put(discardField,booleanToString(!c.isPersistent())); map.put(discardField,booleanToString(false)); String commentURL = c.getCommentURL(); if (commentURL != null && commentURL.length() > 0) map.put(commentURLField,commentURL); int[] ports = c.getPorts(); if (ports != null && ports.length > 0) map.put(portField,portsToString(ports)); map.put(portBlankField,booleanToString(ports == null || ports.length == 0)); map.put(portSpecifiedField,booleanToString(ports != null && ports.length > 0)); performInsert(map,null); i++; } cacheManager.invalidateKeys(ch); } catch (ManifoldCFException e) { signalRollback(); throw e; } catch (Error e) { signalRollback(); throw e; } finally { endTransaction(); } } finally { cacheManager.leaveCache(ch); } } // Protected methods and classes /** Construct a global key which represents an individual session. *@param sessionKey is the session key. *@return the cache key. */ protected static String getCookiesCacheKey(String sessionKey) { return "COOKIES_"+sessionKey; } /** Read cookies from database, uncached. *@param sessionKey is the session key. *@return the login cookies object. */ protected LoginCookies readCookiesUncached(String sessionKey) throws ManifoldCFException { ArrayList list = new ArrayList(); list.add(sessionKey); IResultSet result = performQuery("SELECT * FROM "+getTableName()+" WHERE "+keyField+"=? ORDER BY "+ordinalField+" ASC",list,null,null); DynamicCookieSet dcs = new DynamicCookieSet(); int i = 0; while (i < result.getRowCount()) { IResultRow row = result.getRow(i++); String name = (String)row.getValue(nameField); String value = (String)row.getValue(valueField); BasicClientCookie2 c = new BasicClientCookie2(name,value); String domain = (String)row.getValue(domainField); if (domain != null && domain.length() > 0) c.setDomain(domain); //c.setDomainAttributeSpecified(stringToBoolean((String)row.getValue(domainSpecifiedField))); String path = (String)row.getValue(pathField); if (path != null && path.length() > 0) c.setPath(path); //c.setPathAttributeSpecified(stringToBoolean((String)row.getValue(pathSpecifiedField))); Long version = (Long)row.getValue(versionField); if (version != null) c.setVersion((int)version.longValue()); //c.setVersionAttributeSpecified(stringToBoolean((String)row.getValue(versionSpecifiedField))); String comment = (String)row.getValue(commentField); if (comment != null) c.setComment(comment); c.setSecure(stringToBoolean((String)row.getValue(secureField))); Long expirationDate = (Long)row.getValue(expirationDateField); if (expirationDate != null) c.setExpiryDate(new Date(expirationDate.longValue())); c.setDiscard(stringToBoolean((String)row.getValue(discardField))); String commentURL = (String)row.getValue(commentURLField); if (commentURL != null && commentURL.length() > 0) c.setCommentURL(commentURL); String ports = (String)row.getValue(portField); // Ports are comma-separated if (ports != null && ports.length() > 0) c.setPorts(stringToPorts(ports)); //c.setPortAttributeBlank(stringToBoolean((String)row.getValue(portBlankField))); //c.setPortAttributeSpecified(stringToBoolean((String)row.getValue(portSpecifiedField))); dcs.addCookie(c); } return dcs; } /** Convert a boolean string to a boolean. */ protected static boolean stringToBoolean(String value) throws ManifoldCFException { if (value.equals("T")) return true; else if (value.equals("F")) return false; else throw new ManifoldCFException("Expected T or F but saw "+value); } /** Convert a boolean to a boolean string. */ protected static String booleanToString(boolean value) { if (value) return "T"; else return "F"; } /** Convert a string to a port array. */ protected static int[] stringToPorts(String value) throws ManifoldCFException { String[] ports = value.split(","); int[] rval = new int[ports.length]; int i = 0; while (i < rval.length) { try { rval[i] = Integer.parseInt(ports[i]); } catch (NumberFormatException e) { throw new ManifoldCFException(e.getMessage(),e); } i++; } return rval; } /** Convert a port array to a string. */ protected static String portsToString(int[] ports) { StringBuilder sb = new StringBuilder(); int i = 0; while (i < ports.length) { if (i > 0) sb.append(","); sb.append(Integer.toString(ports[i])); i++; } return sb.toString(); } /** This is a set of cookies, built dynamically. */ protected static class DynamicCookieSet implements LoginCookies { protected List<Cookie> cookies = new ArrayList<Cookie>(); public DynamicCookieSet() { } public void addCookie(Cookie c) { cookies.add(c); } public int getCookieCount() { return cookies.size(); } public Cookie getCookie(int index) { return cookies.get(index); } } /** This is the object description for a session key object. * This is the key that is used to look up cached data. */ protected static class CookiesDescription extends org.apache.manifoldcf.core.cachemanager.BaseDescription { protected String sessionKey; protected String criticalSectionName; protected StringSet cacheKeys; public CookiesDescription(String sessionKey, StringSet invKeys) { super("cookiescache"); this.sessionKey = sessionKey; criticalSectionName = getClass().getName()+"-"+sessionKey; cacheKeys = invKeys; } public String getSessionKey() { return sessionKey; } public int hashCode() { return sessionKey.hashCode(); } public boolean equals(Object o) { if (!(o instanceof CookiesDescription)) return false; CookiesDescription d = (CookiesDescription)o; return d.sessionKey.equals(sessionKey); } public String getCriticalSectionName() { return criticalSectionName; } /** Get the cache keys for an object (which may or may not exist yet in * the cache). This method is called in order for cache manager to throw the correct locks. * @return the object's cache keys, or null if the object should not * be cached. */ public StringSet getObjectKeys() { return cacheKeys; } /** Get the object class for an object. The object class is used to determine * the group of objects treated in the same LRU manner. * @return the newly created object's object class, or null if there is no * such class, and LRU behavior is not desired. */ public ICacheClass getObjectClass() { return cookiesCacheClass; } } /** Cache class for robots. * An instance of this class describes the cache class for cookie caching. There's * only ever a need for one, so that will be created statically. */ protected static class CookiesCacheClass implements ICacheClass { /** Get the name of the object class. * This determines the set of objects that are treated in the same * LRU pool. *@return the class name. */ public String getClassName() { // We count all the cookies, so this is a constant string. return "COOKIESCLASS"; } /** Get the maximum LRU count of the object class. *@return the maximum number of the objects of the particular class * allowed. */ public int getMaxLRUCount() { // Hardwired for the moment; 2000 cookies records will be cached, // and no more. return 2000; } } /** This is the executor object for locating cookies session objects. * This object furnishes the operations the cache manager needs to rebuild objects that it needs that are * not in the cache at the moment. */ protected static class CookiesExecutor extends org.apache.manifoldcf.core.cachemanager.ExecutorBase { // Member variables protected CookieManager thisManager; protected LoginCookies returnValue; protected CookiesDescription thisDescription; /** Constructor. *@param manager is the RobotsManager class instance. *@param objectDescription is the desired object description. */ public CookiesExecutor(CookieManager manager, CookiesDescription objectDescription) { super(); thisManager = manager; thisDescription = objectDescription; returnValue = null; } /** Get the result. *@return the looked-up or read cached instance. */ public LoginCookies getResults() { return returnValue; } /** Create a set of new objects to operate on and cache. This method is called only * if the specified object(s) are NOT available in the cache. The specified objects * should be created and returned; if they are not created, it means that the * execution cannot proceed, and the execute() method will not be called. * @param objectDescriptions is the set of unique identifier of the object. * @return the newly created objects to cache, or null, if any object cannot be created. * The order of the returned objects must correspond to the order of the object descriptinos. */ public Object[] create(ICacheDescription[] objectDescriptions) throws ManifoldCFException { // I'm not expecting multiple values to be requested, so it's OK to walk through the objects // and do a request at a time. LoginCookies[] rval = new LoginCookies[objectDescriptions.length]; int i = 0; while (i < rval.length) { CookiesDescription desc = (CookiesDescription)objectDescriptions[i]; rval[i] = thisManager.readCookiesUncached(desc.getSessionKey()); i++; } return rval; } /** Notify the implementing class of the existence of a cached version of the * object. The object is passed to this method so that the execute() method below * will have it available to operate on. This method is also called for all objects * that are freshly created as well. * @param objectDescription is the unique identifier of the object. * @param cachedObject is the cached object. */ public void exists(ICacheDescription objectDescription, Object cachedObject) throws ManifoldCFException { // Cast what came in as what it really is CookiesDescription objectDesc = (CookiesDescription)objectDescription; LoginCookies cookiesData = (LoginCookies)cachedObject; if (objectDesc.equals(thisDescription)) returnValue = cookiesData; } /** Perform the desired operation. This method is called after either createGetObject() * or exists() is called for every requested object. */ public void execute() throws ManifoldCFException { // Does nothing; we only want to fetch objects in this cacher. } } }