/*
* Copyright 2000-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jetspeed.portal.portlets;
//Element Construction Set
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.ecs.ConcreteElement;
import org.apache.jetspeed.portal.PortletConfig;
import org.apache.jetspeed.portal.PortletException;
import org.apache.jetspeed.services.Transformer;
import org.apache.jetspeed.util.Base64;
import org.apache.jetspeed.util.JetspeedClearElement;
import org.apache.turbine.services.servlet.TurbineServlet;
import org.apache.turbine.util.RunData;
import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
import org.apache.jetspeed.services.logging.JetspeedLogger;
/**
* A class that clips parts of one or more web pages.
*
* @author <a href="mailto:mmari@ce.unipr.it">Marco Mari</a>
* @version $Id: WebClippingPortlet.java,v 1.2 2004/02/23 04:03:34 jford Exp $
*/
public class WebClippingPortlet extends AbstractInstancePortlet
{
/**
* Static initialization of the logger for this class
*/
private static final JetspeedLogger logger =
JetspeedLogFactoryService.getLogger(WebClippingPortlet.class.getName());
// Define parameter name for the first tag to clip
public static final String START = "startTag";
// Define parameter name for the last tag to clip
public static final String STOP = "stopTag";
// Define parameter name for a single tag to clip
public static final String TAG = "Tag";
// Define parameter name for the number of the tag to clip
public static final String TAGNUM = "startTagNumber";
// Define parameter name for the URL of the page
public static final String URL = "url";
// Error message for startTag without stopTag
private String BAD_PARAM = "<br>Error: startTag without stopTag<br>";
// Error message for wrong startTagNumber parameter
private String BAD_NUMBER = "<br>Error: bad integer parameter<br>";
protected boolean initDone = false;
protected boolean contentStale = true;
protected boolean cacheContent = false;
protected String username = null;
protected String password = null;
private Hashtable patterns = null;
/**
* Initialize this portlet
* @throws PortletException Initialization failed
*/
public void init()
{
if (initDone)
return;
patterns = new Hashtable();
try
{
loadParams();
}
catch (Exception e)
{
logger.info("Exception occurred:" + e.toString());
e.printStackTrace();
}
contentStale = true;
initDone = true;
}
/**
* took this from FileServerPortlet as it was private
*
*/
// FIXME: Currently only the expiration the HTTP Response header is honored.
// Expiration information in <meta> tags are not honored
protected Reader getReader(String url) throws IOException
{
URL pageUrl = new URL(url);
URLConnection pageConn = pageUrl.openConnection();
try
{
// set HTTP Basic Authetication header if username and password are set
if (username != null && password != null)
{
pageConn.setRequestProperty(
"Authorization",
"Basic "
+ Base64.encodeAsString(username + ":" + password));
}
}
catch (Exception e)
{
logger.info("Exception occurred:" + e.toString());
e.printStackTrace();
}
long pageExpiration = pageConn.getExpiration();
String encoding = "iso-8859-1";
String contentType = pageConn.getContentType();
String tempString = null;
String noCache = "no-cache";
if (contentType != null)
{
StringTokenizer st = new StringTokenizer(contentType, "; =");
while (st.hasMoreTokens())
{
if (st.nextToken().equalsIgnoreCase("charset"))
{
try
{
encoding = st.nextToken();
break;
}
catch (Exception e)
{
break;
}
}
}
}
/*
* Determing if content should be cached.
*/
cacheContent = true; // Assume content is cached
if (pageExpiration == 0)
{
cacheContent = false;
}
// Check header field CacheControl
tempString = pageConn.getHeaderField("Cache-Control");
if (tempString != null)
{
if (tempString.toLowerCase().indexOf(noCache) >= 0)
{
cacheContent = false;
}
}
// Check header field Pragma
tempString = pageConn.getHeaderField("Pragma");
if (tempString != null)
{
if (tempString.toLowerCase().indexOf(noCache) >= 0)
{
cacheContent = false;
}
}
// Assign a reader
Reader rdr = new InputStreamReader(pageConn.getInputStream(), encoding);
// Only set the page expiration it the page has not expired
if (pageExpiration > System.currentTimeMillis()
&& (cacheContent == true))
{
contentStale = false;
logger.debug(
"WebPagePortlet caching URL: "
+ url
+ " Expiration: "
+ pageExpiration
+ ", "
+ (pageExpiration - System.currentTimeMillis())
+ " milliseconds into the future");
setExpirationMillis(pageExpiration);
}
else
{
contentStale = true;
}
return rdr;
}
/**
This methods outputs the content of the portlet for a given
request.
@param data the RunData object for the request
@return the content to be displayed to the user-agent
*/
public ConcreteElement getContent(RunData data)
{
PortletConfig config = this.getPortletConfig();
if (contentStale == true)
return getWebClippedContent(data, config);
if (null == getExpirationMillis())
return getContent(data, null, true);
if (getExpirationMillis().longValue() <= System.currentTimeMillis())
return getWebClippedContent(data, config);
return getContent(data, null, true);
}
/*
* This method returns the clipped part of the Web page
*/
private ConcreteElement getWebClippedContent(
RunData data,
PortletConfig config)
{
String clippedString = ""; // HTML to visualize
JetspeedClearElement element = null;
int patternNumber = 1;
int tagNumber = 0;
Reader htmlReader;
String defaultUrl = selectUrl(data, config);
try
{
// Re-load parameters to see immediately the effect of changes
loadParams();
Enumeration en = patterns.keys();
while (en.hasMoreElements())
{
String name = (String) en.nextElement();
// Search for parameters in the right order
if (name.equals(START + String.valueOf(patternNumber))
|| name.equals(TAG + String.valueOf(patternNumber)))
{
String start =
(String) patterns.get(
START + String.valueOf(patternNumber));
String simpleTag =
(String) patterns.get(
TAG + String.valueOf(patternNumber));
String stop =
(String) patterns.get(
STOP + String.valueOf(patternNumber));
String tagNum =
(String) patterns.get(
TAGNUM + String.valueOf(patternNumber));
// A group of params can have a specific url
String url =
(String) patterns.get(
URL + String.valueOf(patternNumber));
url = controlUrl(url, defaultUrl);
htmlReader = getReader(url);
if ((start != null) && (stop == null))
{
element = new JetspeedClearElement(BAD_PARAM);
return element;
}
if (tagNum != null)
{
try
{
tagNumber = Integer.parseInt(tagNum);
}
catch (NumberFormatException e)
{
logger.info("Exception occurred:" + e.toString());
e.printStackTrace();
element = new JetspeedClearElement(BAD_NUMBER);
return element;
}
}
if ((simpleTag != null) && (tagNum == null))
clippedString =
clippedString
+ Transformer.findElement(
htmlReader,
url,
simpleTag);
else if ((simpleTag != null) && (tagNum != null))
clippedString =
clippedString
+ Transformer.findElementNumber(
htmlReader,
url,
simpleTag,
tagNumber);
else if (tagNum == null)
clippedString =
clippedString
+ Transformer.clipElements(
htmlReader,
url,
start,
stop);
else if (tagNum != null)
clippedString =
clippedString
+ Transformer.clipElementsNumber(
htmlReader,
url,
start,
stop,
tagNumber);
patternNumber = patternNumber + 1;
//Restart Enumeration, because params could not be in the right order
en = patterns.keys();
htmlReader.close();
}
}
element = new JetspeedClearElement(clippedString);
//FIXME: We should do a clearContent() for the media type, not ALL media types
this.clearContent();
// doing this because setContent() is not overwriting current content.
this.setContent(element);
}
catch (Exception e)
{
logger.info("Exception occurred:" + e.toString());
e.printStackTrace();
}
return element;
}
/**
* Usually called by caching system when portlet is marked as expired, but
* has not be idle longer then TimeToLive.
*
* Any cached content that is expired need to be refreshed.
*/
public void refresh()
{
if (cacheContent == true)
{
getWebClippedContent(null, this.getPortletConfig());
}
}
/**
* Select the URL to use for this portlet.
* @return The URL to use for this portlet
*/
protected String selectUrl(RunData data, PortletConfig config)
{
String url = config.getURL();
return url;
}
/*
* Choose between a specific url and the default url
*/
private String controlUrl(String url, String defaultUrl)
{
if (url == null)
{
return defaultUrl;
}
//if the given URL doesn not include a protocol... ie http:// or ftp://
//then resolve it relative to the current URL context
if (url.indexOf("://") < 0)
{
url = TurbineServlet.getResource(url).toString();
}
return url;
}
/*
* Load portlet parameters
*/
private void loadParams() throws PortletException
{
Iterator en = this.getPortletConfig().getInitParameterNames();
try
{
while (en.hasNext())
{
String name = (String) en.next();
if (name.equals("username"))
username =
this.getPortletConfig().getInitParameter("username");
else if (name.equals("password"))
password =
this.getPortletConfig().getInitParameter("password");
else
patterns.put(
name,
this.getPortletConfig().getInitParameter(name));
}
}
catch (Exception e)
{
logger.info("Exception occurred:" + e.toString());
e.printStackTrace();
throw new PortletException(e.toString());
}
}
}