package com.opensymphony.module.sitemesh.parser;
import java.io.IOException;
import java.util.*;
import com.opensymphony.module.sitemesh.*;
/**
* Page parser that doesn't parse the full page, but rather just parses the head section of the page.
*
* @since v2.5
*/
public class PartialPageParser implements PageParser
{
public Page parse(char[] buffer) throws IOException {
return parse(new DefaultSitemeshBuffer(buffer));
}
public Page parse(SitemeshBuffer buffer) throws IOException
{
char[] data = buffer.getCharArray();
int length = buffer.getBufferLength();
int position = 0;
while (position < data.length)
{
if (data[position++] == '<')
{
if (position < data.length && data[position] == '!')
{
// Ignore doctype
continue;
}
if (compareLowerCase(data, length, position, "html"))
{
// It's an HTML page, handle HTML pages
return parseHtmlPage(buffer, position);
}
else
{
// The whole thing is the body.
return new PartialPageParserHtmlPage(buffer, new SitemeshBufferFragment(buffer, 0, length), null);
}
}
}
// If we're here, we mustn't have found a tag
return new PartialPageParserHtmlPage(buffer, new SitemeshBufferFragment(buffer, 0, length), null);
}
private Page parseHtmlPage(SitemeshBuffer buffer, int position)
{
char[] data = buffer.getCharArray();
int length = buffer.getBufferLength();
int bodyStart = -1;
int bodyLength = -1;
int headStart = -1;
int headLength = -1;
// Find head end and start, and body start
Map<String, String> bodyProperties = null;
while (position < length)
{
if (data[position++] == '<')
{
if (compareLowerCase(data, length, position, "head"))
{
position = findEndOf(data, length, position + 4, ">");
headStart = position;
// Find end of head
position = findStartOf(data, length, position, "</head>");
headLength = position - headStart;
position += 7;
}
else if (compareLowerCase(data, length, position, "body"))
{
HashSimpleMap map = new HashSimpleMap();
bodyStart = parseProperties(data, length, position + 4, map);
bodyProperties = map.getMap();
break;
}
}
}
if (bodyStart < 0)
{
// No body found
bodyStart = length;
bodyLength = 0;
}
else
{
for (int i = length - 8; i > bodyStart; i--)
{
if (compareLowerCase(data, length, i, "</body>"))
{
bodyLength = i - bodyStart;
break;
}
}
if (bodyLength == -1)
{
bodyLength = length - bodyStart;
}
}
if (headLength > 0)
{
int idx = headStart;
int headEnd = headStart + headLength;
String title = null;
TreeMap<Integer, Integer> deletions = new TreeMap<Integer, Integer>();
// Extract meta attributes out of head
Map<String, String> metaAttributes = new HashMap<String, String>();
while (idx < headEnd)
{
if (data[idx++] == '<')
{
if (compareLowerCase(data, headEnd, idx, "meta"))
{
MetaTagSimpleMap map = new MetaTagSimpleMap();
idx = parseProperties(data, headEnd, idx + 4, map);
if (map.getName() != null && map.getContent() != null)
{
metaAttributes.put(map.getName(), map.getContent());
}
}
}
}
// We need a new head buffer because we have to remove the title and content tags from it
Map<String, String> pageProperties = new HashMap<String, String>();
for (int i = headStart; i < headEnd; i++)
{
char c = data[i];
if (c == '<')
{
if (compareLowerCase(data, headEnd, i + 1, "title"))
{
int titleStart = findEndOf(data, headEnd, i + 6, ">");
int titleEnd = findStartOf(data, headEnd, titleStart, "<");
title = new String(data, titleStart, titleEnd - titleStart);
int titleTagEnd = titleEnd + "</title>".length();
deletions.put(i, titleTagEnd - i);
i = titleTagEnd - 1;
}
else if (compareLowerCase(data, headEnd, i + 1, "content"))
{
ContentTagSimpleMap map = new ContentTagSimpleMap();
int contentStart = parseProperties(data, headEnd, i + 8, map);
int contentEnd = findStartOf(data, headEnd, contentStart, "</content>");
pageProperties.put(map.getTag(), new String(data, contentStart, contentEnd - contentStart));
int contentTagEnd = contentEnd + "</content>".length();
deletions.put(i, contentTagEnd - i);
i = contentTagEnd - 1;
}
}
}
return new PartialPageParserHtmlPage(buffer, new SitemeshBufferFragment(buffer, bodyStart, bodyLength), bodyProperties,
new SitemeshBufferFragment(buffer, headStart, headEnd - headStart, deletions), title, metaAttributes, pageProperties);
}
else
{
return new PartialPageParserHtmlPage(buffer, new SitemeshBufferFragment(buffer, bodyStart, bodyLength), bodyProperties);
}
}
private static boolean compareLowerCase(final char[] data, final int dataEnd, int position, String token)
{
int l = position + token.length();
if (l > dataEnd)
{
return false;
}
for (int i = 0; i < token.length(); i++)
{
// | 32 converts from ASCII uppercase to ASCII lowercase
char potential = data[position + i];
char needed = token.charAt(i);
if ((Character.isLetter(potential) && (potential | 32) != needed) || potential != needed)
{
return false;
}
}
return true;
}
private static int findEndOf(final char[] data, final int dataEnd, int position, String token)
{
for (int i = position; i < dataEnd - token.length(); i++)
{
if (compareLowerCase(data, dataEnd, i, token))
{
return i + token.length();
}
}
return dataEnd;
}
private static int findStartOf(final char[] data, final int dataEnd, int position, String token)
{
for (int i = position; i < dataEnd - token.length(); i++)
{
if (compareLowerCase(data, dataEnd, i, token))
{
return i;
}
}
return dataEnd;
}
/**
* Parse the properties of the current tag
*
* @param data the data
* @param dataEnd the end index of the data
* @param position our position in the data, this should be the first character after the tag name
* @param map to the map to parse the properties into
*
* @return The position of the first character after the tag
*/
private static int parseProperties(char[] data, int dataEnd, int position, SimpleMap map)
{
int idx = position;
while (idx < dataEnd)
{
// Skip forward to the next non-whitespace character
while (idx < dataEnd && Character.isWhitespace(data[idx]))
{
idx++;
}
// Make sure its not the end of the data or the end of the tag
if (idx == dataEnd || data[idx] == '>' || data[idx] == '/')
{
break;
}
int startAttr = idx;
// Find the next equals
while (idx < dataEnd && !Character.isWhitespace(data[idx]) && data[idx] != '=' && data[idx] != '>')
{
idx++;
}
if (idx == dataEnd || data[idx] != '=')
{
continue;
}
String attrName = new String(data, startAttr, idx - startAttr);
idx++;
if (idx == dataEnd)
{
break;
}
int startValue = idx;
int endValue;
if (data[idx] == '"')
{
idx++;
startValue = idx;
while (idx < dataEnd && data[idx] != '"')
{
idx++;
}
if (idx == dataEnd)
{
break;
}
endValue = idx;
idx++;
}
else if (data[idx] == '\'')
{
idx++;
startValue = idx;
while (idx < dataEnd && data[idx] != '\'')
{
idx++;
}
if (idx == dataEnd)
{
break;
}
endValue = idx;
idx++;
}
else
{
while (idx < dataEnd && !Character.isWhitespace(data[idx]) && data[idx] != '/' && data[idx] != '>')
{
idx++;
}
endValue = idx;
}
String attrValue = new String(data, startValue, endValue - startValue);
map.put(attrName, attrValue);
}
// Find the end of the tag
while (idx < dataEnd && data[idx] != '>')
{
idx++;
}
if (idx == dataEnd)
{
return idx;
}
else
{
// Return the first character after the end of the tag
return idx + 1;
}
}
public static interface SimpleMap
{
public void put(String key, String value);
}
public static class MetaTagSimpleMap implements SimpleMap
{
private String name;
private String content;
public void put(String key, String value)
{
if (key.equals("name"))
{
name = value;
}
else if (key.equals("content"))
{
content = value;
}
}
public String getName()
{
return name;
}
public String getContent()
{
return content;
}
}
public static class ContentTagSimpleMap implements SimpleMap
{
private String tag;
public void put(String key, String value)
{
if (key.equals("tag"))
{
tag = value;
}
}
public String getTag()
{
return tag;
}
}
public static class HashSimpleMap implements SimpleMap
{
private final Map<String, String> map = new HashMap<String, String>();
public void put(String key, String value)
{
map.put(key, value);
}
public Map<String, String> getMap()
{
return map;
}
}
}