package ecologylab.bigsemantics.httpclient;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ecologylab.bigsemantics.Utils;
import ecologylab.generic.HashMapArrayList;
import ecologylab.generic.StringBuilderBaseUtils;
/**
*
* @author quyin
*/
public class HttpResponseParser
{
private static Logger logger;
private static Pattern pStatusLine;
static
{
logger = LoggerFactory.getLogger(HttpResponseParser.class);
pStatusLine = Pattern.compile("HTTP/\\d.\\d\\s+(\\d+)\\s+(.*)");
}
public SimplHttpResponse parse(String url, InputStream istream) throws Exception
{
SimplHttpResponse result = new SimplHttpResponse();
// process headers
List<String> otherUrls = new ArrayList<String>();
while (true)
{
String line = readUntil(istream, "\r\n");
Matcher matcher = pStatusLine.matcher(line);
if (matcher.matches())
{
int code = Integer.valueOf(matcher.group(1));
result.setCode(code);
HashMapArrayList<String, SimplHttpHeader> headers = readHeaders(istream);
if (code >= 300 && code < 400)
{
// redirection happened
otherUrls.add(url);
String previousUrl = url;
SimplHttpHeader locationHeader = headers.get("Location");
if (locationHeader == null)
{
logger.warn("Redirection without Location header: {}", headers);
}
else
{
url = locationHeader.getValue();
if (HttpClientUtils.looksLikeRelative(url))
{
url = HttpClientUtils.relativeToAbsolute(previousUrl, url);
}
}
continue;
}
result.setHeaders(headers);
break;
}
else
{
throw new HttpClientException("When processing raw response from "
+ url
+ ": Status line expected, got "
+ line);
}
}
result.setUrl(url);
if (otherUrls.size() > 0)
{
result.setOtherUrls(otherUrls);
}
// change charset if necessary
Charset charset = Charset.forName("UTF-8");
String charsetName = result.getCharset();
if (charsetName != null && Charset.isSupported(charsetName))
{
charset = Charset.forName(charsetName);
}
InputStreamReader reader = new InputStreamReader(istream, charset);
String content = Utils.readAllFromReader(reader);
result.setContent(content);
return result;
}
private HashMapArrayList<String, SimplHttpHeader> readHeaders(InputStream istream)
throws IOException, HttpClientException
{
HashMapArrayList<String, SimplHttpHeader> result =
new HashMapArrayList<String, SimplHttpHeader>();
while (true)
{
String line = readUntil(istream, "\r\n");
if (line == null || line.length() == 0)
{
break;
}
int p = line.indexOf(':');
if (p > 0)
{
String name = line.substring(0, p).trim();
String value = line.substring(p + 1).trim();
if (name.length() > 0 && value.length() > 0)
{
SimplHttpHeader header = new SimplHttpHeader(name, value);
result.put(name, header);
continue;
}
}
throw new HttpClientException("Invalid header: " + line);
}
return result;
}
private String readUntil(InputStream istream, String end) throws IOException
{
StringBuilder sb = StringBuilderBaseUtils.acquire();
while (true)
{
int c = istream.read();
if (c < 0)
{
return null;
}
sb.append((char) c);
int n = sb.length();
if (n >= end.length() && end.equals(sb.substring(n - end.length(), n)))
{
String result = sb.substring(0, n - end.length());
StringBuilderBaseUtils.release(sb);
return result;
}
}
}
}