/****************************************************************************/
/* File: PkgReadableData.java */
/* Author: F. Georges */
/* Company: H2O Consulting */
/* Date: 2009-10-21 */
/* Tags: */
/* Copyright (c) 2009 Florent Georges (see end of file.) */
/* ------------------------------------------------------------------------ */
package org.expath.pkg.calabash;
import com.xmlcalabash.core.XProcConstants;
import com.xmlcalabash.core.XProcException;
import com.xmlcalabash.core.XProcRuntime;
import com.xmlcalabash.io.DocumentSequence;
import com.xmlcalabash.io.ReadableData;
import com.xmlcalabash.io.ReadablePipe;
import com.xmlcalabash.model.Step;
import com.xmlcalabash.util.Base64;
import com.xmlcalabash.util.TreeWriter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XdmNode;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* ...
*
* Based on ReadableData (Calabash 0.9.15, SVN revision 456.)
*
* @author Florent Georges
*/
public class PkgReadableData
implements ReadablePipe
{
// TODO: Why do not use the same approach as in ReadableDocument (and
// PkgReableDocument) which uses lazy loading. Why loading documents in
// ctor (as in ReadableData)?
//
// TODO: Why not pass the DataBinding object, instead of href, wrapper,
// content type, etc.?
public PkgReadableData(String href, QName wrapper, String content_type, EntityResolver resolver, XProcRuntime runtime)
{
myHref = href;
myWrapper = wrapper;
myConType = content_type;
myResolver = resolver;
myRuntime = runtime;
myDocs = new DocumentSequence(myRuntime);
// TODO: Localize more precisely regarding their uses.
String user_con_type = parseContentType(myConType);
String user_charset = parseCharset(myConType);
if ( myHref == null ) {
return;
}
InputSource src;
try {
src = myResolver.resolveEntity(null, myHref);
if ( src == null ) {
// TODO: What to do if not resolved? Delegate to ReadableData?
//
// Well, if the user set pkg:kind, then we can say if it is not
// in the repository, then this is an error...
throw new XProcException("Data not found (" + myHref + ")");
}
}
catch ( SAXException ex ) {
throw new XProcException("Error resolving the URI (" + myHref + ")", ex);
}
catch ( IOException ex ) {
throw new XProcException("Error resolving the URI (" + myHref + ")", ex);
}
URI uri;
try {
uri = new URI(src.getSystemId());
}
catch ( URISyntaxException ex ) {
throw new XProcException("Not a proper URI (" + myHref + ")", ex);
}
TreeWriter tree = new TreeWriter(runtime);
tree.startDocument(uri);
try {
InputStream stream = src.getByteStream();
// TODO: How to get the server Content-Type when using an
// InputSource?
// String server_con_type = connection.getContentType();
String server_con_type = myConType == null ? "text/plain" : myConType;
if ( "content/unknown".equals(server_con_type) && myConType != null ) {
// pretend...
server_con_type = myConType;
}
String server_base_con_type = parseContentType(server_con_type);
String server_charset = parseCharset(server_con_type);
/* FIXME:
// HACK! HACK! HACK! Just so that the test cases work in the test suite
serverContentType = ctype + ";charset=" + charset.toUpperCase() + post;
*/
// If the user specified a charset and the server did not and it's a file: URI,
// assume the user knows best.
// FIXME: provide some way to override this!!!
String charset = server_charset;
// if ("file".equals(url.getProtocol())
// && server_charset == null
// && server_base_con_type.equals(user_con_type)) {
if ( server_charset == null
&& server_base_con_type.equals(user_con_type) ) {
charset = user_charset;
}
tree.addStartElement(wrapper);
if ( XProcConstants.c_data.equals(wrapper) ) {
if ( "content/unknown".equals(server_con_type) ) {
tree.addAttribute(ReadableData._contentType, "application/octet-stream");
}
else {
tree.addAttribute(ReadableData._contentType, server_con_type);
}
if ( ! isText(server_con_type, charset) ) {
tree.addAttribute(ReadableData._encoding, "base64");
}
}
else {
if ( "content/unknown".equals(server_con_type) ) {
tree.addAttribute(ReadableData.c_contentType, "application/octet-stream");
}
else {
tree.addAttribute(ReadableData.c_contentType, server_con_type);
}
if ( ! isText(server_con_type, charset) ) {
tree.addAttribute(ReadableData.c_encoding, "base64");
}
}
tree.startContent();
if ( isText(server_con_type, charset) ) {
if ( charset == null ) {
// FIXME: Is this right? I think it is...
charset = "UTF-8";
}
BufferedReader bufreader = new BufferedReader(new InputStreamReader(stream, charset));
int buflen = 4096 * 3;
char[] chars = new char[buflen];
int read = bufreader.read(chars, 0, buflen);
while ( read >= 0 ) {
if ( read > 0 ) {
String data = new String(chars, 0, read);
tree.addText(data);
}
read = bufreader.read(chars, 0, buflen);
}
bufreader.close();
}
else {
// Fill the buffer each time so that we get an even number of base64 lines
int buflen = 4096 * 3;
byte[] bytes = new byte[buflen];
int pos = 0;
int readlen = buflen;
boolean done = false;
while ( ! done ) {
int read = stream.read(bytes, pos, readlen);
if ( read >= 0 ) {
pos += read;
readlen -= read;
}
else {
done = true;
}
if ( (readlen == 0) || done ) {
String base64 = Base64.encodeBytes(bytes, 0, pos);
tree.addText(base64 + "\n");
pos = 0;
readlen = buflen;
}
}
stream.close();
}
}
catch ( IOException ex ) {
throw new XProcException("Error reading the data content", ex);
}
tree.addEndElement();
tree.endDocument();
XdmNode doc = tree.getResult();
myDocs.add(doc);
}
@Override
public void canReadSequence(boolean sequence)
{
// nop; always false
}
@Override
public void resetReader()
{
myPos = 0;
}
@Override
public void setReader(Step step)
{
// nothing
}
@Override
public boolean moreDocuments()
{
return myPos < myDocs.size();
}
@Override
public boolean closed()
{
return true;
}
@Override
public int documentCount()
{
return myDocs.size();
}
@Override
public DocumentSequence documents()
{
return myDocs;
}
@Override
public XdmNode read()
throws SaxonApiException
{
return myDocs.get(myPos++);
}
@Override
public boolean readSequence()
{
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void setNames(String step, String port)
{
// nop
}
// TODO: Copied from ReadableData, adapt following RFC 3023. See also what
// I've done in EXPath HTTP Client for Saxon.
private boolean isText(String content_type, String charset)
{
return ("application/xml".equals(content_type)
|| content_type.endsWith("+xml")
|| content_type.startsWith("text/")
|| "utf-8".equals(charset));
}
// TODO: Same comments as for isText()...
private String parseContentType(String content_type)
{
if ( content_type == null ) {
return null;
}
int pos = content_type.indexOf(";");
if ( pos > 0 ) {
String type = content_type.substring(0, pos).trim();
return type;
} else {
return content_type;
}
}
// TODO: Same comments as for parseContentType()...
private String parseCharset(String content_type)
{
if (content_type == null) {
return null;
}
int pos = content_type.indexOf(";");
if ( pos > 0 ) {
String charset = content_type.substring(pos);
charset = charset.replaceAll(";\\s+", ";").replaceAll("\\s+;", ";");
if ( charset.contains(";charset=") ) {
pos = charset.indexOf(";charset=");
charset = charset.substring(pos + 9);
pos = charset.indexOf(";");
if ( pos >= 0 ) {
charset = charset.substring(0, pos);
}
return charset.toLowerCase();
}
}
return null;
}
private String myHref;
private QName myWrapper;
private String myConType;
private EntityResolver myResolver;
private XProcRuntime myRuntime;
private int myPos = 0;
private DocumentSequence myDocs = null;
}
/* ------------------------------------------------------------------------ */
/* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS COMMENT. */
/* */
/* The contents of this file are subject to the Mozilla Public License */
/* Version 1.0 (the "License"); you may not use this file except in */
/* compliance with the License. You may obtain a copy of the License at */
/* http://www.mozilla.org/MPL/. */
/* */
/* Software distributed under the License is distributed on an "AS IS" */
/* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See */
/* the License for the specific language governing rights and limitations */
/* under the License. */
/* */
/* The Original Code is: all this file. */
/* */
/* The Initial Developer of the Original Code is Florent Georges. */
/* */
/* Contributor(s): none. */
/* ------------------------------------------------------------------------ */