/* (c) Copyright 2007 Hewlett-Packard Development Company, LP [See end of file] $Id: RewindableURL.java 1170 2007-04-24 13:50:52Z jeremy_carroll $ */ package com.hp.hpl.jena.grddl.impl; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.transform.stream.StreamSource; /** * RewindableURL * * @author Jeremy J. Carroll */ public class RewindableURL extends Rewindable { final URL url; final String encoding; final String mimetype; StreamSource source; final GRDDL grddl; private StreamSource openSource = null; final URLConnection conn; // public RewindableURL(String u) throws IOException { // this(u,null); // } public RewindableURL(String u, GRDDL g) throws IOException { super(u); GRDDL.logurl(u); grddl = g; url = new URL(u); conn = url.openConnection(); conn.setRequestProperty("accept", "application/rdf+xml; q=0.5, " + "application/xhtml+xml; q=1.0, " + "text/html; q=0.7, " + "application/xml; q=1.0, " + "text/xml; q=0.7, " + "application/rss+xml; q=0.2, " + "*/*; q=0.1"); // conn.setRequestProperty("negotiate","*"); g.setHeaders(conn); encoding = conn.getContentEncoding(); mimetype = conn.getContentType(); String newU = conn.getURL().toString(); if (!newU.equals(url.toString())) { // TODO worry about IRI issues here updateRetrivalIRI(newU); GRDDL.logurl(newU); } // System.err.println("GET "+u); // Iterator<Entry<String,List<String>>> it = conn.getHeaderFields().entrySet().iterator(); // while (it.hasNext()){ // Entry<String,List<String>> e = it.next(); // System.err.print(e.getKey()+":"); // Iterator<String> i2 = e.getValue().iterator(); // while (i2.hasNext()) { // System.err.println(" "+i2.next()); // } // } // System.err.println(); String link = conn.getHeaderField("Link"); String profile = conn.getHeaderField("Profile"); if (link != null && profile != null && ( profile.equals(GRDDL.PROFILE) || profile.equals("<"+GRDDL.PROFILE+">") ) ) { addTransforms(link,g); } source = toStreamSource(conn.getInputStream()); } Pattern linkRegex = Pattern.compile("<([^>]*)>(?: *; *([-a-zA-Z0-9_]+) *=([^,]*)),?"); // link = <http://www.w3.org/2000/06/dc-extract/dc-extract.xsl>; rel="transformation" private void addTransforms(String link, GRDDL g) { Matcher m = linkRegex.matcher(link); while (m.find()) { if (m.groupCount()==3) { String url = m.group(1); String prop = m.group(2); String val = m.group(3); if (prop.equals("rel")) { val = val.trim(); if (val.equals("\"transformation\"")) { // System.err.println("t "+url); g.addTransform(this.resolveAgainstRetrievalIRI(url)); } } } } } private StreamSource toStreamSource(InputStream in) throws UnsupportedEncodingException { if (encoding == null) { return new StreamSource(in, url.toString()); } else { return new StreamSource(new InputStreamReader(in, encoding), url .toString()); } } String encoding() { return encoding; } String mimetype() { return mimetype; } StreamSource startAfreshRaw(boolean rewindable) throws IOException { close(); if (source != null) { openSource = source; source = null; } else { openSource = toStreamSource(reopen().getInputStream()); } return openSource; } private URLConnection reopen() throws IOException { URLConnection conn = url.openConnection(); grddl.setHeaders(conn); conn.setRequestProperty("accept", mimetype); // conn.setRequestProperty("Cache-Control","no-cache"); if (!equals(encoding,conn.getContentEncoding())) throw new RuntimeException("error handling not implemented"); if (!equals(mimetype,conn.getContentType())) throw new RuntimeException("error handling not implemented"); return conn; } private boolean equals(Object a, Object b) { if (a==null) return b==null; return a.equals(b); } void close() throws IOException { if (openSource != null) { if (openSource.getInputStream() != null) openSource.getInputStream().close(); if (openSource.getReader() != null) openSource.getReader().close(); openSource = null; } } } /* * (c) Copyright 2007 Hewlett-Packard Development Company, LP All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */