/** * Copyright 2015 Santhosh Kumar Tekuri * * The JLibs authors license this file to you under the Apache License, * version 2.0 (the "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package jlibs.xml.sax.async; import jlibs.core.nio.InputStreamChannel; import jlibs.nbp.Feeder; import jlibs.nbp.NBChannel; import jlibs.nbp.NBParser; import jlibs.nbp.NBReaderChannel; import org.xml.sax.InputSource; import java.io.*; import java.net.*; import java.nio.CharBuffer; import java.nio.channels.ReadableByteChannel; import java.nio.charset.Charset; import java.util.Locale; /** * @author Santhosh Kumar T */ public class XMLFeeder extends Feeder{ AsyncXMLReader xmlReader; String publicID; String systemID; Runnable postAction; public XMLFeeder(AsyncXMLReader xmlReader, NBParser parser, InputSource source, XMLScanner declParser) throws IOException{ super(parser); this.xmlReader = xmlReader; init(source, declParser); } public static String toURL(String systemID) throws IOException{ if(systemID==null) return null; int ix = systemID.indexOf(':', 0); if (ix >= 3 && ix <= 8) return systemID; else{ String absPath = new File(systemID).getAbsolutePath(); char sep = File.separatorChar; if(sep!='/') absPath = absPath.replace(sep, '/'); if(absPath.length()>0 && absPath.charAt(0)!='/') absPath = "/" + absPath; return new URL("file", "", absPath).toString(); } } final void init(InputSource is, XMLScanner prologParser) throws IOException{ postAction = null; iProlog = 0; this.prologParser = prologParser; elemDepth = 0; publicID = is.getPublicId(); systemID = toURL(is.getSystemId()); Reader charStream = is.getCharacterStream(); if(charStream !=null) setChannel(new NBReaderChannel(charStream)); else{ ReadableByteChannel byteChannel = null; String encoding = is.getEncoding(); if(is instanceof ChannelInputSource){ ChannelInputSource channelInputSource = (ChannelInputSource)is; byteChannel = channelInputSource.getChannel(); } if(byteChannel==null){ InputStream inputStream = is.getByteStream(); if(inputStream==null){ assert systemID!=null; if(systemID.startsWith("file:/")){ try{ inputStream = new FileInputStream(new File(new URI(systemID))); }catch(URISyntaxException ex){ throw new IOException(ex); } }else{ URLConnection con = new URL(systemID).openConnection(); if(con instanceof HttpURLConnection){ final HttpURLConnection httpCon = (HttpURLConnection)con; // set request properties /* Map<String, String> requestProperties = new HashMap<String, String>(); for(Map.Entry<String, String> entry: requestProperties.entrySet()) httpCon.setRequestProperty(entry.getKey(), entry.getValue()); */ // set preference for redirection httpCon.setInstanceFollowRedirects(true); } inputStream = con.getInputStream(); String contentType; String charset = null; // content type will be string like "text/xml; charset=UTF-8" or "text/xml" String rawContentType = con.getContentType(); // text/xml and application/xml offer only one optional parameter int index = (rawContentType != null) ? rawContentType.indexOf(';') : -1; if(index!=-1){ // this should be something like "text/xml" contentType = rawContentType.substring(0, index).trim(); // this should be something like "charset=UTF-8", but we want to // strip it down to just "UTF-8" charset = rawContentType.substring(index + 1).trim(); if(charset.startsWith("charset=")){ // 8 is the length of "charset=" charset = charset.substring(8).trim(); // strip quotes, if present if((charset.charAt(0)=='"' && charset.charAt(charset.length()-1)=='"') || (charset.charAt(0)=='\'' && charset.charAt(charset.length()-1)=='\'')){ charset = charset.substring(1, charset.length() - 1); } } }else contentType = rawContentType.trim(); String detectedEncoding = null; /** The encoding of such a resource is determined by: 1 external encoding information, if available, otherwise -- the most common type of external information is the "charset" parameter of a MIME package 2 if the media type of the resource is text/xml, application/xml, or matches the conventions text/*+xml or application/*+xml as described in XML Media Types [IETF RFC 3023], the encoding is recognized as specified in XML 1.0, otherwise 3 the value of the encoding attribute if one exists, otherwise 4 UTF-8. **/ if(contentType.equals("text/xml")){ if(charset!=null) detectedEncoding = charset; else detectedEncoding = "US-ASCII"; // see RFC2376 or 3023, section 3.1 }else if(contentType.equals("application/xml")){ if(charset!=null) detectedEncoding = charset; } if(detectedEncoding != null) encoding = detectedEncoding; } } byteChannel = new InputStreamChannel(inputStream); } nbChannel.setChannel(byteChannel); if(encoding==null) nbChannel.setEncoding("UTF-8", true); else nbChannel.setEncoding(encoding, false); setChannel(nbChannel); } } private NBChannel nbChannel = new NBChannel(null); // < 6 see if it has prolog // ==7 found declared encoding private int iProlog = 0; CharBuffer singleChar = CharBuffer.allocate(1); CharBuffer sixChars = CharBuffer.allocate(6); XMLScanner prologParser; private static final int MAX_PROLOG_LENGTH = 70; @Override protected Feeder read() throws IOException{ xmlReader.setFeeder(this); if(prologParser !=null){ while(iProlog<6){ sixChars.clear(); int read = channel.read(sixChars); if(read==0) return this; else if(read==-1){ charBuffer.append("<?xml ", 0, iProlog); return onPrologEOF(); }else{ char chars[] = sixChars.array(); for(int i=0; i<read; i++){ char ch = chars[i]; if(isPrologStart(ch)){ iProlog++; if(iProlog==6){ charBuffer.append("<?xml "); for(i=0; i<MAX_PROLOG_LENGTH; i++){ singleChar.clear(); read = channel.read(singleChar); if(read==1){ ch = singleChar.get(0); charBuffer.append(ch); if(ch=='>') break; }else break; } if(charBuffer.position()>0){ charBuffer.flip(); charBuffer.position(prologParser.consume(charBuffer.array(), charBuffer.position(), charBuffer.limit(), false)); charBuffer.compact(); } if(read==0) return this; else if(read==-1) return onPrologEOF(); break; } }else{ charBuffer.append("<?xml ", 0, iProlog); while(i<read) charBuffer.append(chars[i++]); iProlog = 7; prologParser = null; break; } } } } while(iProlog!=7){ singleChar.clear(); int read = channel.read(singleChar); if(read==0) return this; else if(read==-1) return onPrologEOF(); else prologParser.consume(singleChar.array(), 0, 1, false); } } return super.read(); } private Feeder onPrologEOF() throws IOException{ charBuffer.flip(); channel.close(); channel = null; return super.read(); } private boolean isPrologStart(char ch){ switch(iProlog){ case 0: return ch=='<'; case 1: return ch=='?'; case 2: return ch=='x'; case 3: return ch=='m'; case 4: return ch=='l'; case 5: return ch==0x20 || ch==0x9 || ch==0xa || ch==0xd; default: throw new Error("impossible"); } } void setDeclaredEncoding(String encoding){ iProlog = 7; parser.setLocation(prologParser); if(encoding!=null && channel instanceof NBChannel){ NBChannel nbChannel = (NBChannel)channel; String detectedEncoding = nbChannel.decoder().charset().name().toUpperCase(Locale.ENGLISH); String declaredEncoding = encoding.toUpperCase(Locale.ENGLISH); if(!detectedEncoding.equals(declaredEncoding)){ if(detectedEncoding.startsWith("UTF-16") && declaredEncoding.equals("UTF-16")) return; if(!detectedEncoding.equals(encoding)) nbChannel.decoder(Charset.forName(encoding).newDecoder()); } } } public InputSource resolve(String publicID, String systemID) throws IOException{ InputSource inputSource = new InputSource(resolve(systemID)); inputSource.setPublicId(publicID); return inputSource; } public String resolve(String systemID) throws IOException{ if(systemID==null) return null; else{ if(this.systemID==null) return toURL(systemID); else{ if(systemID.length()==0) return systemID; int ix = systemID.indexOf(':', 0); if(ix>=3 && ix<=8) return systemID; else{ try{ return new URI(this.systemID).resolve(new URI(systemID)).toString(); }catch(URISyntaxException ex){ return systemID; } } } } } int elemDepth; }