LaxURI.java example

Explorer

archive-commons-master
- archive-commons
  - src
    - main
      - java
        org
        archive
        RecoverableRecordFormatException.java
        extract
        CDXExtractorOutput.java
        DumpingExtractorOutput.java
        ExtractingResourceFactoryMapper.java
        ExtractingResourceProducer.java
        ExtractorOutput.java
        FilteredExtractorOuput.java
        JSONViewExtractorOutput.java
        ProducerUtils.java
        RealCDXExtractorOutput.java
        ResourceExtractor.java
        ResourceFactoryMapper.java
        WATExtractorOutput.java
        format
        arc
        ARCConstants.java
        ARCFormatException.java
        ARCMetaData.java
        ARCMetaDataParser.java
        FiledescRecord.java
        FiledescRecordParser.java
        cdx
        CDXFile.java
        CDXInputSource.java
        MultiCDXInputSource.java
        dns
        DNSParseException.java
        DNSRecord.java
        DNSResponse.java
        DNSResponseParser.java
        gzip
        GZIPConstants.java
        GZIPDecoder.java
        GZIPFExtraRecord.java
        GZIPFExtraRecords.java
        GZIPFooter.java
        GZIPFormatException.java
        GZIPHeader.java
        GZIPMemberSeries.java
        GZIPMemberWriter.java
        GZIPMemberWriterCommittedOutputStream.java
        GZIPSeriesMember.java
        GZIPStaticHeader.java
        zipnum
        LineBufferingIterator.java
        LocationUpdater.java
        MultiBlockIterator.java
        SummaryBlockIterator.java
        TimestampCustomDedupIterator.java
        TimestampDedupIterator.java
        ZipNumBlockLoader.java
        ZipNumCluster.java
        ZipNumParams.java
        ZipNumWriter.java
        http
        DumpingHTTPParseObserver.java
        HttpConstants.java
        HttpHeader.java
        HttpHeaderObserver.java
        HttpHeaderParser.java
        HttpHeaders.java
        HttpMessage.java
        HttpMessageParser.java
        HttpParseException.java
        HttpParseObserver.java
        HttpRequest.java
        HttpRequestMessage.java
        HttpRequestMessageObserver.java
        HttpRequestMessageParser.java
        HttpRequestParser.java
        HttpResponse.java
        HttpResponseMessage.java
        HttpResponseMessageObserver.java
        HttpResponseMessageParser.java
        HttpResponseParser.java
        json
        CompoundORJSONPathSpec.java
        CrossProductOfLists.java
        JSONPathSpec.java
        JSONPathSpecFactory.java
        JSONUtils.java
        JSONView.java
        SimpleJSONPathSpec.java
        text
        charset
        CharsetDetector.java
        RotatingCharsetDetector.java
        StandardCharsetDetector.java
        html
        CDATALexer.java
        LexParser.java
        NodeUtils.java
        ParseObserver.java
        warc
        WARCConstants.java
        WARCRecordWriter.java
        hadoop
        ArchiveJSONViewLoader.java
        ArchiveMetadataLoader.java
        FilenameInputFormat.java
        PerMapOutputFormat.java
        ResourceContext.java
        ResourceInputFormat.java
        ResourceRecordReader.java
        func
        JSONViewEvalFunc.java
        TupleFunc.java
        URLResolverFunc.java
        resource
        AbstractEmptyResource.java
        AbstractResource.java
        MetaData.java
        Resource.java
        ResourceConstants.java
        ResourceContainer.java
        ResourceFactory.java
        ResourceParseException.java
        ResourceProducer.java
        TransformingResourceProducer.java
        arc
        ARCResource.java
        ARCResourceFactory.java
        record
        FiledescResource.java
        FiledescResourceFactory.java
        generic
        GenericResourceProducer.java
        GenericStreamResource.java
        gzip
        GZIPMetaData.java
        GZIPResource.java
        GZIPResourceContainer.java
        html
        ExtractingParseObserver.java
        HTMLMetaData.java
        HTMLResource.java
        HTMLResourceFactory.java
        http
        HTTPHeadersResource.java
        HTTPHeadersResourceFactory.java
        HTTPRequestResource.java
        HTTPRequestResourceFactory.java
        HTTPResponseResource.java
        HTTPResponseResourceFactory.java
        producer
        ARCFile.java
        EnvelopedResourceFile.java
        WARCFile.java
        warc
        WARCResource.java
        WARCResourceFactory.java
        record
        DNSResource.java
        DNSResourceFactory.java
        WARCJSONMetaDataResource.java
        WARCJSONMetaDataResourceFactory.java
        WARCMetaDataResource.java
        WARCMetaDataResourceFactory.java
        streamcontext
        AbstractBufferingStream.java
        ByteArrayWrappedStream.java
        HDFSStream.java
        HTTP11Stream.java
        RandomAccessFileStream.java
        SimpleStream.java
        Stream.java
        StreamWrappedInputStream.java
        url
        AggressiveIACanonicalizerRules.java
        AggressiveIAURLCanonicalizer.java
        BasicURLCanonicalizer.java
        CanonicalizeRules.java
        CanonicalizerConstants.java
        DefaultIACanonicalizerRules.java
        DefaultIAURLCanonicalizer.java
        GoogleURLCanonicalizer.java
        HandyURL.java
        IAURLCanonicalizer.java
        LaxURI.java
        LaxURLCodec.java
        NonMassagingIAURLCanonicalizer.java
        OrdinaryIACanonicalizerRules.java
        OrdinaryIAURLCanonicalizer.java
        SURT.java
        SURTTokenizer.java
        URLCanonicalizer.java
        URLKeyMaker.java
        URLParser.java
        URLRegexTransformer.java
        UsableURI.java
        UsableURIFactory.java
        WaybackURLKeyMaker.java
        util
        ArchiveUtils.java
        Base32.java
        ByteOp.java
        CrossProduct.java
        DateUtils.java
        FileNameSpec.java
        GeneralURIStreamFactory.java
        IAUtils.java
        InterruptibleCharSequence.java
        NestedMap.java
        PrefixSet.java
        SURT.java
        StreamCopy.java
        StringFieldExtractor.java
        StringParse.java
        SurtPrefixSet.java
        TextUtils.java
        binsearch
        FileSearchTool.java
        SeekableLineReader.java
        SeekableLineReaderFactory.java
        SeekableLineReaderIterator.java
        SortedTextFile.java
        impl
        HDFSSeekableLineReader.java
        HDFSSeekableLineReaderFactory.java
        HTTPSeekableLineReader.java
        HTTPSeekableLineReaderFactory.java
        NIOSeekableLineReader.java
        NIOSeekableLineReaderFactory.java
        RandomAccessFileSeekableLineReader.java
        RandomAccessFileSeekableLineReaderFactory.java
        http
        ApacheHttp31SLR.java
        ApacheHttp31SLRFactory.java
        HTTPURLConnSLR.java
        HTTPURLConnSLRFactory.java
        io
        BytesReadObserver.java
        CRCInputStream.java
        CRCOutputStream.java
        CommitedOutputStream.java
        EOFNotifyingInputStream.java
        EOFObserver.java
        MultiMemberOpenJDKGZIPInputStream.java
        NotifyingInputStream.java
        PushBackOneByteInputStream.java
        iterator
        AbstractPeekableIterator.java
        BoundedStringIterator.java
        CachingStringFilter.java
        CloseableIterator.java
        CloseableIteratorUtil.java
        FilterStringIterator.java
        LineReadingIterator.java
        LookaheadIterator.java
        PeekableIterator.java
        PrefixMatchStringIterator.java
        RegexLineIterator.java
        SortedCompositeIterator.java
        StartBoundedStringIterator.java
        StringFilter.java
        StringTransformer.java
        TransformingIteratorWrapper.java
        TransformingPrefixStringFilter.java
        zip
        GZIPMembersInputStream.java
        GzipHeader.java
        NoGzipMagicException.java
        OpenJDK7GZIPInputStream.java
        OpenJDK7InflaterInputStream.java
    - test
      - java
        org
        archive
        format
        dns
        DNSResponseParserTest.java
        gzip
        GZIPMemberSeriesTest.java
        GZIPMemberWriterTest.java
        zipnum
        ZipNumWriterTest.java
        http
        HttpRequestMessageParserTest.java
        HttpResponseParserTest.java
        json
        CompoundORJSONPathSpecTest.java
        JSONPathSpecFactoryTest.java
        JSONViewTest.java
        SimpleJSONPathSpecTest.java
        text
        html
        CDATALexerTest.java
        resource
        html
        ExtractingParseObserverTest.java
        HTMLMetaDataTest.java
        url
        AggressiveIAURLCanonicalizerTest.java
        BasicURLCanonicalizerTest.java
        HandyURLTest.java
        IAURLCanonicalizerTest.java
        OrdinaryIAURLCanonicalizerTest.java
        URLParserTest.java
        URLRegexTransformerTest.java
        UsableURIFactoryTest.java
        UsableURITest.java
        WaybackURLKeyMakerTest.java
        util
        ArchiveUtilsTest.java
        ByteOpTest.java
        CrossProductTest.java
        InterruptibleCharSequenceTest.java
        StringFieldExtractorTest.java
        TestUtils.java
        binsearch
        SortedTextFileTest.java
        iterator
        CachingStringFilterTest.java
        FilterStringIteratorTest.java
        SortedCompositeIteratorTest.java
        zip
        GZIPMembersInputStreamTest.java
- ia-tools
  - src
    - main
      - java
        org
        archive
        hadoop
        cdx
        BlockLoader.java
        CDXCluster.java
        CDXClusterRangeDumper.java
        CDXConverterTool.java
        ClusterRange.java
        HDFSBlockLoader.java
        HDFSLSR.java
        HDFSRangeDumper.java
        ManifestAggregator.java
        SplitFile.java
        SummaryGenerator.java
        ZipNumBlock.java
        ZipNumBlockIterator.java
        io
        HDFSTouch.java
        MergeClusterRangesInputFormat.java
        MergeClusterRangesInputSplit.java
        jobs
        BuildCluster.java
        CDXTransformer.java
        HTTPImportJob.java
        JobDriver.java
        MergeClusterRangesJob.java
        MergeClusters.java
        WATExtractorJob.java
        mapreduce
        AlphaPartitioner.java
        CDXMapper.java
        GZIPMembersLineInputFormat.java
        GZIPMembersLineRecordReader.java
        GZIPRangeLineDereferencingInputFormat.java
        GZIPRangeLineDereferencingRecordReader.java
        GlobalWaybackCDXReducer.java
        GlobalWaybackMergeMapper.java
        HTTPImportMapper.java
        IdentityTextReducer.java
        LineDereferencingInputFormat.java
        LineDereferencingRecordReader.java
        OvercrawlZipNumRecordWriter.java
        SimpleTextMapper.java
        SortMergeInputFormat.java
        SortMergeInputSplit.java
        WATExtractorMapper.java
        ZipNumAllOutputFormat.java
        ZipNumAllShardRecordWriter.java
        ZipNumOutputFormat.java
        ZipNumRecordWriter.java
        ZipNumRecordWriterOld.java
        pig
        DateFilter.java
        DateFilterLSRLoader.java
        DisablablePathFilter.java
        FirstPigJobOnlyFilter.java
        HttpClusterInputSplit.java
        HttpInputLineRecordReader.java
        HttpTextLoader.java
        HttpZipNumDerefLineRecordReader.java
        LSRPigLoader.java
        RecordReaderValueIterator.java
        SequenceFileStorage.java
        ZipNumLoader.java
        ZipNumRecordReader.java
        ZipNumStorage.java
        udf
        AccessControlAllowCapture.java
        toSURT.java
        storage
        ZipNumStorage.java
        util
        HDFSMove.java
        HDFSSync.java
        HDFSeeko.java
        PartitionName.java
        io
        ZipNumWriterTool.java
        server
        FileBackedInputStream.java
        GZRangeClient.java
        GZRangeClientTool.java
        GZRangeServer.java
    - test
      - java
        org
        archive
        hadoop
        cdx
        CDXClusterTest.java
        SplitFileTest.java
        func
        URLResolverFuncTest.java
        storage
        ZipnumRecordWriterTest.java
        server
        GZRangeClientTest.java

/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.url;

import java.util.Arrays;
import java.util.BitSet;

import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.util.EncodingUtil;

/**
 * URI subclass which allows partial/inconsistent encoding, matching
 * the URIs which will be relayed in requests from popular web
 * browsers (esp. Mozilla Firefox and MS IE).
 * 
 * @author gojomo
 */
public class LaxURI extends URI {

    private static final long serialVersionUID = 5273922211722239537L;
    
    final protected static char[] HTTP_SCHEME = {'h','t','t','p'};
    final protected static char[] HTTPS_SCHEME = {'h','t','t','p','s'};
    
    protected static final BitSet lax_rel_segment = new BitSet(256);
    // Static initializer for lax_rel_segment
    static {
        lax_rel_segment.or(rel_segment);
        lax_rel_segment.set(':'); // allow ':'
        // TODO: add additional allowances as need is demonstrated
    }

    protected static final BitSet lax_abs_path = new BitSet(256);
    static {
        lax_abs_path.or(abs_path);
        lax_abs_path.set('|'); // tests indicate Firefox (1.0.6) doesn't escape.
    }
    
    protected static final BitSet lax_rel_path = new BitSet(256);
    // Static initializer for rel_path
    static {
        lax_rel_path.or(lax_rel_segment);
        lax_rel_path.or(lax_abs_path);
    }
    
    protected static final BitSet lax_query = new BitSet(256);
    static {
        lax_query.or(query);
        lax_query.set('{'); // tests indicate FF doesn't escape { in query
        lax_query.set('}'); // tests indicate FF doesn't escape } in query
        lax_query.set('|'); // tests indicate FF doesn't escape | in query
        lax_query.set('['); // tests indicate FF doesn't escape [ in query
        lax_query.set(']'); // tests indicate FF doesn't escape ] in query
        lax_query.set('^'); // tests indicate FF doesn't escape ^ in query
    }
    
    // passthrough initializers
    public LaxURI(String uri, boolean escaped, String charset)
    throws URIException {
        super(uri,escaped,charset);
    }
    public LaxURI(URI base, URI relative) throws URIException {
        super(base,relative);
    }
    public LaxURI(String uri, boolean escaped) throws URIException {
        super(uri,escaped);
    }
    public LaxURI() {
        super();
    }

    // overridden to use this class's static decode()
    public String getURI() throws URIException {
        return (_uri == null) ? null : decode(_uri, getProtocolCharset());
    }
    
    // overridden to use this class's static decode()
    public String getPath() throws URIException {
        char[] p = getRawPath();
        return (p == null) ? null : decode(p, getProtocolCharset());
    }

    // overridden to use this class's static decode()
    public String getPathQuery() throws URIException {
        char[] rawPathQuery = getRawPathQuery();
        return (rawPathQuery == null) ? null : decode(rawPathQuery,
                getProtocolCharset());
    }
    // overridden to use this class's static decode()
    protected static String decode(char[] component, String charset)
            throws URIException {
        if (component == null) {
            throw new IllegalArgumentException(
                    "Component array of chars may not be null");
        }
        return decode(new String(component), charset);
    }

    // overridden to use IA's LaxURLCodec, which never throws DecoderException
    protected static String decode(String component, String charset)
            throws URIException {
        if (component == null) {
            throw new IllegalArgumentException(
                    "Component array of chars may not be null");
        }
        byte[] rawdata = null;
        //     try {
        rawdata = LaxURLCodec.decodeUrlLoose(EncodingUtil
                .getAsciiBytes(component));
        //     } catch (DecoderException e) {
        //         throw new URIException(e.getMessage());
        //     }
        return EncodingUtil.getString(rawdata, charset);
    }
    
    // overidden to lax() the acceptable-char BitSet passed in
    protected boolean validate(char[] component, BitSet generous) {
        return super.validate(component, lax(generous));
    }

    // overidden to lax() the acceptable-char BitSet passed in
    protected boolean validate(char[] component, int soffset, int eoffset,
            BitSet generous) {
        return super.validate(component, soffset, eoffset, lax(generous));
    }
    
    /**
     * Given a BitSet -- typically one of the URI superclass's
     * predefined static variables -- possibly replace it with
     * a more-lax version to better match the character sets
     * actually left unencoded in web browser requests
     * 
     * @param generous original BitSet
     * @return (possibly more lax) BitSet to use
     */
    protected BitSet lax(BitSet generous) {
        if (generous == rel_segment) {
            // Swap in more lax allowable set
            return lax_rel_segment;
        }
        if (generous == abs_path) {
            return lax_abs_path;
        }
        if (generous == query) {
            return lax_query;
        }
        if (generous == rel_path) {
            return lax_rel_path; 
        }
        // otherwise, leave as is
        return generous;
    }
    
    /** 
     * Coalesce the _host and _authority fields where 
     * possible.
     * 
     * In the web crawl/http domain, most URIs have an 
     * identical _host and _authority. (There is no port
     * or user info.) However, the superclass always 
     * creates two separate char[] instances. 
     * 
     * Notably, the lengths of these char[] fields are 
     * equal if and only if their values are identical.
     * This method makes use of this fact to reduce the
     * two instances to one where possible, slimming 
     * instances.  
     * 
     * @see org.apache.commons.httpclient.URI#parseAuthority(java.lang.String, boolean)
     */
    protected void parseAuthority(String original, boolean escaped)
            throws URIException {
        super.parseAuthority(original, escaped);
        if (_host != null && _authority != null
                && _host.length == _authority.length) {
            _host = _authority;
        }
    }
    
    
    /** 
     * Coalesce _scheme to existing instances, where appropriate.
     * 
     * In the web-crawl domain, most _schemes are 'http' or 'https',
     * but the superclass always creates a new char[] instance. For
     * these two cases, we replace the created instance with a 
     * long-lived instance from a static field, saving 12-14 bytes
     * per instance. 
     * 
     * @see org.apache.commons.httpclient.URI#setURI()
     */
    protected void setURI() {
        if (_scheme != null) {
            if (_scheme.length == 4 && Arrays.equals(_scheme, HTTP_SCHEME)) {
                _scheme = HTTP_SCHEME;
            } else if (_scheme.length == 5
                    && Arrays.equals(_scheme, HTTP_SCHEME)) {
                _scheme = HTTPS_SCHEME;
            }
        }
        super.setURI();
    }
    
    /**
     * IA OVERRIDDEN IN LaxURI TO INCLUDE FIX FOR 
     * http://issues.apache.org/jira/browse/HTTPCLIENT-588
     * AND
     * http://webteam.archive.org/jira/browse/HER-1268
     * 
     * In order to avoid any possilbity of conflict with non-ASCII characters,
     * Parse a URI reference as a <code>String</code> with the character
     * encoding of the local system or the document.
     * <p>
     * The following line is the regular expression for breaking-down a URI
     * reference into its components.
     * <p><blockquote><pre>
     *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
     *    12            3  4          5       6  7        8 9
     * </pre></blockquote><p>
     * For example, matching the above expression to
     *   http://jakarta.apache.org/ietf/uri/#Related
     * results in the following subexpression matches:
     * <p><blockquote><pre>
     *               $1 = http:
     *  scheme    =  $2 = http
     *               $3 = //jakarta.apache.org
     *  authority =  $4 = jakarta.apache.org
     *  path      =  $5 = /ietf/uri/
     *               $6 = <undefined>
     *  query     =  $7 = <undefined>
     *               $8 = #Related
     *  fragment  =  $9 = Related
     * </pre></blockquote><p>
     *
     * @param original the original character sequence
     * @param escaped <code>true</code> if <code>original</code> is escaped
     * @throws URIException If an error occurs.
     */
    protected void parseUriReference(String original, boolean escaped)
        throws URIException {

        // validate and contruct the URI character sequence
        if (original == null) {
            throw new URIException("URI-Reference required");
        }

        /* @
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         */
        String tmp = original.trim();
        
        /*
         * The length of the string sequence of characters.
         * It may not be equal to the length of the byte array.
         */
        int length = tmp.length();

        /*
         * Remove the delimiters like angle brackets around an URI.
         */
        if (length > 0) {
            char[] firstDelimiter = { tmp.charAt(0) };
            if (validate(firstDelimiter, delims)) {
                if (length >= 2) {
                    char[] lastDelimiter = { tmp.charAt(length - 1) };
                    if (validate(lastDelimiter, delims)) {
                        tmp = tmp.substring(1, length - 1);
                        length = length - 2;
                    }
                }
            }
        }

        /*
         * The starting index
         */
        int from = 0;

        /*
         * The test flag whether the URI is started from the path component.
         */
        boolean isStartedFromPath = false;
        int atColon = tmp.indexOf(':');
        int atSlash = tmp.indexOf('/');
        if (!tmp.startsWith("//")
                && (atColon <= 0 || (atSlash >= 0 && atSlash < atColon))) {
            isStartedFromPath = true;
        }

        /*
         * <p><blockquote><pre>
         *     @@@@@@@@
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         * </pre></blockquote><p>
         */
        int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
        if (at == -1) { 
            at = 0;
        }

        /*
         * Parse the scheme.
         * <p><blockquote><pre>
         *  scheme    =  $2 = http
         *              @
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         * </pre></blockquote><p>
         */
        if (at > 0 && at < length && tmp.charAt(at) == ':') {
            char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
            if (validate(target, scheme)) {
                _scheme = target;
                from = ++at;
            } else {
                // IA CHANGE:
                // do nothing; allow interpretation as URI with 
                // later colon in other syntactical component
            }
        }

        /*
         * Parse the authority component.
         * <p><blockquote><pre>
         *  authority =  $4 = jakarta.apache.org
         *                  @@
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         * </pre></blockquote><p>
         */
        // Reset flags
        _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
        if (0 <= at && at < length && tmp.charAt(at) == '/') {
            // Set flag
            _is_hier_part = true;
            if (at + 2 < length && tmp.charAt(at + 1) == '/' 
                && !isStartedFromPath) {
                // the temporary index to start the search from
                int next = indexFirstOf(tmp, "/?#", at + 2);
                if (next == -1) {
                    next = (tmp.substring(at + 2).length() == 0) ? at + 2 
                        : tmp.length();
                }
                parseAuthority(tmp.substring(at + 2, next), escaped);
                from = at = next;
                // Set flag
                _is_net_path = true;
            }
            if (from == at) {
                // Set flag
                _is_abs_path = true;
            }
        }

        /*
         * Parse the path component.
         * <p><blockquote><pre>
         *  path      =  $5 = /ietf/uri/
         *                                @@@@@@
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         * </pre></blockquote><p>
         */
        if (from < length) {
            // rel_path = rel_segment [ abs_path ]
            int next = indexFirstOf(tmp, "?#", from);
            if (next == -1) {
                next = tmp.length();
            }
            if (!_is_abs_path) {
                if (!escaped 
                    && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
                    || escaped 
                    && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
                    // Set flag
                    _is_rel_path = true;
                } else if (!escaped 
                    && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
                    || escaped 
                    && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
                    // Set flag
                    _is_opaque_part = true;
                } else {
                    // the path component may be empty
                    _path = null;
                }
            }
            String s = tmp.substring(from, next);
            if (escaped) {
                setRawPath(s.toCharArray());
            } else {
                setPath(s);
            }
            at = next;
        }

        // set the charset to do escape encoding
        String charset = getProtocolCharset();

        /*
         * Parse the query component.
         * <p><blockquote><pre>
         *  query     =  $7 = <undefined>
         *                                        @@@@@@@@@
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         * </pre></blockquote><p>
         */
        if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
            int next = tmp.indexOf('#', at + 1);
            if (next == -1) {
                next = tmp.length();
            }
            if (escaped) {
                _query = tmp.substring(at + 1, next).toCharArray();
                if (!validate(_query, query)) {
                    throw new URIException("Invalid query");
                }
            } else {
                _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
            }
            at = next;
        }

        /*
         * Parse the fragment component.
         * <p><blockquote><pre>
         *  fragment  =  $9 = Related
         *                                                   @@@@@@@@
         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
         * </pre></blockquote><p>
         */
        if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
            if (at + 1 == length) { // empty fragment
                _fragment = "".toCharArray();
            } else {
                _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
            }
        }

        // set this URI.
        setURI();
    }
    
}