package org.apache.maven.index.reader; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import java.io.Closeable; import java.io.DataInput; import java.io.DataInputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.UTFDataFormatException; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; import java.util.zip.GZIPInputStream; /** * Maven 2 Index published binary chunk reader, it reads raw Maven Indexer records from the transport binary format. * * @since 5.1.2 */ public class ChunkReader implements Closeable, Iterable<Map<String, String>> { private final String chunkName; private final DataInputStream dataInputStream; private final int version; private final Date timestamp; public ChunkReader(final String chunkName, final InputStream inputStream) throws IOException { this.chunkName = chunkName.trim(); this.dataInputStream = new DataInputStream(new GZIPInputStream(inputStream, 2 * 1024)); this.version = ((int) dataInputStream.readByte()) & 0xff; this.timestamp = new Date(dataInputStream.readLong()); } /** * Returns the chunk name. */ public String getName() { return chunkName; } /** * Returns index version. All releases so far always returned {@code 1}. */ public int getVersion() { return version; } /** * Returns the index timestamp of last update of the index. */ public Date getTimestamp() { return timestamp; } /** * Returns the {@link Record} iterator. */ public Iterator<Map<String, String>> iterator() { try { return new IndexIterator(dataInputStream); } catch (IOException e) { throw new RuntimeException("error", e); } } /** * Closes this reader and it's underlying input. */ public void close() throws IOException { dataInputStream.close(); } /** * Low memory footprint index iterator that incrementally parses the underlying stream. */ private static class IndexIterator implements Iterator<Map<String, String>> { private final DataInputStream dataInputStream; private Map<String, String> nextRecord; public IndexIterator(final DataInputStream dataInputStream) throws IOException { this.dataInputStream = dataInputStream; this.nextRecord = nextRecord(); } public boolean hasNext() { return nextRecord != null; } public Map<String, String> next() { if (nextRecord == null) { throw new NoSuchElementException("chunk depleted"); } Map<String, String> result = nextRecord; nextRecord = nextRecord(); return result; } public void remove() { throw new UnsupportedOperationException("remove"); } private Map<String, String> nextRecord() { try { return readRecord(dataInputStream); } catch (IOException e) { throw new RuntimeException("read error", e); } } } /** * Reads and returns next record from the underlying stream, or {@code null} if no more records. */ private static Map<String, String> readRecord(final DataInput dataInput) throws IOException { int fieldCount; try { fieldCount = dataInput.readInt(); } catch (EOFException ex) { return null; // no more documents } Map<String, String> recordMap = new HashMap<String, String>(); for (int i = 0; i < fieldCount; i++) { readField(recordMap, dataInput); } return recordMap; } private static void readField(final Map<String, String> record, final DataInput dataInput) throws IOException { dataInput.readByte(); // flags: neglect them String name = dataInput.readUTF(); String value = readUTF(dataInput); record.put(name, value); } private static String readUTF(final DataInput dataInput) throws IOException { int utflen = dataInput.readInt(); byte[] bytearr; char[] chararr; try { bytearr = new byte[utflen]; chararr = new char[utflen]; } catch (OutOfMemoryError e) { IOException ioex = new IOException("Index data content is corrupt"); ioex.initCause(e); throw ioex; } int c, char2, char3; int count = 0; int chararr_count = 0; dataInput.readFully(bytearr, 0, utflen); while (count < utflen) { c = bytearr[count] & 0xff; if (c > 127) { break; } count++; chararr[chararr_count++] = (char) c; } while (count < utflen) { c = bytearr[count] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx */ count++; chararr[chararr_count++] = (char) c; break; case 12: case 13: /* 110x xxxx 10xx xxxx */ count += 2; if (count > utflen) { throw new UTFDataFormatException("malformed input: partial character at end"); } char2 = bytearr[count - 1]; if ((char2 & 0xC0) != 0x80) { throw new UTFDataFormatException("malformed input around byte " + count); } chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > utflen) { throw new UTFDataFormatException("malformed input: partial character at end"); } char2 = bytearr[count - 2]; char3 = bytearr[count - 1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { throw new UTFDataFormatException("malformed input around byte " + (count - 1)); } chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | (char3 & 0x3F)); break; default: /* 10xx xxxx, 1111 xxxx */ throw new UTFDataFormatException("malformed input around byte " + count); } } // The number of chars produced may be less than utflen return new String(chararr, 0, chararr_count); } }