package org.apache.maven.index.reader;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.Closeable;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.GZIPOutputStream;
/**
* Maven 2 Index published binary chunk writer, it writes raw Maven Indexer records to the transport binary format.
*
* @since 5.1.2
*/
public class ChunkWriter
implements Closeable
{
private static final int F_INDEXED = 1;
private static final int F_TOKENIZED = 2;
private static final int F_STORED = 4;
private final String chunkName;
private final DataOutputStream dataOutputStream;
private final int version;
private final Date timestamp;
public ChunkWriter(final String chunkName, final OutputStream outputStream, final int version, final Date timestamp)
throws IOException
{
this.chunkName = chunkName.trim();
this.dataOutputStream = new DataOutputStream(new GZIPOutputStream(outputStream, 2 * 1024));
this.version = version;
this.timestamp = timestamp;
dataOutputStream.writeByte(version);
dataOutputStream.writeLong(timestamp == null ? -1 : timestamp.getTime());
}
/**
* Returns the chunk name.
*/
public String getName() {
return chunkName;
}
/**
* Returns index version. All releases so far always returned {@code 1}.
*/
public int getVersion() {
return version;
}
/**
* Returns the index timestamp of last update of the index.
*/
public Date getTimestamp() {
return timestamp;
}
/**
* Writes out the record iterator and returns the written record count.
*/
public int writeChunk(final Iterator<Map<String, String>> iterator) throws IOException {
int written = 0;
while (iterator.hasNext()) {
writeRecord(iterator.next(), dataOutputStream);
written++;
}
return written;
}
/**
* Closes this reader and it's underlying input.
*/
public void close() throws IOException {
dataOutputStream.close();
}
private static void writeRecord(final Map<String, String> record, final DataOutput dataOutput)
throws IOException
{
dataOutput.writeInt(record.size());
for (Map.Entry<String, String> entry : record.entrySet()) {
writeField(entry.getKey(), entry.getValue(), dataOutput);
}
}
private static void writeField(final String fieldName, final String fieldValue, final DataOutput dataOutput)
throws IOException
{
boolean isIndexed = !(fieldName.equals("i") || fieldName.equals("m"));
boolean isTokenized = !(fieldName.equals("i")
|| fieldName.equals("m")
|| fieldName.equals("1")
|| fieldName.equals("px"));
int flags = (isIndexed ? F_INDEXED : 0) + (isTokenized ? F_TOKENIZED : 0) + F_STORED;
dataOutput.writeByte(flags);
dataOutput.writeUTF(fieldName);
writeUTF(fieldValue, dataOutput);
}
private static void writeUTF(final String str, final DataOutput dataOutput)
throws IOException
{
int strlen = str.length();
int utflen = 0;
int c;
// use charAt instead of copying String to char array
for (int i = 0; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
utflen++;
}
else if (c > 0x07FF) {
utflen += 3;
}
else {
utflen += 2;
}
}
dataOutput.writeInt(utflen);
byte[] bytearr = new byte[utflen];
int count = 0;
int i = 0;
for (; i < strlen; i++) {
c = str.charAt(i);
if (!((c >= 0x0001) && (c <= 0x007F))) {
break;
}
bytearr[count++] = (byte) c;
}
for (; i < strlen; i++) {
c = str.charAt(i);
if ((c >= 0x0001) && (c <= 0x007F)) {
bytearr[count++] = (byte) c;
}
else if (c > 0x07FF) {
bytearr[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
bytearr[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
else {
bytearr[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
bytearr[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
}
}
dataOutput.write(bytearr, 0, utflen);
}
}