package org.apache.maven.index.updater; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import java.io.BufferedOutputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.zip.GZIPOutputStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; import org.apache.maven.index.ArtifactInfo; import org.apache.maven.index.context.DefaultIndexingContext; import org.apache.maven.index.context.IndexingContext; /** * An index data writer used to write transfer index format. * * @author Eugene Kuleshov */ public class IndexDataWriter { static final int VERSION = 1; static final int F_INDEXED = 1; static final int F_TOKENIZED = 2; static final int F_STORED = 4; static final int F_COMPRESSED = 8; private final DataOutputStream dos; private final GZIPOutputStream gos; private final BufferedOutputStream bos; private final Set<String> allGroups; private final Set<String> rootGroups; private boolean descriptorWritten; public IndexDataWriter( OutputStream os ) throws IOException { bos = new BufferedOutputStream( os, 1024 * 8 ); gos = new GZIPOutputStream( bos, 1024 * 2 ); dos = new DataOutputStream( gos ); this.allGroups = new HashSet<String>(); this.rootGroups = new HashSet<String>(); this.descriptorWritten = false; } public int write( IndexingContext context, IndexReader indexReader, List<Integer> docIndexes ) throws IOException { writeHeader( context ); int n = writeDocuments( indexReader, docIndexes ); writeGroupFields(); close(); return n; } public void close() throws IOException { dos.flush(); gos.flush(); gos.finish(); bos.flush(); } public void writeHeader( IndexingContext context ) throws IOException { dos.writeByte( VERSION ); Date timestamp = context.getTimestamp(); dos.writeLong( timestamp == null ? -1 : timestamp.getTime() ); } public void writeGroupFields() throws IOException { { List<IndexableField> allGroupsFields = new ArrayList<>( 2 ); allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS, ArtifactInfo.ALL_GROUPS_VALUE, Store.YES)); allGroupsFields.add( new StringField( ArtifactInfo.ALL_GROUPS_LIST, ArtifactInfo.lst2str( allGroups ), Store.YES) ); writeDocumentFields( allGroupsFields ); } { List<IndexableField> rootGroupsFields = new ArrayList<>( 2 ); rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS, ArtifactInfo.ROOT_GROUPS_VALUE, Store.YES) ); rootGroupsFields.add( new StringField( ArtifactInfo.ROOT_GROUPS_LIST, ArtifactInfo.lst2str( rootGroups ), Store.YES )); writeDocumentFields( rootGroupsFields ); } } public int writeDocuments( IndexReader r, List<Integer> docIndexes ) throws IOException { int n = 0; Bits liveDocs = MultiFields.getLiveDocs(r); if ( docIndexes == null ) { for ( int i = 0; i < r.maxDoc(); i++ ) { if (liveDocs == null || liveDocs.get(i) ) { if ( writeDocument( r.document( i ) ) ) { n++; } } } } else { for ( int i : docIndexes ) { if ( liveDocs == null || liveDocs.get(i) ) { if ( writeDocument( r.document( i ) ) ) { n++; } } } } return n; } public boolean writeDocument( final Document document ) throws IOException { List<IndexableField> fields = document.getFields(); List<IndexableField> storedFields = new ArrayList<>( fields.size() ); for (IndexableField field : fields ) { if ( DefaultIndexingContext.FLD_DESCRIPTOR.equals( field.name() ) ) { if ( descriptorWritten ) { return false; } else { descriptorWritten = true; } } if ( ArtifactInfo.ALL_GROUPS.equals( field.name() ) ) { final String groupList = document.get( ArtifactInfo.ALL_GROUPS_LIST ); if ( groupList != null && groupList.trim().length() > 0 ) { allGroups.addAll( ArtifactInfo.str2lst( groupList ) ); } return false; } if ( ArtifactInfo.ROOT_GROUPS.equals( field.name() ) ) { final String groupList = document.get( ArtifactInfo.ROOT_GROUPS_LIST ); if ( groupList != null && groupList.trim().length() > 0 ) { rootGroups.addAll( ArtifactInfo.str2lst( groupList ) ); } return false; } if ( field.fieldType().stored()) { storedFields.add( field ); } } writeDocumentFields( storedFields ); return true; } public void writeDocumentFields( List<IndexableField> fields ) throws IOException { dos.writeInt( fields.size() ); for ( IndexableField field : fields ) { writeField( field ); } } public void writeField( IndexableField field ) throws IOException { int flags = ( field.fieldType().indexOptions() != IndexOptions.NONE ? F_INDEXED : 0 ) // + ( field.fieldType().tokenized() ? F_TOKENIZED : 0 ) // + ( field.fieldType().stored() ? F_STORED : 0 ); // // + ( false ? F_COMPRESSED : 0 ); // Compressed not supported anymore String name = field.name(); String value = field.stringValue(); dos.write( flags ); dos.writeUTF( name ); writeUTF( value, dos ); } private static void writeUTF( String str, DataOutput out ) throws IOException { int strlen = str.length(); int utflen = 0; int c; // use charAt instead of copying String to char array for ( int i = 0; i < strlen; i++ ) { c = str.charAt( i ); if ( ( c >= 0x0001 ) && ( c <= 0x007F ) ) { utflen++; } else if ( c > 0x07FF ) { utflen += 3; } else { utflen += 2; } } // TODO optimize storing int value out.writeInt( utflen ); byte[] bytearr = new byte[utflen]; int count = 0; int i = 0; for ( ; i < strlen; i++ ) { c = str.charAt( i ); if ( !( ( c >= 0x0001 ) && ( c <= 0x007F ) ) ) { break; } bytearr[count++] = (byte) c; } for ( ; i < strlen; i++ ) { c = str.charAt( i ); if ( ( c >= 0x0001 ) && ( c <= 0x007F ) ) { bytearr[count++] = (byte) c; } else if ( c > 0x07FF ) { bytearr[count++] = (byte) ( 0xE0 | ( ( c >> 12 ) & 0x0F ) ); bytearr[count++] = (byte) ( 0x80 | ( ( c >> 6 ) & 0x3F ) ); bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); } else { bytearr[count++] = (byte) ( 0xC0 | ( ( c >> 6 ) & 0x1F ) ); bytearr[count++] = (byte) ( 0x80 | ( ( c >> 0 ) & 0x3F ) ); } } out.write( bytearr, 0, utflen ); } }