RowIndexer.java example

Explorer
sql-layer-master
/**
 * Copyright (C) 2009-2013 FoundationDB, LLC
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package com.foundationdb.server.service.text;

import com.foundationdb.qp.operator.Cursor;
import com.foundationdb.qp.rowtype.TableRowType;
import com.foundationdb.qp.row.Row;
import com.foundationdb.qp.rowtype.RowType;
import com.foundationdb.server.types.value.ValueSource;
import com.foundationdb.util.Strings;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.*;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;

/** Given <code>Row</code>s in hkey order, create <code>Document</code>s. */
public class RowIndexer implements Closeable
{
    private Map<RowType,Integer> ancestorRowTypes;
    private Row[] ancestors;
    private Set<RowType> descendantRowTypes;
    private Map<RowType,List<IndexedField>> fieldsByRowType;
    private IndexWriter writer;
    private Document currentDocument;
    private long documentCount;
    private String keyEncodedString;
    private boolean updating;

    private static final Logger logger = LoggerFactory.getLogger(RowIndexer.class);

    public RowIndexer(FullTextIndexInfo index, IndexWriter writer, boolean updating) {
        TableRowType indexedRowType = index.getIndexedRowType();
        int depth = indexedRowType.table().getDepth();
        ancestorRowTypes = new HashMap<>(depth+1);
        ancestors = new Row[depth+1];
        fieldsByRowType = index.getFieldsByRowType();
        Set<RowType> rowTypes = index.getRowTypes();
        descendantRowTypes = new HashSet<>(rowTypes.size() - ancestorRowTypes.size());
        for (RowType rowType : rowTypes) {
            if ((rowType == indexedRowType) ||
                rowType.ancestorOf(indexedRowType)) {
                Integer ancestorDepth = rowType.table().getDepth();
                ancestorRowTypes.put(rowType, ancestorDepth);
            }
            else if (indexedRowType.ancestorOf(rowType)) {
                descendantRowTypes.add(rowType);
            }
            else {
                assert false : "Not ancestor or descendant " + rowType;
            }
        }
        this.writer = writer;
        this.updating = updating;
        currentDocument = null;
    }

    public void indexRow(Row row) throws IOException {
        if (row == null) {
            addDocument();
            return;
        }
        RowType rowType = row.rowType();
        Integer ancestorDepth = ancestorRowTypes.get(rowType);
        if (ancestorDepth != null) {
            ancestors[ancestorDepth] = row;
            if (ancestorDepth == ancestors.length - 1) {
                addDocument();
                currentDocument = new Document();
                getKeyBytes(row);
                addFields(row, fieldsByRowType.get(rowType));
                for (int i = 0; i < ancestors.length - 1; i++) {
                    Row ancestor = ancestors[i];
                    if (ancestor != null) {
                        // We may have remembered an ancestor with no
                        // children and then this row is an orphan.
                        if (ancestor.ancestorOf(row)) {
                            addFields(ancestor, fieldsByRowType.get(ancestor.rowType()));
                        }
                        else {
                            ancestors[i] = null;
                        }
                    }
                }
            }
        }
        else if (descendantRowTypes.contains(rowType)) {
            Row ancestor = ancestors[ancestors.length - 1];
            if ((ancestor != null) && ancestor.ancestorOf(row)) {
                addFields(row, fieldsByRowType.get(rowType));
            }
        }
    }
    
    public long indexRows(Cursor cursor) throws IOException {
        documentCount = 0;
        cursor.openTopLevel();
        Row row;
        do {
            row = cursor.next();
            indexRow(row);
        } while (row != null);
        cursor.closeTopLevel();
        return documentCount;
    }

    protected void updateDocument(Cursor cursor, byte hkeyBytes[]) throws IOException
    {
        if (indexRows(cursor) == 0)
        {
            String encoded = encodeBytes(hkeyBytes, 0, hkeyBytes.length);
            writer.deleteDocuments(new Term(IndexedField.KEY_FIELD, encoded));
            logger.debug("Deleted documents with encoded byptes: " + encoded);
        }
    }

    protected void addDocument() throws IOException {
        if (currentDocument != null) {
            if (updating) {
                
                writer.updateDocument(new Term(IndexedField.KEY_FIELD, keyEncodedString), 
                                      currentDocument);
                logger.debug("Updated {}", currentDocument);
            }
            else {
                writer.addDocument(currentDocument);
                logger.debug("Added {}", currentDocument);
            }
            documentCount++;
            currentDocument = null;
        }
    }

    protected void getKeyBytes(Row row) {
        
        byte[] bytes = row.hKey().hKeyBytes();
        keyEncodedString = encodeBytes(bytes, 0, bytes.length);
        Field field = new StringField(IndexedField.KEY_FIELD, keyEncodedString, Store.YES);
        currentDocument.add(field);
    }

    protected void addFields(Row row, List<IndexedField> fields) throws IOException {
        if (fields == null) return;
        for (IndexedField indexedField : fields) {
            ValueSource value = row.value(indexedField.getPosition());
            Field field = indexedField.getField(value);
            currentDocument.add(field);
        }
    }

    static String encodeBytes(byte bytes[], int offset, int length)
    {
        // TODO: needs to be more efficient?
        return Strings.toBase64(bytes, offset, length);
    }
    
    static byte[] decodeString(String st)
    {
        return Strings.fromBase64(st);
    }

    @Override
    public void close() {
        Arrays.fill(ancestors, null);
    }

}