/** * Copyright (C) 2009-2013 FoundationDB, LLC * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.foundationdb.server.service.text; import com.foundationdb.qp.operator.Cursor; import com.foundationdb.qp.rowtype.TableRowType; import com.foundationdb.qp.row.Row; import com.foundationdb.qp.rowtype.RowType; import com.foundationdb.server.types.value.ValueSource; import com.foundationdb.util.Strings; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.util.*; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; /** Given <code>Row</code>s in hkey order, create <code>Document</code>s. */ public class RowIndexer implements Closeable { private Map<RowType,Integer> ancestorRowTypes; private Row[] ancestors; private Set<RowType> descendantRowTypes; private Map<RowType,List<IndexedField>> fieldsByRowType; private IndexWriter writer; private Document currentDocument; private long documentCount; private String keyEncodedString; private boolean updating; private static final Logger logger = LoggerFactory.getLogger(RowIndexer.class); public RowIndexer(FullTextIndexInfo index, IndexWriter writer, boolean updating) { TableRowType indexedRowType = index.getIndexedRowType(); int depth = indexedRowType.table().getDepth(); ancestorRowTypes = new HashMap<>(depth+1); ancestors = new Row[depth+1]; fieldsByRowType = index.getFieldsByRowType(); Set<RowType> rowTypes = index.getRowTypes(); descendantRowTypes = new HashSet<>(rowTypes.size() - ancestorRowTypes.size()); for (RowType rowType : rowTypes) { if ((rowType == indexedRowType) || rowType.ancestorOf(indexedRowType)) { Integer ancestorDepth = rowType.table().getDepth(); ancestorRowTypes.put(rowType, ancestorDepth); } else if (indexedRowType.ancestorOf(rowType)) { descendantRowTypes.add(rowType); } else { assert false : "Not ancestor or descendant " + rowType; } } this.writer = writer; this.updating = updating; currentDocument = null; } public void indexRow(Row row) throws IOException { if (row == null) { addDocument(); return; } RowType rowType = row.rowType(); Integer ancestorDepth = ancestorRowTypes.get(rowType); if (ancestorDepth != null) { ancestors[ancestorDepth] = row; if (ancestorDepth == ancestors.length - 1) { addDocument(); currentDocument = new Document(); getKeyBytes(row); addFields(row, fieldsByRowType.get(rowType)); for (int i = 0; i < ancestors.length - 1; i++) { Row ancestor = ancestors[i]; if (ancestor != null) { // We may have remembered an ancestor with no // children and then this row is an orphan. if (ancestor.ancestorOf(row)) { addFields(ancestor, fieldsByRowType.get(ancestor.rowType())); } else { ancestors[i] = null; } } } } } else if (descendantRowTypes.contains(rowType)) { Row ancestor = ancestors[ancestors.length - 1]; if ((ancestor != null) && ancestor.ancestorOf(row)) { addFields(row, fieldsByRowType.get(rowType)); } } } public long indexRows(Cursor cursor) throws IOException { documentCount = 0; cursor.openTopLevel(); Row row; do { row = cursor.next(); indexRow(row); } while (row != null); cursor.closeTopLevel(); return documentCount; } protected void updateDocument(Cursor cursor, byte hkeyBytes[]) throws IOException { if (indexRows(cursor) == 0) { String encoded = encodeBytes(hkeyBytes, 0, hkeyBytes.length); writer.deleteDocuments(new Term(IndexedField.KEY_FIELD, encoded)); logger.debug("Deleted documents with encoded byptes: " + encoded); } } protected void addDocument() throws IOException { if (currentDocument != null) { if (updating) { writer.updateDocument(new Term(IndexedField.KEY_FIELD, keyEncodedString), currentDocument); logger.debug("Updated {}", currentDocument); } else { writer.addDocument(currentDocument); logger.debug("Added {}", currentDocument); } documentCount++; currentDocument = null; } } protected void getKeyBytes(Row row) { byte[] bytes = row.hKey().hKeyBytes(); keyEncodedString = encodeBytes(bytes, 0, bytes.length); Field field = new StringField(IndexedField.KEY_FIELD, keyEncodedString, Store.YES); currentDocument.add(field); } protected void addFields(Row row, List<IndexedField> fields) throws IOException { if (fields == null) return; for (IndexedField indexedField : fields) { ValueSource value = row.value(indexedField.getPosition()); Field field = indexedField.getField(value); currentDocument.add(field); } } static String encodeBytes(byte bytes[], int offset, int length) { // TODO: needs to be more efficient? return Strings.toBase64(bytes, offset, length); } static byte[] decodeString(String st) { return Strings.fromBase64(st); } @Override public void close() { Arrays.fill(ancestors, null); } }