package org.wikibrain.lucene; import gnu.trove.iterator.TIntIterator; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.wikibrain.core.dao.DaoException; import org.wikibrain.core.dao.LocalPageDao; import org.wikibrain.core.dao.RawPageDao; import org.wikibrain.core.dao.RedirectDao; import org.wikibrain.core.model.LocalPage; import org.wikibrain.core.model.RawPage; /** * This class builds custom Lucene TextFields from pages based on * title, title synonyms as defined by redirects, and plain text. * * @author Ari Weiland */ public class TextFieldBuilder { private final LocalPageDao localPageDao; private final RawPageDao rawPageDao; private final RedirectDao redirectDao; public TextFieldBuilder(LocalPageDao localPageDao, RawPageDao rawPageDao, RedirectDao redirectDao) { this.localPageDao = localPageDao; this.rawPageDao = rawPageDao; this.redirectDao = redirectDao; try { localPageDao.setFollowRedirects(false); } catch (DaoException e) { throw new RuntimeException(e); } } /** * Builds a lucene text field for page based on the specified text field elements * * @param page * @param elements * @return * @throws DaoException */ public TextField buildTextField(LocalPage page, TextFieldElements elements) throws DaoException { return buildTextField( page, rawPageDao.getById(page.getLanguage(), page.getLocalId()), elements); } /** * Builds a lucene text field for page based on the specified text field elements * * @param page * @param elements * @return * @throws DaoException */ public TextField buildTextField(RawPage page, TextFieldElements elements) throws DaoException { return buildTextField( localPageDao.getById(page.getLanguage(), page.getLocalId()), page, elements); } private TextField buildTextField(LocalPage localPage, RawPage rawPage, TextFieldElements elements) throws DaoException { StringBuilder sb = new StringBuilder(); String title = rawPage.getTitle().getCanonicalTitle(); for (int i=0; i<elements.usesTitle(); i++) { sb.append(title); sb.append(" "); } if (elements.usesRedirects()) { TIntIterator iterator = redirectDao.getRedirects(localPage).iterator(); while (iterator.hasNext()) { sb.append(localPageDao .getById(localPage.getLanguage(), iterator.next()) .getTitle() .getCanonicalTitle()); sb.append(" "); } } if (elements.usesPlainText()) { String plainText = rawPage.getPlainText(); sb.append(plainText); } return new TextField(elements.getTextFieldName(), sb.toString().trim(), Field.Store.YES); } }