/*
* Copyright (C) 2010 - 2013 Interactive Media Management
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package dk.i2m.converge.ejb.facades;
import dk.i2m.converge.core.ConfigurationKey;
import dk.i2m.converge.core.DataNotFoundException;
import dk.i2m.converge.core.content.NewsItem;
import dk.i2m.converge.core.content.NewsItemActor;
import dk.i2m.converge.core.content.NewsItemPlacement;
import dk.i2m.converge.core.content.catalogue.MediaItem;
import dk.i2m.converge.core.content.catalogue.MediaItemRendition;
import dk.i2m.converge.core.metadata.*;
import dk.i2m.converge.core.search.IndexQueueEntry;
import dk.i2m.converge.core.search.QueueEntryOperation;
import dk.i2m.converge.core.search.QueueEntryType;
import dk.i2m.converge.core.search.SearchEngineIndexingException;
import dk.i2m.converge.core.security.UserAccount;
import dk.i2m.converge.core.utils.BeanComparator;
import dk.i2m.converge.domain.search.IndexField;
import dk.i2m.converge.domain.search.SearchFacet;
import dk.i2m.converge.domain.search.SearchResult;
import dk.i2m.converge.domain.search.SearchResults;
import dk.i2m.converge.ejb.services.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.DateFormat;
import java.text.MessageFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.Resource;
import javax.ejb.*;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFHeader;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HeaderFooter;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.WorkbookUtil;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.tika.Tika;
/**
* Stateless session bean implementing a search engine service.
*
* @author <a href="mailto:allan@i2m.dk">Allan Lykke Christensen</a>
*/
@Stateless
public class SearchEngineBean implements SearchEngineLocal {
private static final Logger LOG = Logger.getLogger(SearchEngineBean.class.getName());
@EJB
private ConfigurationServiceLocal cfgService;
@EJB
private UserFacadeLocal userFacade;
@EJB
private DaoServiceLocal daoService;
@EJB
private NewsItemFacadeLocal newsItemFacade;
@EJB
private CatalogueFacadeLocal catalogueFacade;
@EJB
private MetaDataServiceLocal metaDataService;
@Resource
private SessionContext ctx;
private final DateFormat solrDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
@Override
public IndexQueueEntry addToIndexQueue(QueueEntryType type, Long id, QueueEntryOperation operation) {
IndexQueueEntry entry = new IndexQueueEntry(type, id, operation);
Map<String, Object> params = QueryBuilder.
with("entryId", entry.getId()).
and("type", entry.getType()).
and("operation", entry.getOperation()).parameters();
List<IndexQueueEntry> entries = daoService.findWithNamedQuery(IndexQueueEntry.FIND_BY_TYPE_ID_AND_OPERATION, params);
if (entries.isEmpty()) {
return daoService.create(entry);
} else {
return entries.iterator().next();
}
}
@Override
@TransactionAttribute(TransactionAttributeType.NEVER)
public List<IndexQueueEntry> getIndexQueue() {
List<IndexQueueEntry> queue = daoService.findAll(IndexQueueEntry.class);
Collections.sort(queue, new BeanComparator("added", false));
return queue;
}
@Override
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void removeFromQueue(Long id) {
daoService.delete(IndexQueueEntry.class, id);
}
/**
* Remove an item from the search engine.
*
* @param id Unique identifier of the item to remove
*/
@Override
public void removeItem(Long id) {
try {
getSolrServer().deleteById(String.valueOf(id));
} catch (SolrServerException ex) {
LOG.log(Level.SEVERE, ex.getMessage());
LOG.log(Level.FINEST, "", ex);
} catch (IOException ex) {
LOG.log(Level.SEVERE, ex.getMessage());
LOG.log(Level.FINEST, "", ex);
}
}
@Override
public void processIndexingQueue() {
SolrServer solrServer = getSolrServer();
List<IndexQueueEntry> items = getIndexQueue();
for (IndexQueueEntry entry : items) {
if (entry.getOperation().equals(QueueEntryOperation.REMOVE)) {
try {
solrServer.deleteById(String.valueOf(entry.getEntryId()));
removeFromQueue(entry.getId());
} catch (Exception ex) {
LOG.log(Level.WARNING, "{0} #{1} could not be removed from index", new Object[]{entry.getType().name(), entry.getEntryId()});
LOG.log(Level.FINEST, "", ex);
}
} else {
if (entry.getType() == QueueEntryType.NEWS_ITEM) {
try {
NewsItem newsItem = newsItemFacade.findNewsItemById(entry.getEntryId());
index(newsItem, solrServer);
removeFromQueue(entry.getId());
} catch (DataNotFoundException ex) {
LOG.log(Level.WARNING, "NewsItem #{0} does not exist in the database. Skipping indexing.", entry.getEntryId());
removeFromQueue(entry.getId());
} catch (SearchEngineIndexingException ex) {
LOG.log(Level.WARNING, "NewsItem #{0} could not be indexed. {1}", new Object[]{entry.getEntryId(), ex.getMessage()});
LOG.log(Level.FINEST, "", ex);
}
} else if (entry.getType() == QueueEntryType.MEDIA_ITEM) {
try {
MediaItem mediaItem = catalogueFacade.findMediaItemById(entry.getEntryId());
index(mediaItem, solrServer);
removeFromQueue(entry.getId());
} catch (DataNotFoundException ex) {
LOG.log(Level.WARNING, "MediaItem #{0} does not exist in the database. Skipping indexing.", entry.getEntryId());
removeFromQueue(entry.getId());
} catch (SearchEngineIndexingException ex) {
LOG.log(Level.WARNING, "MediaItem #{0} could not be indexed. {1}", new Object[]{entry.getEntryId(), ex.getMessage()});
LOG.log(Level.FINEST, "", ex);
}
}
}
}
}
/**
* {@inheritDoc}
*/
@Override
public SearchResults search(String query, int start, int rows, String... filterQueries) {
return search(query, start, rows, "score", false, filterQueries);
}
@Override
public SearchResults search(String query, int start, int rows, String sortField, boolean sortOrder, String... filterQueries) {
return search(query, start, rows, "score", false, null, null, filterQueries);
}
/**
* Queries the search engine.
*
* @param query Query string
* @param start First record to retrieve
* @param rows Number of rows to retrieve
* @param sortField Field to sort by
* @param sortOrder Ascending ({@code true}) or descending ({@code false})
* @param dateFrom Search results must not be older than this date
* @param dateTo Search results must not be newer than this date
* @param filterQueries Filter queries to include in the search
* @return {@link SearchResults} matching the {@code query}
*/
@Override
public SearchResults search(String query, int start, int rows, String sortField, boolean sortOrder, Date dateFrom, Date dateTo, String... filterQueries) {
long startTime = System.currentTimeMillis();
SearchResults searchResults = new SearchResults();
try {
final DateFormat ORIGINAL_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
final DateFormat NEW_FORMAT = new SimpleDateFormat("MMMM yyyy");
SolrQuery solrQuery = new SolrQuery();
solrQuery.setStart(start);
solrQuery.setRows(rows);
StringBuilder queryString = new StringBuilder(query);
// Check if the query has date restrictions
if (dateFrom != null || dateTo != null) {
// Construct date query
if (!query.isEmpty()) {
queryString.append(" AND date:");
}
if (dateFrom == null) {
queryString.append("[* TO ");
} else {
queryString.append("[");
queryString.append(solrDateFormat.format(dateFrom));
queryString.append(" TO ");
}
if (dateTo == null) {
queryString.append("*]");
} else {
queryString.append(solrDateFormat.format(dateTo));
queryString.append("]");
}
}
solrQuery.setQuery(queryString.toString());
solrQuery.setFacet(true);
if (sortOrder) {
solrQuery.setSortField(sortField, SolrQuery.ORDER.asc);
} else {
solrQuery.setSortField(sortField, SolrQuery.ORDER.desc);
}
solrQuery.addFacetField(IndexField.TYPE.getName());
solrQuery.addFacetField(IndexField.OUTLET.getName());
solrQuery.addFacetField(IndexField.REPOSITORY.getName());
solrQuery.addFacetField(IndexField.SECTION.getName());
solrQuery.addFacetField(IndexField.SUBJECT.getName());
solrQuery.addFacetField(IndexField.ORGANISATION.getName());
solrQuery.addFacetField(IndexField.PERSON.getName());
solrQuery.addFacetField(IndexField.LOCATION.getName());
solrQuery.addFacetField(IndexField.POINT_OF_INTEREST.getName());
solrQuery.addFilterQuery(filterQueries);
solrQuery.setFacetMinCount(1);
solrQuery.setIncludeScore(true);
solrQuery.setHighlight(true).setHighlightSnippets(1);
solrQuery.setParam("hl.fl", "title,story,caption");
solrQuery.setParam("hl.fragsize", "500");
solrQuery.setParam("hl.simple.pre", "<span class=\"searchHighlight\">");
solrQuery.setParam("hl.simple.post", "</span>");
solrQuery.setParam("facet.date", "date");
solrQuery.setParam("facet.date.start", "NOW/YEAR-10YEAR");
solrQuery.setParam("facet.date.end", "NOW");
solrQuery.setParam("facet.date.gap", "+1MONTH");
SolrServer srv = getSolrServer();
// POST is used to support UTF-8
QueryResponse qr = srv.query(solrQuery, METHOD.POST);
SolrDocumentList sdl = qr.getResults();
searchResults.setNumberOfResults(sdl.getNumFound());
for (SolrDocument d : sdl) {
// Copy all fields to map for easy access
HashMap<String, Object> values = new HashMap<String, Object>();
for (Iterator<Map.Entry<String, Object>> i = d.iterator(); i.hasNext();) {
Map.Entry<String, Object> e2 = i.next();
values.put(e2.getKey(), e2.getValue());
}
String type = (String) values.get("type");
SearchResult hit = null;
if ("Story".equalsIgnoreCase(type)) {
hit = generateStoryHit(qr, values);
} else if ("Media".equalsIgnoreCase(type)) {
hit = generateMediaHit(qr, values);
}
generateTags(hit, qr, values);
if (hit != null) {
hit.setScore((Float) d.getFieldValue("score"));
searchResults.getHits().add(hit);
}
}
List<FacetField> facets = qr.getFacetFields();
for (FacetField facet : facets) {
List<FacetField.Count> facetEntries = facet.getValues();
if (facetEntries != null) {
for (FacetField.Count fcount : facetEntries) {
if (!searchResults.getFacets().containsKey(facet.getName())) {
searchResults.getFacets().put(facet.getName(), new ArrayList<SearchFacet>());
}
SearchFacet sf = new SearchFacet(fcount.getName(), fcount.getAsFilterQuery(), fcount.getCount());
// Check if the filter query is already active
for (String fq : filterQueries) {
if (fq.equals(fcount.getAsFilterQuery())) {
sf.setSelected(true);
}
}
// Ensure that the facet is not already there
if (!searchResults.getFacets().get(facet.getName()).contains(sf)) {
searchResults.getFacets().get(facet.getName()).add(sf);
}
}
}
}
for (FacetField facet : qr.getFacetDates()) {
List<FacetField.Count> facetEntries = facet.getValues();
if (facetEntries != null) {
for (FacetField.Count fcount : facetEntries) {
if (fcount.getCount() != 0) {
if (!searchResults.getFacets().containsKey(facet.getName())) {
searchResults.getFacets().put(facet.getName(), new ArrayList<SearchFacet>());
}
String facetLabel = "";
try {
Date facetDate = ORIGINAL_FORMAT.parse(fcount.getName());
facetLabel = NEW_FORMAT.format(facetDate);
} catch (ParseException ex) {
LOG.log(Level.SEVERE, ex.getMessage());
LOG.log(Level.FINEST, "", ex);
facetLabel = fcount.getName();
}
String realFilterQuery = "date:[" + fcount.getName() + " TO " + fcount.getName() + "+1MONTH]";
SearchFacet sf = new SearchFacet(facetLabel, realFilterQuery, fcount.getCount());
// Check if the filter query is already active
for (String fq : filterQueries) {
if (fq.equals(realFilterQuery)) {
sf.setSelected(true);
}
}
// Ensure that the facet is not already there
if (!searchResults.getFacets().get(facet.getName()).contains(sf)) {
searchResults.getFacets().get(facet.getName()).add(sf);
}
}
}
}
}
} catch (SolrServerException ex) {
LOG.log(Level.SEVERE, ex.getMessage());
LOG.log(Level.FINEST, "", ex);
}
long endTime = System.currentTimeMillis();
searchResults.setSearchTime(endTime - startTime);
searchResults.setStart(start);
searchResults.setResultsPerPage(rows);
return searchResults;
}
/**
* Generates an overview reports of a set of {@link SearchResults}. The
* search results will be extracted (fetched) so that it is not just the
* partial set of {@link SearchResults} that will be included in the report.
* <p/>
* @param results {@link SearchResults} for which to generate the report
* @return Binary data representing the report
*/
@Override
public byte[] generateReport(SearchResults results) {
ResourceBundle i18n;
try {
String uid = ctx.getCallerPrincipal().getName();
UserAccount user = userFacade.findById(uid);
Locale userLocale = user.getPreferredLocale();
i18n = ResourceBundle.getBundle("dk.i2m.converge.i18n.ServiceMessages", userLocale);
} catch (DataNotFoundException ex) {
i18n = ResourceBundle.getBundle("dk.i2m.converge.i18n.ServiceMessages");
}
String lblSheetName = i18n.getString("SearchEngineBean_generateReport_SHEET_NAME");
String lblHeaderLeft = i18n.getString("SearchEngineBean_generateReport_HEADER_LEFT");
String lblHeaderRight = i18n.getString("SearchEngineBean_generateReport_HEADER_RIGHT");
String lblFooterLeft = i18n.getString("SearchEngineBean_generateReport_FOOTER_LEFT");
String lblFooterRight = i18n.getString("SearchEngineBean_generateReport_FOOTER_RIGHT");
String lblDateFormat = i18n.getString("SearchEngineBean_generateReport_DATE_FORMAT");
String lblRowHeaderId = i18n.getString("SearchEngineBean_generateReport_ROW_HEADER_ID");
String lblRowHeaderDate = i18n.getString("SearchEngineBean_generateReport_ROW_HEADER_DATE");
String lblRowHeaderTitle = i18n.getString("SearchEngineBean_generateReport_ROW_HEADER_TITLE");
String lblRowHeaderOutlet = i18n.getString("SearchEngineBean_generateReport_ROW_HEADER_OUTLET");
String lblRowHeaderSection = i18n.getString("SearchEngineBean_generateReport_ROW_HEADER_SECTION");
HSSFWorkbook wb = new HSSFWorkbook();
String sheetName = WorkbookUtil.createSafeSheetName(lblSheetName);
int overviewSheetRow = 0;
Font storyFont = wb.createFont();
storyFont.setFontHeightInPoints((short) 12);
storyFont.setBoldweight(Font.BOLDWEIGHT_NORMAL);
// Create style with borders
CellStyle style = wb.createCellStyle();
style.setBorderBottom(CellStyle.BORDER_THIN);
style.setBottomBorderColor(IndexedColors.BLACK.getIndex());
style.setBorderLeft(CellStyle.BORDER_THIN);
style.setLeftBorderColor(IndexedColors.BLACK.getIndex());
style.setBorderRight(CellStyle.BORDER_THIN);
style.setRightBorderColor(IndexedColors.BLACK.getIndex());
style.setBorderTop(CellStyle.BORDER_THIN);
style.setTopBorderColor(IndexedColors.BLACK.getIndex());
// Create style for date cells
CreationHelper createHelper = wb.getCreationHelper();
CellStyle dateStyle = wb.createCellStyle();
dateStyle.setDataFormat(createHelper.createDataFormat().getFormat(lblDateFormat));
dateStyle.setBorderBottom(CellStyle.BORDER_THIN);
dateStyle.setBottomBorderColor(IndexedColors.BLACK.getIndex());
dateStyle.setBorderLeft(CellStyle.BORDER_THIN);
dateStyle.setLeftBorderColor(IndexedColors.BLACK.getIndex());
dateStyle.setBorderRight(CellStyle.BORDER_THIN);
dateStyle.setRightBorderColor(IndexedColors.BLACK.getIndex());
dateStyle.setBorderTop(CellStyle.BORDER_THIN);
dateStyle.setTopBorderColor(IndexedColors.BLACK.getIndex());
HSSFSheet overviewSheet = wb.createSheet(sheetName);
// Create sheet header
HSSFHeader sheetHeader = overviewSheet.getHeader();
sheetHeader.setLeft(lblHeaderLeft);
sheetHeader.setRight(lblHeaderRight);
// Create sheet footer
Footer footer = overviewSheet.getFooter();
String footerLeft = MessageFormat.format(lblFooterLeft, new Object[]{HeaderFooter.page(), HeaderFooter.numPages()});
String footerRight = MessageFormat.format(lblFooterRight, new Object[]{HeaderFooter.date(), HeaderFooter.time()});
footer.setLeft(footerLeft);
footer.setRight(footerRight);
// Freeze the header row
overviewSheet.createFreezePane(0, 1, 0, 1);
Row row = overviewSheet.createRow(0);
row.createCell(0).setCellValue(lblRowHeaderId);
row.getCell(0).setCellStyle(style);
row.createCell(1).setCellValue(lblRowHeaderDate);
row.getCell(1).setCellStyle(style);
row.createCell(2).setCellValue(lblRowHeaderTitle);
row.getCell(2).setCellStyle(style);
row.createCell(3).setCellValue(lblRowHeaderOutlet);
row.getCell(3).setCellStyle(style);
row.createCell(4).setCellValue(lblRowHeaderSection);
row.getCell(4).setCellStyle(style);
overviewSheetRow++;
for (SearchResult result : results.getHits()) {
try {
NewsItem newsItem = newsItemFacade.findNewsItemFromArchive(result.getId());
if (newsItem.getPlacements().isEmpty()) {
row = overviewSheet.createRow(overviewSheetRow);
row.createCell(0).setCellValue(result.getId());
row.getCell(0).setCellStyle(style);
row.createCell(1).setCellValue(newsItem.getUpdated());
row.getCell(1).setCellStyle(dateStyle);
row.createCell(2).setCellValue(newsItem.getTitle());
row.getCell(2).setCellStyle(style);
row.createCell(3).setCellValue(newsItem.getOutlet().getTitle());
row.getCell(3).setCellStyle(style);
row.createCell(4).setCellValue("");
row.getCell(4).setCellStyle(style);
} else {
for (NewsItemPlacement nip : newsItem.getPlacements()) {
try {
row = overviewSheet.createRow(overviewSheetRow);
row.createCell(0).setCellValue(result.getId());
row.getCell(0).setCellStyle(style);
row.createCell(1).setCellValue(nip.getEdition().getPublicationDate());
row.getCell(1).setCellStyle(dateStyle);
row.createCell(2).setCellValue(newsItem.getTitle());
row.getCell(2).setCellStyle(style);
row.createCell(3).setCellValue(nip.getOutlet().getTitle());
row.getCell(3).setCellStyle(style);
row.createCell(4).setCellValue(nip.getSection().getFullName());
row.getCell(4).setCellStyle(style);
} catch (Exception ex) {
LOG.log(Level.INFO, "Failed to output line in report. {0}", ex.getMessage());
LOG.log(Level.FINEST, "", ex);
}
}
}
overviewSheetRow++;
} catch (DataNotFoundException ex) {
}
}
// Auto-size
for (int i = 0; i <= 2; i++) {
overviewSheet.autoSizeColumn(i);
}
wb.setRepeatingRowsAndColumns(0, 0, 0, 0, 0);
overviewSheet.setFitToPage(true);
overviewSheet.setAutobreaks(true);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
wb.write(baos);
} catch (IOException ex) {
LOG.log(Level.SEVERE, ex.getMessage());
LOG.log(Level.FINEST, "", ex);
}
return baos.toByteArray();
}
/**
* Communicate to the Solr server that the search engine index should be
* updated.
* <p/>
* @throws SearchEngineIndexingException If an unexpected response was
* received from the Solr server
*/
@Override
public void optimizeIndex() throws SearchEngineIndexingException {
try {
getSolrServer().optimize();
} catch (SolrServerException ex) {
throw new SearchEngineIndexingException(ex);
} catch (IOException ex) {
throw new SearchEngineIndexingException(ex);
}
}
/**
* Generates a {link SearchResult} for a media item.
*
* @param qr QueryResponse from Solr
* @param values Fields available
* @return {@link SearchResult}
*/
private SearchResult generateMediaHit(QueryResponse qr, HashMap<String, Object> values) {
String id = (String) values.get(IndexField.ID.getName());
StringBuilder caption = new StringBuilder("");
StringBuilder title = new StringBuilder("");
StringBuilder note = new StringBuilder("");
Map<String, List<String>> highlighting = qr.getHighlighting().get(id);
boolean highlightingExist = highlighting != null;
if (highlightingExist && highlighting.get(IndexField.STORY.getName()) != null) {
for (String hl : highlighting.get(IndexField.STORY.getName())) {
caption.append(hl);
}
} else if (highlighting.get(IndexField.STORY.getName()) != null) {
caption.append(StringUtils.abbreviate((String) values.get(IndexField.STORY.getName()), 500));
} else {
caption.append(StringUtils.abbreviate((String) values.get(IndexField.CAPTION.getName()), 500));
}
if (highlightingExist && highlighting.get(IndexField.TITLE.getName()) != null) {
for (String hl : qr.getHighlighting().get(id).get(IndexField.TITLE.getName())) {
title.append(hl);
}
} else {
title.append((String) values.get(IndexField.TITLE.getName()));
}
String format = (String) values.get(IndexField.MEDIA_FORMAT.getName());
note.append((String) values.get(IndexField.TYPE.getName()));
note.append(" - ");
note.append(format);
note.append(" - ");
note.append((String) values.get(IndexField.REPOSITORY.getName()));
SearchResult hit = new SearchResult();
hit.setId(Long.valueOf(id));
hit.setTitle(title.toString());
hit.setDescription(caption.toString());
hit.setNote(note.toString());
hit.setLink("{0}/MediaItemArchive.xhtml?id=" + values.get(IndexField.ID.getName()));
hit.setType((String) values.get(IndexField.TYPE.getName()));
hit.setFormat(format);
if (values.containsKey(IndexField.THUMB_URL.getName())) {
hit.setPreview(true);
hit.setPreviewLink((String) values.get(IndexField.THUMB_URL.getName()));
hit.setDirectLink((String) values.get(IndexField.DIRECT_URL.getName()));
try {
Tika tika = new Tika();
String contentType = tika.detect(new URL(hit.getPreviewLink()));
hit.setPreviewContentType(contentType);
} catch (IOException ex) {
LOG.log(Level.WARNING, "Could not set the content type of the preview link. {0}", new Object[]{ex.getMessage()});
}
} else {
hit.setPreview(false);
}
if (values.containsKey(IndexField.DATE.getName())) {
if (values.get(IndexField.DATE.getName()) instanceof List) {
hit.setDates((List<Date>) values.get(IndexField.DATE.getName()));
} else {
hit.addDate((Date) values.get(IndexField.DATE.getName()));
}
}
return hit;
}
/**
* Generates a {link SearchResult} for a story.
*
* @param qr QueryResponse from Solr
* @param values Fields available
* @return {@link SearchResult}
*/
private SearchResult generateStoryHit(QueryResponse qr, HashMap<String, Object> values) {
String id = (String) values.get(IndexField.ID.getName());
StringBuilder story = new StringBuilder();
StringBuilder title = new StringBuilder();
StringBuilder note = new StringBuilder();
Map<String, List<String>> highlighting = qr.getHighlighting().get(id);
boolean highlightingExist = highlighting != null;
if (highlightingExist && highlighting.get(IndexField.STORY.getName()) != null) {
for (String hl : highlighting.get(IndexField.STORY.getName())) {
story.append(hl);
}
} else {
story.append(StringUtils.abbreviate((String) values.get(IndexField.STORY.getName()), 500));
}
if (highlightingExist && highlighting.get(IndexField.TITLE.getName()) != null) {
for (String hl : qr.getHighlighting().get(id).get(IndexField.TITLE.getName())) {
title.append(hl);
}
} else {
title.append((String) values.get(IndexField.TITLE.getName()));
}
note.append((String) values.get(IndexField.TYPE.getName()));
note.append(" - Words: ");
if (values.containsKey(IndexField.WORD_COUNT.getName())) {
note.append(String.valueOf(values.get(IndexField.WORD_COUNT.getName())));
} else {
note.append("Unknown");
}
note.append("<br/>");
if (values.containsKey(IndexField.PLACEMENT.getName())) {
if (values.get(IndexField.PLACEMENT.getName()) instanceof String) {
note.append(values.get(IndexField.PLACEMENT.getName()));
} else if (values.get(IndexField.PLACEMENT.getName()) instanceof List) {
List<String> placements = (List<String>) values.get(IndexField.PLACEMENT.getName());
for (String placement : placements) {
note.append(placement);
note.append("<br/>");
}
} else {
LOG.warning("Unexpected value returned from search engine");
}
}
SearchResult hit = new SearchResult();
hit.setId(Long.valueOf(id));
hit.setTitle(title.toString());
hit.setDescription(story.toString());
hit.setNote(note.toString());
hit.setLink("{0}/NewsItemArchive.xhtml?id=" + id);
hit.setType((String) values.get(IndexField.TYPE.getName()));
return hit;
}
/**
* Gets the instance of the Apache Solr server used for indexing.
*
* @return Instance of the Apache Solr server
* @throws IllegalStateException If the search engine is not properly
* configured
*/
private SolrServer getSolrServer() {
try {
String url = cfgService.getString(ConfigurationKey.SEARCH_ENGINE_URL);
Integer socketTimeout = cfgService.getInteger(ConfigurationKey.SEARCH_ENGINE_SOCKET_TIMEOUT);
Integer connectionTimeout = cfgService.getInteger(ConfigurationKey.SEARCH_ENGINE_CONNECTION_TIMEOUT);
Integer maxTotalConnectionsPerHost = cfgService.getInteger(ConfigurationKey.SEARCH_ENGINE_MAX_TOTAL_CONNECTIONS_PER_HOST);
Integer maxTotalConnections = cfgService.getInteger(ConfigurationKey.SEARCH_ENGINE_MAX_TOTAL_CONNECTIONS);
Integer maxRetries = cfgService.getInteger(ConfigurationKey.SEARCH_ENGINE_MAX_RETRIES);
Boolean followRedirects = cfgService.getBoolean(ConfigurationKey.SEARCH_ENGINE_FOLLOW_REDIRECTS);
Boolean allowCompression = cfgService.getBoolean(ConfigurationKey.SEARCH_ENGINE_ALLOW_COMPRESSION);
CommonsHttpSolrServer solrServer = new CommonsHttpSolrServer(url);
solrServer.setRequestWriter(new BinaryRequestWriter());
solrServer.setSoTimeout(socketTimeout);
solrServer.setConnectionTimeout(connectionTimeout);
solrServer.setDefaultMaxConnectionsPerHost(maxTotalConnectionsPerHost);
solrServer.setMaxTotalConnections(maxTotalConnections);
solrServer.setFollowRedirects(followRedirects);
solrServer.setAllowCompression(allowCompression);
solrServer.setMaxRetries(maxRetries);
return solrServer;
} catch (MalformedURLException ex) {
LOG.log(Level.SEVERE, "Invalid search engine configuration. {0}", ex.getMessage());
LOG.log(Level.FINEST, "", ex);
throw new IllegalStateException("Invalid search engine configuration", ex);
}
}
private void generateTags(SearchResult hit, QueryResponse qr, HashMap<String, Object> values) {
if (values.containsKey(IndexField.DATE.getName())) {
if (values.get(IndexField.DATE.getName()) instanceof Date) {
hit.addDate((Date) values.get(IndexField.DATE.getName()));
} else if (values.get(IndexField.DATE.getName()) instanceof List) {
hit.setDates((List<Date>) values.get(IndexField.DATE.getName()));
} else {
LOG.warning("Unexpected value returned from search engine");
}
}
List<String> tags = new ArrayList<String>();
if (values.containsKey(IndexField.CONCEPT.getName())) {
if (values.get(IndexField.CONCEPT.getName()) instanceof String) {
Object tag = values.get(IndexField.CONCEPT.getName());
tags.add((String) tag);
} else if (values.get(IndexField.CONCEPT.getName()) instanceof List) {
tags = (List<String>) values.get(IndexField.CONCEPT.getName());
} else {
LOG.warning("Unexpected value returned from search engine");
}
}
hit.setTags(tags.toArray(new String[tags.size()]));
}
private void index(NewsItem ni, SolrServer solrServer) throws SearchEngineIndexingException {
SolrInputDocument solrDoc = new SolrInputDocument();
solrDoc.addField(IndexField.ID.getName(), ni.getId(), 1.0f);
solrDoc.addField(IndexField.TITLE.getName(), ni.getTitle(), 1.0f);
solrDoc.addField(IndexField.TYPE.getName(), "Story");
solrDoc.addField(IndexField.BYLINE.getName(), ni.getByLine());
solrDoc.addField(IndexField.BRIEF.getName(), ni.getBrief());
solrDoc.addField(IndexField.STORY.getName(), dk.i2m.converge.core.utils.StringUtils.stripHtml(ni.getStory()));
try {
solrDoc.addField(IndexField.LANG.getName(), ni.getLanguage().getCode());
} catch (NullPointerException ex) {
}
solrDoc.addField(IndexField.LANGUAGE.getName(), ni.getLanguage().getName());
solrDoc.addField(IndexField.WORD_COUNT.getName(), ni.getWordCount());
for (NewsItemPlacement placement : ni.getPlacements()) {
if (placement.getEdition() != null) {
if (placement.getEdition().getPublicationDate() != null) {
solrDoc.addField(IndexField.DATE.getName(), placement.getEdition().getPublicationDate().getTime());
}
solrDoc.addField(IndexField.EDITION_NUMBER.getName(), placement.getEdition().getNumber());
solrDoc.addField(IndexField.EDITION_VOLUME.getName(), placement.getEdition().getVolume());
}
if (placement.getSection() != null) {
solrDoc.addField(IndexField.SECTION.getName(), placement.getSection().getFullName());
}
if (placement.getOutlet() != null) {
solrDoc.addField(IndexField.OUTLET.getName(), placement.getOutlet().getTitle());
}
solrDoc.addField(IndexField.PLACEMENT.getName(), placement.toString());
}
for (NewsItemActor actor : ni.getActors()) {
solrDoc.addField(IndexField.ACTOR.getName(), actor.getUser().getFullName());
// Dynamic fields for the actors role
solrDoc.addField(actor.getRole().getName(), actor.getUser().getFullName());
}
for (Concept concept : ni.getConcepts()) {
if (concept instanceof Subject) {
solrDoc.addField(IndexField.SUBJECT.getName(), concept.getFullTitle());
}
if (concept instanceof Person) {
solrDoc.addField(IndexField.PERSON.getName(), concept.getFullTitle());
}
if (concept instanceof Organisation) {
solrDoc.addField(IndexField.ORGANISATION.getName(), concept.getFullTitle());
}
if (concept instanceof GeoArea) {
solrDoc.addField(IndexField.LOCATION.getName(), concept.getFullTitle());
}
if (concept instanceof PointOfInterest) {
solrDoc.addField(IndexField.POINT_OF_INTEREST.getName(), concept.getFullTitle());
}
solrDoc.addField(IndexField.CONCEPT.getName(), concept.getFullTitle());
}
try {
solrServer.add(solrDoc);
} catch (SolrServerException ex) {
throw new SearchEngineIndexingException(ex);
} catch (IOException ex) {
throw new SearchEngineIndexingException(ex);
}
}
public void index(MediaItem mi, SolrServer solrServer) throws SearchEngineIndexingException {
if (mi.isOriginalAvailable()) {
MediaItemRendition mir = mi.getOriginal();
SolrInputDocument solrDoc = new SolrInputDocument();
solrDoc.addField(IndexField.ID.getName(), mi.getId(), 1.0f);
solrDoc.addField(IndexField.TYPE.getName(), "Media");
String mediaFormat;
String contentType = mi.getOriginal().getContentType();
String story = "";
if (mir.isAudio()) {
mediaFormat = "Audio";
} else if (mir.isVideo()) {
mediaFormat = "Video";
} else if (mir.isImage()) {
mediaFormat = "Image";
} else if (mir.isDocument()) {
mediaFormat = "Document";
story = metaDataService.extractContent(mir);
} else {
mediaFormat = "Unknown";
}
solrDoc.addField(IndexField.MEDIA_FORMAT.getName(), mediaFormat);
solrDoc.addField(IndexField.TITLE.getName(), mi.getTitle(), 1.0f);
solrDoc.addField(IndexField.BYLINE.getName(), mi.getByLine());
solrDoc.addField(IndexField.STORY.getName(), dk.i2m.converge.core.utils.StringUtils.stripHtml(mi.getDescription()) + " " + story);
solrDoc.addField(IndexField.CAPTION.getName(), dk.i2m.converge.core.utils.StringUtils.stripHtml(mi.getDescription()));
solrDoc.addField(IndexField.CONTENT_TYPE.getName(), mi.getOriginal().getContentType());
solrDoc.addField(IndexField.REPOSITORY.getName(), mi.getCatalogue().getName());
if (mi.getMediaDate() != null) {
solrDoc.addField(IndexField.DATE.getName(), mi.getMediaDate().getTime());
}
if (mi.isPreviewAvailable()) {
solrDoc.addField(IndexField.THUMB_URL.getName(), mi.getPreview().getAbsoluteFilename());
solrDoc.addField(IndexField.DIRECT_URL.getName(), mi.getPreview().getFileLocation());
}
solrDoc.addField(IndexField.ACTOR.getName(), mi.getOwner().getFullName());
for (Concept concept : mi.getConcepts()) {
if (concept instanceof Subject) {
solrDoc.addField(IndexField.SUBJECT.getName(), concept.getFullTitle());
}
if (concept instanceof Person) {
solrDoc.addField(IndexField.PERSON.getName(), concept.getFullTitle());
}
if (concept instanceof Organisation) {
solrDoc.addField(IndexField.ORGANISATION.getName(), concept.getFullTitle());
}
if (concept instanceof GeoArea) {
solrDoc.addField(IndexField.LOCATION.getName(), concept.getFullTitle());
}
if (concept instanceof PointOfInterest) {
solrDoc.addField(IndexField.POINT_OF_INTEREST.getName(), concept.getFullTitle());
}
solrDoc.addField(IndexField.CONCEPT.getName(), concept.getFullTitle());
}
try {
solrServer.add(solrDoc);
} catch (SolrServerException ex) {
throw new SearchEngineIndexingException(ex);
} catch (IOException ex) {
throw new SearchEngineIndexingException(ex);
}
} else {
LOG.log(Level.INFO, "Ignoring MediaItem #{0}. Missing original {1} rendition", new Object[]{mi.getId(), mi.getCatalogue().getOriginalRendition().getName()});
}
}
}