package zh.solr.se.searcher.relevance;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import zh.solr.se.searcher.solr.SolrUtil;
import zh.solr.se.searcher.util.ConfigFactory;
import zh.solr.se.searcher.util.ConfigProperties;
import zh.solr.se.searcher.util.StringUtil;
public class SearchUtil {
/**
* Read the configured field boosts from config file
* @param boostsFieldName different core had different boosts, and they are configured under different field name
* @return the boosts as a map with the Solr field name as the key and the boost as the value
*/
public static Map<String, Float> getFieldBoostsFromConfig(String boostsFieldName) {
ConfigProperties searchProperties = ConfigFactory.getInstance()
.getConfigProperties(ConfigFactory.SEARCH_CONFIG_PATH);
String boostsStr = searchProperties.getProperty(boostsFieldName);
return parseFieldBoosts(boostsStr);
}
public static Map<String, Float> parseFieldBoosts(String boostsStr) {
if (boostsStr == null || boostsStr.length() < 3)
return null;
// the field boosts are in the format of "field1:boost1,field2:boost2,field3:boost3..."
ArrayList<String> fieldList = StringUtil.stringToStringList(boostsStr,
StringUtil.getRegExDelimiter(ConfigProperties.FIELD_SEPARATOR));
if (fieldList == null || fieldList.size() == 0)
return null;
HashMap<String, Float> boostMap = new HashMap<String, Float>();
for (String fieldStr : fieldList) {
String[] nameValuePair = fieldStr.split(StringUtil.getRegExDelimiter(ConfigProperties.NAME_VALUE_SEPARATOR));
if (nameValuePair != null && nameValuePair.length == 2) {
try {
float boost = Float.parseFloat(nameValuePair[1].trim());
if (boost > 0)
boostMap.put(nameValuePair[0].trim(), boost);
} catch (Exception e) {
// skip this entry
}
}
}
return boostMap;
}
public static List<Float> getScaleFactorsFromConfig(String factorsFieldName) {
ConfigProperties searchProperties = ConfigFactory.getInstance()
.getConfigProperties(ConfigFactory.SEARCH_CONFIG_PATH);
String factorsStr = searchProperties.getProperty(factorsFieldName);
return parseScaleFactors(factorsStr);
}
public static List<Float> parseScaleFactors(String factorsStr) {
if (factorsStr == null)
return null;
// the field boosts are in the format of "field1:boost1,field2:boost2,field3:boost3..."
ArrayList<String> factorStrList = StringUtil.stringToStringList(factorsStr,
StringUtil.getRegExDelimiter(ConfigProperties.FIELD_SEPARATOR));
if (factorStrList == null || factorStrList.size() == 0)
return null;
ArrayList<Float> factorValueList = new ArrayList<Float>();
for (String factorStr : factorStrList) {
float factorValue = 1.0f;
try {
factorValue = Float.valueOf(factorStr.trim());
} catch (Exception e) {
// do nothing, use the default value
}
factorValueList.add(factorValue);
}
return factorValueList;
}
/**
* Boost a document if its marketing tag matches the keyword
* Sort the document list with the new scores.
* @param searchResult the list of documents
* @param solrResp Solr response object that holds the search results
* @param keyword to be matched against marketing tag of the each document
* @param maxCount the wanted number of documents in the search result
* @return DocSlice with the documents sorted with the new relevance scores
*/
public static void sortSearchResultByMarketingTagMatch(
SearchResult searchResult, String keyword, int maxCount)
{
if (searchResult == null || keyword == null)
return;
// boost documents whose marketing_tag matches the keyword with the current maximum score
for (ScoredSolrDoc doc : (DocSliceResult)searchResult) {
boostByMarketingTagMatch(doc, keyword, searchResult.getMaxScore());
}
// sort by the new scores, and we only want to top N documents, and N = maxCount
searchResult.sortByScore(maxCount);
}
public static void boostByMarketingTagMatch(ScoredSolrDoc doc, String keyword, float boost) {
if (doc == null || keyword == null)
return;
if (boost <= 0)
boost = 5.0f;
List<String> keywordList = StringUtil.stringToStringList(keyword, "[ ]");
int keywordCount = keywordList.size();
// to keep the maximum boost = boost, scale down the boost by number of words in the keyword
if (keywordCount > 0)
boost /= (float)keywordCount;
else
return;
String marketingTag = doc.getFieldValue(SolrUtil.INDEX_FIELD_MARKETING_TAG);
if (marketingTag == null || marketingTag.length() == 0)
return;
List<String> tagList = StringUtil.stringToStringList(marketingTag, "[ ]");
int tagCount = tagList.size();
// compare the last words between marketing tag and keywords
int compareSize = (keywordCount < tagCount) ? keywordCount : tagCount;
for (int i = 0; i < compareSize; i++) {
if (StringUtil.wordMatch(keywordList.get(keywordCount - 1 - i), tagList.get(tagCount - 1 - i))) {
doc.boostScore(boost);
} else {
// do not continue matching if a non-match already found
break;
}
}
}
}