/**
* $Id: SolrLogger.java 4959 2010-05-15 05:22:06Z mdiggory $
* $URL: https://scm.dspace.org/svn/repo/dspace/tags/dspace-1.6.2/dspace-stats/src/main/java/org/dspace/statistics/SolrLogger.java $
* *************************************************************************
* Copyright (c) 2002-2009, DuraSpace. All rights reserved
* Licensed under the DuraSpace Foundation License.
*
* A copy of the DuraSpace License has been included in this
* distribution and is available at: http://scm.dspace.org/svn/repo/licenses/LICENSE.txt
*/
package org.dspace.statistics;
import com.maxmind.geoip.Location;
import com.maxmind.geoip.LookupService;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.dspace.content.*;
import org.dspace.content.Collection;
import org.dspace.core.ConfigurationManager;
import org.dspace.eperson.EPerson;
import org.dspace.statistics.util.DnsLookup;
import org.dspace.statistics.util.LocationUtils;
import org.dspace.statistics.util.SpiderDetector;
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
import java.sql.SQLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* Static SolrLogger used to hold HttpSolrClient connection pool to issue
* usage logging events to Solr from DSpace libraries.
*
* @author ben at atmire.com
* @author kevinvandevelde at atmire.com
* @author mdiggory at atmire.com
*/
public class SolrLogger
{
private static Logger log = Logger.getLogger(SolrLogger.class);
private static final CommonsHttpSolrServer solr;
public static final String DATE_FORMAT_8601 = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public static final String DATE_FORMAT_DCDATE = "yyyy-MM-dd'T'HH:mm:ss'Z'";
private static final LookupService locationService;
private static final boolean useProxies;
private static Map metadataStorageInfo;
static
{
log.info("solr.spidersfile:" + ConfigurationManager.getProperty("solr.spidersfile"));
log.info("solr.log.server:" + ConfigurationManager.getProperty("solr.log.server"));
log.info("solr.dbfile:" + ConfigurationManager.getProperty("solr.dbfile"));
CommonsHttpSolrServer server = null;
if (ConfigurationManager.getProperty("solr.log.server") != null)
{
try
{
server = new CommonsHttpSolrServer(ConfigurationManager.getProperty("solr.log.server"));
SolrQuery solrQuery = new SolrQuery()
.setQuery("type:2 AND id:1");
server.query(solrQuery);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
solr = server;
// Read in the file so we don't have to do it all the time
//spiderIps = SpiderDetector.getSpiderIpAddresses();
LookupService service = null;
// Get the db file for the location
String dbfile = ConfigurationManager.getProperty("solr.dbfile");
if (dbfile != null)
{
try
{
service = new LookupService(dbfile,
LookupService.GEOIP_STANDARD);
}
catch (IOException e)
{
e.printStackTrace();
}
}
else
{
// System.out.println("NO SOLR DB FILE !");
}
locationService = service;
if ("true".equals(ConfigurationManager.getProperty("useProxies")))
useProxies = true;
else
useProxies = false;
log.info("useProxies=" + useProxies);
metadataStorageInfo = new HashMap();
int count = 1;
String metadataVal;
while ((metadataVal = ConfigurationManager.getProperty("solr.metadata.item." + count)) != null)
{
String storeVal = metadataVal.split(":")[0];
String metadataField = metadataVal.split(":")[1];
metadataStorageInfo.put(storeVal, metadataField);
log.info("solr.metadata.item." + count + "=" + metadataVal);
count++;
}
}
public static void post(DSpaceObject dspaceObject, HttpServletRequest request,
EPerson currentUser)
{
if (solr == null || locationService == null)
return;
boolean isSpiderBot = SpiderDetector.isSpider(request);
try
{
if(isSpiderBot &&
!ConfigurationManager.getBooleanProperty("solr.statistics.logBots",true))
{
return;
}
SolrInputDocument doc1 = new SolrInputDocument();
// Save our basic info that we already have
String ip = request.getRemoteAddr();
if(isUseProxies() && request.getHeader("X-Forwarded-For") != null)
{
/* This header is a comma delimited list */
for(String xfip : request.getHeader("X-Forwarded-For").split(","))
{
/* proxy itself will sometime populate this header with the same value in
remote address. ordering in spec is vague, we'll just take the last
not equal to the proxy
*/
if(!request.getHeader("X-Forwarded-For").contains(ip))
{
ip = xfip.trim();
}
}
}
doc1.addField("ip", ip);
doc1.addField("id", dspaceObject.getID());
doc1.addField("type", dspaceObject.getType());
// Save the current time
doc1.addField("time", DateFormatUtils.format(new Date(),
DATE_FORMAT_8601));
if (currentUser != null)
doc1.addField("epersonid", currentUser.getID());
try
{
String dns = DnsLookup.reverseDns(ip);
doc1.addField("dns", dns.toLowerCase());
}
catch (Exception e)
{
log.error("Failed DNS Lookup for IP:" + ip);
log.debug(e.getMessage(),e);
}
// Save the location information if valid, save the event without
// location information if not valid
Location location = locationService.getLocation(ip);
if (location != null
&& !("--".equals(location.countryCode)
&& location.latitude == -180 && location.longitude == -180))
{
try
{
doc1.addField("continent", LocationUtils
.getContinentCode(location.countryCode));
}
catch (Exception e)
{
System.out
.println("COUNTRY ERROR: " + location.countryCode);
}
doc1.addField("countryCode", location.countryCode);
doc1.addField("city", location.city);
doc1.addField("latitude", location.latitude);
doc1.addField("longitude", location.longitude);
doc1.addField("isBot",isSpiderBot);
if(request.getHeader("User-Agent") != null)
doc1.addField("userAgent", request.getHeader("User-Agent"));
}
if (dspaceObject instanceof Item)
{
Item item = (Item) dspaceObject;
// Store the metadata
for (Object storedField : metadataStorageInfo.keySet())
{
String dcField = (String) metadataStorageInfo
.get(storedField);
DCValue[] vals = item.getMetadata(dcField.split("\\.")[0],
dcField.split("\\.")[1], dcField.split("\\.")[2],
Item.ANY);
for (DCValue val1 : vals)
{
String val = val1.value;
doc1.addField(String.valueOf(storedField), val);
doc1.addField(storedField + "_search", val
.toLowerCase());
}
}
}
storeParents(doc1, dspaceObject);
solr.add(doc1);
// TODO: requires further load testing, very fast commits might cause issues
solr.commit(false, false);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
public static Map getMetadataStorageInfo()
{
return metadataStorageInfo;
}
/**
* Method just used to log the parents Community log: owning comms
* Collection log: owning comms & their comms Item log: owning colls/comms
* Bitstream log: owning item/colls/comms
*
* @param doc1
* the current solrinputdoc
* @param dso
* the current dspace object we want to log
* @throws java.sql.SQLException
* ignore it
*/
public static void storeParents(SolrInputDocument doc1, DSpaceObject dso)
throws SQLException
{
if (dso instanceof Community)
{
Community comm = (Community) dso;
while (comm != null && comm.getParentCommunity() != null)
{
comm = comm.getParentCommunity();
doc1.addField("owningComm", comm.getID());
}
}
else if (dso instanceof Collection)
{
Collection coll = (Collection) dso;
for (int i = 0; i < coll.getCommunities().length; i++)
{
Community community = coll.getCommunities()[i];
doc1.addField("owningComm", community.getID());
storeParents(doc1, community);
}
}
else if (dso instanceof Item)
{
Item item = (Item) dso;
for (int i = 0; i < item.getCollections().length; i++)
{
Collection collection = item.getCollections()[i];
doc1.addField("owningColl", collection.getID());
storeParents(doc1, collection);
}
}
else if (dso instanceof Bitstream)
{
Bitstream bitstream = (Bitstream) dso;
for (int i = 0; i < bitstream.getBundles().length; i++)
{
Bundle bundle = bitstream.getBundles()[i];
for (int j = 0; j < bundle.getItems().length; j++)
{
Item item = bundle.getItems()[j];
doc1.addField("owningItem", item.getID());
storeParents(doc1, item);
}
}
}
}
public static boolean isUseProxies()
{
return useProxies;
}
public static void removeIndex(String query) throws IOException,
SolrServerException
{
solr.deleteByQuery(query);
solr.commit();
}
public static Map<String, List<String>> queryField(String query,
List oldFieldVals, String field)
{
Map<String, List<String>> currentValsStored = new HashMap<String, List<String>>();
try
{
// Get one document (since all the metadata for all the values
// should be the same just get the first one we find
Map<String, String> params = new HashMap<String, String>();
params.put("q", query);
params.put("rows", "1");
MapSolrParams solrParams = new MapSolrParams(params);
QueryResponse response = solr.query(solrParams);
// Make sure we at least got a document
if (response.getResults().getNumFound() == 0)
return currentValsStored;
// We have at least one document good
SolrDocument document = response.getResults().get(0);
for (Object storedField : metadataStorageInfo.keySet())
{
// For each of these fields that are stored we are to create a
// list of the values it holds now
java.util.Collection collection = document
.getFieldValues((String) storedField);
List<String> storedVals = new ArrayList<String>();
storedVals.addAll(collection);
// Now add it to our hashmap
currentValsStored.put((String) storedField, storedVals);
}
// System.out.println("HERE");
// Get the info we need
}
catch (SolrServerException e)
{
e.printStackTrace();
}
return currentValsStored;
}
public static class ResultProcessor
{
public void execute(String query) throws SolrServerException, IOException {
Map<String, String> params = new HashMap<String, String>();
params.put("q", query);
params.put("rows", "10");
MapSolrParams solrParams = new MapSolrParams(params);
QueryResponse response = solr.query(solrParams);
long numbFound = response.getResults().getNumFound();
// process the first batch
process(response.getResults());
// Run over the rest
for (int i = 10; i < numbFound; i += 10)
{
params.put("start", String.valueOf(i));
solrParams = new MapSolrParams(params);
response = solr.query(solrParams);
process(response.getResults());
}
}
public void commit() throws IOException, SolrServerException {
solr.commit();
}
/**
* Override to manage pages of documents
* @param docs
*/
public void process(List<SolrDocument> docs) throws IOException, SolrServerException {
for(SolrDocument doc : docs){
process(doc);
}
}
/**
* Overide to manage individual documents
* @param doc
*/
public void process(SolrDocument doc) throws IOException, SolrServerException {
}
}
public static void markRobotsByIP()
{
for(String ip : SpiderDetector.getSpiderIpAddresses()){
try {
/* Result Process to alter record to be identified as a bot */
ResultProcessor processor = new ResultProcessor(){
public void process(SolrDocument doc) throws IOException, SolrServerException {
doc.removeFields("isBot");
doc.addField("isBot", true);
SolrInputDocument newInput = ClientUtils.toSolrInputDocument(doc);
solr.add(newInput);
log.info("Marked " + doc.getFieldValue("ip") + " as bot");
}
};
/* query for ip, exclude results previously set as bots. */
processor.execute("ip:"+ip+ "* AND -isBot:true");
solr.commit();
} catch (Exception e) {
log.error(e.getMessage(),e);
}
}
}
public static void markRobotByUserAgent(String agent){
try {
/* Result Process to alter record to be identified as a bot */
ResultProcessor processor = new ResultProcessor(){
public void process(SolrDocument doc) throws IOException, SolrServerException {
doc.removeFields("isBot");
doc.addField("isBot", true);
SolrInputDocument newInput = ClientUtils.toSolrInputDocument(doc);
solr.add(newInput);
}
};
/* query for ip, exclude results previously set as bots. */
processor.execute("userAgent:"+agent+ " AND -isBot:true");
solr.commit();
} catch (Exception e) {
log.error(e.getMessage(),e);
}
}
public static void deleteRobotsByIsBotFlag()
{
try {
solr.deleteByQuery("isBot:true");
} catch (Exception e) {
log.error(e.getMessage(),e);
}
}
public static void deleteIP(String ip)
{
try {
solr.deleteByQuery("ip:"+ip + "*");
} catch (Exception e) {
log.error(e.getMessage(),e);
}
}
public static void deleteRobotsByIP()
{
for(String ip : SpiderDetector.getSpiderIpAddresses()){
deleteIP(ip);
}
}
/*
* //TODO: below are not used public static void
* update(String query, boolean addField, String fieldName, Object
* fieldValue, Object oldFieldValue) throws SolrServerException, IOException
* { List<Object> vals = new ArrayList<Object>(); vals.add(fieldValue);
* List<Object> oldvals = new ArrayList<Object>(); oldvals.add(fieldValue);
* update(query, addField, fieldName, vals, oldvals); }
*/
public static void update(String query, String action,
List<String> fieldNames, List<List<Object>> fieldValuesList)
throws SolrServerException, IOException
{
long start = new Date().getTime();
// Since there is NO update
// We need to get our documents
// QueryResponse queryResponse = solr.query()//query(query, null, -1,
// null, null, null);
final List<SolrDocument> docsToUpdate = new ArrayList<SolrDocument>();
ResultProcessor processor = new ResultProcessor(){
public void process(List<SolrDocument> docs) throws IOException, SolrServerException {
docsToUpdate.addAll(docs);
}
};
processor.execute(query);
// We have all the docs delete the ones we don't need
solr.deleteByQuery(query);
// Add the new (updated onces
for (int i = 0; i < docsToUpdate.size(); i++)
{
SolrDocument solrDocument = docsToUpdate.get(i);
// Now loop over our fieldname actions
for (int j = 0; j < fieldNames.size(); j++)
{
String fieldName = fieldNames.get(j);
List<Object> fieldValues = fieldValuesList.get(j);
if (action.equals("addOne") || action.equals("replace"))
{
if (action.equals("replace"))
solrDocument.removeFields(fieldName);
for (Object fieldValue : fieldValues)
{
solrDocument.addField(fieldName, fieldValue);
}
}
else if (action.equals("remOne"))
{
// Remove the field
java.util.Collection<Object> values = solrDocument
.getFieldValues(fieldName);
solrDocument.removeFields(fieldName);
for (Object value : values)
{
// Keep all the values besides the one we need to remove
if (!fieldValues.contains((value)))
{
solrDocument.addField(fieldName, value);
}
}
}
}
SolrInputDocument newInput = ClientUtils
.toSolrInputDocument(solrDocument);
solr.add(newInput);
}
solr.commit();
// System.out.println("SolrLogger.update(\""+query+"\"):"+(new
// Date().getTime() - start)+"ms,"+numbFound+"records");
}
public static void query(String query, int max) throws SolrServerException
{
query(query, null, null, max, null, null, null, null);
}
/**
* Query used to get values grouped by the given facetfield
*
* @param query
* the query to be used
* @param facetField
* the facet field on which to group our values
* @param max
* the max number of values given back (in case of 10 the top 10
* will be given)
* @param showTotal
* a boolean determening whether the total amount should be given
* back as the last element of the array
* @return an array containing our results
* @throws SolrServerException
* ...
*/
public static ObjectCount[] queryFacetField(String query,
String filterQuery, String facetField, int max, boolean showTotal,
List<String> facetQueries) throws SolrServerException
{
QueryResponse queryResponse = query(query, filterQuery, facetField,
max, null, null, null, facetQueries);
if (queryResponse == null)
return new ObjectCount[0];
FacetField field = queryResponse.getFacetField(facetField);
// At least make sure we have one value
if (0 < field.getValueCount())
{
// Create an array for our result
ObjectCount[] result = new ObjectCount[field.getValueCount()
+ (showTotal ? 1 : 0)];
// Run over our results & store them
for (int i = 0; i < field.getValues().size(); i++)
{
FacetField.Count fieldCount = field.getValues().get(i);
result[i] = new ObjectCount();
result[i].setCount(fieldCount.getCount());
result[i].setValue(fieldCount.getName());
}
if (showTotal)
{
result[result.length - 1] = new ObjectCount();
result[result.length - 1].setCount(queryResponse.getResults()
.getNumFound());
result[result.length - 1].setValue("total");
}
return result;
}
else
{
// Return an empty array cause we got no data
return new ObjectCount[0];
}
}
/**
* Query used to get values grouped by the date
*
* @param query
* the query to be used
* @param max
* the max number of values given back (in case of 10 the top 10
* will be given)
* @param dateType
* the type to be used (example: DAY, MONTH, YEAR)
* @param dateStart
* the start date Format:(-3, -2, ..) the date is calculated
* relatively on today
* @param dateEnd
* the end date stop Format (-2, +1, ..) the date is calculated
* relatively on today
* @param showTotal
* a boolean determening whether the total amount should be given
* back as the last element of the array
* @return and array containing our results
* @throws SolrServerException
* ...
*/
public static ObjectCount[] queryFacetDate(String query,
String filterQuery, int max, String dateType, String dateStart,
String dateEnd, boolean showTotal) throws SolrServerException
{
QueryResponse queryResponse = query(query, filterQuery, null, max,
dateType, dateStart, dateEnd, null);
if (queryResponse == null)
return new ObjectCount[0];
FacetField dateFacet = queryResponse.getFacetDate("time");
// TODO: check if this cannot crash I checked it, it crashed!!!
// Create an array for our result
ObjectCount[] result = new ObjectCount[dateFacet.getValueCount()
+ (showTotal ? 1 : 0)];
// Run over our datefacet & store all the values
for (int i = 0; i < dateFacet.getValues().size(); i++)
{
FacetField.Count dateCount = dateFacet.getValues().get(i);
result[i] = new ObjectCount();
result[i].setCount(dateCount.getCount());
result[i].setValue(getDateView(dateCount.getName(), dateType));
}
if (showTotal)
{
result[result.length - 1] = new ObjectCount();
result[result.length - 1].setCount(queryResponse.getResults()
.getNumFound());
// TODO: Make sure that this total is gotten out of the msgs.xml
result[result.length - 1].setValue("total");
}
return result;
}
public static Map<String, Integer> queryFacetQuery(String query,
String filterQuery, List<String> facetQueries)
throws SolrServerException
{
QueryResponse response = query(query, filterQuery, null, 1, null, null,
null, facetQueries);
return response.getFacetQuery();
}
public static ObjectCount queryTotal(String query, String filterQuery)
throws SolrServerException
{
QueryResponse queryResponse = query(query, filterQuery, null, -1, null,
null, null, null);
ObjectCount objCount = new ObjectCount();
objCount.setCount(queryResponse.getResults().getNumFound());
return objCount;
}
private static String getDateView(String name, String type)
{
if (name != null && name.matches("^[0-9]{4}\\-[0-9]{2}.*"))
{
/*
* if("YEAR".equalsIgnoreCase(type)) return name.substring(0, 4);
* else if("MONTH".equalsIgnoreCase(type)) return name.substring(0,
* 7); else if("DAY".equalsIgnoreCase(type)) return
* name.substring(0, 10); else if("HOUR".equalsIgnoreCase(type))
* return name.substring(11, 13);
*/
// Get our date
Date date = null;
try
{
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT_8601);
date = format.parse(name);
}
catch (ParseException e)
{
try
{
// We should use the dcdate (the dcdate is used when
// generating random data)
SimpleDateFormat format = new SimpleDateFormat(
DATE_FORMAT_DCDATE);
date = format.parse(name);
}
catch (ParseException e1)
{
e1.printStackTrace();
}
// e.printStackTrace();
}
String dateformatString = "dd-MM-yyyy";
if ("DAY".equals(type))
{
dateformatString = "dd-MM-yyyy";
}
else if ("MONTH".equals(type))
{
dateformatString = "MMMM yyyy";
}
else if ("YEAR".equals(type))
{
dateformatString = "yyyy";
}
SimpleDateFormat simpleFormat = new SimpleDateFormat(
dateformatString);
if (date != null)
name = simpleFormat.format(date);
}
return name;
}
private static QueryResponse query(String query, String filterQuery,
String facetField, int max, String dateType, String dateStart,
String dateEnd, List<String> facetQueries)
throws SolrServerException
{
if (solr == null)
return null;
// System.out.println("QUERY");
SolrQuery solrQuery = new SolrQuery().setRows(0).setQuery(query)
.setFacetMinCount(1);
// Set the date facet if present
if (dateType != null)
{
solrQuery.setParam("facet.date", "time")
.
// EXAMPLE: NOW/MONTH+1MONTH
setParam("facet.date.end",
"NOW/" + dateType + dateEnd + dateType).setParam(
"facet.date.gap", "+1" + dateType)
.
// EXAMPLE: NOW/MONTH-" + nbMonths + "MONTHS
setParam("facet.date.start",
"NOW/" + dateType + dateStart + dateType + "S")
.setFacet(true);
}
if (facetQueries != null)
{
for (int i = 0; i < facetQueries.size(); i++)
{
String facetQuery = facetQueries.get(i);
solrQuery.addFacetQuery(facetQuery);
}
if (0 < facetQueries.size())
solrQuery.setFacet(true);
}
if (facetField != null)
solrQuery.addFacetField(facetField);
// Set the top x of if present
if (max != -1)
solrQuery.setFacetLimit(max);
// A filter is used instead of a regular query to improve
// performance and ensure the search result ordering will
// not be influenced
// Choose to filter by the Legacy spider IP list (may get too long to properly filter all IP's
if(ConfigurationManager.getBooleanProperty("solr.statistics.query.filter.spiderIp",false))
solrQuery.addFilterQuery(getIgnoreSpiderIPs());
// Choose to filter by isBot field, may be overriden in future
// to allow views on stats based on bots.
if(ConfigurationManager.getBooleanProperty("solr.statistics.query.filter.isBot",true))
solrQuery.addFilterQuery("-isBot:true");
if (filterQuery != null)
solrQuery.addFilterQuery(filterQuery);
QueryResponse response = null;
try
{
// solr.set
response = solr.query(solrQuery);
}
catch (SolrServerException e)
{
System.err.println("Error using query " + query);
throw e;
}
return response;
}
/** String of IP and Ranges in IPTable as a Solr Query */
private static String filterQuery = null;
/**
* Returns in a filterQuery string all the ip addresses that should be ignored
*
* @return a string query with ip addresses
*/
public static String getIgnoreSpiderIPs() {
if (filterQuery == null) {
String query = "";
boolean first = true;
for (String ip : SpiderDetector.getSpiderIpAddresses()) {
if (first) {
query += " AND ";
first = false;
}
query += " NOT(ip: " + ip + ")";
}
filterQuery = query;
}
return filterQuery;
}
}