/**
*
*/
package uk.bl.wa.analyser.text;
/*
* #%L
* warc-indexer
* %%
* Copyright (C) 2013 - 2014 The UK Web Archive
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-2.0.html>.
* #L%
*/
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.typesafe.config.Config;
import uk.bl.wa.sentimentalj.Sentiment;
import uk.bl.wa.sentimentalj.SentimentalJ;
import uk.bl.wa.solr.SolrFields;
import uk.bl.wa.solr.SolrRecord;
/**
* @author anj
*
*/
public class SentimentJTextAnalyser extends AbstractTextAnalyser {
private static Log log = LogFactory.getLog( SentimentJTextAnalyser.class );
/** */
private static SentimentalJ sentij = new SentimentalJ();
/**
* @param conf
*/
public SentimentJTextAnalyser(Config conf) {
}
/**
*
*/
public void analyse( String text, SolrRecord solr ) {
// Sentiment Analysis:
int sentilen = 10000;
if( sentilen > text.length() )
sentilen = text.length();
String sentitext = text.substring( 0, sentilen );
// metadata.get(HtmlFeatureParser.FIRST_PARAGRAPH);
Sentiment senti = sentij.analyze( sentitext );
double sentilog = Math.signum( senti.getComparative() ) * ( Math.log( 1.0 + Math.abs( senti.getComparative() ) ) / 40.0 );
int sentii = ( int ) ( SolrFields.SENTIMENTS.length * ( 0.5 + sentilog ) );
if( sentii < 0 ) {
log.debug( "Caught a sentiment rating less than zero: " + sentii + " from " + sentilog );
sentii = 0;
}
if( sentii >= SolrFields.SENTIMENTS.length ) {
log.debug( "Caught a sentiment rating too large to be in range: " + sentii + " from " + sentilog );
sentii = SolrFields.SENTIMENTS.length - 1;
}
// if( sentii != 3 )
// log.debug("Got sentiment: " + sentii+" "+sentilog+" "+ SolrFields.SENTIMENTS[sentii] );
// Map to sentiment scale:
solr.addField( SolrFields.SENTIMENT, SolrFields.SENTIMENTS[ sentii ] );
solr.addField( SolrFields.SENTIMENT_SCORE, "" + senti.getComparative() );
}
}