/** * */ package uk.bl.wa.analyser.text; /* * #%L * warc-indexer * %% * Copyright (C) 2013 - 2014 The UK Web Archive * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as * published by the Free Software Foundation, either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program. If not, see * <http://www.gnu.org/licenses/gpl-2.0.html>. * #L% */ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.typesafe.config.Config; import uk.bl.wa.sentimentalj.Sentiment; import uk.bl.wa.sentimentalj.SentimentalJ; import uk.bl.wa.solr.SolrFields; import uk.bl.wa.solr.SolrRecord; /** * @author anj * */ public class SentimentJTextAnalyser extends AbstractTextAnalyser { private static Log log = LogFactory.getLog( SentimentJTextAnalyser.class ); /** */ private static SentimentalJ sentij = new SentimentalJ(); /** * @param conf */ public SentimentJTextAnalyser(Config conf) { } /** * */ public void analyse( String text, SolrRecord solr ) { // Sentiment Analysis: int sentilen = 10000; if( sentilen > text.length() ) sentilen = text.length(); String sentitext = text.substring( 0, sentilen ); // metadata.get(HtmlFeatureParser.FIRST_PARAGRAPH); Sentiment senti = sentij.analyze( sentitext ); double sentilog = Math.signum( senti.getComparative() ) * ( Math.log( 1.0 + Math.abs( senti.getComparative() ) ) / 40.0 ); int sentii = ( int ) ( SolrFields.SENTIMENTS.length * ( 0.5 + sentilog ) ); if( sentii < 0 ) { log.debug( "Caught a sentiment rating less than zero: " + sentii + " from " + sentilog ); sentii = 0; } if( sentii >= SolrFields.SENTIMENTS.length ) { log.debug( "Caught a sentiment rating too large to be in range: " + sentii + " from " + sentilog ); sentii = SolrFields.SENTIMENTS.length - 1; } // if( sentii != 3 ) // log.debug("Got sentiment: " + sentii+" "+sentilog+" "+ SolrFields.SENTIMENTS[sentii] ); // Map to sentiment scale: solr.addField( SolrFields.SENTIMENT, SolrFields.SENTIMENTS[ sentii ] ); solr.addField( SolrFields.SENTIMENT_SCORE, "" + senti.getComparative() ); } }