package com.idega.block.websearch.data;
import java.text.SimpleDateFormat;
import java.util.StringTokenizer;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.Document;
/**
* <p><code>WebSearchHit</code> Represents a document returned by a search result.<br>
* This class is a part of the websearch webcrawler and search engine block. <br>
* It is based on the <a href="http://lucene.apache.org">Lucene</a> java search engine from the Apache group and loosly <br>
* from the work of David Duddleston of i2a.com.<br>
*
* @copyright Idega Software 2002
* @author <a href="mailto:eiki@idega.is">Eirikur Hrafnsson</a>
*/
public final class WebSearchHit {
private int rank;
private float score;
private Document document;
private WebSearchHit() {
}
public WebSearchHit(Document d, int r, float s) {
this.document = d;
this.rank = r;
this.score = s;
}
public String getCategories() {
return this.document.get("categories");
}
public String getContentType() {
return this.document.get("contentType");
}
public String getDescription() {
return this.document.get("description");
}
public String getHREF() {
String href = this.document.get("href");
return (href != null) ? href : this.document.get("url");
}
public String getKeywords() {
return this.document.get("keywords");
}
public long getPublished() {
return DateField.stringToTime(this.document.get("published"));
}
public String getPublishedFormated() {
SimpleDateFormat dateFormatter =
new SimpleDateFormat("yyyy.MM.dd hh:mm:ss z");
return dateFormatter.format(
DateField.stringToDate(this.document.get("published")));
}
public int getRank() {
return this.rank;
}
public float getScore() {
return this.score;
}
public String getTitle() {
return this.document.get("title");
}
public String getURL() {
return this.document.get("url");
}
public String getContents() {
return this.document.get("contents");
}
/**
* Method getContents. Gets a "centered" view of the content surrounding the first search word that is found.
* @param queryString
* @return String
*/
public String getContents(String queryString) {
String contents = this.document.get("contents");
if( contents!=null ){
StringTokenizer tokens = new StringTokenizer(queryString);
int length = contents.length();
int maxLength = 200;
while ( tokens.hasMoreTokens() ){
String word = tokens.nextToken();
if( word.equals("AND") || word.equals("OR") || word.equals("NOT") ) {
continue;
}
int middle = contents.indexOf(word);
int wordLength = word.length();
int wordLengthAndindex = middle+wordLength;
int start = 0;
int end = length;
int half = maxLength/2;
int left = half;
if( middle!=-1 ){
if( middle>=half ){
start = middle-half;
}
else{
int margin = (half-middle);
left+=margin;
}
String temp = contents.substring(start, middle);
temp+="<b>"+word+"</b>";
if( (wordLengthAndindex+(left)) < length ){
end = wordLengthAndindex+(left);
}
temp += contents.substring( wordLengthAndindex, end ) ;
return temp;
}
else {
continue;
}
}
return contents.substring( 0 , Math.min(maxLength,length)) ;
}
return contents;
}
}