/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.searcher; // JDK imports import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; // Hadoop imports import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; // Nutch imports import org.apache.nutch.html.Entities; /** A document summary dynamically generated to match a query. */ public class Summary implements Writable { private final static int FRAGMENT = 0; private final static int HIGHLIGHT = 1; private final static int ELLIPSIS = 2; /** A fragment of text within a summary. */ public static class Fragment { private String text; /** Constructs a fragment for the given text. */ public Fragment(String text) { this.text = text; } /** Returns the text of this fragment. */ public String getText() { return text; } /** Returns true iff this fragment is to be highlighted. */ public boolean isHighlight() { return false; } /** Returns true iff this fragment is an ellipsis. */ public boolean isEllipsis() { return false; } /** Returns a textual representation of this fragment. */ public String toString() { return getText(); } // Inherited Javadoc public boolean equals(Object o) { try { Fragment f = (Fragment) o; return f.getText().equals(getText()) && f.isHighlight() == isHighlight() && f.isEllipsis() == isEllipsis(); } catch (Exception e) { return false; } } } /** A highlighted fragment of text within a summary. */ public static class Highlight extends Fragment { /** Constructs a highlighted fragment for the given text. */ public Highlight(String text) { super(text); } /** Returns true. */ public boolean isHighlight() { return true; } } /** An ellipsis fragment within a summary. */ public static class Ellipsis extends Fragment { /** Constructs an ellipsis fragment for the given text. */ public Ellipsis() { super(" ... "); } /** Returns true. */ public boolean isEllipsis() { return true; } } private ArrayList<Fragment> fragments = new ArrayList<Fragment>(); private static final Fragment[] FRAGMENT_PROTO = new Fragment[0]; /** Constructs an empty Summary.*/ public Summary() {} /** Adds a fragment to a summary.*/ public void add(Fragment fragment) { fragments.add(fragment); } /** Returns an array of all of this summary's fragments.*/ public Fragment[] getFragments() { return fragments.toArray(FRAGMENT_PROTO); } /** Returns a String representation of this Summary. */ public String toString() { StringBuffer buffer = new StringBuffer(); for (int i = 0; i < fragments.size(); i++) { buffer.append(fragments.get(i)); } return buffer.toString(); } /** * Returns a HTML representation of this Summary. * HTML output for <b>Highlight</b> fragments is * <code><span class="highlight">highlight's text</span></code>, * for <b>Ellipsis</b> fragments is * <code><span class="highlight"> ... </span></code>, for generic * <b>Fragment</b> is simply the fragment's text.<br/> * * @param encode specifies if the summary's entities should be encoded. */ public String toHtml(boolean encode) { Fragment fragment = null; StringBuffer buf = new StringBuffer(); for (int i=0; i<fragments.size(); i++) { fragment = fragments.get(i); if (fragment.isHighlight()) { buf.append("<span class=\"highlight\">") .append(encode ? Entities.encode(fragment.getText()) : fragment.getText()) .append("</span>"); } else if (fragment.isEllipsis()) { buf.append("<span class=\"ellipsis\"> ... </span>"); } else { buf.append(encode ? Entities.encode(fragment.getText()) : fragment.getText()); } } return buf.toString(); } // Inherited Javadoc public boolean equals(Object o) { if (!(o instanceof Summary)) { return false; } Fragment[] fragments1 = ((Summary) o).getFragments(); Fragment[] fragments2 = getFragments(); if (fragments1.length != fragments2.length) { return false; } for (int i=0; i<fragments1.length; i++) { if (!fragments1[i].equals(fragments2[i])) { return false; } } return true; } /** * Helper method that return a String representation for each * specified Summary. */ public static String[] toStrings(Summary[] summaries) { if (summaries == null) { return null; } String[] strs = new String[summaries.length]; for (int i=0; i<summaries.length; i++) { strs[i] = summaries[i].toString(); } return strs; } public static Summary read(DataInput in) throws IOException { Summary summary = new Summary(); summary.readFields(in); return summary; } /* ------------------------- * * <implementation:Writable> * * ------------------------- */ // Inherited Javadoc public void write(DataOutput out) throws IOException { out.writeInt(fragments.size()); Fragment fragment = null; for (int i=0; i<fragments.size(); i++) { fragment = fragments.get(i); if (fragment.isHighlight()) { out.writeByte(HIGHLIGHT); Text.writeString(out, fragment.getText()); } else if (fragment.isEllipsis()) { out.writeByte(ELLIPSIS); } else { out.writeByte(FRAGMENT); Text.writeString(out, fragment.getText()); } } } // Inherited Javadoc public void readFields(DataInput in) throws IOException { int nbFragments = in.readInt(); Fragment fragment = null; for (int i=0; i<nbFragments; i++) { int type = in.readByte(); if (type == HIGHLIGHT) { fragment = new Highlight(Text.readString(in)); } else if (type == ELLIPSIS) { fragment = new Ellipsis(); } else { fragment = new Fragment(Text.readString(in)); } fragments.add(fragment); } } /* -------------------------- * * </implementation:Writable> * * -------------------------- */ }