FieldPhraseList.java example

Explorer
solrcene-master
package org.apache.lucene.search.vectorhighlight;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;

/**
 * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder
 * to create a FieldFragList object.
 */
public class FieldPhraseList {

  LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>();

  /**
   * a constructor.
   * 
   * @param fieldTermStack FieldTermStack object
   * @param fieldQuery FieldQuery object
   */
  public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery ){
    final String field = fieldTermStack.getFieldName();

    LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>();
    QueryPhraseMap currMap = null;
    QueryPhraseMap nextMap = null;
    while( !fieldTermStack.isEmpty() ){
      
      phraseCandidate.clear();

      TermInfo ti = fieldTermStack.pop();
      currMap = fieldQuery.getFieldTermMap( field, ti.getText() );

      // if not found, discard top TermInfo from stack, then try next element
      if( currMap == null ) continue;
      
      // if found, search the longest phrase
      phraseCandidate.add( ti );
      while( true ){
        ti = fieldTermStack.pop();
        nextMap = null;
        if( ti != null )
          nextMap = currMap.getTermMap( ti.getText() );
        if( ti == null || nextMap == null ){
          if( ti != null )
            fieldTermStack.push( ti );
          if( currMap.isValidTermOrPhrase( phraseCandidate ) ){
            addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
          }
          else{
            while( phraseCandidate.size() > 1 ){
              fieldTermStack.push( phraseCandidate.removeLast() );
              currMap = fieldQuery.searchPhrase( field, phraseCandidate );
              if( currMap != null ){
                addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) );
                break;
              }
            }
          }
          break;
        }
        else{
          phraseCandidate.add( ti );
          currMap = nextMap;
        }
      }
    }
  }
  
  void addIfNoOverlap( WeightedPhraseInfo wpi ){
    for( WeightedPhraseInfo existWpi : phraseList ){
      if( existWpi.isOffsetOverlap( wpi ) ) return;
    }
    phraseList.add( wpi );
  }
  
  public static class WeightedPhraseInfo {

    String text;  // unnecessary member, just exists for debugging purpose
    List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
                            // but if position-gap > 1 and slop > 0 then size() could be greater than 1
    float boost;  // query boost
    int seqnum;
    
    public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){
      this( terms, boost, 0 );
    }
    
    public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){
      this.boost = boost;
      this.seqnum = number;
      termsOffsets = new ArrayList<Toffs>( terms.size() );
      TermInfo ti = terms.get( 0 );
      termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
      if( terms.size() == 1 ){
        text = ti.getText();
        return;
      }
      StringBuilder sb = new StringBuilder();
      sb.append( ti.getText() );
      int pos = ti.getPosition();
      for( int i = 1; i < terms.size(); i++ ){
        ti = terms.get( i );
        sb.append( ti.getText() );
        if( ti.getPosition() - pos == 1 ){
          Toffs to = termsOffsets.get( termsOffsets.size() - 1 );
          to.setEndOffset( ti.getEndOffset() );
        }
        else{
          termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) );
        }
        pos = ti.getPosition();
      }
      text = sb.toString();
    }
    
    public int getStartOffset(){
      return termsOffsets.get( 0 ).startOffset;
    }
    
    public int getEndOffset(){
      return termsOffsets.get( termsOffsets.size() - 1 ).endOffset;
    }
    
    public boolean isOffsetOverlap( WeightedPhraseInfo other ){
      int so = getStartOffset();
      int eo = getEndOffset();
      int oso = other.getStartOffset();
      int oeo = other.getEndOffset();
      if( so <= oso && oso < eo ) return true;
      if( so < oeo && oeo <= eo ) return true;
      if( oso <= so && so < oeo ) return true;
      if( oso < eo && eo <= oeo ) return true;
      return false;
    }
    
    @Override
    public String toString(){
      StringBuilder sb = new StringBuilder();
      sb.append( text ).append( '(' ).append( boost ).append( ")(" );
      for( Toffs to : termsOffsets ){
        sb.append( to );
      }
      sb.append( ')' );
      return sb.toString();
    }
    
    public static class Toffs {
      int startOffset;
      int endOffset;
      public Toffs( int startOffset, int endOffset ){
        this.startOffset = startOffset;
        this.endOffset = endOffset;
      }
      public void setEndOffset( int endOffset ){
        this.endOffset = endOffset;
      }
      public int getStartOffset(){
        return startOffset;
      }
      public int getEndOffset(){
        return endOffset;
      }
      @Override
      public String toString(){
        StringBuilder sb = new StringBuilder();
        sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' );
        return sb.toString();
      }
    }
  }
}