package org.apache.lucene.search.vectorhighlight; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; /** * FieldPhraseList has a list of WeightedPhraseInfo that is used by FragListBuilder * to create a FieldFragList object. */ public class FieldPhraseList { LinkedList<WeightedPhraseInfo> phraseList = new LinkedList<WeightedPhraseInfo>(); /** * a constructor. * * @param fieldTermStack FieldTermStack object * @param fieldQuery FieldQuery object */ public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery ){ final String field = fieldTermStack.getFieldName(); LinkedList<TermInfo> phraseCandidate = new LinkedList<TermInfo>(); QueryPhraseMap currMap = null; QueryPhraseMap nextMap = null; while( !fieldTermStack.isEmpty() ){ phraseCandidate.clear(); TermInfo ti = fieldTermStack.pop(); currMap = fieldQuery.getFieldTermMap( field, ti.getText() ); // if not found, discard top TermInfo from stack, then try next element if( currMap == null ) continue; // if found, search the longest phrase phraseCandidate.add( ti ); while( true ){ ti = fieldTermStack.pop(); nextMap = null; if( ti != null ) nextMap = currMap.getTermMap( ti.getText() ); if( ti == null || nextMap == null ){ if( ti != null ) fieldTermStack.push( ti ); if( currMap.isValidTermOrPhrase( phraseCandidate ) ){ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); } else{ while( phraseCandidate.size() > 1 ){ fieldTermStack.push( phraseCandidate.removeLast() ); currMap = fieldQuery.searchPhrase( field, phraseCandidate ); if( currMap != null ){ addIfNoOverlap( new WeightedPhraseInfo( phraseCandidate, currMap.getBoost(), currMap.getTermOrPhraseNumber() ) ); break; } } } break; } else{ phraseCandidate.add( ti ); currMap = nextMap; } } } } void addIfNoOverlap( WeightedPhraseInfo wpi ){ for( WeightedPhraseInfo existWpi : phraseList ){ if( existWpi.isOffsetOverlap( wpi ) ) return; } phraseList.add( wpi ); } public static class WeightedPhraseInfo { String text; // unnecessary member, just exists for debugging purpose List<Toffs> termsOffsets; // usually termsOffsets.size() == 1, // but if position-gap > 1 and slop > 0 then size() could be greater than 1 float boost; // query boost int seqnum; public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost ){ this( terms, boost, 0 ); } public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){ this.boost = boost; this.seqnum = number; termsOffsets = new ArrayList<Toffs>( terms.size() ); TermInfo ti = terms.get( 0 ); termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); if( terms.size() == 1 ){ text = ti.getText(); return; } StringBuilder sb = new StringBuilder(); sb.append( ti.getText() ); int pos = ti.getPosition(); for( int i = 1; i < terms.size(); i++ ){ ti = terms.get( i ); sb.append( ti.getText() ); if( ti.getPosition() - pos == 1 ){ Toffs to = termsOffsets.get( termsOffsets.size() - 1 ); to.setEndOffset( ti.getEndOffset() ); } else{ termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); } pos = ti.getPosition(); } text = sb.toString(); } public int getStartOffset(){ return termsOffsets.get( 0 ).startOffset; } public int getEndOffset(){ return termsOffsets.get( termsOffsets.size() - 1 ).endOffset; } public boolean isOffsetOverlap( WeightedPhraseInfo other ){ int so = getStartOffset(); int eo = getEndOffset(); int oso = other.getStartOffset(); int oeo = other.getEndOffset(); if( so <= oso && oso < eo ) return true; if( so < oeo && oeo <= eo ) return true; if( oso <= so && so < oeo ) return true; if( oso < eo && eo <= oeo ) return true; return false; } @Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( text ).append( '(' ).append( boost ).append( ")(" ); for( Toffs to : termsOffsets ){ sb.append( to ); } sb.append( ')' ); return sb.toString(); } public static class Toffs { int startOffset; int endOffset; public Toffs( int startOffset, int endOffset ){ this.startOffset = startOffset; this.endOffset = endOffset; } public void setEndOffset( int endOffset ){ this.endOffset = endOffset; } public int getStartOffset(){ return startOffset; } public int getEndOffset(){ return endOffset; } @Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( '(' ).append( startOffset ).append( ',' ).append( endOffset ).append( ')' ); return sb.toString(); } } } }