package org.basex.query.util; import static org.basex.util.Token.*; import org.basex.core.Text; import org.basex.data.Data; import org.basex.data.FTPos; import org.basex.data.FTPosData; import org.basex.query.item.DBNode; import org.basex.query.item.ANode; import org.basex.util.TokenBuilder; import org.basex.util.ft.FTLexer; import org.basex.util.ft.FTSpan; import org.basex.util.list.TokenList; /** * Class for constructing decorated full-text nodes. * * @author BaseX Team 2005-12, BSD License * @author Christian Gruen */ final class DataFTBuilder { /** Dots. */ private static final byte[] DOTS = token(Text.DOTS); /** Full-text position data. */ private final FTPosData ftpos; /** Length of full-text extract. */ private final int ftlen; /** * Constructor. * @param pos full-text position data * @param len length of extract */ public DataFTBuilder(final FTPosData pos, final int len) { ftpos = pos; ftlen = len; } /** * Builds full-text information. * @param nd node to be added * @return number of added nodes */ TokenList build(final ANode nd) { // check full-text mode if(!(nd instanceof DBNode)) return null; // check if full-text data exists for the current node final DBNode node = (DBNode) nd; return build(node.data, node.pre, nd.string()); } /** * Builds full-text information. * @param d data reference * @param p pre value * @param str string value * @return number of added nodes */ TokenList build(final Data d, final int p, final byte[] str) { final FTPos ftp = ftpos.get(d, p); if(ftp == null) return null; boolean marked = false; final TokenList tl = new TokenList(); final TokenBuilder tb = new TokenBuilder(); final FTLexer lex = new FTLexer().sc().init(str); int len = -ftlen; while(lex.hasNext()) { final FTSpan span = lex.next(); // check if current text is still to be marked or already marked if(ftp.contains(span.pos) || marked) { if(tb.size() != 0) { // write current text node tl.add(tb.finish()); len += tb.size(); tb.reset(); // skip construction if(len >= 0 && tl.size() > 1 && !marked) break; } if(!marked) tl.add((byte[]) null); marked ^= true; } // add span tb.add(span.text); } // write last text node if(tb.size() != 0) { tl.add(tb.finish()); len += tb.size(); } // chop first and last text if(len > 0) { final int ts = tl.size(); // get first text (empty if it is a full-text match) final byte[] first = tl.get(0) != null ? tl.get(0) : EMPTY; // get last text (empty if it is a full-text match) final byte[] last = tl.get(ts - 2) != null ? tl.get(ts - 1) : EMPTY; if(first != EMPTY) { // remove leading characters of first text final double l = first.length + last.length; final int ll = Math.min(first.length, (int) (first.length / l * len)); tl.set(0, concat(DOTS, subtoken(first, ll))); len -= ll; } if(last != EMPTY && len > 0) { // remove trailing characters of last text final int ll = Math.min(last.length, len); tl.set(ts - 1, concat(subtoken(last, 0, last.length - ll), DOTS)); len -= ll; } // still too much text: shorten inner texts for(int t = ts - 2; t > 0 && len > 0; t--) { final byte[] txt = tl.get(t); // skip elements, marked texts and too short text snippets if(txt == null || tl.get(t - 1) == null) continue; final int ll = Math.min(txt.length, len); tl.set(t, concat(subtoken(txt, 0, (txt.length - ll) / 2), DOTS, subtoken(txt, (txt.length + ll) / 2))); len -= ll; } } return tl; } }