package org.basex.query.ft; import static org.basex.query.util.Err.*; import static org.basex.util.Token.*; import java.io.IOException; import java.util.Arrays; import org.basex.build.MemBuilder; import org.basex.build.Parser; import org.basex.core.Context; import org.basex.data.Data; import org.basex.data.Nodes; import org.basex.io.IO; import org.basex.query.QueryException; import org.basex.query.QueryProcessor; import org.basex.util.InputInfo; import org.basex.util.hash.TokenMap; import org.basex.util.hash.TokenObjMap; import org.basex.util.list.TokenList; /** * Simple Thesaurus for full-text requests. * * @author BaseX Team 2005-12, BSD License * @author Christian Gruen */ public final class Thesaurus { /** Thesaurus root references. */ private final TokenObjMap<ThesNode> nodes = new TokenObjMap<ThesNode>(); /** Relationships. */ private static final TokenMap RSHIPS = new TokenMap(); /** Database properties. */ private final Context ctx; static { RSHIPS.add(token("NT"), token("BT")); RSHIPS.add(token("BT"), token("BT")); RSHIPS.add(token("BTG"), token("NTG")); RSHIPS.add(token("NTG"), token("BTG")); RSHIPS.add(token("BTP"), token("NTP")); RSHIPS.add(token("NTP"), token("BTP")); RSHIPS.add(token("USE"), token("UF")); RSHIPS.add(token("UF"), token("USE")); RSHIPS.add(token("RT"), token("RT")); } /** Thesaurus node. */ static class ThesNode { /** Related nodes. */ ThesNode[] nodes = new ThesNode[1]; /** Relationships. */ byte[][] rs = new byte[1][]; /** Term. */ byte[] term; /** Entries. */ int size; /** * Adds a relationship to the node. * @param n target node * @param r relationship */ void add(final ThesNode n, final byte[] r) { if(size == nodes.length) { final int s = size << 1; nodes = Arrays.copyOf(nodes, s); rs = Arrays.copyOf(rs, s); } nodes[size] = n; rs[size++] = r; } } /** Input file. */ private final IO file; /** Relationship. */ private final byte[] rel; /** Minimum level. */ private final long min; /** Maximum level. */ private final long max; /** * Constructor. * @param fl file reference * @param c database context */ public Thesaurus(final IO fl, final Context c) { this(fl, EMPTY, 0, Long.MAX_VALUE, c); } /** * Reads a thesaurus file. * @param fl file reference * @param rs relationship * @param mn minimum level * @param mx maximum level * @param c database context */ public Thesaurus(final IO fl, final byte[] rs, final long mn, final long mx, final Context c) { file = fl; rel = rs; min = mn; max = mx; ctx = c; } /** * Initializes the thesaurus. * @param ii input info * @throws QueryException query exception */ private void init(final InputInfo ii) throws QueryException { try { final Data data = MemBuilder.build( Parser.xmlParser(file, ctx.prop), ctx.prop); final Nodes result = nodes("//*:entry", new Nodes(0, data)); for(int n = 0; n < result.size(); ++n) { build(new Nodes(result.list[n], data)); } } catch(final IOException ex) { NOTHES.thrw(ii, file); } } /** * Builds the thesaurus. * @param in input nodes * @throws QueryException query exception */ private void build(final Nodes in) throws QueryException { final Nodes sub = nodes("*:synonym", in); if(sub.size() == 0) return; final ThesNode node = node(text("*:term", in)); for(int n = 0; n < sub.size(); ++n) { final Nodes tmp = new Nodes(sub.list[n], sub.data); final ThesNode snode = node(text("*:term", tmp)); final byte[] rs = text("*:relationship", tmp); node.add(snode, rs); final byte[] srs = RSHIPS.get(rs); if(srs != null) snode.add(node, srs); build(sub); } } /** * Returns a node for the specified term. * @param term term * @return node */ private ThesNode node(final byte[] term) { ThesNode node = nodes.get(term); if(node == null) { node = new ThesNode(); node.term = term; nodes.add(term, node); } return node; } /** * Performs a query and returns the result as nodes. * @param query query string * @param in input nodes * @return resulting nodes * @throws QueryException query exception */ private Nodes nodes(final String query, final Nodes in) throws QueryException { return new QueryProcessor(query, in, ctx).queryNodes(); } /** * Performs a query and returns the first result as text. * @param query query string * @param in input nodes * @return resulting text * @throws QueryException query exception */ private byte[] text(final String query, final Nodes in) throws QueryException { return new QueryProcessor(query, in, ctx).iter().next().string(null); } /** * Finds a thesaurus term. * @param ii input info * @param list result list * @param ft token * @throws QueryException query exception */ void find(final InputInfo ii, final TokenList list, final byte[] ft) throws QueryException { if(nodes.size() == 0) init(ii); find(list, nodes.get(ft), 1); } /** * Recursively collects relevant thesaurus terms. * @param list result list * @param node input node * @param lev current level */ private void find(final TokenList list, final ThesNode node, final long lev) { if(lev > max || node == null) return; for(int n = 0; n < node.size; ++n) { if(rel.length == 0 || eq(node.rs[n], rel)) { final byte[] term = node.nodes[n].term; if(!list.contains(term)) { list.add(term); find(list, node.nodes[n], lev + 1); } } } } /** * Compares two thesaurus instances. * @param th instance to be compared * @return result of check */ boolean sameAs(final Thesaurus th) { return file.eq(th.file) && min == th.min && max == th.max && eq(rel, th.rel); } }