package folioxml.export.plugins; import folioxml.config.*; import folioxml.core.InvalidMarkupException; import folioxml.core.Pair; import folioxml.core.TokenUtils; import folioxml.export.FileNode; import folioxml.export.InfobaseSetPlugin; import folioxml.export.LogStreamProvider; import folioxml.lucene.InfobaseFieldOptsSet; import folioxml.lucene.analysis.DynamicAnalyzer; import folioxml.lucene.folioQueryParser.QueryParser; import folioxml.slx.ISlxTokenReader; import folioxml.slx.SlxRecord; import folioxml.xml.Node; import folioxml.xml.NodeFilter; import folioxml.xml.NodeList; import folioxml.xml.XmlRecord; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.store.FSDirectory; import java.io.IOException; import java.nio.file.Path; import java.util.HashMap; import java.util.Map; //Fixes jump links and query links across infobase boundaries. public class ResolveHyperlinks implements InfobaseSetPlugin { IndexSearcher searcher = null; InfobaseSet infobaseSet = null; Map<String, DynamicAnalyzer> analyzersPerInfobase = new HashMap<String, DynamicAnalyzer>(); private DynamicAnalyzer loadAnalyzerFromLucene(InfobaseConfig ic) throws IOException, InvalidMarkupException { BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new TermQuery(new Term("infobase", ic.getId())), BooleanClause.Occur.MUST); q.add(new TermQuery(new Term("level", "root")), BooleanClause.Occur.MUST); ScoreDoc[] hits = searcher.search(q.build(), 1).scoreDocs; if (hits.length > 0) { //info.workingQueryLinks++; String rootXml = searcher.doc(hits[0].doc).get("xml"); XmlRecord root = new XmlRecord(rootXml); return new DynamicAnalyzer(new InfobaseFieldOptsSet(root)); } else { //Infobase in set not indexed throw new IOException("Infobase " + ic.getId() + " is present in set, but root record is missing from lucene index."); } } private void loadAnalyzers() throws IOException, InvalidMarkupException { for (InfobaseConfig i : infobaseSet.getInfobases()) { analyzersPerInfobase.put(i.getId(), loadAnalyzerFromLucene(i)); } } ExportLocations export; LogStreamProvider provider; Boolean resolve_jump_links; Boolean resolve_query_links; @Override public void beginInfobaseSet(InfobaseSet set, ExportLocations export, LogStreamProvider logs) throws IOException, InvalidMarkupException { infobaseSet = set; searcher = null; this.export = export; this.provider = logs; resolve_jump_links = set.getBool("resolve_jump_links"); if (resolve_jump_links == null) resolve_jump_links = true; resolve_query_links = set.getBool("resolve_query_links"); if (resolve_query_links == null) resolve_query_links = true; Path index = export.getLocalPath("lucene_index", AssetType.LuceneIndex, FolderCreation.None); if (java.nio.file.Files.isDirectory(index)) { searcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(index))); //Load and parse all infobase root nodes //query infoabse="x" && level="root", then load and parse slx to load the query resolver. loadAnalyzers(); } else { System.err.println("Failed to locate lucene index; links will not be resolved"); } } InfobaseConfig currentInfobase = null; @Override public void beginInfobase(InfobaseConfig infobase) throws IOException { currentInfobase = infobase; } @Override public ISlxTokenReader wrapSlxReader(ISlxTokenReader reader) { return reader; } @Override public void onSlxRecordParsed(SlxRecord clean_slx) throws InvalidMarkupException, IOException { } @Override public void onRecordTransformed(XmlRecord r, SlxRecord dirty_slx) throws InvalidMarkupException, IOException { } @Override public FileNode assignFileNode(XmlRecord xr, SlxRecord dirty_slx) throws InvalidMarkupException, IOException { return null; } @Override public void onRecordComplete(XmlRecord xr, FileNode file) throws InvalidMarkupException, IOException { if (searcher == null) return; //Do nothing if we can't access lucene. NodeList nodes = new NodeList(xr); if (resolve_jump_links) { //All jump destinations must be an anchor tag, or we can't link to them. for (Node n : nodes.search(new NodeFilter("bookmark")).list()) { n.set("id", hashDestination(currentInfobase, n.get("name"))); n.setTagName("a"); } } //Convert local and remote jump and query links NodeList queryLinks = nodes.search(new NodeFilter("link", "query", null)); for (Node n : queryLinks.list()) { if (resolve_query_links) { Pair<String, String> result = TryGetResultUri(n.get("infobase"), TokenUtils.entityDecodeString(n.get("query")), file); n.set("resolved", result.getSecond()); if (result.getFirst() != null) { n.set("href", result.getFirst()); n.setTagName("a"); } else { provider.getNamedStream("broken_query_links").append("Broken query link").append(" in record ").append(n.rootNode().get("folioId")).append("\n").append(n.toXmlString((true))).append("\n"); } } else { provider.getNamedStream("unresolved_query_links").append("Query link").append(" in record ").append(n.rootNode().get("folioId")).append("\n").append(n.toXmlString((true))).append("\n"); } } //Convert jump links NodeList jumpLinks = nodes.search(new NodeFilter("link", "jumpDestination", null)); for (Node n : jumpLinks.list()) { if (resolve_jump_links) { Pair<String, String> result = TryGetDestinationUri(n.get("infobase"), n.get("jumpDestination"), file); n.set("resolved", result.getSecond()); if (result.getFirst() != null) { n.set("href", result.getFirst()); n.setTagName("a"); } else { provider.getNamedStream("broken_jump_links").append("Broken jump link").append(" in record ").append(n.rootNode().get("folioId")).append("\n").append(n.toXmlString((true))).append("\n"); n.pull(); } } else { provider.getNamedStream("unresolved_jump_links").append("Jump link").append(" in record ").append(n.rootNode().get("folioId")).append("\n").append(n.toXmlString((true))).append("\n"); } } } private String hashDestination(InfobaseConfig infobase, String name) { //Normalize destination infobase ID. String iid = infobase.getId(); //Ids may not begin with a number, prefix return "d" + Integer.toHexString(iid.hashCode()) + "_" + Integer.toHexString(name.hashCode()); } @Override public void endInfobase(InfobaseConfig infobase) throws IOException, InvalidMarkupException { } @Override public void endInfobaseSet(InfobaseSet set) throws IOException { if (searcher != null) searcher.getIndexReader().close(); /* System.out.println("Invalid query links (for syntax reasons): " + queryInfo.invalidQueryLinks); System.out.println("No result query links: " + queryInfo.noresultQueryLinks); System.out.println("Working query links: " + queryInfo.workingQueryLinks); System.out.println("Cross-infobase query links: " + queryInfo.crossInfobaseQueries); */ } public String GetUriFor(Document d, FileNode n, String overrideFragment) throws IOException { String relative_path = d.get("relative_path"); String uri_fragment = d.get("uri_fragment"); Path doc_base = export.getLocalPath(n.getRelativePath(), AssetType.Html, FolderCreation.None); return export.getUri(relative_path, AssetType.Html, doc_base) + ((overrideFragment == null) ? uri_fragment : overrideFragment); } public Pair<String, String> TryGetResultUri(String infobase, String query, FileNode fn) throws InvalidMarkupException, IOException { InfobaseConfig targetConfig = infobase == null ? currentInfobase : infobaseSet.byName(infobase); if (targetConfig == null) { return new Pair<String, String>(null, "destination infobase is external to configuration set"); } Analyzer a = this.analyzersPerInfobase.get(targetConfig.getId()); try { //Lookup analyzer based on infobase QueryParser qp = new QueryParser(a, InfobaseFieldOptsSet.getStaticDefaultField()); Query q = qp.parse(query); if (q == null) { System.out.println("Failed to convert query: " + query); //info.invalidQueryLinks ++; return new Pair<String, String>(null, "failed to parse query"); } else { String newQuery = q.toString(); ScoreDoc[] hits = searcher.search(q, 1).scoreDocs; if (hits.length > 0) { Document d = searcher.doc(hits[0].doc); //info.workingQueryLinks++; return new Pair<String, String>(GetUriFor(d, fn, null), "true"); } else { //info.noresultQueryLinks++; System.out.println("No results for " + newQuery + " (Previously " + query + ")"); return new Pair<String, String>(null, "no results for query " + newQuery); } } } catch (InvalidMarkupException ex) { System.out.println("Failed on: " + query); System.out.println(ex.getMessage()); //info.invalidQueryLinks++; return new Pair<String, String>(null, "exception occurred: " + ex.toString()); } catch (IOException e) { System.out.println("Failed on: " + query); // TODO Auto-generated catch block e.printStackTrace(); // info.invalidQueryLinks++; return new Pair<String, String>(null, "exception occurred: " + e.toString()); } /*catch (ParseException e) { System.out.println("Failed on: " + query); // TODO Auto-generated catch block e.printStackTrace(); //info.invalidQueryLinks++; return new Pair<String, String>(null, "exception occurred: " + e.toString()); }*/ } public Pair<String, String> TryGetDestinationUri(String infobase, String jumpDestination, FileNode fn) throws IOException, InvalidMarkupException { InfobaseConfig targetConfig = infobase == null ? currentInfobase : infobaseSet.byName(infobase); if (targetConfig == null) { return new Pair<String, String>(null, "destination infobase is external to configuration set"); } BooleanQuery.Builder qb = new BooleanQuery.Builder(); qb.add(new TermQuery(new Term("infobase", targetConfig.getId())), BooleanClause.Occur.MUST); qb.add(new TermQuery(new Term("destinations", jumpDestination)), BooleanClause.Occur.MUST); ScoreDoc[] hits = searcher.search(qb.build(), 1).scoreDocs; if (hits.length > 0) { String bookmarkHash = hashDestination(currentInfobase, jumpDestination); //info.workingQueryLinks++; String newUri = GetUriFor(searcher.doc(hits[0].doc), fn, "#" + bookmarkHash); //TODO: improve by modifying uri fragment to link directly to bookmark if (newUri == null) { //We aren't providing structure throw new InvalidMarkupException("Hyperlinks cannot be resolved unless ExportStructure (or another plugin that populates the uri attribute for records) is installed prior to indexing"); } return new Pair<String, String>(newUri, "true"); } else { //TODO; broken jump link! return new Pair<String, String>(null, "no corresponding jump destination found for infobase " + targetConfig.getId() + " and bookmark '" + jumpDestination + "'."); } } }