/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.server.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.server.HTMLHelper;
import org.eclipse.jetty.util.ajax.JSON;
/**
* <p>Provides details of all the {@link Parser}s registered with
* Apache Tika, similar to <em>--list-parsers</em> and
* <em>--list-parser-details</em> within the Tika CLI.
*/
@Path("/parsers")
public class TikaParsers {
private static final ParseContext EMPTY_PC = new ParseContext();
private HTMLHelper html;
public TikaParsers() {
this.html = new HTMLHelper();
}
@GET
@Path("/details")
@Produces("text/html")
public String getParserDetailsHTML() {
return getParsersHTML(true);
}
@GET
@Produces("text/html")
public String getParsersHTML() {
return getParsersHTML(false);
}
protected String getParsersHTML(boolean withMimeTypes) {
ParserDetails p = new ParserDetails(TikaResource.getConfig().getParser());
StringBuffer h = new StringBuffer();
html.generateHeader(h, "Parsers available to Apache Tika");
parserAsHTML(p, withMimeTypes, h, 2);
html.generateFooter(h);
return h.toString();
}
private void parserAsHTML(ParserDetails p, boolean withMimeTypes, StringBuffer html, int level) {
html.append("<h");
html.append(level);
html.append(">");
html.append(p.shortName);
html.append("</h");
html.append(level);
html.append(">");
html.append("<p>Class: ");
html.append(p.className);
html.append("</p>");
if (p.isDecorated) {
html.append("<p>Decorated Parser");
if (p.decoratedBy != null)
html.append( " - ").append(p.decoratedBy);
html.append("</p>");
}
if (p.isComposite) {
html.append("<p>Composite Parser</p>");
html.append("<div style=\"margin-left: 1em\">\n");
for (Parser cp : p.childParsers) {
parserAsHTML(new ParserDetails(cp), withMimeTypes, html, level + 1);
}
html.append("</div>\n");
} else if (withMimeTypes) {
html.append("<p>Mime Types:");
html.append("<ul>");
for (MediaType mt : p.supportedTypes) {
html.append("<li>");
html.append(mt.toString());
html.append("</li>");
}
html.append("</ul>");
html.append("</p>");
}
html.append("\n");
}
@GET
@Path("/details")
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
public String getParserDetailsJSON() {
return getParsersJSON(true);
}
@GET
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
public String getParsersJSON() {
return getParsersJSON(false);
}
protected String getParsersJSON(boolean withMimeTypes) {
Map<String, Object> details = new HashMap<String, Object>();
parserAsMap(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, details);
return JSON.toString(details);
}
private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map<String, Object> details) {
details.put("name", p.className);
details.put("composite", p.isComposite);
details.put("decorated", p.isDecorated);
if (p.isComposite) {
List<Map<String, Object>> c = new ArrayList<Map<String, Object>>();
for (Parser cp : p.childParsers) {
Map<String, Object> cdet = new HashMap<String, Object>();
parserAsMap(new ParserDetails(cp), withMimeTypes, cdet);
c.add(cdet);
}
details.put("children", c);
} else if (withMimeTypes) {
List<String> mts = new ArrayList<String>(p.supportedTypes.size());
for (MediaType mt : p.supportedTypes) {
mts.add(mt.toString());
}
details.put("supportedTypes", mts);
}
}
@GET
@Path("/details")
@Produces("text/plain")
public String getParserDetailssPlain() {
return getParsersPlain(true);
}
@GET
@Produces("text/plain")
public String getParsersPlain() {
return getParsersPlain(false);
}
protected String getParsersPlain(boolean withMimeTypes) {
StringBuffer text = new StringBuffer();
renderParser(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, text, "");
return text.toString();
}
private void renderParser(ParserDetails p, boolean withMimeTypes, StringBuffer text, String indent) {
String nextIndent = indent + " ";
text.append(indent);
text.append(p.className);
if (p.isDecorated) {
text.append(" (Decorated Parser");
if (p.decoratedBy != null) {
text.append(" ").append(p.decoratedBy);
}
text.append(")");
}
if (p.isComposite) {
text.append(" (Composite Parser):\n");
for (Parser cp : p.childParsers) {
renderParser(new ParserDetails(cp), withMimeTypes, text, nextIndent);
}
} else {
text.append("\n");
if (withMimeTypes) {
for (MediaType mt : p.supportedTypes) {
text.append(nextIndent);
text.append("Supports: ");
text.append(mt.toString());
text.append("\n");
}
}
}
}
private static class ParserDetails {
private String className;
private String shortName;
private boolean isComposite;
private boolean isDecorated;
private String decoratedBy;
private Set<MediaType> supportedTypes;
private List<Parser> childParsers;
private ParserDetails(Parser p) {
if (p instanceof ParserDecorator) {
isDecorated = true;
decoratedBy = ((ParserDecorator)p).getDecorationName();
p = ((ParserDecorator)p).getWrappedParser();
}
className = p.getClass().getName();
shortName = className.substring(className.lastIndexOf('.') + 1);
if (p instanceof CompositeParser) {
isComposite = true;
supportedTypes = Collections.emptySet();
// Get the unique set of child parsers
Set<Parser> children = new HashSet<Parser>(
((CompositeParser) p).getParsers(EMPTY_PC).values());
// Sort it by class name
childParsers = new ArrayList<Parser>(children);
Collections.sort(childParsers, new Comparator<Parser>() { @Override
public int compare(Parser p1, Parser p2) {
return p1.getClass().getName().compareTo(p2.getClass().getName());
}
});
} else {
supportedTypes = p.getSupportedTypes(EMPTY_PC);
}
}
}
}