package org.mitre.rhex;
import com.google.gson.*;
import edu.umd.cs.findbugs.annotations.NonNull;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.mitre.test.*;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;
import java.util.regex.Pattern;
/**
* Test for documents as discovered via section atom feeds
*
* <pre>
* 6.5 baseURL/sectionpath/documentname
*
* 6.5.1 GET
*
* This operation returns a representation of the document that is identified by documentname within the
* section identified by sectionpath. The documentname is typically assigned by the underlying system
* and is not guaranteed to be identical across two different systems.
*
* Implementations MAY use identifiers contained within the infoset of the document as documentnames.
*
* If no document of name documentname exists, the implementation MUST return a HTTP status code 404.
*
* Status Codes: <B>200</B>, 404
* </pre>
*
* @author Jason Mathews, MITRE Corp.
* Date: 2/20/12 10:45 AM
*/
public class DocumentTest extends BaseXmlTest {
private final boolean debugEnabled = log.isDebugEnabled();
private final boolean traceEnabled = log.isTraceEnabled();
// regexp for mime-type (rfc2046); e.g. application/rss+xml, audio/L2, application/x-pkcs7-signature, etc.
private static final Pattern mimePattern = Pattern.compile("[a-z]+/\\S+");
private JsonParser parser;
private int count;
public DocumentTest() {
// forces source test to keep its Document objects after it executes
setProperty(BaseSectionFromRootXml.class, BaseSectionFromRootXml.PROP_KEEP_SECTION_DOM_BOOL, Boolean.TRUE);
}
@NonNull
public String getId() {
return "6.5.1.2";
}
@Override
public boolean isRequired() {
return true; // implied MUST
}
@NonNull
public String getName() {
return "GET operation returns a representation of the document that is identified by documentname within the section";
}
@NonNull
public List<Class<? extends TestUnit>> getDependencyClasses() {
return Collections.<Class<? extends TestUnit>> singletonList(BaseSectionFromRootXml.class); // 6.4.1.1
}
public void execute() throws TestException {
// pre-conditions: for this test to be executed the prerequisite test BaseSectionFromRootXml must have passed
// with 200 HTTP and has Map of all DOMs
TestUnit baseTest = getDependency(BaseSectionFromRootXml.class);
if (baseTest == null) {
// assertion failed: this should never be null
log.error("Failed to retrieve prerequisite test");
setStatus(StatusEnumType.SKIPPED, "Failed to retrieve prerequisite test");
return;
}
Map<String, Document> docMap = ((BaseSectionFromRootXml)baseTest).getDocumentMap();
if (docMap.isEmpty()) {
log.error("Failed to retrieve prerequisite test");
setStatus(StatusEnumType.SKIPPED, "Failed to retrieve prerequisite test results");
return;
}
final Context context = Loader.getInstance().getContext();
try {
for (Document doc : docMap.values()) {
checkFeed(context, doc);
}
} catch (URISyntaxException e) {
throw new TestException(e);
} catch (IOException e) {
throw new TestException(e);
} catch (JDOMException e) {
throw new TestException(e);
}
setStatus(StatusEnumType.SUCCESS);
}
private void checkFeed(Context context, Document doc) throws URISyntaxException, IOException, JDOMException, TestException {
final Element root = doc.getRootElement();
final Namespace ns = Namespace.getNamespace(NAMESPACE_W3_ATOM_2005);
// System.out.println("check document feed " + root.getChildText("title", ns));
/*
expecting XML like this:
<?xml version="1.0" encoding="UTF-8"?>
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
<id>tag:hdata.herokuapp.com,2005:/records/1460/c32</id>
<link rel="alternate" type="text/html" href="https://hdata.herokuapp.com"/>
<link rel="self" type="application/atom+xml" href="https://hdata.herokuapp.com/records/1460/c32"/>
<title>C32</title>
<entry>
<id>tag:hdata.herokuapp.com,2005:Record/4ef38c7b00f4bab04c000032</id>
<updated>2011-12-22T20:24:32+00:00</updated>
<link rel="alternate" type="text/html" href="https://hdata.herokuapp.com/records/1460/c32/1460"/>
<title>1460</title>
</entry>
</feed>
*/
for(Object obj : root.getChildren("entry", ns)) {
if (!(obj instanceof Element)) continue;
Element e = (Element)obj;
// allow multiple links per entry
for(Object linkChild : e.getChildren("link", ns)) {
if (!(linkChild instanceof Element)) continue;
Element link = (Element)linkChild;
final String href = link.getAttributeValue("href");
if (StringUtils.isNotBlank(href)) {
checkDocument(context, href, link.getAttributeValue("type"));
}
} // for each link
} // for each entry
}
private void checkDocument(Context context, String href, String type) throws URISyntaxException, IOException, TestException {
URI baseURL = new URI(href);
if (!baseURL.isAbsolute()) {
// REVIEW: is this legal wrt HL7 spec
baseURL = context.getBaseURL().resolve(baseURL);
log.trace("relative URL {} -> {}", href, baseURL);
}
String contentType = getValidType(type);
if (debugEnabled && type != null && !type.equals(contentType)) {
System.out.println("\tcontent type=" + contentType);
}
if ("localhost".equalsIgnoreCase(baseURL.getHost())) {
addLogWarning("URL cannot be localhost");
return;
}
if (contentType == null) {
if (log.isDebugEnabled()) {
if (type == null) log.debug("null content type");
else log.debug("null content type from type=" + type);
}
contentType = "application/xml,text/xml,application/json,text/html;q=0.9,*/*;q=0.8";
} else {
// NOTE: implementation with Ruby on Rails does not handle the */* or quality weights on Accept header correctly
// so for example: Accept => application/xml,*/*;q=0.8 returns HTML document even when an XML representation exists.
// Use multiple formats in accept header for 25% of the documents.
// if (contentType.contains("/xml")) return; // skip XML for now
if (++count % 4 == 0) {
// if add multiple mime types then get HTML output
if (MIME_APPLICATION_JSON.equals(contentType))
contentType += ",application/xml;q=0.9";
else
contentType += ",application/json;q=0.9";
// else contentType += ",*/*;q=0.8"; // responds 406 status code
}
}
HttpClient client = context.getHttpClient();
try {
if (debugEnabled) System.out.println("GET URL=" + baseURL);
//System.out.println("GET URL=" + baseURL);
HttpGet req = new HttpGet(baseURL);
req.setHeader("Accept", contentType);
if (debugEnabled) System.out.println("Accept=" + contentType);
validateContent(baseURL, req, context.executeRequest(client, req), context, contentType);
} finally {
client.getConnectionManager().shutdown();
}
}
private void validateContent(URI baseURL, HttpGet req, HttpResponse response,
Context context, String requestMediaType)
throws IOException, TestException
{
// TODO: what can we test about these document URLs - does any error in document fail overall conformance for the spec requirement
int code = response.getStatusLine().getStatusCode();
if (debugEnabled) {
System.out.println("Response status=" + code);
}
if (code == 406) {
if (addLogWarning("Failed to retrieve content for requested media type")) {
dumpResponse(req, response, true);
}
} else if (code != 200) {
dumpResponse(req, response, true);
throw new TestException("Unexpected HTTP response: " + code);
}
final HttpEntity entity = response.getEntity();
if (entity == null) {
// no body
addWarning("encountered non-body response to document request");
log.info("no BODY in response for document: " + baseURL.getPath());
return;
}
long len = entity.getContentLength();
if (len <= 0) {
log.warn("section content length=" + len + ", expecting len > 0");
return;
}
final String contentType = ClientHelper.getContentType(entity);
if (contentType == null) {
if (addLogWarning("Failed to determine content type") && debugEnabled) {
dumpResponse(req, response, true);
}
return;
}
int ind = requestMediaType.indexOf(',');
if (ind > 0) requestMediaType = requestMediaType.substring(0, ind);
ind = requestMediaType.indexOf(';'); // strip off any parameter (e.g. ;q=0.9)
if (ind > 0) requestMediaType = requestMediaType.substring(0, ind);
if (!requestMediaType.equalsIgnoreCase(contentType)) {
addLogWarning(String.format("Returned content type: %s does not match requested type: %s",
requestMediaType ,contentType));
}
if (ClientHelper.isXmlContentType(contentType)) {
// content-type = application/atom+xml OR text/xml OR application/xml
ByteArrayOutputStream bos = new ByteArrayOutputStream();
entity.writeTo(bos);
try {
getDefaultDocument(context, bos);
} catch (JDOMException e) {
addWarning(e.getMessage());
log.warn("", e);
}
} else if (contentType.equals(MIME_APPLICATION_JSON)) {
if (len > 1) {
try {
String json = EntityUtils.toString(entity);
if (StringUtils.isBlank(json)) {
addLogWarning("Empty json document");
} else {
// e.g. {"codes":{"SNOMED-CT":["84114007"],"ICD-9-CM":["428.9"],"ICD-10-CM":["I50.9"]},...
try {
if (parser == null) parser = new JsonParser();
parser.parse(json);
} catch(JsonSyntaxException e) {
log.warn("", e);
}
}
} catch(Exception e) {
log.debug("", e);
}
} else {
// 1-byte json documents are bogus
addLogWarning("Empty json document");
}
} // else System.out.println("XXX: other type="+ contentType);
// if not XML/JSON then do nothing for now
}
private static String getValidType(String type) {
if (type == null || type.length() == 0) return null;
int ind = type.indexOf(';');
if (ind == 0) return null;
if (ind > 0) {
// strip off parameter values (e.g. charset=...) from mime type if present
type = type.substring(0, ind);
}
type = type.trim();
// regexp for mime-type (rfc2046) /^[a-z]+/\\S+/ matching mime type (e.g. application/rss+xml)
return mimePattern.matcher(type).matches() ? type : null;
}
}