package info.aaronland.extruder; import info.aaronland.extruder.Upload; import info.aaronland.extruder.Document; import info.aaronland.extruder.DocumentView; import javax.ws.rs.core.MediaType; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.QueryParam; import javax.ws.rs.Produces; import javax.ws.rs.Consumes; import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.Status; import com.sun.jersey.core.header.FormDataContentDisposition; import com.sun.jersey.multipart.FormDataMultiPart; import com.sun.jersey.multipart.FormDataBodyPart; import java.io.InputStream; import java.io.File; import java.net.URL; import com.basistech.readability.Readability; import org.apache.commons.io.FilenameUtils; // See below inre: Readers (20130901/straup) import com.basistech.readability.HttpPageReader; import com.basistech.readability.FilePageReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Path(value = "/java-readability") @Produces({MediaType.TEXT_HTML + "; charset=UTF-8", MediaType.APPLICATION_JSON}) public class JavaReadabilityResource { private static final Logger LOGGER = LoggerFactory.getLogger(JavaReadabilityResource.class); @GET public Response extrudeThisURL(@QueryParam("url") String url){ Document doc; DocumentView view; try { doc = extrudeThis(url); view = new DocumentView(doc); } // TODO: trap MalformedURLExceptions and return NOT_ACCEPTABLE here (20130901/straup) catch (Exception e){ return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.toString()).build(); } return Response.status(Response.Status.OK).entity(view).build(); } @POST @Consumes(MediaType.MULTIPART_FORM_DATA) public Response extrudeThisFile(FormDataMultiPart formParams){ FormDataBodyPart stream = formParams.getField("file"); InputStream input = stream.getValueAs(InputStream.class); Upload upload = new Upload(); File tmpfile = upload.writeTmpFile(input); String uri = "file://" + tmpfile.getAbsolutePath(); Document doc; DocumentView view; try { doc = extrudeThis(uri); view = new DocumentView(doc); } catch (Exception e){ tmpfile.delete(); return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.toString()).build(); } tmpfile.delete(); return Response.status(Response.Status.OK).entity(view).build(); } private Document extrudeThis(String uri){ URL url; String text; String title; try { url = new URL(uri); } catch (Exception e){ throw new RuntimeException(e); } try { Readability parser = new Readability(); String path = url.toString(); // Basically I need to write a URIPageReader class to hide // all this nonsense because the HttpPageReader uses the Http // classes rather than java.net.URL (20130901/straup) if (path.startsWith("file:")){ path = path.replace("file:", ""); FilePageReader reader = new FilePageReader(); parser.setPageReader(reader); } else { HttpPageReader reader = new HttpPageReader(); parser.setPageReader(reader); } parser.processDocument(path); text = parser.getArticleText(); title = FilenameUtils.getBaseName(url.toString()); } catch (Exception e){ throw new RuntimeException(e); } return new Document(text, title); } }