package info.aaronland.extruder;
import info.aaronland.extruder.Upload;
import info.aaronland.extruder.Document;
import info.aaronland.extruder.DocumentView;
import java.io.InputStream;
import java.io.File;
import com.sun.jersey.core.header.FormDataContentDisposition;
import com.sun.jersey.multipart.FormDataMultiPart;
import com.sun.jersey.multipart.FormDataBodyPart;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.QueryParam;
import javax.ws.rs.Produces;
import javax.ws.rs.Consumes;
import javax.ws.rs.core.Response.Status;
import javax.ws.rs.core.Response;
import java.net.URL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.l3s.boilerpipe.extractors.DefaultExtractor;
import de.l3s.boilerpipe.extractors.ArticleExtractor;
import org.apache.commons.io.FilenameUtils;
@Path(value = "/boilerpipe")
@Produces({MediaType.TEXT_HTML + "; charset=UTF-8", MediaType.APPLICATION_JSON})
public class BoilerpipeResource {
private static final Logger LOGGER = LoggerFactory.getLogger(BoilerpipeResource.class);
@GET
public Response extrudeThisURL(@QueryParam("url") String url){
Document doc;
DocumentView view;
try {
doc = extrudeThis(url);
view = new DocumentView(doc);
}
// TODO: trap MalformedURLExceptions and return NOT_ACCEPTABLE here (20130901/straup)
catch (Exception e){
return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.toString()).build();
}
//String html = doc.toHTML();
//return Response.status(Response.Status.OK).entity(html).build();
return Response.status(Response.Status.OK).entity(view).build();
}
@POST
@Consumes(MediaType.MULTIPART_FORM_DATA)
public Response extrudeThisFile(FormDataMultiPart formParams){
FormDataBodyPart stream = formParams.getField("file");
InputStream input = stream.getValueAs(InputStream.class);
Upload upload = new Upload();
File tmpfile = upload.writeTmpFile(input);
String uri = "file://" + tmpfile.getAbsolutePath();
Document doc;
DocumentView view;
try {
doc = extrudeThis(uri);
view = new DocumentView(doc);
}
catch (Exception e){
tmpfile.delete();
return Response.status(Response.Status.INTERNAL_SERVER_ERROR).entity(e.toString()).build();
}
tmpfile.delete();
return Response.status(Response.Status.OK).entity(view).build();
}
private Document extrudeThis(String uri){
URL url;
String text;
String title;
try {
url = new URL(uri);
}
catch (Exception e){
throw new RuntimeException(e);
}
try {
text = ArticleExtractor.INSTANCE.getText(url);
title = FilenameUtils.getBaseName(url.toString());
}
catch (Exception e){
throw new RuntimeException(e);
}
return new Document(text, title);
}
}