package org.gbif.checklistbank.ws.resources; import org.gbif.api.model.checklistbank.ParsedName; import org.gbif.api.service.checklistbank.NameParser; import org.gbif.checklistbank.ws.util.LineReader; import org.gbif.ws.util.ExtraMediaTypes; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.Iterator; import java.util.List; import javax.ws.rs.Consumes; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.inject.Inject; import com.sun.jersey.multipart.FormDataParam; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The scientific name parser exposed in the API. */ @Path("/parser/name") @Produces({MediaType.APPLICATION_JSON, ExtraMediaTypes.APPLICATION_JAVASCRIPT}) public class NameParserResource { private static final Logger LOG = LoggerFactory.getLogger(NameParserResource.class); private static final Splitter NEW_LINE_SPLITTER = Splitter.on('\n').omitEmptyStrings().trimResults(); private final NameParser parser; @Inject public NameParserResource(NameParser parser) { this.parser = parser; } /** * Parsing names as GET query parameters. */ @GET public List<ParsedName> parseGet(@QueryParam("name") List<String> names) { return parse(names.iterator()); } /** * Parsing names as a json array. */ @POST @Consumes(MediaType.APPLICATION_JSON) public List<ParsedName> parseJson(List<String> names) { return parse(names.iterator()); } /** * Parsing names by uploading a plain UTF-8 text file using one line per scientific name. * <pre> * curl -F names=@scientific_names.txt http://apidev.gbif.org/parser/name * </pre> */ @POST @Consumes(MediaType.MULTIPART_FORM_DATA) public List<ParsedName> parseFile(@FormDataParam("names") InputStream namesFile) throws UnsupportedEncodingException { if (namesFile == null) { LOG.debug("No names file uploaded"); return Lists.newArrayList(); } LineReader iter = new LineReader(namesFile, Charset.forName("UTF8")); return parse(iter.iterator()); } /** * Parsing names by posting plain text content using one line per scientific name. * Make sure to preserve new lines (\n) in the posted data, for example use --data-binary with curl: * <pre> * curl POST -H "Content-Type:text/plain" --data-binary @scientific_names.txt http://apidev.gbif.org/parser/name * </pre> */ @POST @Consumes(MediaType.TEXT_PLAIN) public List<ParsedName> parsePlainText(String names) { return parse(NEW_LINE_SPLITTER.split(Strings.nullToEmpty(names)).iterator()); } private List<ParsedName> parse(Iterator<String> iter) { int counter = 0; int unparsable = 0; List<ParsedName> pnames = Lists.newArrayList(); while (iter.hasNext()) { final String name = iter.next(); ParsedName pn = parser.parseQuietly(name); pnames.add(pn); counter++; if (!pn.getType().isParsable()) { unparsable++; } } LOG.debug("Parsed {} names out of which {} are unparsable", counter, unparsable); return pnames; } }