/* * Copyright 2016 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.cloud.language.samples; import com.google.cloud.language.spi.v1.LanguageServiceClient; import com.google.cloud.language.v1.AnalyzeEntitiesRequest; import com.google.cloud.language.v1.AnalyzeEntitiesResponse; import com.google.cloud.language.v1.AnalyzeSentimentResponse; import com.google.cloud.language.v1.AnalyzeSyntaxRequest; import com.google.cloud.language.v1.AnalyzeSyntaxResponse; import com.google.cloud.language.v1.Document; import com.google.cloud.language.v1.Document.Type; import com.google.cloud.language.v1.EncodingType; import com.google.cloud.language.v1.Entity; import com.google.cloud.language.v1.EntityMention; import com.google.cloud.language.v1.Sentiment; import com.google.cloud.language.v1.Token; import com.google.protobuf.Descriptors; import java.io.IOException; import java.io.PrintStream; import java.security.GeneralSecurityException; import java.util.List; import java.util.Map; /** * A sample application that uses the Natural Language API to perform * entity, sentiment and syntax analysis. */ public class Analyze { /** * Detects entities,sentiment and syntax in a document using the Natural Language API. */ public static void main(String[] args) throws IOException, GeneralSecurityException { if (args.length != 2) { System.err.println("Usage:"); System.err.printf( "\tjava %s \"command\" \"text to analyze\"\n", Analyze.class.getCanonicalName()); System.exit(1); } String command = args[0]; String text = args[1]; Analyze app = new Analyze(LanguageServiceClient.create()); if (command.equals("entities")) { if (text.startsWith("gs://")) { printEntities(System.out, app.analyzeEntitiesFile(text)); } else { printEntities(System.out, app.analyzeEntitiesText(text)); } } else if (command.equals("sentiment")) { if (text.startsWith("gs://")) { printSentiment(System.out, app.analyzeSentimentFile(text)); } else { printSentiment(System.out, app.analyzeSentimentText(text)); } } else if (command.equals("syntax")) { if (text.startsWith("gs://")) { printSyntax(System.out, app.analyzeSyntaxFile(text)); } else { printSyntax(System.out, app.analyzeSyntaxText(text)); } } } /** * Print a list of {@code entities}. */ public static void printEntities(PrintStream out, List<Entity> entities) { if (entities == null || entities.size() == 0) { out.println("No entities found."); return; } out.printf("Found %d entit%s.\n", entities.size(), entities.size() == 1 ? "y" : "ies"); for (Entity entity : entities) { out.printf("%s\n", entity.getName()); out.printf("\tSalience: %.3f\n", entity.getSalience()); out.printf("\tType: %s\n", entity.getType()); if (entity.getMetadataMap() != null) { for (Map.Entry<String, String> metadata : entity.getMetadataMap().entrySet()) { out.printf("\tMetadata: %s = %s\n", metadata.getKey(), metadata.getValue()); } } if (entity.getMentionsList() != null) { for (EntityMention mention : entity.getMentionsList()) { for (Map.Entry<Descriptors.FieldDescriptor, Object> mentionSetMember : mention.getAllFields().entrySet()) { out.printf("\tMention: %s = %s\n", mentionSetMember.getKey(), mentionSetMember.getValue()); } } } } } /** * Print the Sentiment {@code sentiment}. */ public static void printSentiment(PrintStream out, Sentiment sentiment) { if (sentiment == null) { out.println("No sentiment found"); return; } out.println("Found sentiment."); out.printf("\tMagnitude: %.3f\n", sentiment.getMagnitude()); out.printf("\tScore: %.3f\n", sentiment.getScore()); } /** * Prints the Syntax for the {@code tokens}. */ public static void printSyntax(PrintStream out, List<Token> tokens) { if (tokens == null || tokens.size() == 0) { out.println("No syntax found"); return; } out.printf("Found %d token%s.\n", tokens.size(), tokens.size() == 1 ? "" : "s"); for (Token token : tokens) { out.println("TextSpan"); out.printf("\tText: %s\n", token.getText().getContent()); out.printf("\tBeginOffset: %d\n", token.getText().getBeginOffset()); out.printf("Lemma: %s\n", token.getLemma()); out.printf("PartOfSpeechTag: %s\n", token.getPartOfSpeech().getTag()); out.printf("\tAspect: %s\n",token.getPartOfSpeech().getAspect()); out.printf("\tCase: %s\n", token.getPartOfSpeech().getCase()); out.printf("\tForm: %s\n", token.getPartOfSpeech().getForm()); out.printf("\tGender: %s\n",token.getPartOfSpeech().getGender()); out.printf("\tMood: %s\n", token.getPartOfSpeech().getMood()); out.printf("\tNumber: %s\n", token.getPartOfSpeech().getNumber()); out.printf("\tPerson: %s\n", token.getPartOfSpeech().getPerson()); out.printf("\tProper: %s\n", token.getPartOfSpeech().getProper()); out.printf("\tReciprocity: %s\n", token.getPartOfSpeech().getReciprocity()); out.printf("\tTense: %s\n", token.getPartOfSpeech().getTense()); out.printf("\tVoice: %s\n", token.getPartOfSpeech().getVoice()); out.println("DependencyEdge"); out.printf("\tHeadTokenIndex: %d\n", token.getDependencyEdge().getHeadTokenIndex()); out.printf("\tLabel: %s\n", token.getDependencyEdge().getLabel()); } } private final LanguageServiceClient languageApi; /** * Constructs a {@link Analyze} which connects to the Cloud Natural Language API. */ public Analyze(LanguageServiceClient languageApi) { this.languageApi = languageApi; } /** * Gets {@link Entity}s from the string {@code text}. */ public List<Entity> analyzeEntitiesText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() .setDocument(doc) .setEncodingType(EncodingType.UTF16).build(); AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); return response.getEntitiesList(); } /** * Gets {@link Entity}s from the contents of the object at the given GCS {@code path}. */ public List<Entity> analyzeEntitiesFile(String path) throws IOException { Document doc = Document.newBuilder() .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() .setDocument(doc) .setEncodingType(EncodingType.UTF16).build(); AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); return response.getEntitiesList(); } /** * Gets {@link Sentiment} from the string {@code text}. */ public Sentiment analyzeSentimentText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); return response.getDocumentSentiment(); } /** * Gets {@link Sentiment} from the contents of the object at the given GCS {@code path}. */ public Sentiment analyzeSentimentFile(String path) throws IOException { Document doc = Document.newBuilder() .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); return response.getDocumentSentiment(); } /** * Gets {@link Token}s from the string {@code text}. */ public List<Token> analyzeSyntaxText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() .setDocument(doc) .setEncodingType(EncodingType.UTF16).build(); AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); return response.getTokensList(); } /** * Gets {@link Token}s from the contents of the object at the given GCS {@code path}. */ public List<Token> analyzeSyntaxFile(String path) throws IOException { Document doc = Document.newBuilder() .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() .setDocument(doc) .setEncodingType(EncodingType.UTF16).build(); AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); return response.getTokensList(); } }