/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.source.microsoft.v5;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.NameValuePair;
import org.apache.http.message.BasicNameValuePair;
import org.carrot2.core.Document;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.attribute.CommonAttributes;
import org.carrot2.core.attribute.Processing;
import org.carrot2.source.SearchEngineResponse;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.DefaultGroups;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
/**
* A {@link IDocumentSource} fetching news search results from Bing,
* using Search API V5.
*
* <p>Important: there are limits for free use of the above API (beyond which it is a
* paid service).
*
* @see "https://msdn.microsoft.com/en-us/library/mt711408.aspx"
*/
@Bindable(prefix = "Bing5NewsDocumentSource", inherit = CommonAttributes.class)
public class Bing5NewsDocumentSource extends Bing5DocumentSource
{
/**
* REST endpoint.
*/
private final static String SERVICE_URL = "https://api.cognitive.microsoft.com/bing/v5.0/news/search";
/**
* Filter news by age.
*/
@Processing
@Input
@Attribute
@Label("Filter news by age")
@Level(AttributeLevel.BASIC)
@Group(DefaultGroups.FILTERING)
public Freshness freshness;
public Bing5NewsDocumentSource() {
super(METADATA, SERVICE_URL);
}
@Override
protected void augmentSearchParameters(List<NameValuePair> params) {
if (freshness != null) {
params.add(new BasicNameValuePair("freshness", freshness.argName));
}
}
@Override
protected void handleResponse(BingResponse response, SearchEngineResponse ser) {
NewsResponse newsResponse = (NewsResponse) response;
ser.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, newsResponse.totalEstimatedMatches);
if (newsResponse.value != null) {
ArrayDeque<NewsResponse.NewsArticle> articles = new ArrayDeque<>(newsResponse.value);
while (!articles.isEmpty()) {
NewsResponse.NewsArticle r = articles.removeFirst();
if (r.clusteredArticles != null) {
articles.addAll(r.clusteredArticles);
}
Document doc = new Document(r.name, r.description, r.url);
if (r.image != null && r.image.thumbnail != null) {
doc.setField(Document.THUMBNAIL_URL, r.image.thumbnail.contentUrl);
}
if (r.provider != null) {
ArrayList<String> sources = new ArrayList<>();
for (NewsResponse.NewsArticle.Organization o : r.provider) {
sources.add(o.name);
}
doc.setField(Document.SOURCES, sources);
}
ser.results.add(doc);
}
}
}
}