package ecologylab.bigsemantics.metadata;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import org.junit.Test;
import ecologylab.bigsemantics.collecting.FakeSemanticsScope;
import ecologylab.bigsemantics.cyberneko.CybernekoWrapper;
import ecologylab.bigsemantics.downloadcontrollers.FakeDownloadControllerFactory;
import ecologylab.bigsemantics.generated.library.RepositoryMetadataTypesScope;
import ecologylab.bigsemantics.httpclient.SimplHttpResponse;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.bigsemantics.metadata.builtins.DocumentClosure;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.SimplTypesScope;
/**
* Test if extracted metadata has the right mm_name associated with it, in different cases. The
* correct mm_name should always be the name of the meta-metadata that is used for extracting that
* metadata.
*
* @author quyin
*/
public class TestMmName
{
static SimplTypesScope metadataScope;
static FakeDownloadControllerFactory factory;
static FakeSemanticsScope semanticsScope;
static
{
metadataScope = RepositoryMetadataTypesScope.get();
factory = new FakeDownloadControllerFactory();
semanticsScope = new FakeSemanticsScope(metadataScope, CybernekoWrapper.class);
semanticsScope.setFakeDownloadControllerFactory(factory);
}
private SimplHttpResponse newDefaultResponse()
{
SimplHttpResponse response = new SimplHttpResponse();
response.setCode(200);
response.setMessage("OK");
response.setContent("<html><head><title>Test Page</title></head><body></body></html>");
return response;
}
private Document getDocument(String url) throws IOException
{
Document doc = semanticsScope.getOrConstructDocument(ParsedURL.getAbsolute(url));
DocumentClosure closure = doc.getOrConstructClosure();
closure.performDownloadSynchronously(false, false);
doc = closure.getDocument();
return doc;
}
/**
* Test mm_name in regular cases.
*
* @throws IOException
*/
@Test
public void testMmName() throws IOException
{
SimplHttpResponse response = newDefaultResponse();
String url = "http://www.amazon.com/SomeProduct/dp/0000000000";
factory.setResponse(url, response);
Document doc = getDocument(url);
assertEquals("amazon_product", doc.getMetaMetadataName());
}
/**
* Test mm_name with non-type meta-metadata. Note that the URL used in this test case must be
* associated with a non-type meta-metadata (a meta-metadata that does not define a new type).
*
* @throws IOException
*/
@Test
public void testMmNameNonTypeMmd() throws IOException
{
SimplHttpResponse response = newDefaultResponse();
String url = "http://www.nytimes.com/2014/05/15/news/some-random-news.html";
factory.setResponse(url, response);
Document doc = getDocument(url);
assertEquals("nytimes", doc.getMetaMetadataName());
}
/**
* Test mm_name when there is redirection.
*
* @throws IOException
*/
@Test
public void testMmNameWithRedirection() throws IOException
{
SimplHttpResponse response = newDefaultResponse();
String initialUrl = "http://redirect.com/";
String realUrl = "http://dl.acm.org/citation.cfm?id=1234567";
response.addOtherUrl(realUrl); // mimic redirection.
factory.setResponse(initialUrl, response);
Document doc = getDocument(initialUrl);
assertEquals("acm_portal", doc.getMetaMetadataName());
}
}