package com.yahoo.glimmer.indexing; /* * Copyright (c) 2012 Yahoo! Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software distributed under the License is * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. * See accompanying LICENSE file. */ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import it.unimi.dsi.lang.MutableString; import java.io.IOException; import org.junit.Test; public class HorizontalDocumentFactoryTest extends AbstractDocumentFactoryTest { @Test public void withContextTest() throws IOException { HorizontalDocumentFactory.setupConf(conf, true, null, "@"); HorizontalDocumentFactory factory = (HorizontalDocumentFactory)RDFDocumentFactory.buildFactory(conf); factory.setResourcesHashFunction(resourcesHash); assertEquals(5, factory.getFieldCount()); HorizontalDocument document = (HorizontalDocument)factory.getDocument(); document.setContent(CONTENT_BYTES, CONTENT_BYTES.length); assertEquals("http://subject/", document.getSubject()); MutableString word = new MutableString(); MutableString nonWord = new MutableString(); WordArrayReader subjectReader = (WordArrayReader)document.content(0); assertTrue(subjectReader.next(word, nonWord)); assertEquals("@33", word.toString()); assertEquals("", nonWord.toString()); assertFalse(subjectReader.next(word, nonWord)); WordArrayReader subjectTextReader = (WordArrayReader)document.content(1); // The subjectText contains both the ResourceID and the tokens from the URL/BNode. assertTrue(subjectTextReader.next(word, nonWord)); assertEquals("@33", word.toString()); assertEquals("", nonWord.toString()); assertTrue(subjectTextReader.next(word, nonWord)); assertEquals("subject", word.toString()); assertEquals("", nonWord.toString()); assertFalse(subjectTextReader.next(word, nonWord)); // token, predicate & context are positional/parallel indexes. WordArrayReader objectReader = (WordArrayReader)document.content(2); WordArrayReader predicateReader = (WordArrayReader)document.content(3); WordArrayReader contextReader = (WordArrayReader)document.content(4); assertTrue(objectReader.next(word, nonWord)); assertEquals("@45", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@60", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals("@22", word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("@46", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@61", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("@47", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@61", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("object", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@62", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals("@55", word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("3", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@62", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals("@55", word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("@88", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@63", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertFalse(objectReader.next(word, nonWord)); assertFalse(predicateReader.next(word, nonWord)); assertFalse(contextReader.next(word, nonWord)); context.assertIsSatisfied(); assertEquals(5l, factory.getCounter(RDFDocumentFactory.RdfCounters.INDEXED_TRIPLES).getValue()); } @Test public void withoutContextTest() throws IOException { HorizontalDocumentFactory.setupConf(conf, false, null, "@"); HorizontalDocumentFactory factory = (HorizontalDocumentFactory) RDFDocumentFactory.buildFactory(conf); factory.setResourcesHashFunction(resourcesHash); assertEquals(5, factory.getFieldCount()); HorizontalDocument document = (HorizontalDocument)factory.getDocument(); document.setContent(CONTENT_BYTES, CONTENT_BYTES.length); assertEquals("http://subject/", document.getSubject()); MutableString word = new MutableString(); MutableString nonWord = new MutableString(); WordArrayReader subjectReader = (WordArrayReader)document.content(0); assertTrue(subjectReader.next(word, nonWord)); assertEquals("@33", word.toString()); assertEquals("", nonWord.toString()); assertFalse(subjectReader.next(word, nonWord)); WordArrayReader subjectTextReader = (WordArrayReader)document.content(1); // The subjectText contains both the ResourceID and the tokens from the URL/BNode. assertTrue(subjectTextReader.next(word, nonWord)); assertEquals("@33", word.toString()); assertEquals("", nonWord.toString()); assertTrue(subjectTextReader.next(word, nonWord)); assertEquals("subject", word.toString()); assertEquals("", nonWord.toString()); assertFalse(subjectTextReader.next(word, nonWord)); // token, predicate & context are positional/parallel indexes. WordArrayReader objectReader = (WordArrayReader)document.content(2); WordArrayReader predicateReader = (WordArrayReader)document.content(3); WordArrayReader contextReader = (WordArrayReader)document.content(4); assertTrue(objectReader.next(word, nonWord)); assertEquals("@45", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@60", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("@46", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@61", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("@47", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@61", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("object", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@62", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("3", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@62", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertTrue(objectReader.next(word, nonWord)); assertEquals("@88", word.toString()); assertEquals("", nonWord.toString()); assertTrue(predicateReader.next(word, nonWord)); assertEquals("@63", word.toString()); assertEquals("", nonWord.toString()); assertTrue(contextReader.next(word, nonWord)); assertEquals(RDFDocument.NO_CONTEXT, word.toString()); assertEquals("", nonWord.toString()); assertFalse(objectReader.next(word, nonWord)); assertFalse(predicateReader.next(word, nonWord)); assertFalse(contextReader.next(word, nonWord)); context.assertIsSatisfied(); assertEquals(5l, factory.getCounter(RDFDocumentFactory.RdfCounters.INDEXED_TRIPLES).getValue()); } }