/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed onT an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Assert;
import org.junit.Test;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* <p> Test for XPathRecordReader </p>
*
* @version $Id: TestXPathRecordReader.java 963873 2010-07-13 21:45:31Z rmuir $
* @since solr 1.3
*/
public class TestXPathRecordReader extends SolrTestCaseJ4 {
@Test
public void basic() {
String xml="<root>\n"
+ " <b><c>Hello C1</c>\n"
+ " <c>Hello C1</c>\n"
+ " </b>\n"
+ " <b><c>Hello C2</c>\n"
+ " </b>\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/b");
rr.addField("c", "/root/b/c", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(2, ((List) l.get(0).get("c")).size());
Assert.assertEquals(1, ((List) l.get(1).get("c")).size());
}
@Test
public void attributes() {
String xml="<root>\n"
+ " <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n"
+ " <b a=\"x2\" b=\"y2\" />\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/b");
rr.addField("a", "/root/b/@a", false);
rr.addField("b", "/root/b/@b", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(3, l.size());
Assert.assertEquals("x0", l.get(0).get("a"));
Assert.assertEquals("x1", l.get(1).get("a"));
Assert.assertEquals("x2", l.get(2).get("a"));
Assert.assertEquals("y0", l.get(0).get("b"));
Assert.assertEquals("y1", l.get(1).get("b"));
Assert.assertEquals("y2", l.get(2).get("b"));
}
@Test
public void attrInRoot(){
String xml="<r>\n" +
"<merchantProduct id=\"814636051\" mid=\"189973\">\n" +
" <in_stock type=\"stock-4\" />\n" +
" <condition type=\"cond-0\" />\n" +
" <price>301.46</price>\n" +
" </merchantProduct>\n" +
"<merchantProduct id=\"814636052\" mid=\"189974\">\n" +
" <in_stock type=\"stock-5\" />\n" +
" <condition type=\"cond-1\" />\n" +
" <price>302.46</price>\n" +
" </merchantProduct>\n" +
"\n" +
"</r>";
XPathRecordReader rr = new XPathRecordReader("/r/merchantProduct");
rr.addField("id", "/r/merchantProduct/@id", false);
rr.addField("mid", "/r/merchantProduct/@mid", false);
rr.addField("price", "/r/merchantProduct/price", false);
rr.addField("conditionType", "/r/merchantProduct/condition/@type", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Map<String, Object> m = l.get(0);
Assert.assertEquals("814636051", m.get("id"));
Assert.assertEquals("189973", m.get("mid"));
Assert.assertEquals("301.46", m.get("price"));
Assert.assertEquals("cond-0", m.get("conditionType"));
m = l.get(1);
Assert.assertEquals("814636052", m.get("id"));
Assert.assertEquals("189974", m.get("mid"));
Assert.assertEquals("302.46", m.get("price"));
Assert.assertEquals("cond-1", m.get("conditionType"));
}
@Test
public void attributes2Level() {
String xml="<root>\n"
+ "<a>\n <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n"
+ " <b a=\"x2\" b=\"y2\" />\n"
+ " </a>"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a/b");
rr.addField("a", "/root/a/b/@a", false);
rr.addField("b", "/root/a/b/@b", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(3, l.size());
Assert.assertEquals("x0", l.get(0).get("a"));
Assert.assertEquals("y1", l.get(1).get("b"));
}
@Test
public void attributes2LevelHetero() {
String xml="<root>\n"
+ "<a>\n <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n"
+ " <b a=\"x2\" b=\"y2\" />\n"
+ " </a>"
+ "<x>\n <b a=\"x4\" b=\"y4\" />\n"
+ " <b a=\"x5\" b=\"y5\" />\n"
+ " <b a=\"x6\" b=\"y6\" />\n"
+ " </x>"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a | /root/x");
rr.addField("a", "/root/a/b/@a", false);
rr.addField("b", "/root/a/b/@b", false);
rr.addField("a", "/root/x/b/@a", false);
rr.addField("b", "/root/x/b/@b", false);
final List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
final List<Map<String, Object>> x = new ArrayList<Map<String, Object>>();
rr.streamRecords(new StringReader(xml), new XPathRecordReader.Handler() {
public void handle(Map<String, Object> record, String xpath) {
if (record == null) return;
if (xpath.equals("/root/a")) a.add(record);
if (xpath.equals("/root/x")) x.add(record);
}
});
Assert.assertEquals(1, a.size());
Assert.assertEquals(1, x.size());
}
@Test
public void attributes2LevelMissingAttrVal() {
String xml="<root>\n"
+ "<a>\n <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n"
+ " </a>"
+ "<a>\n <b a=\"x3\" />\n"
+ " <b b=\"y4\" />\n"
+ " </a>"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("a", "/root/a/b/@a", true);
rr.addField("b", "/root/a/b/@b", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertNull(((List) l.get(1).get("a")).get(1));
Assert.assertNull(((List) l.get(1).get("b")).get(0));
}
@Test
public void elems2LevelMissing() {
String xml="<root>\n"
+ "\t<a>\n"
+ "\t <b>\n\t <x>x0</x>\n"
+ "\t <y>y0</y>\n"
+ "\t </b>\n"
+ "\t <b>\n\t <x>x1</x>\n"
+ "\t <y>y1</y>\n"
+ "\t </b>\n"
+ "\t </a>\n"
+ "\t<a>\n"
+ "\t <b>\n\t <x>x3</x>\n\t </b>\n"
+ "\t <b>\n\t <y>y4</y>\n\t </b>\n"
+ "\t </a>\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("a", "/root/a/b/x", true);
rr.addField("b", "/root/a/b/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertNull(((List) l.get(1).get("a")).get(1));
Assert.assertNull(((List) l.get(1).get("b")).get(0));
}
@Test
public void mixedContent() {
String xml = "<xhtml:p xmlns:xhtml=\"http://xhtml.com/\" >This text is \n" +
" <xhtml:b>bold</xhtml:b> and this text is \n" +
" <xhtml:u>underlined</xhtml:u>!\n" +
"</xhtml:p>";
XPathRecordReader rr = new XPathRecordReader("/p");
rr.addField("p", "/p", true);
rr.addField("b", "/p/b", true);
rr.addField("u", "/p/u", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Map<String, Object> row = l.get(0);
Assert.assertEquals("bold", ((List) row.get("b")).get(0));
Assert.assertEquals("underlined", ((List) row.get("u")).get(0));
String p = (String) ((List) row.get("p")).get(0);
Assert.assertTrue(p.contains("This text is"));
Assert.assertTrue(p.contains("and this text is"));
Assert.assertTrue(p.contains("!"));
// Should not contain content from child elements
Assert.assertFalse(p.contains("bold"));
}
@Test
public void mixedContentFlattened() {
String xml = "<xhtml:p xmlns:xhtml=\"http://xhtml.com/\" >This text is \n" +
" <xhtml:b>bold</xhtml:b> and this text is \n" +
" <xhtml:u>underlined</xhtml:u>!\n" +
"</xhtml:p>";
XPathRecordReader rr = new XPathRecordReader("/p");
rr.addField("p", "/p", false, XPathRecordReader.FLATTEN);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Map<String, Object> row = l.get(0);
Assert.assertEquals("This text is \n" +
" bold and this text is \n" +
" underlined!", ((String)row.get("p")).trim() );
}
@Test
public void elems2LevelWithAttrib() {
String xml = "<root>\n\t<a>\n\t <b k=\"x\">\n"
+ "\t <x>x0</x>\n"
+ "\t <y>y0</y>\n"
+ "\t </b>\n"
+ "\t <b k=\"y\">\n"
+ "\t <x>x1</x>\n"
+ "\t <y>y1</y>\n"
+ "\t </b>\n"
+ "\t </a>\n"
+ "\t <a>\n\t <b>\n"
+ "\t <x>x3</x>\n"
+ "\t </b>\n"
+ "\t <b>\n"
+ "\t <y>y4</y>\n"
+ "\t </b>\n"
+ "\t </a>\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("x", "/root/a/b[@k]/x", true);
rr.addField("y", "/root/a/b[@k]/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(2, ((List) l.get(0).get("x")).size());
Assert.assertEquals(2, ((List) l.get(0).get("y")).size());
Assert.assertEquals(0, l.get(1).size());
}
@Test
public void elems2LevelWithAttribMultiple() {
String xml="<root>\n"
+ "\t<a>\n\t <b k=\"x\" m=\"n\" >\n"
+ "\t <x>x0</x>\n"
+ "\t <y>y0</y>\n"
+ "\t </b>\n"
+ "\t <b k=\"y\" m=\"p\">\n"
+ "\t <x>x1</x>\n"
+ "\t <y>y1</y>\n"
+ "\t </b>\n"
+ "\t </a>\n"
+ "\t<a>\n\t <b k=\"x\">\n"
+ "\t <x>x3</x>\n"
+ "\t </b>\n"
+ "\t <b m=\"n\">\n"
+ "\t <y>y4</y>\n"
+ "\t </b>\n"
+ "\t </a>\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("x", "/root/a/b[@k][@m='n']/x", true);
rr.addField("y", "/root/a/b[@k][@m='n']/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(1, ((List) l.get(0).get("x")).size());
Assert.assertEquals(1, ((List) l.get(0).get("y")).size());
Assert.assertEquals(0, l.get(1).size());
}
@Test
public void elems2LevelWithAttribVal() {
String xml="<root>\n\t<a>\n <b k=\"x\">\n"
+ "\t <x>x0</x>\n"
+ "\t <y>y0</y>\n"
+ "\t </b>\n"
+ "\t <b k=\"y\">\n"
+ "\t <x>x1</x>\n"
+ "\t <y>y1</y>\n"
+ "\t </b>\n"
+ "\t </a>\n"
+ "\t <a>\n <b><x>x3</x></b>\n"
+ "\t <b><y>y4</y></b>\n"
+ "\t</a>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("x", "/root/a/b[@k='x']/x", true);
rr.addField("y", "/root/a/b[@k='x']/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(1, ((List) l.get(0).get("x")).size());
Assert.assertEquals(1, ((List) l.get(0).get("y")).size());
Assert.assertEquals(0, l.get(1).size());
}
@Test
public void attribValWithSlash() {
String xml = "<root><b>\n" +
" <a x=\"a/b\" h=\"hello-A\"/> \n" +
"</b></root>";
XPathRecordReader rr = new XPathRecordReader("/root/b");
rr.addField("x", "/root/b/a[@x='a/b']/@h", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(1, l.size());
Map<String, Object> m = l.get(0);
Assert.assertEquals("hello-A", m.get("x"));
}
@Test
public void unsupported_Xpaths() {
String xml = "<root><b><a x=\"a/b\" h=\"hello-A\"/> </b></root>";
XPathRecordReader rr=null;
try {
rr = new XPathRecordReader("//b");
Assert.fail("A RuntimeException was expected: //b forEach cannot begin with '//'.");
}
catch (RuntimeException ex) { }
try {
rr.addField("bold" ,"b", false);
Assert.fail("A RuntimeException was expected: 'b' xpaths must begin with '/'.");
}
catch (RuntimeException ex) { }
}
@Test
public void any_decendent_from_root() {
XPathRecordReader rr = new XPathRecordReader("/anyd/contenido");
rr.addField("descdend", "//boo", true);
rr.addField("inr_descd","//boo/i", false);
rr.addField("cont", "/anyd/contenido", false);
rr.addField("id", "/anyd/contenido/@id", false);
rr.addField("status", "/anyd/status", false);
rr.addField("title", "/anyd/contenido/titulo", false,XPathRecordReader.FLATTEN);
rr.addField("resume", "/anyd/contenido/resumen",false);
rr.addField("text", "/anyd/contenido/texto", false);
String xml="<anyd>\n"
+ " this <boo>top level</boo> is ignored because it is external to the forEach\n"
+ " <status>as is <boo>this element</boo></status>\n"
+ " <contenido id=\"10097\" idioma=\"cat\">\n"
+ " This one is <boo>not ignored as its</boo> inside a forEach\n"
+ " <antetitulo><i> big <boo>antler</boo></i></antetitulo>\n"
+ " <titulo> My <i>flattened <boo>title</boo></i> </titulo>\n"
+ " <resumen> My summary <i>skip this!</i> </resumen>\n"
+ " <texto> <boo>Within the body of</boo>My text</texto>\n"
+ " <p>Access <boo>inner <i>sub clauses</i> as well</boo></p>\n"
+ " </contenido>\n"
+ "</anyd>";
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(1, l.size());
Map<String, Object> m = l.get(0);
Assert.assertEquals("This one is inside a forEach", m.get("cont").toString().trim());
Assert.assertEquals("10097" ,m.get("id"));
Assert.assertEquals("My flattened title",m.get("title").toString().trim());
Assert.assertEquals("My summary" ,m.get("resume").toString().trim());
Assert.assertEquals("My text" ,m.get("text").toString().trim());
Assert.assertEquals("not ignored as its",(String) ((List) m.get("descdend")).get(0) );
Assert.assertEquals("antler" ,(String) ((List) m.get("descdend")).get(1) );
Assert.assertEquals("Within the body of",(String) ((List) m.get("descdend")).get(2) );
Assert.assertEquals("inner as well" ,(String) ((List) m.get("descdend")).get(3) );
Assert.assertEquals("sub clauses" ,m.get("inr_descd").toString().trim());
}
@Test
public void any_decendent_of_a_child1() {
XPathRecordReader rr = new XPathRecordReader("/anycd");
rr.addField("descdend", "/anycd//boo", true);
// same test string as above but checking to see if *all* //boo's are collected
String xml="<anycd>\n"
+ " this <boo>top level</boo> is ignored because it is external to the forEach\n"
+ " <status>as is <boo>this element</boo></status>\n"
+ " <contenido id=\"10097\" idioma=\"cat\">\n"
+ " This one is <boo>not ignored as its</boo> inside a forEach\n"
+ " <antetitulo><i> big <boo>antler</boo></i></antetitulo>\n"
+ " <titulo> My <i>flattened <boo>title</boo></i> </titulo>\n"
+ " <resumen> My summary <i>skip this!</i> </resumen>\n"
+ " <texto> <boo>Within the body of</boo>My text</texto>\n"
+ " <p>Access <boo>inner <i>sub clauses</i> as well</boo></p>\n"
+ " </contenido>\n"
+ "</anycd>";
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(1, l.size());
Map<String, Object> m = l.get(0);
Assert.assertEquals("top level" ,(String) ((List) m.get("descdend")).get(0) );
Assert.assertEquals("this element" ,(String) ((List) m.get("descdend")).get(1) );
Assert.assertEquals("not ignored as its",(String) ((List) m.get("descdend")).get(2) );
Assert.assertEquals("antler" ,(String) ((List) m.get("descdend")).get(3) );
Assert.assertEquals("title" ,(String) ((List) m.get("descdend")).get(4) );
Assert.assertEquals("Within the body of",(String) ((List) m.get("descdend")).get(5) );
Assert.assertEquals("inner as well" ,(String) ((List) m.get("descdend")).get(6) );
}
@Test
public void any_decendent_of_a_child2() {
XPathRecordReader rr = new XPathRecordReader("/anycd");
rr.addField("descdend", "/anycd/contenido//boo", true);
// same test string as above but checking to see if *some* //boo's are collected
String xml="<anycd>\n"
+ " this <boo>top level</boo> is ignored because it is external to the forEach\n"
+ " <status>as is <boo>this element</boo></status>\n"
+ " <contenido id=\"10097\" idioma=\"cat\">\n"
+ " This one is <boo>not ignored as its</boo> inside a forEach\n"
+ " <antetitulo><i> big <boo>antler</boo></i></antetitulo>\n"
+ " <titulo> My <i>flattened <boo>title</boo></i> </titulo>\n"
+ " <resumen> My summary <i>skip this!</i> </resumen>\n"
+ " <texto> <boo>Within the body of</boo>My text</texto>\n"
+ " <p>Access <boo>inner <i>sub clauses</i> as well</boo></p>\n"
+ " </contenido>\n"
+ "</anycd>";
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(1, l.size());
Map<String, Object> m = l.get(0);
Assert.assertEquals("not ignored as its",((List) m.get("descdend")).get(0) );
Assert.assertEquals("antler" ,((List) m.get("descdend")).get(1) );
Assert.assertEquals("title" ,((List) m.get("descdend")).get(2) );
Assert.assertEquals("Within the body of",((List) m.get("descdend")).get(3) );
Assert.assertEquals("inner as well" ,((List) m.get("descdend")).get(4) );
}
@Test
public void another() {
String xml="<root>\n"
+ " <contenido id=\"10097\" idioma=\"cat\">\n"
+ " <antetitulo></antetitulo>\n"
+ " <titulo> This is my title </titulo>\n"
+ " <resumen> This is my summary </resumen>\n"
+ " <texto> This is the body of my text </texto>\n"
+ " </contenido>\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/contenido");
rr.addField("id", "/root/contenido/@id", false);
rr.addField("title", "/root/contenido/titulo", false);
rr.addField("resume","/root/contenido/resumen",false);
rr.addField("text", "/root/contenido/texto", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(1, l.size());
Map<String, Object> m = l.get(0);
Assert.assertEquals("10097", m.get("id"));
Assert.assertEquals("This is my title", m.get("title").toString().trim());
Assert.assertEquals("This is my summary", m.get("resume").toString().trim());
Assert.assertEquals("This is the body of my text", m.get("text").toString()
.trim());
}
@Test
public void sameForEachAndXpath(){
String xml="<root>\n" +
" <cat>\n" +
" <name>hello</name>\n" +
" </cat>\n" +
" <item name=\"item name\"/>\n" +
"</root>";
XPathRecordReader rr = new XPathRecordReader("/root/cat/name");
rr.addField("catName", "/root/cat/name",false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals("hello",l.get(0).get("catName"));
}
@Test
public void putNullTest(){
String xml = "<root>\n" +
" <i>\n" +
" <x>\n" +
" <a>A.1.1</a>\n" +
" <b>B.1.1</b>\n" +
" </x>\n" +
" <x>\n" +
" <b>B.1.2</b>\n" +
" <c>C.1.2</c>\n" +
" </x>\n" +
" </i>\n" +
" <i>\n" +
" <x>\n" +
" <a>A.2.1</a>\n" +
" <c>C.2.1</c>\n" +
" </x>\n" +
" <x>\n" +
" <b>B.2.2</b>\n" +
" <c>C.2.2</c>\n" +
" </x>\n" +
" </i>\n" +
"</root>";
XPathRecordReader rr = new XPathRecordReader("/root/i");
rr.addField("a", "/root/i/x/a", true);
rr.addField("b", "/root/i/x/b", true);
rr.addField("c", "/root/i/x/c", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Map<String, Object> map = l.get(0);
List<String> a = (List<String>) map.get("a");
List<String> b = (List<String>) map.get("b");
List<String> c = (List<String>) map.get("c");
Assert.assertEquals("A.1.1",a.get(0));
Assert.assertEquals("B.1.1",b.get(0));
Assert.assertNull(c.get(0));
Assert.assertNull(a.get(1));
Assert.assertEquals("B.1.2",b.get(1));
Assert.assertEquals("C.1.2",c.get(1));
map = l.get(1);
a = (List<String>) map.get("a");
b = (List<String>) map.get("b");
c = (List<String>) map.get("c");
Assert.assertEquals("A.2.1",a.get(0));
Assert.assertNull(b.get(0));
Assert.assertEquals("C.2.1",c.get(0));
Assert.assertNull(a.get(1));
Assert.assertEquals("B.2.2",b.get(1));
Assert.assertEquals("C.2.2",c.get(1));
}
@Test
public void testError(){
String malformedXml = "<root>\n" +
" <node>\n" +
" <id>1</id>\n" +
" <desc>test1</desc>\n" +
" </node>\n" +
" <node>\n" +
" <id>2</id>\n" +
" <desc>test2</desc>\n" +
" </node>\n" +
" <node>\n" +
" <id/>3</id>\n" + // invalid XML
" <desc>test3</desc>\n" +
" </node>\n" +
"</root>";
XPathRecordReader rr = new XPathRecordReader("/root/node");
rr.addField("id", "/root/node/id", true);
rr.addField("desc", "/root/node/desc", true);
try {
rr.getAllRecords(new StringReader(malformedXml));
Assert.fail("A RuntimeException was expected: the input XML is invalid.");
} catch (Exception e) { }
}
}