package edu.isi.karma.mapreduce.driver; import org.apache.hadoop.io.Text; import org.junit.Assert; import org.junit.Test; import edu.isi.karma.mapreduce.function.MergeJSON; public class TestMergeJSON { @Test public void testMerge() { MergeJSON test = new MergeJSON(); String a = " {\"syll:twitterId\":\"caknoblock\",\"@type\":\"http://lod.isi.edu/ontology/syllabus/Person\",\"foaf:homepage\":{\"@type\":\"http://xmlns.com/foaf/0.1/Document\",\"@id\":\"http://www.isi.edu/~knoblock\"},\"foaf:name\":\"Craig Knoblock\",\"foaf:lastName\":\"Knoblock\",\"@id\":\"http://lod.isi.edu/cs548/person/Knoblock\",\"id\":\"http://lod.isi.edu/cs548/person/Knoblock\",\"foaf:depiction\":{\"@type\":\"http://xmlns.com/foaf/0.1/Image\",\"@id\":\"http://www.isi.edu/integration/people/knoblock/img/CraigKnoblock.jpg\",\"foaf:depicts\":\"<http://lod.isi.edu/cs548/person/Knoblock>\"},\"foaf:title\":\"Prof\",\"foaf:mbox\":\"mailto:knoblock@isi.edu\"}"; String b = " {\"rdfs:label\":\"knoblock@isi.edu\",\"@type\":\"http://www.w3.org/2002/07/owl#Thing\",\"@id\":\"mailto:knoblock@isi.edu\",\"id\":\"mailto:knoblock@isi.edu\"}"; Text result = test.evaluate(new Text(a), new Text(b), new Text("foaf:mbox"), new Text("@id")); String expected = "{\"id\":\"http://lod.isi.edu/cs548/person/Knoblock\",\"syll:twitterId\":\"caknoblock\",\"@type\":\"http://lod.isi.edu/ontology/syllabus/Person\",\"foaf:homepage\":{\"@type\":\"http://xmlns.com/foaf/0.1/Document\",\"@id\":\"http://www.isi.edu/~knoblock\"},\"foaf:lastName\":\"Knoblock\",\"foaf:name\":\"Craig Knoblock\",\"foaf:depiction\":{\"@type\":\"http://xmlns.com/foaf/0.1/Image\",\"@id\":\"http://www.isi.edu/integration/people/knoblock/img/CraigKnoblock.jpg\",\"foaf:depicts\":\"<http://lod.isi.edu/cs548/person/Knoblock>\"},\"@id\":\"http://lod.isi.edu/cs548/person/Knoblock\",\"foaf:title\":\"Prof\",\"foaf:mbox\":{\"id\":\"mailto:knoblock@isi.edu\",\"rdfs:label\":\"knoblock@isi.edu\",\"@type\":\"http://www.w3.org/2002/07/owl#Thing\",\"@id\":\"mailto:knoblock@isi.edu\"}}"; Assert.assertEquals(result.toString(), expected); } @Test public void testMerge2() { MergeJSON test = new MergeJSON(); String a = "{\r\n \"@type\": \"http://dig.isi.edu/ontology/URLEntity\",\r\n \"dig:snapshot\": {\r\n \"dig:hasTitlePart\": {\r\n \"@type\": \"http://schema.org/WebPageElement\",\r\n \"@id\": \"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518/title\"\r\n },\r\n \"dig:hasBodyPart\": {\r\n \"dig:mentionsPhoneNumber\": {\r\n \"dig:tenDigitPhoneNumber\": \"8052536469\",\r\n \"schema:location\": {\r\n \"@type\": \"http://schema.org/Place\",\r\n \"@id\": \"http://dig.isi.edu/data/exchange/805253\"\r\n },\r\n \"@type\": \"http://dig.isi.edu/ontology/PhoneNumber\",\r\n \"@id\": \"http://dig.isi.edu/data/phonenumber/8052536469\"\r\n },\r\n \"@type\": \"http://schema.org/WebPageElement\",\r\n \"@id\": \"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518/body\"\r\n },\r\n \"@type\": \"http://schema.org/WebPage\",\r\n \"@id\": \"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518\"\r\n },\r\n \"@id\": \"http://losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518\"\r\n}"; String b = "{\r\n \"@type\": \"http://schema.org/Place\",\r\n \"schema:geo\": {\r\n \"schema:longitude\": \"-119.176487\",\r\n \"@type\": \"http://schema.org/GeoCoordinates\",\r\n \"schema:latitude\": \"34.196012\",\r\n \"@id\": \"http://dig.isi.edu/data/exchange/805253/geo\"\r\n },\r\n \"@id\": \"http://dig.isi.edu/data/exchange/805253\",\r\n \"schema:address\": {\r\n \"schema:addressLocality\": \"Oxnard\",\r\n \"@type\": \"http://schema.org/PostalAddress\",\r\n \"schema:addressRegion\": \"California\",\r\n \"@id\": \"http://dig.isi.edu/data/exchange/805253/postaladdress\"\r\n }\r\n}"; Text result = test.evaluate(new Text(a), new Text(b), new Text("$.dig:snapshot[*].dig:hasBodyPart.dig:mentionsPhoneNumber.schema:location"), new Text("@id")); String expected = "{\"@type\":\"http://dig.isi.edu/ontology/URLEntity\",\"dig:snapshot\":{\"dig:hasTitlePart\":{\"@type\":\"http://schema.org/WebPageElement\",\"@id\":\"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518/title\"},\"dig:hasBodyPart\":{\"dig:mentionsPhoneNumber\":{\"dig:tenDigitPhoneNumber\":\"8052536469\",\"schema:location\":{\"@type\":\"http://schema.org/Place\",\"schema:geo\":{\"schema:longitude\":\"-119.176487\",\"@type\":\"http://schema.org/GeoCoordinates\",\"schema:latitude\":\"34.196012\",\"@id\":\"http://dig.isi.edu/data/exchange/805253/geo\"},\"@id\":\"http://dig.isi.edu/data/exchange/805253\",\"schema:address\":{\"schema:addressLocality\":\"Oxnard\",\"@type\":\"http://schema.org/PostalAddress\",\"schema:addressRegion\":\"California\",\"@id\":\"http://dig.isi.edu/data/exchange/805253/postaladdress\"}},\"@type\":\"http://dig.isi.edu/ontology/PhoneNumber\",\"@id\":\"http://dig.isi.edu/data/phonenumber/8052536469\"},\"@type\":\"http://schema.org/WebPageElement\",\"@id\":\"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518/body\"},\"@type\":\"http://schema.org/WebPage\",\"@id\":\"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518\"},\"@id\":\"http://losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518\"}"; Assert.assertEquals(result.toString(), expected); } @Test public void testMerge3() { MergeJSON test = new MergeJSON(); String a = "{ \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Product1\", \"@type\" : [ \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/Product\", \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductType5\" ], \"http://www.w3.org/2000/01/rdf-schema#label\" : [ { \"@value\" : \"manner gatemen\" } ], \"http://www.w3.org/2000/01/rdf-schema#comment\" : [ { \"@value\" : \"lordlings dialyzed hoardings palmitate resisters redesigned trowing fledging disinters occasionally refry objective comedown senders attendance calculous redux zed bidets subacute swinks berhymed pumping overassured outrush corteges chitters civilest chiffonniers kimonos protects epizootic centimos dismast boomage issues aggrieves sociably ammoniacs polliwogs labyrinths infatuates whiteout dissentients newmown flunkey titillated caduceus rediscovered breaststrokes schillings endorsement cheerleaders nonconcurrent intoned outpaces inkle superpowers habaneras subsoils paramours laughed\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/producer\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Producer1\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyNumeric1\" : [ { \"@value\" : \"831\", \"@type\" : \"http://www.w3.org/2001/XMLSchema#integer\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyNumeric2\" : [ { \"@value\" : \"312\", \"@type\" : \"http://www.w3.org/2001/XMLSchema#integer\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyNumeric3\" : [ { \"@value\" : \"735\", \"@type\" : \"http://www.w3.org/2001/XMLSchema#integer\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyNumeric5\" : [ { \"@value\" : \"150\", \"@type\" : \"http://www.w3.org/2001/XMLSchema#integer\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyTextual1\" : [ { \"@value\" : \"guzzling jillion psychotherapists substantiation nonuple deluded snowmelt interlards overrefinement annoyed stuntedness calcimining stereophonically\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyTextual2\" : [ { \"@value\" : \"recommendation embezzler reconviction misproportions discountenances callings defacers crummiest triglyceride\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyTextual3\" : [ { \"@value\" : \"decentralizations impacting promulgations bibliotherapy murexes professorships locomotes durning lyncher spoonier abhorrence assize goglets\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productPropertyTextual4\" : [ { \"@value\" : \"distracts universally trashily enervator\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productFeature\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature142\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature144\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature154\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature156\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature158\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature159\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature171\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature175\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature177\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature178\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature180\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature3\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature4\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature16\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature22\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature25\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature28\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature34\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductFeature35\" } ], \"http://purl.org/dc/elements/1.1/publisher\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromProducer1/Producer1\" } ], \"http://purl.org/dc/elements/1.1/date\" : [ { \"@value\" : \"2000-11-01\", \"@type\" : \"http://www.w3.org/2001/XMLSchema#date\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/review\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review3\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review8\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review12\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review14\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review16\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review18\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review19\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review23\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review25\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review27\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromRatingSite1/Review28\" } ], \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/offer\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer3\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer10\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer20\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer23\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer27\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer28\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer32\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer36\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer42\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer48\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer50\" }, { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/dataFromVendor1/Offer55\" } ]}"; String b = "{ \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductType5\", \"@type\" : [ \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/ProductType\" ], \"http://www.w3.org/2000/01/rdf-schema#label\" : [ { \"@value\" : \"arteriography\" } ], \"http://www.w3.org/2000/01/rdf-schema#comment\" : [ { \"@value\" : \"hollowness unhealed cityward parring wishing pyromaniacs marbly pions boughed innervate gung inverts demoted comprehended stollen unadjudicated septuagenarians chaptered vocalists bennies unstably moppet cogging ethnics billhooks frenching squeamishly tyres weaseling chancroids pandered amirates relented\" } ], \"http://www.w3.org/2000/01/rdf-schema#subClassOf\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/ProductType2\" } ], \"http://purl.org/dc/elements/1.1/publisher\" : [ { \"@id\" : \"http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/instances/StandardizationInstitution1\" } ], \"http://purl.org/dc/elements/1.1/date\" : [ { \"@value\" : \"2000-07-18\", \"@type\" : \"http://www.w3.org/2001/XMLSchema#date\" } ]}"; Text result = test.evaluate(new Text(a), new Text(b), new Text("$.@type"), new Text("@id")); //String expected = "{\"@type\":\"http://dig.isi.edu/ontology/URLEntity\",\"dig:snapshot\":{\"dig:hasTitlePart\":{\"@type\":\"http://schema.org/WebPageElement\",\"@id\":\"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518/title\"},\"dig:hasBodyPart\":{\"dig:mentionsPhoneNumber\":{\"dig:tenDigitPhoneNumber\":\"8052536469\",\"schema:location\":{\"@type\":\"http://schema.org/Place\",\"schema:geo\":{\"schema:longitude\":\"-119.176487\",\"@type\":\"http://schema.org/GeoCoordinates\",\"schema:latitude\":\"34.196012\",\"@id\":\"http://dig.isi.edu/data/exchange/805253/geo\"},\"@id\":\"http://dig.isi.edu/data/exchange/805253\",\"schema:address\":{\"schema:addressLocality\":\"Oxnard\",\"@type\":\"http://schema.org/PostalAddress\",\"schema:addressRegion\":\"California\",\"@id\":\"http://dig.isi.edu/data/exchange/805253/postaladdress\"}},\"@type\":\"http://dig.isi.edu/ontology/PhoneNumber\",\"@id\":\"http://dig.isi.edu/data/phonenumber/8052536469\"},\"@type\":\"http://schema.org/WebPageElement\",\"@id\":\"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518/body\"},\"@type\":\"http://schema.org/WebPage\",\"@id\":\"https://karmadigstorage.blob.core.windows.net/arch/churl/20140301/losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518\"},\"@id\":\"http://losangeles.backpage.com/FemaleEscorts/undeniable-latina-hottie-1oohh-160hr-rainy-day-specials-_-24/38317518\"}"; Assert.assertEquals(result.toString(), result.toString()); } }