/******************************************************************************* * Copyright 2012 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * This code was developed by the Information Integration Group as part * of the Karma project at the Information Sciences Institute of the * University of Southern California. For more information, publications, * and related projects, please see: http://www.isi.edu/integration ******************************************************************************/ package edu.isi.karma.kr2rml; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.isi.karma.rep.HNode; import edu.isi.karma.rep.HTable; import edu.isi.karma.rep.RepFactory; import edu.isi.karma.rep.Worksheet; public class TemplateTermSetBuilder { private static Logger logger = LoggerFactory.getLogger(TemplateTermSetBuilder.class); public static TemplateTermSet constructTemplateTermSetFromR2rmlTemplateString( String templStr, Worksheet worksheet, RepFactory factory) throws JSONException { TemplateTermSet termSet = new TemplateTermSet(); Pattern p = Pattern.compile("\\{\\\".*?\\\"\\}"); Matcher matcher = p.matcher(templStr); int startIndex = 0; if (matcher.find()) { matcher.reset(); while (matcher.find()) { // We have a String template term before this column template term if (matcher.start() != startIndex) { String strTermVal = templStr.substring(startIndex, matcher.start()); logger.debug("String templ term: " + strTermVal); termSet.addTemplateTermToSet(new StringTemplateTerm(strTermVal)); } String colTermVal = removeR2rmlFormatting(matcher.group()); logger.debug("Col name templ term: " + colTermVal); HTable hTable = worksheet.getHeaders(); // If hierarchical columns if (colTermVal.startsWith("[") && colTermVal.endsWith("]") && colTermVal.contains(",")) { JSONArray strArr = new JSONArray(colTermVal); for (int i=0; i<strArr.length(); i++) { String cName = (String) strArr.get(i); logger.debug("Column being normalized: "+ cName); HNode hNode = hTable.getHNodeFromColumnName(cName); if(hNode == null || hTable == null) { logger.error("Error retrieving column: " + cName); return null; } if (i == strArr.length()-1) { // Found! String hNodeId = hNode.getId(); termSet.addTemplateTermToSet(new ColumnTemplateTerm(hNodeId)); } else { hTable = hNode.getNestedTable(); } } } else { HNode hNode = hTable.getHNodeFromColumnName(colTermVal); logger.debug("Column" + removeR2rmlFormatting(colTermVal)); termSet.addTemplateTermToSet(new ColumnTemplateTerm(hNode.getId())); } startIndex = matcher.end(); } } else { termSet.addTemplateTermToSet(new StringTemplateTerm(templStr)); } return termSet; } public static TemplateTermSet constructTemplateTermSetFromR2rmlColumnString( String colTermVal, Worksheet worksheet, RepFactory factory) throws JSONException { TemplateTermSet termSet = new TemplateTermSet(); HTable hTable = worksheet.getHeaders(); // If hierarchical columns if (colTermVal.startsWith("[") && colTermVal.endsWith("]")) { JSONArray strArr = new JSONArray(colTermVal); for (int i=0; i<strArr.length(); i++) { String cName = (String) strArr.get(i); logger.debug("Column being normalized: "+ cName); HNode hNode = hTable.getHNodeFromColumnName(cName); if(hNode == null || hTable == null) { logger.error("Error retrieving column: " + cName); return null; } if (i == strArr.length()-1) { // Found! String hNodeId = hNode.getId(); termSet.addTemplateTermToSet(new ColumnTemplateTerm(hNodeId)); } else { hTable = hNode.getNestedTable(); } } } else { HNode hNode = hTable.getHNodeFromColumnName( removeR2rmlFormatting(colTermVal)); logger.debug("Column" + removeR2rmlFormatting(colTermVal)); termSet.addTemplateTermToSet(new ColumnTemplateTerm(hNode.getId())); } return termSet; } private static String removeR2rmlFormatting(String r2rmlColName) { if (r2rmlColName.startsWith("{\"") && r2rmlColName.endsWith("\"}")) return r2rmlColName.substring(2, r2rmlColName.length()-2); else return r2rmlColName; } }