/* * Copyright (c) 2013, University of Toronto. * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package edu.toronto.cs.xcurator.discoverer; import edu.toronto.cs.xcurator.common.DataDocument; import edu.toronto.cs.xcurator.mapping.Attribute; import edu.toronto.cs.xcurator.mapping.Schema; import edu.toronto.cs.xcurator.mapping.Mapping; import java.util.Iterator; import java.util.List; /** * * @author ekzhu */ public class KeyAttributeDiscovery implements MappingDiscoveryStep { @Override public void process(List<DataDocument> dataDocuments, Mapping mapping) { System.out.println("process KeyAttributeDiscovery..."); Iterator<Schema> it = mapping.getEntityIterator(); while (it.hasNext()) { // For each entity, find attribute whose instances are unique // That is, the cardinality of the attribute instances should equal // to the cardinality of the entity instances // The value attribute should not be used as key. // Its instance count should be zero Schema entity = it.next(); int instanceCount = entity.getXmlInstanceCount(); Iterator<Attribute> attrIt = entity.getAttributeIterator(); while (attrIt.hasNext()) { Attribute attr = attrIt.next(); // This is a hack, the key identification algorithm needs to be // improved. System.out.println(attr); System.out.println(attr.getInstances().size() + " <> " + instanceCount); if (attr.getInstances().size() == instanceCount // && // attr.getId().endsWith(".id") ) { attr.asKey(); } } } } }