/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.stanbol.entityhub.indexing.source.vcard; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_COUNTRY; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_EXTENDED; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_LOCALITY; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_POSTAL_CODE; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_POST_OFFICE_ADDRESS; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_REGION; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.ADR_STREET; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_ADDITIONAL; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_FAMILY; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_GIVEN; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_PREFIX; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.N_SUFFIX; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.RDF_TYPE; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.VCARD_ORGANIZATION; import static org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.VCARD_PERSON; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.net.URLEncoder; import java.nio.charset.Charset; import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import net.fortuna.ical4j.data.ParserException; import net.fortuna.ical4j.vcard.Parameter; import net.fortuna.ical4j.vcard.Property; import net.fortuna.ical4j.vcard.VCard; import net.fortuna.ical4j.vcard.VCardBuilder; import net.fortuna.ical4j.vcard.VCardFileFilter; import net.fortuna.ical4j.vcard.property.Address; import net.fortuna.ical4j.vcard.property.N; import net.fortuna.ical4j.vcard.property.Org; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory; import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable; import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator; import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig; import org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter; import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader; import org.apache.stanbol.entityhub.indexing.core.source.ResourceState; import org.apache.stanbol.entityhub.indexing.source.vcard.OntologyMappings.Mapping; import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum; import org.apache.stanbol.entityhub.servicesapi.model.Representation; import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory; import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.emory.mathcs.backport.java.util.Arrays; import edu.emory.mathcs.backport.java.util.Collections; public class VcardIndexingSource implements EntityDataIterable, ResourceImporter { protected static Logger log = LoggerFactory.getLogger(VcardIndexingSource.class); /** * The prefix used to create Entities */ private String prefix; private char typeSeperatorChar = '/'; private ResourceLoader loader; /** * The charset used to read the vcard file(s) in the source folder */ private Charset charset = null; /** * The default Charset ("utf-8"). This is also used to write the vcard files * within the destination directory. */ public static final Charset DEFAULT_CHARSET = Charset.forName("UTF8"); /** * Parameter that allows users to define the encoding of the vcard files * to import (the {@link #DEFAULT_CHARSET default encoding} is set to * "utf-8" */ public static final String PARAM_CHARSET = "encoding"; /** * The Parameter used to configure the source folder(s) relative to the * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as * separator to parsed multiple sources. */ public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source"; /** * The default directory name used to search for vcard files to be imported */ public static final String DEFAULT_SOURCE_FOLDER_NAME = "vcard"; /** * The prefix used vCard entities */ public static final String PARAM_PREFIX = "prefix"; /** * Used to import vcard files from the * {@link IndexingConfig#getSourceFolder() source}/ * {@link #PARAM_SOURCE_FILE_OR_FOLDER vcard} folder. */ protected ResourceImporter importer; /** * Folder within the destination directory to temporary copy all the * vCard files to import. */ private File vcardFileImportFolder; /** * List of the files that need to be imported. Initialised in {@link #initialise()} */ @SuppressWarnings("unchecked") private List<File> vcardFiles = Collections.emptyList(); /** * Used to create {@link Representation} instances */ private ValueFactory vf = InMemoryValueFactory.getInstance(); /** * The vcard -> ontology mappings * TODO make configurable as soon as there are multiple mappings available */ private Map<String,Mapping> mappings = OntologyMappings.schemaOrgMappings; public VcardIndexingSource() { //set relaxed parsing to TRUE System.setProperty("ical4j.parsing.relaxed", Boolean.TRUE.toString()); } @Override public EntityDataIterator entityDataIterator() { return new VCardIterator(); } @Override public void close() { this.importer = null; } @Override public boolean needsInitialisation() { //if there are resources with the state REGISTERED we need an initialisation return !loader.getResources(ResourceState.REGISTERED).isEmpty(); } @Override public void initialise(){ //this will call #importResource(..) for all files in the directories //configured by the #PARAM_SOURCE_FILE_OR_FOLDER loader.loadResources(); //create the lists vcardFiles = Arrays.asList(vcardFileImportFolder.listFiles( (FilenameFilter)VCardFileFilter.INSTANCE)); } @Override public void setConfiguration(Map<String,Object> config) { //init fields IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG); loader = new ResourceLoader(this, true, false); //vcard files are imported from a special folder in the destination dir. //this folder needs to be deleted/(re-)created first. vcardFileImportFolder = new File(indexingConfig.getDestinationFolder(),"vcard"); if(vcardFileImportFolder.exists()){ if(vcardFileImportFolder.isDirectory()){ try { FileUtils.deleteDirectory(vcardFileImportFolder); }catch (IOException e){ throw new IllegalStateException("Unable to delete Folder "+ vcardFileImportFolder.getAbsolutePath()+" containing the vCard files from a" + "previouse indexing! Please remove this folder manually.",e); } } else if(!vcardFileImportFolder.delete()){ throw new IllegalStateException("Unable to delete File "+ vcardFileImportFolder.getAbsolutePath()+" containing the vCard data from a" + "previouse indexing! Please remove this File manually."); } } if(!vcardFileImportFolder.mkdirs()){ throw new IllegalStateException("Unable to delete Folder "+ vcardFileImportFolder.getAbsolutePath()+" containing the vCard files from a" + "previouse indexing! Please remove this folder manually."); } //load config Object value; log.debug("load vcard resources from :"); value = config.get(PARAM_SOURCE_FILE_OR_FOLDER); if(value == null){ //if not set use the default value = DEFAULT_SOURCE_FOLDER_NAME; } for(String source : value.toString().split(",")){ File sourceFileOrDirectory = indexingConfig.getSourceFile(source); if(sourceFileOrDirectory.exists()){ //register the configured source with the ResourceLoader this.loader.addResource(sourceFileOrDirectory); } else { if(FilenameUtils.getExtension(source).isEmpty()){ //non existent directory -> create //This is typically the case if this method is called to //initialise the default configuration. So we will try //to create the directory users need to copy the source //RDF files. if(!sourceFileOrDirectory.mkdirs()){ log.warn("Unable to create directory {} configured to improt source data from. " + "You will need to create this directory manually before copying the" + "Source files into it.",sourceFileOrDirectory); //this would not be necessary because the directory will //be empty - however I like to be consistent and have //all configured and existent files & dirs added the the //resource loader this.loader.addResource(sourceFileOrDirectory); } } else { log.warn("Unable to find vcard source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder()); } } } if(log.isDebugEnabled()){ for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){ log.debug(" > "+registeredSource); } } //parse the encoding value = config.get(PARAM_CHARSET); if(value != null){ String encoding = value.toString(); if(encoding.isEmpty()){ //use plattform encoding if empty charset = Charset.defaultCharset(); } else { try { charset = Charset.forName(encoding); } catch (RuntimeException e) { throw new IllegalStateException("The configured encoding '"+ encoding+"' is not supported by this Plattform", e); } } } else { //use plattorm encoding if missing charset = Charset.defaultCharset(); } //parse the prefix value = config.get(PARAM_PREFIX); if(value == null || value.toString().isEmpty()){ throw new IllegalStateException("Teh configuration is missing the required parameter 'prefix'!"); } else { prefix = value.toString(); //set the typeSeperatorChar based on the kind of parsed prefix if(prefix.endsWith("#")){ typeSeperatorChar = '.'; } else if (prefix.endsWith("/")){ typeSeperatorChar = '/'; } else if (prefix.endsWith(":")){ typeSeperatorChar = ':'; } else if (prefix.startsWith("urn:")){ //maybe an urn without an tailing ':' prefix = prefix+':'; typeSeperatorChar = ':'; } else if (prefix.indexOf("://")>0){ //maybe an url without an tailing '/' or '#' prefix = prefix+'/'; } //else ... no idea what kind of prefix ... use the default '/' } } /** * This only copies vCard files to the {@link #vcardFileImportFolder} within the * {@link IndexingConfig#getDestinationFolder()}.<p> * In addition if a specific {@link #charset} is configured for the * vcard files to import this also changes the encoding to the * {@link #DEFAULT_CHARSET} (utf-8). This can help users to investigate and * correct file encoding related issues. * @see org.apache.stanbol.entityhub.indexing.core.source.ResourceImporter#importResource(java.io.InputStream, java.lang.String) */ @Override public ResourceState importResource(InputStream is, String resourceName) throws IOException { //only copies the file to tmp files in the if(resourceName.charAt(0) != '.' && VCardFileFilter.INSTANCE.accept(new File(resourceName))){ //copy the file to the destination directory //1. get the file name used in the destination String name = FilenameUtils.getName(resourceName); String baseName = FilenameUtils.getBaseName(name); String extension = FilenameUtils.getExtension(name); File outFile = new File(vcardFileImportFolder,name); for(int i = 0;outFile.exists();i++){ outFile = new File(vcardFileImportFolder, String.format("%s_%s.%s",baseName,i,extension)); } //check the encoding to ensure that in the destination all files use // DEFAULT_CHARSET (utf-8) if(charset == null || charset.equals(DEFAULT_CHARSET)){ // no recoding -> copy bytes OutputStream os = new FileOutputStream(outFile); IOUtils.copy(is, os); IOUtils.closeQuietly(os); IOUtils.closeQuietly(is); } else { //recode Reader r = new InputStreamReader(is, charset); Writer w = new OutputStreamWriter(new FileOutputStream(outFile), DEFAULT_CHARSET); IOUtils.copy(r, w); IOUtils.closeQuietly(r); IOUtils.closeQuietly(w); } return ResourceState.LOADED; } else { log.debug("RDFTerm {} ignored: Not an Vcard file.",resourceName); return ResourceState.IGNORED; } } private final class VCardIterator implements EntityDataIterator { Map<EntityType,Map<String,Set<String>>> entityMap; Iterator<File> files = vcardFiles.iterator(); @SuppressWarnings("unchecked") Iterator<VCard> vcards = Collections.emptyList().iterator(); @SuppressWarnings("unchecked") Iterator<Representation> representations = Collections.emptyList().iterator(); Representation nextRepresentation = null; Representation currentRepresentation = null; private VCardIterator(){ entityMap = new EnumMap<EntityType,Map<String,Set<String>>>(EntityType.class); entityMap.put(EntityType.organization, new HashMap<String,Set<String>>()); entityMap.put(EntityType.person, new HashMap<String,Set<String>>()); } /** * Parses all {@link VCard} object of the next {@link #files file}; */ private Iterator<VCard> parseNext(File file){ Reader r; try { r = new InputStreamReader(new FileInputStream(file), DEFAULT_CHARSET); } catch (FileNotFoundException e) { throw new IllegalStateException("vcard import file "+file+ "not found - maybe deleted during import?",e); } VCardBuilder parser = new VCardBuilder(r); try { return parser.buildAll().iterator(); } catch (IOException e) { throw new IllegalStateException("Unable to read vcard file "+file,e); } catch (ParserException e) { throw new IllegalStateException("Unable to parse vcard file "+file,e); } } @Override public Representation getRepresentation() { return currentRepresentation; } @Override public boolean hasNext() { //Iterate while there are still representations, vCards or files while(nextRepresentation == null && (representations.hasNext() || vcards.hasNext() || files.hasNext())) { if(representations.hasNext()){ //if more representations nextRepresentation = representations.next(); //set next } else { //else process the next vCard object VCard nextVcard = null; //Iterate while there are still more vCards or files while(nextVcard == null && (vcards.hasNext() || files.hasNext())){ if(vcards.hasNext()){ //if there are more vCards nextVcard = vcards.next(); //get next } else { //parse the next file //NOTE: we do not need to check for file.hasNext, //because this was already implicitly checked by the //outer most while loop vcards = parseNext(files.next()); } } if(nextVcard != null){ representations = processVcard(nextVcard,mappings,entityMap); } } } return nextRepresentation != null; } @Override public String next() { if(nextRepresentation == null && !hasNext()){ //try to get the next throw new NoSuchElementException(); } currentRepresentation = nextRepresentation; nextRepresentation = null; return currentRepresentation.getId(); } @Override public void remove() { throw new UnsupportedOperationException("removal is not supported"); } @SuppressWarnings("unchecked") @Override public void close() { //set to empty iterators instead of null. Otherwise I would need //to check for null in all the other methods files = Collections.emptyList().iterator(); representations = Collections.emptyList().iterator(); vcards = Collections.emptyList().iterator(); nextRepresentation = null; currentRepresentation = null; } } /** * Vcard objects can represent persons (FN is defined) or organisations * (no 'FN' but an 'ORG' element) * @author Rupert Westenthaler * */ private enum EntityType {person,organization} /** * Converts a vCard object to Representations. * @param vCard the vCard object to process * @param mappings the Mappings to use * @param entityMap the Map holding the ids of already processed vCards. This * is used to avoid id conflicts * @return Iterator over the processed Representation */ protected Iterator<Representation> processVcard(VCard vCard,Map<String,Mapping> mappings, Map<EntityType,Map<String,Set<String>>> entityMap){ //NOTE: this is protected to allow direct access from the VCardIterator String name = null; EntityType entityType = null; Property nameProperty = vCard.getProperty(Property.Id.FN); if(nameProperty != null && nameProperty.getValue() != null && !nameProperty.getValue().isEmpty()){ entityType = EntityType.person; name = nameProperty.getValue(); } else { //FN name -> maybe a ORG was exported Property orgProperty = vCard.getProperty(Property.Id.ORG); if(orgProperty != null && ((Org)orgProperty).getValues() != null && ((Org)orgProperty).getValues().length>0){ entityType = EntityType.organization; name = ((Org)orgProperty).getValues()[0]; } } if(entityType == null){ log.warn("Unable to index vCard object without values for FN or ORG parameter (vCard: {})",vCard); return Collections.emptyList().iterator(); } String id = null; Property uid = vCard.getProperty(Property.Id.UID); if(uid != null){ id = uid.getValue(); } else { id = name; } id = entityByName(entityMap, entityType, name, id,true); //we have a name and an id (local name of the URI/URN) // ... now parse the vCard Representation rep = vf .createRepresentation( id); Map<String,Representation> representations = new HashMap<String,Representation>(); representations.put(rep.getId(), rep); //add the type Mapping typeMapping = mappings.get( entityType == EntityType.person ? VCARD_PERSON : VCARD_ORGANIZATION); if(typeMapping != null){ rep.add(NamespaceEnum.rdf+"type", typeMapping.uri); } log.debug("vCard [type: {} | name: '{}' | id: '{}']", new Object[]{entityType,name,rep.getId()}); for(Property property : vCard.getProperties()){ Property.Id propertyId = property.getId(); String propName = propertyId.getPropertyName(); if(mappings.containsKey(propName)){ //there is a mapping for this property //the Representation to write the Information of the current Property Representation current; //the Map with the mappings to be used for processing the current //Property Map<String,Mapping> currentMappings; Mapping mapping = mappings.get(propName); //May be null!! if(mapping == null || mapping.subMappings == null){ current = rep; //add to the base Representation currentMappings = mappings; //and use the parsed mappings } else { current = null; //indicates we need to create a new Representation currentMappings = mapping.subMappings; //and use the sub mappings } switch (propertyId) { case N: N n = (N)property; String given = n.getGivenName(); String family = n.getFamilyName(); if((given == null || given.isEmpty()) && (family == null || family.isEmpty())){ log.warn("'N' property '{}'does not define given nor family name -> ignored", n.getValue()); } else { if(current == null){ //create new Representation current = createSubRepresentation(rep, ".name", representations.keySet(), mapping); representations.put(current.getId(), current); } Mapping subPropertyMapping = currentMappings.get(N_GIVEN); if(subPropertyMapping != null && given != null && !given.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(given).trim()); } subPropertyMapping = currentMappings.get(N_FAMILY); if(subPropertyMapping != null & family != null && !family.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(family).trim()); } String[] additional = n.getAdditionalNames(); subPropertyMapping = currentMappings.get(N_ADDITIONAL); if(subPropertyMapping != null & additional != null && additional.length>0){ for(String value : additional){ if(value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } } } String[] prefixes = n.getPrefixes(); subPropertyMapping = currentMappings.get(N_PREFIX); if(subPropertyMapping != null & prefixes != null && prefixes.length>0){ for(String value : prefixes){ if(value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } } } String[] suffixes = n.getSuffixes(); subPropertyMapping = currentMappings.get(N_SUFFIX); if(subPropertyMapping != null & suffixes != null && suffixes.length>0){ for(String value : suffixes){ if(value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } } } } break; case ADR: Address address = (Address)property; if(address.getValue() != null && //check of the value does not only contain seperators (',') !address.getValue().replace(';', ' ').trim().isEmpty()){ if(current == null){ //create new Representation current = createSubRepresentation(rep, ".adr", representations.keySet(), mapping); representations.put(current.getId(), current); } Mapping subPropertyMapping = currentMappings.get(ADR_POST_OFFICE_ADDRESS); String value = address.getPoBox(); if(subPropertyMapping != null && value != null && !value.isEmpty()){ //add string -> this is no natural language text current.add(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } value = address.getExtended(); subPropertyMapping = currentMappings.get(ADR_EXTENDED); if(subPropertyMapping != null && value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } value = address.getStreet(); subPropertyMapping = currentMappings.get(ADR_STREET); if(subPropertyMapping != null && value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } value = address.getLocality(); subPropertyMapping = currentMappings.get(ADR_LOCALITY); if(subPropertyMapping != null && value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } value = address.getRegion(); subPropertyMapping = currentMappings.get(ADR_REGION); if(subPropertyMapping != null && value != null && !value.isEmpty()){ current.addNaturalText(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } value = address.getPostcode(); subPropertyMapping = currentMappings.get(ADR_POSTAL_CODE); if(subPropertyMapping != null && value != null && !value.isEmpty()){ // add string -> this is no natural language text current.add(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } value = address.getCountry(); subPropertyMapping = currentMappings.get(ADR_COUNTRY); if(subPropertyMapping != null && value != null && !value.isEmpty()){ // add string -> based on the standard this should be the two letter code current.add(subPropertyMapping.uri, StringUtils.chomp(value).trim()); } } //else empty ADR field -> ignore break; case ORG: Org org = (Org)property; String[] unitHierarchy = org.getValues(); Mapping orgNameMapping = currentMappings.get(OntologyMappings.ORG_NAME); if(unitHierarchy.length>0 && orgNameMapping != null && unitHierarchy[0] != null && unitHierarchy[0].trim().length()>0){ String orgName = unitHierarchy[0]; if(current == null){ //create new Representation for the Organisation //Note: this is an Entity and no sub-RDFTerm! String orgEntityId = entityByName(entityMap, EntityType.organization, orgName, null, false); if(orgEntityId == null){ //create new Entity for this Organization orgEntityId = entityByName(entityMap, EntityType.organization, orgName, null, true); current = vf.createRepresentation(orgEntityId); initSubRepresentation(current, rep, mapping); representations.put(current.getId(), current); current.addNaturalText(orgNameMapping.uri, StringUtils.chomp(orgName).trim()); //TODO: inverse relation form the ORG to the // Person can not be supported without caching // organisations. Therefore delete this relation for now if(mapping.invUri != null){ current.removeAll(mapping.invUri); } //TODO: Organisation units are not supported } else { rep.addReference(mapping.uri, orgEntityId); } } } break; default: if(current != null && mapping != null){ String value = property.getValue(); if(value != null){ value = StringUtils.chomp(property.getValue()).trim(); } if(value.isEmpty()){ log.warn("Unable to index empty value for property {} of vCard {}", property.getId().getPropertyName(),rep.getId()); } else { current.addNaturalText(mapping.uri, value); } } else if(mapping != null){ log.warn("Sub-Resources are not supported for Property {} (mapping to {} ignored)!", propName,mapping); } //else no mapping defined break; } String value = property.getValue(); log.debug(" - {}: {}",propertyId.getPropertyName(),value); for(Parameter param : property.getParameters()){ Parameter.Id paramId = param.getId(); String paramValue = param.getValue(); log.debug(" {}:{}",paramId.getPname(),paramValue); } } else { log.debug("No mapping for Property {} with value {}",propertyId,property.getValue()); } } log.debug(" > Mapped Data;"); if(log.isDebugEnabled()){ for(Representation tmp : representations.values()){ log.info(ModelUtils.getRepresentationInfo(tmp)); } } log.debug("--- end ---"); return representations.values().iterator(); } /** * @param entityMap the map with all the Entity name -> id mappings * @param entityType the type of the entity to search * @param name the name of the Entity * @param id optionally an id other than the name otherwise the name is used * @param create if <code>true</code> is parsed a new Entity is created even * if a entity with the same name already exists * @return the id of the created or found Entity */ private String entityByName(Map<EntityType,Map<String,Set<String>>> entityMap, EntityType entityType, String name, String id, boolean create) { if(id == null) { id = name; } //lookup the existing entities of that type and name Set<String> entities = entityMap.get(entityType).get(name); if(entities == null){ //if none -> we will create one in this method entities = new HashSet<String>(2); //use lower size to save memory entityMap.get(entityType).put(name, entities); } //make ids only to use ASKII chars and no white spaces id = id.replace(' ', '-'); try { // encode special chars //TODO: replace that by ASKII folding id = URLEncoder.encode(id, "utf8"); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("This Plattform does not support 'utf8' encoding :("); } //add prefixes and so on id = prefix+entityType+typeSeperatorChar+id; //now we have the id if(!create){ //NOTE: this would always return the first Entity if multiple Entities // would have been created by using the ceckId method. return entities.contains(id) ? id : null; } else { //we need to create a new entity id = checkId(id, entities); entities.add(id); return id; } } /** * Create a sub-representation by considering the base {@link Representation}, * IDs already taken by other sub representations. The Id addon the caller * would like to add to the id of the base representation. In addition it * adds the relation between the base and the sub-representation as well as * the type and the inverse links to the sub-representation. * @param base the base (parent) representation * @param addon the string addon to the id of the base * @param takenIds set of IDs that are already taken * @param mapping the mapping used to get the information needed to correctly * initialise the sub-relation */ private Representation createSubRepresentation(Representation base, String addon, Set<String> takenIds, Mapping mapping) { Representation current = vf.createRepresentation( checkId(base.getId()+addon, takenIds)); initSubRepresentation(current, base, mapping); return current; } /** * Initialise the parsed sub-representation by adds the relation between * the base and the sub-representation as well as * the rdf:type of the sub-relation and the inverse link if the sub- to the * base representation. * @param toInit The representation to initialise * @param base the parent representation * @param mapping the mapping */ private void initSubRepresentation(Representation toInit, Representation base, Mapping mapping) { Mapping typeMapping = mapping.subMappings.get(RDF_TYPE); if(typeMapping != null){ toInit.addReference(NamespaceEnum.rdf+"type", typeMapping.uri); } base.addReference(mapping.uri, toInit.getId()); if(mapping.invUri != null){ toInit.addReference(mapping.invUri, base.getId()); } } /** * Adds "-{i}" to the end of the parsed ID until it does no longer conflict * with already taken IDs * @param id the id * @param taken already taken IDs * @return a id based on the parsed one that does not conflict with already * taken once. */ private String checkId(String id, Set<String> taken) { String test = null; int i=0; while(taken.contains(i == 0 ? id : test)){ i++; test = id+'-'+i; } if(test != null){ id = test; } return id; } public static void main(String[] args) throws Exception { VcardIndexingSource instance = new VcardIndexingSource(); instance.prefix = "http://test.org/"; VCardBuilder parser = new VCardBuilder(new InputStreamReader(new FileInputStream(new File(args[0])), "utf8")); Map<EntityType,Map<String,Set<String>>> entityMap = new EnumMap<EntityType,Map<String,Set<String>>>(EntityType.class); entityMap.put(EntityType.organization, new HashMap<String,Set<String>>()); entityMap.put(EntityType.person, new HashMap<String,Set<String>>()); for(VCard vcard : parser.buildAll()){ instance.processVcard(vcard,OntologyMappings.schemaOrgMappings,entityMap); } } }