/*
* #!
* Ontopia Engine
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.topicmaps.cmdlineutils;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import net.ontopia.topicmaps.core.AssociationIF;
import net.ontopia.topicmaps.core.TopicIF;
import net.ontopia.topicmaps.core.TopicMapIF;
import net.ontopia.topicmaps.core.TopicNameIF;
import net.ontopia.topicmaps.utils.ImportExportUtils;
import net.ontopia.topicmaps.xml.XTMTopicMapWriter;
import net.ontopia.topicmaps.utils.MergeUtils;
import net.ontopia.topicmaps.utils.KeyGenerator;
import net.ontopia.topicmaps.impl.basic.index.TNCIndex;
import net.ontopia.utils.CmdlineOptions;
import net.ontopia.utils.CmdlineUtils;
import net.ontopia.utils.StringUtils;
/**
* INTERNAL: Consistifies a topic map by merging topics based on the
* TNC, removing duplicate associations, and so on.</p>
*/
public class Consistify {
public static void main(String [] argv) {
// Initialize logging
CmdlineUtils.initializeLogging();
// Initialize command line option parser and listeners
CmdlineOptions options = new CmdlineOptions("Consistify", argv);
OptionsListener ohandler = new OptionsListener();
// Register local options
options.addLong(ohandler, "normalize", 'n');
options.addLong(ohandler, "encoding", 'e');
options.addLong(ohandler, "xtm", 'x');
// Register logging options
CmdlineUtils.registerLoggingOptions(options);
// Parse command line options
try {
options.parse();
} catch (CmdlineOptions.OptionsException e) {
System.err.println("Error: " + e.getMessage());
System.exit(1);
}
// Get command line arguments
String[] args = options.getArguments();
if (args.length < 2) {
System.err.println("Error: need at least two files as arguments.");
usage();
System.exit(1);
}
try {
TopicMapIF loaded = load(args[0]);
if (ohandler.normalize)
normalizeTopicNames(loaded);
doTNCMerge(loaded);
removeDuplicates(loaded);
char format = '?';
if (ohandler.xtm)
format = 'x';
export(loaded, args[1], ohandler.encoding, format);
}
catch (java.io.IOException e) {
System.err.println(e);
System.exit(3);
}
}
protected static void usage() {
System.out.println("java Consistify [options] <input> <output>");
System.out.println("");
System.out.println(" Reads in a topic map, consistifies it, then writes it out again.");
System.out.println("");
System.out.println(" Options:");
System.out.println(" -n: normalize whitespace in base names");
System.out.println(" -e <encoding>: set output encoding");
CmdlineUtils.printLoggingOptionsUsage(System.out);
System.out.println("");
System.out.println(" <input>: source topic map");
System.out.println(" <output>: output topic map");
}
protected static TopicMapIF load(String stm) throws java.io.IOException {
return ImportExportUtils.getReader(stm).read();
}
protected static void doTNCMerge(TopicMapIF tm) {
TNCIndex index = new TNCIndex(tm);
Iterator it = new ArrayList(tm.getTopics()).iterator();
while (it.hasNext()) {
TopicIF topic = (TopicIF) it.next();
if (topic.getTopicMap() == null)
continue;
Iterator it2 = new ArrayList(topic.getTopicNames()).iterator();
while (it2.hasNext()) {
TopicNameIF bn = (TopicNameIF) it2.next();
Iterator it3 =index.getTopics(bn.getValue(), bn.getScope()).iterator();
while (it3.hasNext()) {
TopicIF source = (TopicIF) it3.next();
if (source.equals(topic))
continue;
MergeUtils.mergeInto(topic, source);
}
}
}
}
protected static void export(TopicMapIF tm, String outfile, String encoding,
char format)
throws java.io.IOException {
if (encoding == null)
encoding = "utf-8";
if (format == 'e')
new XTMTopicMapWriter(new File(outfile), encoding).write(tm);
else
ImportExportUtils.getWriter(outfile, encoding).write(tm);
}
protected static void normalizeTopicNames(TopicMapIF tm) {
Iterator it = tm.getTopics().iterator();
while (it.hasNext()) {
TopicIF topic = (TopicIF) it.next();
Iterator it2 = topic.getTopicNames().iterator();
while (it2.hasNext()) {
TopicNameIF bn = (TopicNameIF) it2.next();
bn.setValue(StringUtils.normalizeWhitespace(bn.getValue()));
}
}
}
protected static void removeDuplicates(TopicMapIF tm) {
Map keymap = new HashMap();
AssociationIF[] assocs = new AssociationIF[tm.getAssociations().size()];
tm.getAssociations().toArray(assocs);
for (int i=0; i < assocs.length; i++) {
AssociationIF assoc = assocs[i];
String key = KeyGenerator.makeAssociationKey(assoc);
if (keymap.containsKey(key)) {
System.out.println("Removing: " + key);
// If map contains key remove this association
assoc.remove();
} else {
keymap.put(key, null);
}
}
}
// --- Listener class
private static class OptionsListener implements CmdlineOptions.ListenerIF {
boolean normalize = false;
boolean xtm = false;
String encoding = null;
public void processOption(char option, String value)
throws CmdlineOptions.OptionsException {
if (option == 'n') normalize = true;
if (option == 'x') xtm = true;
if (option == 'e') encoding = value;
}
}
}