DataMerger.java example

Explorer

Phenex-master
- src
  - com
    - eekboom
      - utils
        Strings.java
  - org
    - obo
      - annotation
        base
        OBOUtil.java
        OntologyConfiguration.java
        OntologySource.java
        TermSet.java
        UserOntologyConfiguration.java
        view
        DefaultOntologyCoordinator.java
        IDSelectionEvent.java
        IDSelectionListener.java
        LogViewComponent.java
        LogViewComponentFactory.java
        NativeDockingTheme.java
        OntologyCoordinator.java
        PhenoteGraphViewFactory.java
        PhenoteOntologyTreeEditorFactory.java
        PostCompositionEditor.java
        SelectionBridge.java
        SelectionManager.java
        TermAutocompleteFieldFactory.java
        TermHit.java
        TermInfoComponent.java
        TermInfoComponentFactory.java
        TermInfoToolbar.java
        TermRenderer.java
        TermSearcher.java
        TermSelectionEvent.java
        TermSelectionListener.java
        UseTermEvent.java
        UseTermListener.java
      - app
        controller
        DocumentController.java
        UndoController.java
        UserCancelledReadException.java
        model
        AbstractPropertyChangeObject.java
        ObservableEventList.java
        Observer.java
        PropertyChangeObject.java
        swing
        AutocompleteCellEditor.java
        AutocompleteField.java
        AutocompleteSearcher.java
        BlockingProgressDialog.java
        BugWorkaroundTable.java
        DefaultHit.java
        DefaultSearcher.java
        ListSelectionMaintainer.java
        MatchType.java
        PlaceholderRenderer.java
        PopupListener.java
        ResponderChainAction.java
        SearchHit.java
        SortDisabler.java
        SpringUtilities.java
        StackedBox.java
        TabActionTextField.java
        TableColumnPrefsSaver.java
        TablePopupListener.java
        WindowSizePrefsSaver.java
        util
        Collections.java
        CrossPlatform.java
        Enumerator.java
        EverythingEqualComparator.java
        PrefObj.java
        URLProxy.java
    - phenoscape
- test
  - org
    - phenoscape

package org.phenoscape.util;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.log4j.Logger;
import org.obo.datamodel.OBOClass;
import org.phenoscape.model.Character;
import org.phenoscape.model.DataSet;
import org.phenoscape.model.MultipleState;
import org.phenoscape.model.Specimen;
import org.phenoscape.model.State;
import org.phenoscape.model.Taxon;

/**
 * @author Jim Balhoff
 */
public class DataMerger {
 
    /**
     * Merge values from a data set into an existing data set.  Merges characters,
     * taxa, matrix data, and document data, in that order.
     */
    public static void mergeDataSets(DataSet newData, DataSet existingData) {
        mergeCharacters(newData, existingData);
        mergeTaxa(newData, existingData);
        mergeMatrix(newData, existingData);
        mergeDocumentData(newData, existingData);
    }

    /**
     * Merge characters and EQ annotations from a data set into an existing data set. The 
     * "Character Number" and "State Number" columns are used to match a character (by index) and state (by symbol) 
     * in the existing data set.  If the index falls outside the current range of characters, 
     * a new character is appended to the existing data set.  If a state with the given symbol does 
     * not exist, a new state is appended to the given character.  Description information for the new 
     * characters overwrites that of the existing characters.  EQ annotations are replaced.
     */
    public static void mergeCharacters(DataSet newData, DataSet existingData) {
        for (int i = 0; i < newData.getCharacters().size(); i++) {
            final Character newCharacter = newData.getCharacters().get(i);
            if (newCharacter == null) continue; // this handles a "sparse" collection of characters - only certain indexes represented
            if (i >= existingData.getCharacters().size()) {
                //add character
                existingData.addCharacter(newCharacter);
            } else {
                //merge character data
                final Character character = existingData.getCharacters().get(i);
                character.setLabel(newCharacter.getLabel());
                for (State newState : newCharacter.getStates()) {
                    final State state = findState(character.getStates(), newState.getSymbol());
                    if (state != null) {
                        state.setLabel(newState.getLabel());
                        state.getPhenotypes().clear();
                        state.getPhenotypes().addAll(newState.getPhenotypes());
                    } else {
                        character.addState(newState);
                    }
                }
            }
        }
    }

    /**
     * Merge taxa from a dataset into an existing data set.  Taxa are matched 
     * via their "Publication Name" first.  TTO identifiers and specimen lists are applied to the matched 
     * taxa.  Any taxa in the tab file that do not match a taxon in the existing data set are added 
     * to the data set.  If a match wasn't found by publication name, taxa are matched by TTO identifer.
     */
    public static void mergeTaxa(DataSet newData, DataSet existingData) {
        final List<Taxon> importedTaxa = newData.getTaxa();
        for (Taxon importedTaxon : importedTaxa) {
            final Taxon pubNameMatch = findTaxon(existingData.getTaxa(), importedTaxon.getPublicationName());
            final Taxon existingTaxon = (pubNameMatch == null) ? (findTaxon(existingData.getTaxa(), importedTaxon.getValidName())) : pubNameMatch;
            if (existingTaxon != null) {
                existingTaxon.setValidName(importedTaxon.getValidName());
                existingTaxon.setComment(importedTaxon.getComment());
                for (Specimen specimen : importedTaxon.getSpecimens()) {
                    existingTaxon.addSpecimen(specimen);
                }
            } else {
                existingData.addTaxon(importedTaxon);
            }
        }
    }

    /**
     * Merge matrix values into an existing data set.  Characters are matched via their index. 
     * Only matrix values for existing taxa and characters are applied. Values are matched by comparing 
     * the symbol - if a state with that symbol is not available for the character in the existing data set, a 
     * new state with that symbol is added to the character. Taxa are matched via their Publication 
     * Name.  Matrix values for unmatched taxa are unaltered.
     */
    public static void mergeMatrix(DataSet newData, DataSet existingData) {
        for (int i = 0; i < existingData.getCharacters().size(); i++) {
            final Character currentCharacter = existingData.getCharacters().get(i);
            for (Taxon taxon : existingData.getTaxa()) {
                final String taxonName = taxon.getPublicationName();
                final Taxon newTaxon = findTaxon(newData.getTaxa(), taxonName);
                if (newTaxon == null) { continue; }
                final Character newCharacter = newData.getCharacters().get(i);
                final State newStateValue = newData.getStateForTaxon(newTaxon, newCharacter);
                final State state;
                if (newStateValue == null) {
                    state = null;
                } else if (newStateValue instanceof MultipleState) {
                	state = mapMultipleStateIntoExistingStates(currentCharacter.getStates(), (MultipleState)newStateValue);
                } else {
                    final String valueSymbol = newStateValue.getSymbol();
                    final State existingState = findState(currentCharacter.getStates(), valueSymbol);
                    if (existingState == null) {
                        state = newStateValue;
                        currentCharacter.addState(state);
                    } else {
                        state = existingState;
                    }
                }
                existingData.setStateForTaxon(taxon, currentCharacter, state);
            }
        }
    }

    
    /**
     * Replace values such as publication, publication notes, and curators in the 
     * existing data with any non-null values in the new data.  If the new data is null 
     * for a field, the existing value is unchanged.
     */
    public static void mergeDocumentData(DataSet newData, DataSet existingData) {
        if (newData.getPublication() != null) {
            existingData.setPublication(newData.getPublication());
        }
        if (newData.getCurators() != null) {
            existingData.setCurators(newData.getCurators());
        }
        if (newData.getPublicationNotes() != null) {
            existingData.setPublicationNotes(newData.getPublicationNotes());
        }
    }

    private static Taxon findTaxon(List<Taxon> taxa, String pubName) {
        if ((pubName == null) || (pubName.equals(""))) { return null; }
        for (Taxon taxon : taxa) {
            if (pubName.equals(taxon.getPublicationName())) { return taxon; }
        }
        return null;
    }

    private static Taxon findTaxon(List<Taxon> taxa, OBOClass validName) {
        if (validName == null) { return null; }
        for (Taxon taxon : taxa) {
            if (validName.equals(taxon.getValidName())) { return taxon; }
        }
        return null;
    }

    private static State findState(List<State> states, String symbol) {
        for (State state: states) {
            if (symbol.equals(state.getSymbol())) { return state; }
        }
        return null;
    }
    
    private static MultipleState mapMultipleStateIntoExistingStates(List<State> allExistingStates, MultipleState newState) {
    	final Set<State> existingStates = new HashSet<State>();
    	for (State state : newState.getStates()) {
    		final State foundState = findState(allExistingStates, state.getSymbol());
    		if (foundState != null) {
    			existingStates.add(foundState);
    		} else {
    			existingStates.add(state);
    		}
    	}
    	return new MultipleState(existingStates, newState.getMode());
    }

    @SuppressWarnings("unused")
    private static Logger log() {
        return Logger.getLogger(DataMerger.class);
    }

}