/* * Licensed under the Apache License, Version 2.0 (the "License"); * * You may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. * * Contributions from 2013-2017 where performed either by US government * employees, or under US Veterans Health Administration contracts. * * US Veterans Health Administration contributions by government employees * are work of the U.S. Government and are not subject to copyright * protection in the United States. Portions contributed by government * employees are USGovWork (17USC ยง105). Not subject to copyright. * * Contribution by contractors to the US Veterans Health Administration * during this period are contractually contributed under the * Apache License, Version 2.0. * * See: https://www.usa.gov/government-works * * Contributions prior to 2013: * * Copyright (C) International Health Terminology Standards Development Organisation. * Licensed under the Apache License, Version 2.0. * */ package sh.isaac.api.util; //~--- JDK imports ------------------------------------------------------------ /* * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ import java.util.Comparator; //~--- classes ---------------------------------------------------------------- /** * The Alphanum Algorithm is an improved sorting algorithm for strings containing numbers. * * Instead of sorting numbers in ASCII order like a standard sort, this algorithm sorts numbers in numeric order. * * The inspiration for this implementation came from http://www.DaveKoelle.com However, his implementation did not handle leading 0's properly, nor * did it handle nulls or case sensitivity. * * I fixed all of those issues, and also added convenience methods. * * @author <A HREF="mailto:daniel.armbrust@gmail.com">Dan Armbrust</A> * * See http://armbrust.dyndns.org/programs/index.php?page=3 */ public class AlphanumComparator implements Comparator<String> { /** The case sensitive instance. */ private static AlphanumComparator caseSensitiveInstance; /** The case insensitive instance. */ private static AlphanumComparator caseInsensitiveInstance; //~--- fields -------------------------------------------------------------- /** The ignore case. */ private final boolean ignoreCase; //~--- constructors -------------------------------------------------------- /** * Create a new instance of an AlphanumComparator. * * @param ignoreCase the ignore case */ public AlphanumComparator(boolean ignoreCase) { this.ignoreCase = ignoreCase; } //~--- methods ------------------------------------------------------------- /** * Compare. * * @param s1 the s 1 * @param s2 the s 2 * @return the int */ @Override public int compare(String s1, String s2) { if (s1 == null) { return -1; } if (s2 == null) { return 1; } int thisMarker = 0; int thatMarker = 0; final int s1Length = s1.length(); final int s2Length = s2.length(); while ((thisMarker < s1Length) && (thatMarker < s2Length)) { final String thisChunk = getChunk(s1, s1Length, thisMarker); thisMarker += thisChunk.length(); final String thatChunk = getChunk(s2, s2Length, thatMarker); thatMarker += thatChunk.length(); // If both chunks contain numeric characters, sort them numerically int result = 0; if (isDigit(thisChunk.charAt(0)) && isDigit(thatChunk.charAt(0))) { int[] thisChunkInt = subChunkNumeric(thisChunk); int[] thatChunkInt = subChunkNumeric(thatChunk); // 0 pad the shorter array, so that they have the same length. if (thisChunkInt.length > thatChunkInt.length) { final int[] temp = new int[thisChunkInt.length]; int insertOffset = thisChunkInt.length - thatChunkInt.length; for (int i = 0; i < thatChunkInt.length; i++) { temp[insertOffset++] = thatChunkInt[i]; } thatChunkInt = temp; } else { if (thisChunkInt.length < thatChunkInt.length) { final int[] temp = new int[thatChunkInt.length]; int insertOffset = thatChunkInt.length - thisChunkInt.length; for (int i = 0; i < thisChunkInt.length; i++) { temp[insertOffset++] = thisChunkInt[i]; } thisChunkInt = temp; } } for (int i = 0; i < thisChunkInt.length; i++) { if (thisChunkInt[i] > thatChunkInt[i]) { result = 1; break; } else { if (thisChunkInt[i] < thatChunkInt[i]) { result = -1; break; } } } } else { if (this.ignoreCase) { result = thisChunk.compareToIgnoreCase(thatChunk); } else { result = thisChunk.compareTo(thatChunk); } } if (result != 0) { return result; } } return s1Length - s2Length; } /** * Compare. * * @param left the left * @param right the right * @param ignoreCase the ignore case * @return the int */ public static int compare(String left, String right, boolean ignoreCase) { return getCachedInstance(ignoreCase).compare(left, right); } /** * Sub chunk numeric. * * @param numericChunk the numeric chunk * @return the int[] */ /* * Take in string (which we assume will pass Integer.ParseInt) and return an array of integers. * An array is returned so we don't exceed the limits of int. * * For example, 45600000000524566874861567 would be returned as : [456000000,005245668,74861567] */ private int[] subChunkNumeric(String numericChunk) { final int[] result = new int[(int) Math.ceil(numericChunk.length() / 9.0)]; int s = 0; int e = ((9 > numericChunk.length()) ? numericChunk.length() : 9); for (int i = 0; i < result.length; i++) { result[i] = Integer.parseInt(numericChunk.substring(s, e)); s = e; e = ((e + 9 > numericChunk.length()) ? numericChunk.length() : e + 9); } return result; } //~--- get methods --------------------------------------------------------- /** * Get a reference to a cached, shared instance. Good for reuse, but would have multithreading issues if many threads are trying to sort at the * same time. * * @param ignoreCase the ignore case * @return the cached instance */ public static synchronized AlphanumComparator getCachedInstance(boolean ignoreCase) { if (ignoreCase) { if (caseSensitiveInstance == null) { caseSensitiveInstance = new AlphanumComparator(true); } return caseSensitiveInstance; } else { if (caseInsensitiveInstance == null) { caseInsensitiveInstance = new AlphanumComparator(false); } return caseInsensitiveInstance; } } /** * Length of string is passed in for improved efficiency (only need to calculate it once). * * @param s the s * @param slength the slength * @param marker the marker * @return the chunk */ private String getChunk(String s, int slength, int marker) { final StringBuilder chunk = new StringBuilder(); char c = s.charAt(marker); chunk.append(c); marker++; if (isDigit(c)) { while (marker < slength) { c = s.charAt(marker); if (!isDigit(c)) { break; } chunk.append(c); marker++; } } else { while (marker < slength) { c = s.charAt(marker); if (isDigit(c)) { break; } chunk.append(c); marker++; } } return chunk.toString(); } /** * Checks if digit. * * @param ch the ch * @return true, if digit */ private boolean isDigit(char ch) { return (ch >= 48) && (ch <= 57); } }