/*
* (C) Copyright 2007 Nuxeo SAS (http://nuxeo.com/) and contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser General Public License
* (LGPL) version 2.1 which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/lgpl.html
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* Contributors:
* Nuxeo - initial API and implementation
*
* $Id: IdUtils.java 19046 2007-05-21 13:03:50Z sfermigier $
*/
package org.nuxeo.common.utils;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Random;
import java.util.regex.Pattern;
/**
* Utils for identifier generation.
*
* @author <a href="mailto:at@nuxeo.com">Anahide Tchertchian</a>
*/
public final class IdUtils {
private static final String WORD_SPLITTING_REGEXP = "[^a-zA-Z0-9]+";
// TODO AT: dummy random, does not ensure uniqueness
private static final Random RANDOM = new Random(new Date().getTime());
// This is an utility class.
private IdUtils() {
}
/**
* Generates an unique string identifier.
*/
public static String generateStringId() {
return String.valueOf(generateLongId());
}
/**
* Generates an unique long identifier.
*/
public static long generateLongId() {
long r = RANDOM.nextLong();
if (r < 0) {
r = -r;
}
return r;
}
/**
* Generates an id from a non-null String.
* <p>
* Replaces accented characters from a string by their ascii equivalent,
* removes non alphanumerical characters and replaces spaces by the given
* wordSeparator character.
*
* @param s the original String
* @param wordSeparator the word separator to use (usually '-')
* @param lower if lower is true, remove upper case
* @param maxChars maximum longer of identifier characters
* @return the identifier String
*/
public static String generateId(String s, String wordSeparator,
boolean lower, int maxChars) {
s = StringUtils.toAscii(s);
s = s.trim();
if (lower) {
s = s.toLowerCase();
}
String[] words = s.split(WORD_SPLITTING_REGEXP);
// remove blank chars from words, did not get why they're not filtered
List<String> wordsList = new ArrayList<String>();
for (String word : words) {
if (word != null && word.length() > 0) {
wordsList.add(word);
}
}
if (wordsList.isEmpty()) {
return generateStringId();
}
StringBuilder sb = new StringBuilder();
String id;
if (maxChars > 0) {
// be sure at least one word is used
sb.append(wordsList.get(0));
for (int i = 1; i < wordsList.size(); i++) {
String newWord = wordsList.get(i);
if (sb.length() + newWord.length() > maxChars) {
break;
} else {
sb.append(wordSeparator).append(newWord);
}
}
id = sb.toString();
id = id.substring(0, Math.min(id.length(), maxChars));
} else {
id = StringUtils.join(wordsList.toArray(), wordSeparator);
}
return id;
}
/**
* Generates an id from a non-null String.
* <p>
* Uses default values for wordSeparator: '-', lower: true, maxChars: 24.
*
* @deprecated use {@link #generatePathSegment} instead, or
* {@link #generateId(String, String, boolean, int)} depending
* on the use cases
*/
@Deprecated
public static String generateId(String s) {
return generateId(s, "-", true, 24);
}
public static final Pattern STUPID_REGEXP = Pattern.compile("^[- .,;?!:/\\\\'\"]*$");
/**
* Generates a Nuxeo path segment from a non-null String.
* <p>
* Basically all characters are kept, except for slashes and
* initial/trailing spaces.
*
* @deprecated use {@link PathSegmentService} instead
*/
@Deprecated
public static String generatePathSegment(String s) {
s = s.trim();
if (STUPID_REGEXP.matcher(s).matches()) {
return generateStringId();
}
return s.replace("/", "-");
}
}