package org.solrmarc.callnum; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Implements a call number class for Dewey call numbers. * * <p>Example call number: {@code 322.44 .F816 V.1 1974} * * <p>As unpacked into internal fields: * * <table summary="call number parsed into fields"> * <tr><th>{@code classification}</th><td>322.44</td></tr> * <tr><th>{@code classDigits}</th><td>322</td></tr> * <tr><th>{@code classDecimal}</th><td>.44</td></tr> * <tr><th>{@code cutter}</th><td>F816</td></tr> * <tr><th>{@code suffix}</th><td>V.1 1974</td></tr> * </table> * * <p>If the call number doesn't look like Dewey (no starting digit) the entire call number * goes into {@code suffix}. * * <p>Shelf keys: * * <p>With computing shelf keys, we want a string which represents the number but can easily be sorted. * The main issues is sequences of digits: which ones sort numerically, and how to arrange that. * * <p>The shelf key algorithm is basically: * <ol> * <li>trim leading zeros from {@code classDigits} and prepend with the number of remaining digits</li> * <li>append {@code classDigits} with the leading period</li> * <li>append a space and {@code cutter}</li> * <li> * normalize {@code suffix} and append with a space, * for details see {@link Utils#appendNumericallySortable Utils#appendNumericallySortable} * </li> * </ol> * * <p>Using the above example call number: * <table summary="constructing the shelf key"> * <thead> * <tr><th>Field</th><th>Sample</th><th>Shelf Key</th><th>Notes</th></tr> * </thead> * <tbody> * <tr><th>{@code classification}</th><td>{@code 322.44}</td><td></td><td>not used</td></tr> * <tr><th>{@code classDigits}</th><td>{@code 322}</td><td>{@code 3322}</td><td></td></tr> * <tr><th>{@code classDecimal}</th><td>{@code .44}</td><td>{@code .44}</td><td></td></tr> * <tr><th>{@code cutter}</th><td>{@code F816}</td><td>{@code F816}</td><td></td></tr> * <tr><th>{@code suffix}</th><td>{@code V.1 1974}</td><td>{@code V 11 41974}</td><td></td></tr> * </tbody> * </table> * * <p>The resulting shelf key is {@code 3322.44 .F816 V.1 1974}. * * <p>Run the {@code ExerciseDeweyCallNumber} class from the command line to print out a * number of examples of both parsed call numbers and shelf keys. * * <p>Based in part on Naomi Dushay's {@code CallNumUtils}. * * @author Tod Olson, University of Chicago * */ public class DeweyCallNumber extends AbstractCallNumber { protected String classification = null; protected String classDigits = null; protected String classDecimal = null; protected String cutter = null; protected String cutterSuffix = null; protected String shelfKey = null; /** * Regular expression for Dewey call number. * Dewey classification is a three digit number (possibly missing leading * zeros) with an optional fraction portion. */ //public static final String CLASS_REGEX = "(\\d{1,3})(\\.\\d+)?"; public static final String CLASS_REGEX = "(\\d+)(\\.\\d+)?"; /** * Separates the class from the rest of a call number. * * Match group 1 contains the classification. * Match group 2 contains the class digits (before the decimal). * Match group 3 contains the decimal portion of the class number, including the decimal point. * Match group 4 contains everything after the classification. */ protected static Pattern classPattern = Pattern.compile("(" + CLASS_REGEX + ")" + "(.*)?"); /** * Regular expression for Dewey cutter. * * Dewey cutters start with a letter, followed by a one to three digit * number. The number may be followed immediately (i.e. without space) by * letters, or followed first by a space and then letters. * * NB: {@code CallNumUtils} did not implement the "space letters" part of the cutter, * similarly, this class defers that detail. * Challenging to do while leaving volumes and similar suffixes in tact. */ //TODO: support cutter with space-then-letter //public static final String CUTTER_REGEX = "[A-Z]\\d{1,3} *(?:[A-Z]+)?"; public static final String CUTTER_REGEX = "[A-Z]\\d{1,3}(?:[A-Z]+)?"; public static Pattern cutterPattern = Pattern.compile(" *\\.?(" + CUTTER_REGEX + ") *(.+)?"); /** * Constructs a call number object from the given string. * * The constructor parses the <code>callNumber</code> argument as part of instantiating the object. * * @param callNumber call number to parse */ public DeweyCallNumber(String callNumber) { parse(callNumber); } /** * Constructs call number object with no call number. * Mainly a convenience for inheritance. */ public DeweyCallNumber() { return; } protected void init() { rawCallNum = null; classification = null; classDigits = null; classDecimal = null; cutter = null; cutterSuffix = null; shelfKey = null; } public void parse(String call) { init(); this.rawCallNum = call; parse(); } protected void parse() { parseCallNumber(); buildShelfKey(); } /** * Parses the call number, splitting the classification portions from any * cutter(s) and other following characters. Sets these internal fields: * <ul> * <li><code>classification</code></li> * <li><code>classDigits</code></li> * <li><code>classDecimal</code></li> * <li><code>classSuffix</code></li> * <li><code>cutter</code></li> * <li><code>cutterSuffix</code></li> * </ul> * * <p>Supplies any missing leading zeroes for {@code classification} and {@code classDigits}. */ protected void parseCallNumber() { if (rawCallNum == null || rawCallNum.length() == 0) { return; } String everythingElse; Matcher m = classPattern.matcher(rawCallNum); if (!m.matches()) { cutterSuffix = rawCallNum; } else { classification = m.group(1); classDigits = m.group(2); classDecimal = m.group(3); everythingElse = m.group(4); Matcher mCut = cutterPattern.matcher(everythingElse); if (!mCut.matches()) { cutterSuffix = everythingElse; } else { cutter = mCut.group(1); cutterSuffix = mCut.group(2); } } } /** * returns the classification of the call number. * @return call number classification, or null if not set or found by {@code parse}. */ public String getClassification() { return classification; } /** * Returns a normal form of the classification string. * * <p>Supplies any missing leading zeroes. */ public String getClassificationNormalized() { if (classDigits == null || classDigits.length() >= 3) { return classification; } StringBuilder norm = new StringBuilder(); switch (classDigits.length()) { case 1: norm.append("00"); break; case 2: norm.append("0"); break; default: // DRYROT: classDigits must be non-empty } norm.append(classDigits); if (classDecimal != null) norm.append(classDecimal); return norm.toString(); } /** * returns the classification of the call number. * @return call number classification, or null if not set or found by {@code parse}. */ public String getClassDigits() { return classDigits; } /** * returns the classification of the call number. * @return call number classification, or null if not set or found by {@code parse}. */ public String getClassDecimal() { return classDecimal; } /** * returns the cutter of the call number. * @return call number cutter, or <code>null</code> if no cutter was set or found. */ public String getCutter() { return cutter; } /** * returns the cutterSuffix of the call number. * @return call number cutterSuffix, or <code>null</code> if no cutterSuffix was set or found. */ public String getSuffix() { return cutterSuffix; } @Override public String getShelfKey() { return shelfKey; } protected void buildShelfKey() { StringBuilder keyBuf = new StringBuilder(); if (rawCallNum == null) { shelfKey = null; } else { if (classDigits != null) { Utils.appendSortableNumber(keyBuf, classDigits); } if (classDecimal != null) { keyBuf.append(classDecimal); } if (cutter !=null) { if (keyBuf.length() > 0) { keyBuf.append(' '); } keyBuf.append(cutter); } if (cutterSuffix != null) { if (keyBuf.length() > 0) { keyBuf.append(' '); } Utils.appendNumericallySortable(keyBuf, cutterSuffix); } shelfKey = keyBuf.toString(); } } public boolean isValid() { if (classDigits == null) { return false; } else { return true; } } public String toString() { return rawCallNum; } /* public String debugInfo() { String info = "this.raw = " + this.raw + "this.classification = " + this.classification + "this.cutter = " + this.cutter + "this.suffix = " + this.suffix; return info; } */ }