package org.solrmarc.mixin;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.marc4j.marc.ControlField;
import org.marc4j.marc.DataField;
import org.marc4j.marc.Record;
import org.marc4j.marc.Subfield;
import org.marc4j.marc.VariableField;
import org.solrmarc.index.SolrIndexer;
import org.solrmarc.index.SolrIndexerMixin;
import org.solrmarc.index.indexer.IndexerSpecException;
import org.solrmarc.index.indexer.IndexerSpecException.eErrorSeverity;
import org.solrmarc.index.indexer.ValueIndexerFactory;
import org.solrmarc.index.mapping.AbstractMultiValueMapping;
import org.solrmarc.tools.Utils;
public class GetFormatMixin extends SolrIndexerMixin
{
// Set<String> errorsFound = null;
// public void perRecordInit(Record record)
// {
// errorsFound = new LinkedHashSet<String>();
// }
public void addFormatError(String controlNum, String field, String subfield, eErrorSeverity severity, String message)
{
String separator = (field.length() > 0 && subfield.length() > 0) ? ":" : "";
String errorStr = "GetFormatMixin "+ field + separator + subfield + " : "+message;
ValueIndexerFactory.instance().addPerRecordError(new IndexerSpecException(severity, errorStr));
}
private enum ProfileType
{
NoneDefined,
Books,
Computers,
Maps,
Music,
Serial,
Visual,
Mixed;
@Override public String toString()
{
return "ProfileType." + name();
}
};
private enum ContentType
{
NoneDefined,
Art,
ArtReproduction,
Book,
BookCollection,
BookComponentPart,
BookSeries,
BookSubunit,
Chart,
ComputerBibliographicData,
ComputerCombination,
ComputerDocument,
ComputerFile,
ComputerFont,
ComputerGame,
ComputerInteractiveMultimedia,
ComputerNumericData,
ComputerOnlineSystem,
ComputerProgram,
ComputerRepresentational,
ComputerSound,
Database,
Diorama,
Filmstrip,
FlashCard,
Equipment,
Game,
GovernmentDocumentFederal,
GovernmentDocumentState,
GovernmentDocumentStateUniversity,
GovernmentDocumentLocal,
GovernmentDocumentInternational,
GovernmentDocumentOther,
Graphic,
Image,
Kit,
LooseLeaf,
Manuscript,
Map,
MapAtlas,
MapBound,
MapGlobe,
MapManuscript,
MapSeparate,
MapSerial,
MapSeries,
MapSingle,
MicroscopeSlide,
MixedMaterial,
Model,
MotionPicture,
MusicalScore,
MusicalScoreManuscript,
MusicRecording,
Newspaper,
Pamphlet,
Periodical,
PhysicalObject,
Picture,
ProjectedMedium,
Realia,
Serial,
SerialComponentPart,
SerialIntegratingResource,
Slide,
SoundRecording,
SpecialInstructionalMaterial,
TechnicalDrawing,
Thesis,
Toy,
Transparency,
Video,
VisualKit,
Website;
@Override public String toString()
{
return "ContentType." + name();
}
}
private enum MediaType
{
ActivityCard,
Atlas,
Braille,
Broadside,
Chart,
Collage,
ComputerCard,
ComputerChipCartridge,
ComputerDiscCartridge,
ComputerDisk,
ComputerFloppyDisk,
ComputerMagnetoOpticalDisc,
ComputerOpticalDisc,
ComputerOpticalDiscCartridge,
ComputerOther,
ComputerTapeCartridge,
ComputerTapeCassette,
ComputerTapeReel,
Drawing,
Electronic,
Electronic245,
ElectronicDirect,
FilmCartridge,
FilmCassette,
FilmOther,
Film8mm,
FilmSuper8mm,
Film9_5mm,
Film16mm,
Film28mm,
Film35mm,
FilmRoll,
FilmReel,
Filmslip,
Filmstrip,
FilmstripCartridge,
FilmstripRoll,
FlashCard,
Globe,
GlobeCelestial,
GlobeEarthMoon,
GlobeOther,
GlobePlanetary,
GlobeTerrestrial,
Icon,
ImageOther,
ImagePrint,
Kit,
LooseLeaf,
Map,
MapDiagram,
MapModel,
MapOther,
MapProfile,
MapSection,
MapView,
Microfiche,
MicroficheCassette,
Microfilm,
MicrofilmCartridge,
MicrofilmCassette,
MicrofilmReel,
MicrofilmRoll,
MicrofilmSlip,
Microform,
MicroformApetureCard,
Microopaque,
MusicalScore,
Online,
OnlineExtra,
Painting,
Photo,
PhotomechanicalPrint,
Photonegative,
PhotoPrint,
Picture,
Postcard,
Poster,
Print,
PrintLarge,
ProjectedMediumOther,
Radiograph,
SensorImage,
Slide,
Software,
SoundCartridge,
SoundCassette,
SoundCylinder,
SoundDisc,
SoundDiscCD,
SoundDiscLP,
SoundRecordingOther,
SoundRecordingOnline,
SoundRoll,
SoundTapeReel,
SoundTrackFilm,
SoundWireRecording,
StudyPrint,
TactileCombination,
TactileMoon,
TactileNoWritingSystem,
TactileOther,
TechnicalDrawing,
TextOther,
Transparency,
TypeObsolete,
Video8mm(0.6),
VideoBeta(0.6),
VideoBetacam(0.6),
VideoBetacamSP(0.6),
VideoBluRay(0.75),
VideoCartridge(0.7),
VideoCassette(0.7),
VideoCapacitance(0.8),
VideoD2(0.6),
VideoDisc(0.7),
VideoDVD(0.75),
VideoEIAJ(0.6),
VideoHi8(0.6),
VideoLaserdisc(0.8),
VideoMII(0.6),
VideoOther(0.6),
VideoOnline(0.8),
VideoQuadruplex(0.6),
VideoReel(0.7),
VideoSuperVHS(0.6),
VideoTypeC(0.6),
VideoUMatic(0.6),
VideoVHS(0.75),
VideoVHS_Heuristic(0.9, VideoVHS),
VideoDVD_Heuristic(0.9, VideoDVD),
VideoLaserdisc_Heuristic(0.85, VideoLaserdisc),
VideoBeta_Heuristic(0.65, VideoBeta);
private double priority;
private MediaType mapsTo;
private boolean isHeuristic;
private String fromFields;
MediaType() { priority = 0.6; isHeuristic = false; mapsTo = null; fromFields = null;}
MediaType(double priority) { this.priority = priority; isHeuristic = false; mapsTo = null; fromFields = null;}
MediaType(double priority, MediaType mapsTo) { this.priority = priority; this.mapsTo = mapsTo; isHeuristic = true; fromFields = null;}
MediaType(double priority, MediaType mapsTo, String fromField) { this.priority = priority; this.mapsTo = mapsTo; isHeuristic = true; fromFields = fromField;}
public MediaType mapsTo()
{
if (this.mapsTo != null) return(this.mapsTo);
else return(this);
}
public static MediaType selectBest(MediaType t1, MediaType t2)
{
if (t1.priority >= t2.priority) return(t1);
else return(t2);
}
public double sigmoidProb()
{
double sigmoid = 1 / ( 1 + Math.exp(-1 * (2.0 * (priority -0.5))));
return(sigmoid);
}
@Override public String toString()
{
return "MediaType." + name();
}
};
private class MediaTypeHeuristic
{
private double priority;
private MediaType mapsTo;
private boolean isHeuristic;
private String fromFields;
MediaTypeHeuristic(MediaType mapsTo, double priority, String fromField) { this.priority = priority; isHeuristic = false; this.mapsTo = mapsTo; fromFields = fromField;}
MediaTypeHeuristic(MediaType mapsTo) { priority = 0.5; isHeuristic = false; this.mapsTo = mapsTo; fromFields = null;}
void combine(MediaTypeHeuristic mth2)
{
double newPriority = ((this.priority - 0.5) + (mth2.priority - 0.5) + 0.5);
String newFields = (this.fromFields.contains(mth2.fromFields)) ? this.fromFields : (this.fromFields + ":" + mth2.fromFields);
this.priority = newPriority;
this.fromFields = newFields;
}
public double sigmoidProb()
{
double sigmoid = 1 / ( 1 + Math.exp(-1 * (2.0 * (priority -0.5))));
return(sigmoid);
}
};
private enum FormOfItem
{
Microfilm,
Microfiche,
Microopaque,
PrintLarge,
Braille,
Online,
ElectronicDirect,
Electronic,
Print;
@Override public String toString()
{
return "FormOfItem." + name();
}
}
private enum CombinedType
{
EBook,
EJournal;
@Override public String toString()
{
return "CombinedType." + name();
}
}
private enum ControlType
{
Archive;
@Override public String toString()
{
return "ControlType." + name();
}
}
private static LinkedHashMap<Character, ProfileType> mainProfileMap = new LinkedHashMap<Character, ProfileType>() {
{
put( 'a', ProfileType.Books); // a - Book
put( 'b', ProfileType.NoneDefined); // b - Archival and manuscripts control OBSOLETE, 1995
put( 'c', ProfileType.Music); // c - Notated music
put( 'd', ProfileType.Music); // d - Manuscript notated music
put( 'e', ProfileType.Maps); // e - Cartographic material
put( 'f', ProfileType.Maps); // f - Manuscript cartographic material
put( 'g', ProfileType.Visual); // g - Projected medium
put( 'h', ProfileType.NoneDefined); // h - Microform publications [OBSOLETE, 1972] [USMARC only]
put( 'i', ProfileType.Music); // i - Nonmusical sound recording
put( 'j', ProfileType.Music); // j - Musical sound recording
put( 'k', ProfileType.Visual); // k - Two-dimensional nonprojectable graphic
put( 'm', ProfileType.Computers); // m - Computer file
put( 'n', ProfileType.NoneDefined); // n - Special instructional material [OBSOLETE, 1983]
put( 'o', ProfileType.Visual); // o - Kit
put( 'p', ProfileType.Mixed); // p - Mixed materials
put( 'r', ProfileType.Visual); // r - Three-dimensional artifact or naturally occurring object
put( 's', ProfileType.Serial); // s - Serial/Integrating resource - Continuing Resources
put( 't', ProfileType.Books); // t - Manuscript language material
}
};
private static LinkedHashMap<Character, ProfileType> mainSubProfileMap = new LinkedHashMap<Character, ProfileType>() {
{
put( 'a', ProfileType.Books); // a - Monographic component part
put( 'b', ProfileType.Serial); // b - Serial component part
put( 'c', ProfileType.Books); // c - Collection
put( 'd', ProfileType.Books); // d - Subunit
put( 'i', ProfileType.Serial); // i - Integrating resource
put( 'p', ProfileType.NoneDefined); // p - Pamphlet [OBSOLETE, 1988] [CAN/MARC only]
put( 'm', ProfileType.Books); // m - Monograph/Item
put( 's', ProfileType.Serial); // s - Serial
}
};
private static LinkedHashMap<String, ContentType[]> field245hTypeMap = new LinkedHashMap<String, ContentType[]>() {
{
put ( "art original", new ContentType[]{ContentType.Art});
put ( "art reproduction", new ContentType[]{ContentType.ArtReproduction});
// put ( "computer file");
put ( "cartographic material", new ContentType[]{ ContentType.Map, ContentType.MapManuscript, ContentType.MapSingle, ContentType.MapSeries,
ContentType.MapSerial, ContentType.MapGlobe, ContentType.MapAtlas, ContentType.MapSeparate, ContentType.MapBound });
// 3133 electronic book
// 740178 electronic resource
// put ( "graphic"
// 30580 manuscript
// put( "microform", new ContentType[]{ MediaType.Microform});
// 1341 picture
// 145 series record
put ( "slide", new ContentType[]{ ContentType.Slide});
put ( "sound recording", new ContentType[]{ ContentType.MusicRecording, ContentType.SoundRecording});
put ( "videorecording", new ContentType[]{ ContentType.Video });
put ( "videocassette", new ContentType[]{ ContentType.Video });
}
};
private static LinkedHashMap<Character, ContentType> mainTypeMap = new LinkedHashMap<Character, ContentType>() {
{
put( 'a', ContentType.Book); // a - Book
put( 'b', ContentType.Manuscript); // b - Archival and manuscripts control OBSOLETE, 1995
put( 'c', ContentType.MusicalScore); // c - Notated music
put( 'd', ContentType.MusicalScoreManuscript); // d - Manuscript notated music
put( 'e', ContentType.Map); // e - Cartographic material
put( 'f', ContentType.MapManuscript); // f - Manuscript cartographic material
put( 'g', ContentType.ProjectedMedium); // g - Projected medium
put( 'h', ContentType.NoneDefined); // h - Microform publications [OBSOLETE, 1972] [USMARC only]
put( 'i', ContentType.SoundRecording); // i - Nonmusical sound recording
put( 'j', ContentType.MusicRecording); // j - Musical sound recording
put( 'k', ContentType.Image); // k - Two-dimensional nonprojectable graphic
put( 'm', ContentType.ComputerFile); // m - Computer file
put( 'n', ContentType.NoneDefined); // n - Special instructional material [OBSOLETE, 1983]
put( 'o', ContentType.Kit); // o - Kit
put( 'p', ContentType.MixedMaterial); // p - Mixed materials
put( 'r', ContentType.PhysicalObject); // r - Three-dimensional artifact or naturally occurring object
put( 's', ContentType.Serial); // s - Serial/Integrating resource - Continuing Resources
put( 't', ContentType.Manuscript); // t - Manuscript language material
}
};
private static LinkedHashMap<Character, ContentType> mainSubTypeMap = new LinkedHashMap<Character, ContentType>() {
{
put( 'a', ContentType.BookComponentPart); // a - Monographic component part
put( 'b', ContentType.SerialComponentPart); // b - Serial component part
put( 'c', ContentType.BookCollection); // c - Collection
put( 'd', ContentType.BookSubunit); // d - Subunit
put( 'i', ContentType.SerialIntegratingResource); // i - Integrating resource
put( 'p', ContentType.Pamphlet); // p - Pamphlet [OBSOLETE, 1988] [CAN/MARC only]
put( 'm', ContentType.Book); // m - Monograph/Item
put( 's', ContentType.Serial); // s - Serial
}
};
private static LinkedHashMap<Character, ContentType> computersSubTypes = new LinkedHashMap<Character, ContentType>() {
{
put( 'a', ContentType.ComputerNumericData ); // a - Numeric data
put( 'b', ContentType.ComputerProgram ); // b - Computer program
put( 'c', ContentType.ComputerRepresentational); // c - Representational
put( 'd', ContentType.ComputerDocument); // d - Document
put( 'e', ContentType.ComputerBibliographicData); // e - Bibliographic data
put( 'f', ContentType.ComputerFont); // f - Font
put( 'g', ContentType.ComputerGame); // g - Game
put( 'h', ContentType.ComputerSound); // h - Sound
put( 'i', ContentType.ComputerInteractiveMultimedia); // i - Interactive multimedia
put( 'j', ContentType.ComputerOnlineSystem); // j - Online system or service
put( 'm', ContentType.ComputerCombination); // m - Combination
put( 'j', ContentType.ComputerOnlineSystem); // j - Online system or service
put( 'u', ContentType.ComputerFile); // u - Unknown
put( 'z', ContentType.ComputerFile); // z - Other
put( ' ', ContentType.ComputerFile); // - Anything else
}
};
private static LinkedHashMap<Character, ContentType> visualSubTypes = new LinkedHashMap<Character, ContentType>() {
{
put( 'a', ContentType.Art); // a - Art original
put( 'b', ContentType.VisualKit); // b - Kit
put( 'c', ContentType.ArtReproduction); // c - Art reproduction
put( 'd', ContentType.Diorama); // d - Diorama
put( 'f', ContentType.Filmstrip); // f - Filmstrip
put( 'g', ContentType.Game); // g - Game
put( 'i', ContentType.Picture); // i - Picture
put( 'k', ContentType.Graphic); // k - Graphic
put( 'l', ContentType.TechnicalDrawing); // l - Technical drawing
put( 'm', ContentType.MotionPicture); // m - Motion picture
put( 'n', ContentType.Chart); // n - Chart
put( 'o', ContentType.FlashCard); // o - Flash card
put( 'p', ContentType.MicroscopeSlide); // p - Microscope slide
put( 'q', ContentType.Model); // q - Model
put( 'r', ContentType.Realia); // r - Realia
put( 's', ContentType.Slide); // s - Slide
put( 't', ContentType.Transparency); // t - Transparency
put( 'v', ContentType.Video); // v - Videorecording
put( 'w', ContentType.Toy); // w - Toy
}
};
private static LinkedHashMap<Character, String> visualValidSubTypes = new LinkedHashMap<Character, String>() {
{
put( 'a', "kr"); // a - Art original
put( 'b', "o"); // b - Kit
put( 'c', "kr"); // c - Art reproduction
put( 'd', "r"); // d - Diorama
put( 'f', "g"); // f - Filmstrip
put( 'g', "kr"); // g - Game
put( 'i', "kr"); // i - Picture
put( 'k', "k"); // k - Graphic
put( 'l', "k"); // l - Technical drawing
put( 'm', "g"); // m - Motion picture
put( 'n', "k"); // n - Chart
put( 'o', "k"); // o - Flash card
put( 'p', "r"); // p - Microscope slide
put( 'q', "r"); // q - Model
put( 'r', "r"); // r - Realia
put( 's', "gk"); // s - Slide
put( 't', "gk"); // t - Transparency
put( 'v', "g"); // v - Videorecording
put( 'w', "r"); // w - Toy
}
};
private static LinkedHashMap<Character, ContentType> mapsSubTypes = new LinkedHashMap<Character, ContentType>() {
{
put( 'a', ContentType.MapSingle ); // a - Single map
put( 'b', ContentType.MapSeries ); // b - Map series
put( 'c', ContentType.MapSerial); // c - Map serial
put( 'd', ContentType.MapGlobe); // d - Globe
put( 'e', ContentType.MapAtlas); // e - Atlas
put( 'f', ContentType.MapSeparate); // f - Separate supplement to another work
put( 'g', ContentType.MapBound); // g - Bound as part of another work
put( 'u', ContentType.Map); // u - Unknown
put( 'z', ContentType.Map); // z - Other
put( ' ', ContentType.Map); // - Anything else
}
};
private static LinkedHashMap<Character, ContentType> serialsSubTypes = new LinkedHashMap<Character, ContentType>() {
{
put( 'd', ContentType.Database ); // d - updating database
put( 'l', ContentType.LooseLeaf ); // l - Updating loose-leaf
put( 'm', ContentType.BookSeries); // m - Monographic series
put( 'n', ContentType.Newspaper); // n - Newspaper
put( 'p', ContentType.Periodical); // p - Periodical
put( 'w', ContentType.Website); // w - Updating Web site
put( ' ', ContentType.Serial); // - Anything else
}
};
private static LinkedHashMap<Character, ContentType> govDocTypes = new LinkedHashMap<Character, ContentType>() {
{
put( 'a', ContentType.GovernmentDocumentOther ); // a - Autonomous or semi-autonomous component
put( 'c', ContentType.GovernmentDocumentLocal ); // c - Multilocal
put( 'f', ContentType.GovernmentDocumentFederal); // f - Federal/national
put( 'i', ContentType.GovernmentDocumentInternational); // i - International intergovernmental
put( 'l', ContentType.GovernmentDocumentLocal); // l - Local
put( 'm', ContentType.GovernmentDocumentState); // m - Multistate
put( 'o', ContentType.GovernmentDocumentOther); // o - Government publication-level undetermined
// put( 's', ContentType.GovernmentDocumentState); // s - State, provincial, territorial, dependent, etc.
// put( 's', ContentType.GovernmentDocumentStateUniversity); // s - State, provincial, territorial, dependent, etc.
put( 'z', ContentType.GovernmentDocumentOther); // z - Other
}
};
// used for mapping the 007 field(s)
private static LinkedHashMap<String, MediaType> mediaTypeMap = new LinkedHashMap<String, MediaType>() {
{
// maps
put( "ad", MediaType.Atlas); // ad - Atlas
put( "ag", MediaType.MapDiagram); // ag - Diagram
put( "aj", MediaType.Map); // aj - Map
put( "ak", MediaType.MapProfile); // ak - Manuscript notated music
put( "aq", MediaType.MapModel); // aq - Model
put( "ar", MediaType.SensorImage); // ar - Remote-sensing image
put( "as", MediaType.MapSection); // as - Section
put( "ay", MediaType.MapView); // ay - View
put( "az", MediaType.MapOther); // az - Other Map
put( "aa", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ab", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ac", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ah", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ai", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "am", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "an", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ao", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ap", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "at", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "av", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "aw", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
put( "ax", MediaType.TypeObsolete); // aa ab ac ah ai am an ao ap at av aw ax - Obsolete Map formats
// electronic resource
put( "ca", MediaType.ComputerTapeCartridge); // ca - Tape cartridge
put( "cb", MediaType.ComputerChipCartridge); // cb - Chip cartridge
put( "cc", MediaType.ComputerOpticalDiscCartridge); // cc - Computer optical disc cartridge
put( "cd", MediaType.ComputerDisk); // cd - Computer disc, type unspecified
put( "ce", MediaType.ComputerDiscCartridge); // ce - Computer disc cartridge, type unspecified
put( "cf", MediaType.ComputerTapeCassette); // cf - Tape cassette
put( "ch", MediaType.ComputerTapeReel); // ch - Tape reel
put( "cj", MediaType.ComputerFloppyDisk); // cj - Magnetic disk
put( "ck", MediaType.ComputerCard); // ck - Computer card
put( "cm", MediaType.ComputerMagnetoOpticalDisc); // cm - Magneto-optical disc
put( "co", MediaType.ComputerOpticalDisc); // co - Optical disc
put( "cr", MediaType.Online); // cr - Remote
put( "cu", MediaType.ComputerOther); // cu - Unspecified
put( "cz", MediaType.ComputerOther); // cz - Other
// globe
put( "da", MediaType.GlobeCelestial); // da - Celestial globe
put( "db", MediaType.GlobePlanetary); // db - Planetary or lunar globe
put( "dc", MediaType.GlobeTerrestrial); // dc - Terrestrial globe
put( "dd", MediaType.TypeObsolete); // dd - Satellite globe (of our solar system), excluding the earth moon [OBSOLETE, 1997] [CAN/MARC only]
put( "de", MediaType.GlobeEarthMoon); // de - Earth moon globe
put( "du", MediaType.GlobeOther); // du - Unspecified
put( "dz", MediaType.GlobeOther); // dz - Other
// tactile material
put( "fa", MediaType.TactileMoon); // fa - Moon
put( "fb", MediaType.Braille); // fb - Braille
put( "fc", MediaType.TactileCombination); // fc - Combination
put( "fd", MediaType.TactileNoWritingSystem); // fd - Tactile, with no writing system
put( "fu", MediaType.TactileOther); // fu - Unspecified
put( "fz", MediaType.TactileOther); // fz - Other
// projected graphic
put( "gc", MediaType.FilmstripCartridge); // gc - Filmstrip cartridge
put( "gd", MediaType.Filmslip); // gd - Filmslip
put( "gf", MediaType.Filmstrip); // gf - Filmstrip, type unspecified
put( "gn", MediaType.TypeObsolete); // gn - Not applicable [OBSOLETE, 1981] [USMARC only]
put( "go", MediaType.FilmstripRoll); // go - Filmstrip roll
put( "gs", MediaType.Slide); // gs - Slide
put( "gt", MediaType.Transparency); // gt - Transparency
put( "gu", MediaType.ProjectedMediumOther); // gu - Unspecified
put( "gz", MediaType.ProjectedMediumOther); // gz - Other
// microform
put( "ha", MediaType.MicroformApetureCard); // ha - Aperture card
put( "hb", MediaType.MicrofilmCartridge); // hb - Microfilm cartridge
put( "hc", MediaType.MicrofilmCassette); // hc - Microfilm cassette
put( "hd", MediaType.MicrofilmReel); // hd - Microfilm reel
put( "he", MediaType.Microfiche); // he - Microfiche
put( "hf", MediaType.MicroficheCassette); // hf - Microfiche cassette
put( "hg", MediaType.Microopaque); // hg - Microopaque
put( "hh", MediaType.MicrofilmSlip); // hh - Microfilm slip
put( "hj", MediaType.MicrofilmRoll); // hj - Microfilm roll
put( "hu", MediaType.Microform); // hu - Unspecified
put( "hz", MediaType.Microform); // hz - Other
// non-projected graphic
put( "ka", MediaType.ActivityCard); // ka - Activity card
put( "kc", MediaType.Collage); // kc - Collage
put( "kd", MediaType.Drawing); // kd - Drawing
put( "ke", MediaType.Painting); // ke - Painting
put( "kf", MediaType.PhotomechanicalPrint); // kf - Photomechanical print
put( "kg", MediaType.Photonegative); // kg - Photonegative
put( "kh", MediaType.PhotoPrint); // kh - Photoprint
put( "ki", MediaType.Picture); // ki - Picture
put( "kj", MediaType.ImagePrint); // kj - Print
put( "kk", MediaType.Poster); // kk - Poster
put( "kl", MediaType.TechnicalDrawing); // kl - Technical drawing
put( "kn", MediaType.Chart); // kn - Chart
put( "ko", MediaType.FlashCard); // ko - Flash card
put( "kp", MediaType.Postcard); // kp - Postcard
put( "kq", MediaType.Icon); // kq - Icon
put( "kr", MediaType.Radiograph); // kr - Radiograph
put( "ks", MediaType.StudyPrint); // ks - Study print
put( "kv", MediaType.Photo); // kv - Photograph, type unspecified
put( "ku", MediaType.ImageOther); // ku - Unspecified
put( "kz", MediaType.ImageOther); // kz - Other
// motion picture
put( "mc", MediaType.FilmCartridge); // mc - Film cartridge
put( "mf", MediaType.FilmCassette); // mf - Film cassette
put( "mo", MediaType.FilmRoll); // mo - Film roll
put( "mr", MediaType.FilmReel); // mr - Film reel
put( "mu", MediaType.FilmOther); // mu - Unspecified
put( "mz", MediaType.FilmOther); // mz - Other
put( "o?", MediaType.Kit); // o - kit
put( "q?", MediaType.MusicalScore); // q - notated music
put( "r?", MediaType.SensorImage); // r - remote-sensing image
// sound recording
put( "sd.a", MediaType.SoundDiscLP); // sd - Sound disc
put( "sd.b", MediaType.SoundDiscLP); // sd - Sound disc
put( "sd.c", MediaType.SoundDiscLP); // sd - Sound disc
put( "sd.d", MediaType.SoundDiscLP); // sd - Sound disc
put( "sd.f", MediaType.SoundDiscCD); // sd - Sound disc
put( "sd", MediaType.SoundDisc); // sd - Sound disc
put( "sc", MediaType.TypeObsolete); // sc - Cylinder [OBSOLETE]
put( "se", MediaType.SoundCylinder); // se - Cylinder
put( "sf", MediaType.TypeObsolete); // sf - Sound-track film [OBSOLETE]
put( "sg", MediaType.SoundCartridge); // sg - Sound cartridge
put( "si", MediaType.SoundTrackFilm); // si - Sound-track film
put( "sr", MediaType.TypeObsolete); // sr - Roll [OBSOLETE]
put( "sq", MediaType.SoundRoll); // sq - Roll
put( "ss", MediaType.SoundCassette); // ss - Sound cassette
put( "st", MediaType.SoundTapeReel); // st - Sound-tape reel
put( "sw", MediaType.SoundWireRecording); // sw - Wire recording
put( "su", MediaType.SoundRecordingOther); // su - Unspecified
// put( "sz", MediaType.SoundRecordingOther); // sz - Other // needs special handling
// text
put( "ta", MediaType.Print); // ta - Regular print
put( "tb", MediaType.PrintLarge); // tb - Large print
put( "tc", MediaType.Braille); // tc - Braille
put( "td", MediaType.LooseLeaf); // td - Loose-leaf
put( "tu", MediaType.TextOther); // tu - Unspecified
put( "tz", MediaType.TextOther); // tz - Other
// video recording
put( "v...a", MediaType.VideoBeta); // vf--a - Beta (1/2 in., videocassette)
put( "v...b", MediaType.VideoVHS); // vf--b - VHS (1/2 in., videocassette)
put( "v...c", MediaType.VideoUMatic); // vf--c - U-matic (3/4 in., videocasstte)
put( "v...d", MediaType.VideoEIAJ); // vr--d - EIAJ (1/2 in., reel)
put( "v...e", MediaType.VideoTypeC); // vr--e - Type C (1 in., reel)
put( "v...f", MediaType.VideoQuadruplex); // vr--f - Quadruplex (1 in. or 2 in., reel)
put( "v...g", MediaType.VideoLaserdisc); // vd--g - Laserdisc
put( "v...h", MediaType.VideoCapacitance); // vd--h - CED (Capacitance Electronic Disc) videodisc
put( "v...i", MediaType.VideoBetacam); // vf--i - Betacam (1/2 in., videocassette)
put( "v...j", MediaType.VideoBetacamSP); // vf--j - Betacam SP (1/2 in., videocassette)
put( "v...k", MediaType.VideoSuperVHS); // vf--k - Super-VHS (1/2 in., videocassette)
put( "v...m", MediaType.VideoMII); // vf--m - M-II (1/2 in., videocassette)
put( "v...o", MediaType.VideoD2); // vf--o - D-2 (3/4 in., videocassette)
put( "v...p", MediaType.Video8mm); // vf--p - 8 mm. videocassette
put( "v...q", MediaType.VideoHi8); // vf--q - Hi-8 mm. videocassette
put( "v...m", MediaType.VideoMII); // vf--m - M-II (1/2 in., videocassette)
put( "v...s", MediaType.VideoBluRay); // vd--s - Blu-ray disc
put( "v...v", MediaType.VideoDVD); // vd--v - DVD
put( "v...n", MediaType.TypeObsolete); // v---n - Obsolete type specification
put( "v...?", null); // v---? - Obsolete type specification
put( "v...u", MediaType.VideoOther); // v---u - Unspecified
// put( "v...z", MediaType.VideoOther); // v---z - Other video type // needs special handling
}
};
// used for validating the form of a specific video item
private static LinkedHashMap<String, Character> videoFormMap = new LinkedHashMap<String, Character>() {
{
// video recording
put( "v...a", 'f'); // vf--a - Beta (1/2 in., videocassette)
put( "v...b", 'f'); // vf--b - VHS (1/2 in., videocassette)
put( "v...c", 'f'); // vf--c - U-matic (3/4 in., videocasstte)
put( "v...d", 'r'); // vr--d - EIAJ (1/2 in., reel)
put( "v...e", 'r'); // vr--e - Type C (1 in., reel)
put( "v...f", 'r'); // vr--f - Quadruplex (1 in. or 2 in., reel)
put( "v...g", 'd'); // vd--g - Laserdisc
put( "v...h", 'd'); // vd--h - CED (Capacitance Electronic Disc) videodisc
put( "v...i", 'f'); // vf--i - Betacam (1/2 in., videocassette)
put( "v...j", 'f'); // vf--j - Betacam SP (1/2 in., videocassette)
put( "v...k", 'f'); // vf--k - Super-VHS (1/2 in., videocassette)
put( "v...m", 'f'); // vf--m - M-II (1/2 in., videocassette)
put( "v...o", 'f'); // vf--o - D-2 (3/4 in., videocassette)
put( "v...p", 'f'); // vf--p - 8 mm. videocassette
put( "v...q", 'f'); // vf--q - Hi-8 mm. videocassette
put( "v...m", 'f'); // vf--m - M-II (1/2 in., videocassette)
put( "v...s", 'd'); // vd--s - Blu-ray disc
put( "v...v", 'd'); // vd--v - DVD
}
};
/**
* Return the content type and media types, plus electronic, for this record
*
* @param record MARC Record
* @return Set of Strings of content types and media types
*/
public Set<String> getContentTypesAndMediaTypesMapped(final Record record, String mapFileName)
{
Set<String> formats = getContentTypes(record);
formats.addAll( getMediaTypes(record));
if (recordIsMinimal(record))
{
addFormatError(record.getControlNumber(), "", "", eErrorSeverity.WARN, "Record contains minimal metadata, format is likely wrong");
}
formats = addOnlineTypes(record, formats, false);
if (isArchive(record)) formats.add(ControlType.Archive.toString());
String mapName = null;
AbstractMultiValueMapping theMap = ValueIndexerFactory.instance().createMultiValueMapping(mapFileName);
Set<String>formatsMapped = new LinkedHashSet<String>();
try
{
formatsMapped.addAll(theMap.map(formats));
}
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
return(formatsMapped);
}
/**
* Return the content type and media types, plus electronic, for this record
*
* @param record MARC Record
* @return Set of Strings of content types and media types
*/
public Set<String> getContentTypesAndMediaTypes(final Record record)
{
Set<String> formats = getContentTypes(record);
formats.addAll( getMediaTypes(record));
if (recordIsMinimal(record))
{
addFormatError(record.getControlNumber(), "", "", eErrorSeverity.WARN, "Record contains minimal metadata, format is likely wrong");
}
formats = addOnlineTypes(record, formats, false);
if (isArchive(record)) formats.add(ControlType.Archive.toString());
return(formats);
}
private boolean recordIsMinimal(Record record)
{
ControlField field008 = ((ControlField)record.getVariableField("008"));
if (field008 == null) return(true);
String field008Str = field008.getData();
List<?> vfs = record.getVariableFields(new String[]{"300", "538", "500"});
if ((field008Str.startsWith("000000n")||field008Str.contains("????????????")) && vfs.size() == 0)
return(true);
return (false);
}
/*
* Online materials
* Leader/06 a, i, j, t AND 006/06 (008/23) o OR 007/00 c AND007/01 r
* Leader/06 m AND 006/06 (008/23) o
* Leader/06 p, c, d AND 006/06 (008/23) o
* Leader/06 g AND 006/06 (008/23) o OR 007/00 v AND 007/04 z
* Leader/06 e, f, k, o, r AND 006/12 (008/29) o
*/
public String isOnlineFormatTypes(final Record record)
{
char typeOfRecord = record.getLeader().getTypeOfRecord();
ControlField field008 = ((ControlField)record.getVariableField("008"));
String field008Str = field008 != null ? field008.getData() : "";
List<VariableField> fields006 = record.getVariableFields("006");
List<VariableField> fields007 = record.getVariableFields("007");
String types1 = "aijtpcd";
String types2 = "efgkorm";
if (types1.indexOf(typeOfRecord) != -1)
{
if (field008Str.length() > 23 && field008Str.charAt(23) == 'o') return("field 008 position 23 = 'o'");
if (setContainsAt(fields006, 6, "o", true)) return("field 006 position 6 ='o'");
if (typeOfRecord == 'a' || typeOfRecord == 'i' || typeOfRecord == 'j' || typeOfRecord == 't')
{
if (setContainsAt(fields007, 0, "cr", true)) return("field 007 startswith ='cr'");
}
}
if (types2.indexOf(typeOfRecord) != -1)
{
if (field008Str.length() > 29 && field008Str.charAt(29) == 'o') return("field 008 position 29 = 'o'");
if (setContainsAt(fields006, 12, "o", true)) return("field 006 position 12 ='o'");
if (typeOfRecord == 'g')
{
if (setContainsAt(fields007, 0, "v...z", true))
{
return("field 007 startswith ='v...z'");
}
}
}
return(null);
}
private boolean setContainsAt(List<VariableField> fields, int offset, String match, boolean ignoreCase)
{
for (VariableField vf : fields)
{
ControlField cf = (ControlField)vf;
String data = cf.getData();
if (!match.contains("."))
{
if (data.regionMatches(ignoreCase, offset, match, 0, match.length()))
{
return(true);
}
}
else
{
if (data.length() > offset+match.length() && data.substring(offset, offset + match.length()).matches(match))
{
return(true);
}
}
}
return false;
}
/**
* Add types EBook and Online for electronic items for this record
* @param checkURLs
*
* @param record MARC Record
* @param formats the <code>Set</code> of formats to add the types EBook and Online to
* @return <code>String</code> of primary material types
*/
public Set<String> addOnlineTypes(final Record record, Set<String> formats, boolean checkURLs)
{
// see if we have full-text link
String onlineAccordingTo = isOnlineFormatTypes(record);
boolean hasFullLink = hasFullText(record);
boolean hasSupplLink = hasSupplText(record);
// if so, and this is a book, add e-book as well
if (onlineAccordingTo != null && !hasFullLink && !hasSupplLink)
{
addFormatError(record.getControlNumber(), "856", "", eErrorSeverity.WARN, "Record claims to be \"Online\" in "+ onlineAccordingTo + " but has no valid 856 field");
}
else if (onlineAccordingTo != null && !hasFullLink)
{
formats.add(MediaType.OnlineExtra.toString());
}
else if (hasFullLink && onlineAccordingTo == null)
{
addFormatError(record.getControlNumber(), "856", "", eErrorSeverity.WARN, "Record has valid 856 field, but is missing declarations of online");
}
// if so, and this is a book, add e-book as well
if (formats.contains(ContentType.Book.toString()) && hasFullLink == true)
{
formats = addToTop(formats, CombinedType.EBook.toString());
}
if (hasFullLink == true)
{
formats.add(MediaType.Online.toString());
}
return(formats);
}
/**
* Return the primary content type for this record
*
* @param record MARC Record
* @return <code>String</code> of primary material types
*/
public String getPrimaryContentType(final Record record)
{
String primaryFormat = "";
Set<String> materialType = getContentTypes(record);
for (String result : materialType)
{
primaryFormat = result;
break;
}
return primaryFormat;
}
/**
* Return the primary content type, plus electronic, for this record
*
* @param record MARC Record
* @return String of primary material types
*/
public Set<String> getPrimaryContentTypePlusOnline(final Record record)
{
Set<String> format = new LinkedHashSet<String>();
// get primary material type
String primaryType = getPrimaryContentType(record);
format.add(primaryType);
format = addOnlineTypes(record, format, false);
return format;
}
/**
* Parse out content types from record
*
* @param record MARC Record
* @return <code>List</code> of material types
*/
public boolean isArchive(final Record record)
{
// special case for archive
if (record.getLeader().toString().toLowerCase().charAt(8) == 'a')
{
return true;
}
else
{
return false;
}
}
/**
* Parse out content types from record
*
* @param record MARC Record
* @return <code>List</code> of material types
*/
public Set<String> getContentTypes(final Record record)
{
Set<String> contentTypes = new LinkedHashSet<String>(); // the list of material types
// // Leader ////
String leader = record.getLeader().toString();
// get main type and profile from leader/06
ContentType leaderType = extractType(leader, "leader"); // main material type, based on leader
ProfileType leaderProfile = extractProfile(leader, "leader"); // 008 profile to use
// // 008 & 006 ////
// take both the 008 and 006 (which use the same structure, just at
// different positions)
// so we can iterate over them both
String[] formatTags = { "008", "006" };
ControlField field008 = (ControlField)record.getVariableField("008");
List<VariableField> fields006 = (List<VariableField>)record.getVariableFields("006");
if (field008 != null)
{
getContentTypeFromFixedField(contentTypes, record, field008, leaderProfile, leaderType, offsetForProfile008(leaderProfile));
}
for (VariableField field006v : fields006)
{
ControlField field006 = (ControlField)field006v;
if (field006.getData().length() == 0) continue;
ProfileType profile = extractProfile(field006.getData(), "006");
ContentType type = extractType(field006.getData(), "006");
if (profile != ProfileType.NoneDefined)
{
getContentTypeFromFixedField(contentTypes, record, field006, profile, type, offsetForProfile008(profile) - 17);
}
}
// / DATA FIELDS ///
// thesis
if (!record.getVariableFields("502").isEmpty())
{
String value502 = ((DataField)record.getVariableField("502")).getSubfield('a') != null ?
((DataField)record.getVariableField("502")).getSubfield('a').getData() : "";
if (value502.matches(".*[Tt]hesis.*") || value502.matches(".*[Dd]issertation.*") || value502.matches(".*[Hh]abilitation.*"))
{
// set the first (primary) type as thesis
contentTypes = addToTop(contentTypes, ContentType.Thesis.toString());
// nix manuscript so we can distinguish actual manuscripts
contentTypes.remove(ContentType.Manuscript.toString());
}
else
{
contentTypes = addToTop(contentTypes, ContentType.Thesis.toString());
}
}
// nothing worked?
if (contentTypes.isEmpty())
{
// record must have very little data, so we'll take whatever we can
// get isbn
if (!record.getVariableFields("020").isEmpty())
{
contentTypes.add(ContentType.Book.toString());
}
// only type from leader was available
else if (leaderType != ContentType.NoneDefined)
{
contentTypes.add(leaderType.toString());
}
}
return contentTypes;
}
private void getContentTypeFromFixedField(Set<String> contentTypesStr, Record record, ControlField field, ProfileType profile, ContentType defaultType, int offsetInField)
{
ContentType typeToAdd = null;
if (field.getData().length()-1 < offsetInField)
{
typeToAdd = defaultType;
addFormatError(record.getControlNumber(), field.getTag(), "", eErrorSeverity.WARN, "Fixed field "+field.getTag()+" is shorter than it ought to be");
}
else
{
char subContentType = field.getData().charAt(offsetInField);
switch (profile)
{
case Books:
{
typeToAdd = defaultType;
break;
}
case Computers:
{
typeToAdd = lookupType(computersSubTypes, subContentType, defaultType);
break;
}
case Maps:
{
typeToAdd = lookupType(mapsSubTypes, subContentType, defaultType);
break;
}
case Music:
{
typeToAdd = defaultType;
break;
}
case Serial:
{
typeToAdd = lookupType(serialsSubTypes, subContentType, defaultType);
break;
}
case Visual:
{
ContentType type = lookupType(visualSubTypes, subContentType, defaultType);
typeToAdd = type;
if (visualValidSubTypes.containsKey(subContentType))
{
String validValues = visualValidSubTypes.get(subContentType);
boolean isValid = false;
for (char c : validValues.toCharArray())
{
if (mainTypeMap.get(c).equals(defaultType)) isValid = true;
}
if (!isValid)
{
addFormatError(record.getControlNumber(), field.getTag(), "", eErrorSeverity.WARN, "Visual subtype is "+type+" which is probably not valid for type "+defaultType);
}
}
break;
}
case Mixed:
{
typeToAdd = defaultType;
break;
}
}
}
String field245h = indexer.getFirstFieldVal(record, null, "245h");
if (field245h != null )
{
field245h = field245h.replaceFirst(".*?\\[([a-zA-Z ]*).*\\].*", "$1").trim();
if (field245hTypeMap.containsKey(field245h))
{
boolean isValid = false;
ContentType valid[] = field245hTypeMap.get(field245h);
for (ContentType validType : valid)
{
if (validType == typeToAdd) isValid = true;
}
if (isValid)
{
contentTypesStr.add(typeToAdd.toString());
}
else
{
if (!isSuperTypeOf(typeToAdd, valid[0]) || ( (field.getId() != null && (field.getId() & (long)2) == (long)2)))
{
if (typeToAdd != null) contentTypesStr.add(typeToAdd.toString());
}
contentTypesStr.add(valid[0].toString());
addFormatError(record.getControlNumber(), field.getTag(), "", eErrorSeverity.WARN, "ContentType as specified in the leader/008 field conflicts with that specified in the 245h subfield");
}
}
else
{
if (typeToAdd != null) contentTypesStr.add(typeToAdd.toString());
}
}
else
{
if (typeToAdd != null) contentTypesStr.add(typeToAdd.toString());
}
if (defaultType == ContentType.MapManuscript && Utils.setItemContains(contentTypesStr, "ContentType\\.Map.*"))
{
contentTypesStr.add(defaultType.toString());
}
ContentType govDocType = null;
if (/*(profile == ProfileType.Books || profile == ProfileType.Serial ) && */(govDocType = isGovDoc(field, record)) != null)
{
contentTypesStr.add(govDocType.toString());
}
Set<String> holdings = SolrIndexer.instance().getAllSubfields(record, "999t", "");
for (String holding : holdings)
{
if (holding.equals("EQUIPMENT") || holding.equals("HS-DVDPLYR") || holding.equals("EQUIP-3DAY") || holding.equals("CELLPHONE") ||
holding.equals("CALCULATOR") || holding.equals("LCDPANEL") || holding.equals("HSLAPTOP") || holding.equals("PROJSYSTEM") ||
holding.equals("HSWIRELESS") || holding.equals("EQUIP-2HR") || holding.equals("DIGITALCAM") || holding.equals("AUDIO-VIS") ||
holding.equals("LAPTOP") || holding.equals("EQUIP-3HR") || holding.equals("CAMCORDER"))
{
contentTypesStr.clear();
contentTypesStr.add(ContentType.Equipment.toString());
}
}
}
static String govDocLetters = "acfilmoz";
private ContentType isGovDoc(ControlField field, Record record)
{
ContentType toReturn = null;
int offsetForGovDoc = (field.getTag().equals("008")) ? 28 : 11;
if (field != null && field.getData().length() > offsetForGovDoc)
{
char govdoc = field.getData().toLowerCase().charAt(offsetForGovDoc);
if (govDocLetters.indexOf(govdoc) != -1)
{
toReturn = govDocTypes.get(govdoc);
return(toReturn);
}
else if (govdoc == 's')
{
DataField pubInfo260 = ((DataField)(record.getVariableField("260")));
if (pubInfo260 != null)
{
Subfield sfb = pubInfo260.getSubfield('b');
if (sfb != null && !sfb.getData().contains("Universit"))
{
return(ContentType.GovernmentDocumentState);
}
else
{
return(ContentType.GovernmentDocumentStateUniversity);
}
}
}
}
return null;
}
private boolean isSuperTypeOf(ContentType typeToAdd, ContentType contentTypeFrom245h)
{
if (typeToAdd == ContentType.ProjectedMedium && contentTypeFrom245h == ContentType.Video)
return(true);
return false;
}
private ContentType lookupType(LinkedHashMap<Character, ContentType> subTypeMap, char subContentType, ContentType defaultType)
{
if (subTypeMap.containsKey(subContentType))
{
return(subTypeMap.get(subContentType));
}
else if (subTypeMap.containsKey(' ')) // key not found use default value, if defined
{
return(subTypeMap.get(' '));
}
else
{
return(defaultType);
}
}
private int offsetForProfile008(ProfileType profile)
{
switch (profile)
{
case Computers: return(26);
case Visual: return(33);
case Maps: return(25);
case Serial: return(21);
case Mixed:
case Books:
case Music: return(23);
}
return 00;
}
/**
* Parse out media / carrier types from record
*
* @param record MARC Record
* @return <code>List</code> of material types
*/
public Set<String> getMediaTypes(final Record record)
{
Set<MediaType> form = new LinkedHashSet<MediaType>(); // the list of form
// types
ContentType leaderType = extractType(record.getLeader().toString(), "leader"); // main material type, based on leader
ProfileType profileType = extractProfile(record.getLeader().toString(), "leader");
// // Data Fields ////
// electronic resource from title
DataField title = (DataField) record.getVariableField("245");
if (title != null && title.getSubfield('h') != null)
{
// general material designator in title 245|h
if (title.getSubfield('h').getData().toLowerCase().contains("[electronic resource]"))
{
form.add(MediaType.Electronic245);
}
}
// // 007 ////
List<VariableField> fields007 = (List<VariableField>)record.getVariableFields("007");
for (VariableField field007v : fields007)
{
ControlField field007 = (ControlField)field007v;
// first, check to make sure this is a post-1981 007 by looking at
// position 2, which should be undefined
String field007Str = validate007Field(record, profileType, leaderType, field007);
if (field007Str == null) continue;
char materialGeneral = field007Str.charAt(0);
String materialFirst = "" + field007Str.charAt(0) + "?";
String materialFirstTwo = field007Str.substring(0, 2);
String key = materialFirstTwo;
if (materialGeneral == 'v')
{
key = "" + field007Str.charAt(0) + "..." + field007Str.charAt(4);
}
else if (materialGeneral == 's' && key.equals("sd") && mediaTypeMap.containsKey(key + "." + field007Str.charAt(3)))
{
key = key + "." + field007Str.charAt(3);
}
if (key.equals("v...z")) // Special handling for Video Other
{
if (this.hasFullText(record))
form.add(MediaType.VideoOnline);
else
form.add(MediaType.VideoOther);
}
else if (key.equals("sz")) // Special handling for Sound Other Media
{
if (this.hasFullText(record))
form.add(MediaType.SoundRecordingOnline);
else
form.add(MediaType.SoundRecordingOther);
}
else if (!mediaTypeMap.containsKey(key))
{
key = materialFirst;
}
// look up value in the media type map which maps the initial characters of an 007 field to a media type
if (mediaTypeMap.containsKey(key))
{
MediaType result = mediaTypeMap.get(key);
if (result == MediaType.TypeObsolete)
{
addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "007 field specifies "+field007Str+ " which uses an obsolete encoding");
}
else if (result == MediaType.Online)
{
// Skip it?
result = null;
}
else if (result != null)
{
form.add(result);
}
else
{
result = null;
}
}
else
{
addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "007 Format code '"+field007Str+"' is undefined, looking at other fields");
}
if (materialGeneral == 'v') // validate form of video (disc, reel, cassette with the format of the video. ie. You probably don't have a VHS video disc
{
if (videoFormMap.containsKey(key) && videoFormMap.get(key) != field007Str.charAt(1))
{
String errMsg = "Mismatch between form of video (007/01)" + field007Str.charAt(1) + " and type of video (007/04)" + key.charAt(4);
addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, errMsg);
}
}
} // done with 007 fields
// BinaryHeapPriorityQueue bestAnswers = new BinaryHeapPriorityQueue<MediaType>();
// for (MediaType mt : form)
// {
// bestAnswers.add(mt, mt.priority);
// }
MediaTypeHeuristic type = getMediaTypeHeuristically(record, leaderType);
if (type != null)
{
if (form.isEmpty())
{
form.add(type.mapsTo);
String errMsg = "Media type not specified determining it heuristically " + type.mapsTo + "based on fields: " + type.fromFields;
addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, errMsg);
}
else if (form.size() == 1)
{
MediaType specifiedForm = form.toArray(new MediaType[0])[0];
MediaType heuristicFormMapsTo = type.mapsTo;
if (!heuristicFormMapsTo.toString().equals(specifiedForm.toString()))
{
MediaType finalAnswer = (type.sigmoidProb() > specifiedForm.sigmoidProb()) ? type.mapsTo : specifiedForm;
if (indexer != null)
{
String errMsg = "Mismatch between specified media type" + specifiedForm + " and heuristically determined one " + heuristicFormMapsTo + " based on fields: "+ type.fromFields;
addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, errMsg);
if (finalAnswer != specifiedForm)
{
errMsg = "Overriding specified form " + specifiedForm + " with heuristically determined one " + heuristicFormMapsTo;
addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, errMsg);
}
}
if (finalAnswer != specifiedForm)
{
form.remove(specifiedForm);
form.add(finalAnswer);
}
}
}
}
// // 008 & 006 ////
// parse the form of item indicator from 008 and 006
Set<String> formStr = new LinkedHashSet<String>();
for (MediaType mt : form)
{
formStr.add(mt.toString());
}
String[] formatTags = { "008", "006" };
List<VariableField> fieldsFormat = record.getVariableFields(formatTags);
for (VariableField fieldFormatv : fieldsFormat)
{
ControlField fieldFormat = (ControlField)fieldFormatv;
ProfileType profile;
int position = 0; // position we'll use
// determine the profile
String tag = fieldFormat.getTag();
if (tag.equals("008"))
{
profile = extractProfile(record.getLeader().toString(), "leader");
}
else
{
if (fieldFormat.getData().length() == 0) continue;
profile = extractProfile(fieldFormat.getData(), "006");
}
// from profile, find position
if (profile == ProfileType.Books || profile == ProfileType.Computers ||
profile == ProfileType.Mixed || profile == ProfileType.Music ||
profile == ProfileType.Serial)
{
position = 23;
}
else if (profile == ProfileType.Maps || profile == ProfileType.Visual)
{
position = 29;
}
else
{
continue; // bad profile?
}
int raw_position = position;
// 006 follows same positions as 008, only shifted down seven spots
if (tag.equals("006"))
{
position = position - 17;
}
String field = fieldFormat.getData();
// make sure field has sufficient length
if (field.length() - 1 < position)
{
addFormatError(record.getControlNumber(), tag, "", eErrorSeverity.WARN, "Fixed field "+tag+" is shorter than it ought to be");
continue;
}
char code = field.toLowerCase().charAt(position);
switch (code) // form of item
{
case 'a': // a - Microfilm
formStr.add(FormOfItem.Microfilm.toString());
break;
case 'b': // b - Microfiche
formStr.add(FormOfItem.Microfiche.toString());
break;
case 'c': // c - Microopaque
formStr.add(FormOfItem.Microopaque.toString());
break;
case 'd': // d - Large print
formStr.add(FormOfItem.PrintLarge.toString());
break;
case 'f': // f - Braille
formStr.add(FormOfItem.Braille.toString());
break;
case 'o': // o - Online
// {
// boolean online = true;
// boolean hasFullLink = hasFullText(record);
// boolean hasSupplLink = hasSupplText(record);
//
// // if so, and this is a book, add e-book as well
// if (online && !hasFullLink && !hasSupplLink)
// {
// addFormatError(record.getControlNumber(), "856", "", "Record claims to be \"Online\" in field:"+fieldFormat.getTag()+" position:"+position+" but has no valid 856 field");
// }
// else if (online && !hasFullLink)
// {
// formStr.add(MediaType.OnlineExtra.toString());
// }
//
// if (hasFullLink == true)
// {
// formStr.add(FormOfItem.Online.toString());
// }
// }
break;
case 'q': // q - Direct electronic
formStr.add(FormOfItem.ElectronicDirect.toString());
break;
case 's': // s - Electronic
formStr.add(FormOfItem.Electronic.toString());
break;
case 'r': // r - Regular print reproduction
formStr.add(FormOfItem.Print.toString());
break;
}
}
return formStr;
}
private void addPossibleForm(LinkedHashMap<MediaType, MediaTypeHeuristic> possibleForms, MediaType key, MediaTypeHeuristic value)
{
if (possibleForms.containsKey(key))
{
MediaTypeHeuristic oldVal = possibleForms.get(key);
oldVal.combine(value);
possibleForms.put(key, oldVal);
}
else
{
possibleForms.put(key, value);
}
}
private MediaTypeHeuristic getMediaTypeHeuristically(Record record, ContentType leaderType)
{
LinkedHashMap<MediaType, MediaTypeHeuristic> possibleForms = new LinkedHashMap<MediaType, MediaTypeHeuristic>();
List<VariableField> notes = (List<VariableField>)record.getVariableFields(new String[] {"538"});
for (VariableField notev : notes)
{
DataField note = (DataField)notev;
if (note.getSubfield('a') == null) continue;
String noteData = note.getSubfield('a').getData();
if (noteData.matches(".*Blu[e]?-[Rr]ay.*"))
{
addPossibleForm( possibleForms, MediaType.VideoBluRay, new MediaTypeHeuristic(MediaType.VideoBluRay, 0.8, note.getTag()));
}
else if (noteData.contains("DVD") && !noteData.matches(".*[Aa]lso.*DVD.*") && !noteData.matches(".*DVD-ROM.*") && !noteData.matches(".*DVD [Dd]rive.*"))
{
addPossibleForm( possibleForms, MediaType.VideoDVD, new MediaTypeHeuristic(MediaType.VideoDVD, 0.8, note.getTag()));
}
if (noteData.matches(".*Laser[ ]?[Dd]isc.*") ||
noteData.matches(".*\\bCLV\\b.*"))
{
addPossibleForm( possibleForms, MediaType.VideoLaserdisc, new MediaTypeHeuristic(MediaType.VideoLaserdisc, 0.8, note.getTag()));
}
if (noteData.matches(".*[Cc]ompact [Dd]isc.*") )
{
addPossibleForm( possibleForms, MediaType.SoundDiscCD, new MediaTypeHeuristic(MediaType.SoundDiscCD, 0.8, note.getTag()));
}
if (noteData.matches(".*[Vv]ideodisc.*"))
{
addPossibleForm( possibleForms, MediaType.VideoLaserdisc, new MediaTypeHeuristic(MediaType.VideoLaserdisc, 0.6, note.getTag()));
addPossibleForm( possibleForms, MediaType.VideoDVD, new MediaTypeHeuristic(MediaType.VideoDVD, 0.6, note.getTag()));
}
if (noteData.contains("VHS") && !noteData.matches(".*[Aa]lso.*VHS.*"))
{
addPossibleForm( possibleForms, MediaType.VideoVHS, new MediaTypeHeuristic(MediaType.VideoVHS, 0.7, note.getTag()));
}
else if (leaderType == ContentType.ProjectedMedium && noteData.matches(".*Beta\\b SP.*"))
{
addPossibleForm( possibleForms, MediaType.VideoBetacamSP, new MediaTypeHeuristic(MediaType.VideoBetacamSP, 0.7, note.getTag()));
}
else if (leaderType == ContentType.ProjectedMedium && noteData.matches(".*Beta\\b.*") && !noteData.matches(".*[Aa]lso.*Beta\\b.*"))
{
addPossibleForm( possibleForms, MediaType.VideoBeta, new MediaTypeHeuristic(MediaType.VideoBeta, 0.7, note.getTag()));
}
}
Set<String> forms = SolrIndexer.instance().getAllSubfields(record, "300[abc]", "--");
for (String form : forms)
{
if (form.matches(".*[Vv]ideo[ ]?disc.*--.*--.*12 cm.*"))
{
addPossibleForm( possibleForms, MediaType.VideoLaserdisc, new MediaTypeHeuristic(MediaType.VideoLaserdisc, 0.1, "300"));
addPossibleForm( possibleForms, MediaType.VideoDVD, new MediaTypeHeuristic(MediaType.VideoDVD, 0.8, "300"));
}
else if (form.matches(".*[Vv]ideo[ ]?disc.*--.*--.*12.*"))
{
addPossibleForm( possibleForms, MediaType.VideoLaserdisc, new MediaTypeHeuristic(MediaType.VideoLaserdisc, 0.8, "300"));
addPossibleForm( possibleForms, MediaType.VideoDVD, new MediaTypeHeuristic(MediaType.VideoDVD, 0.1, "300"));
}
else if (form.matches(".*[Vv]ideo[ ]?disc.*--.*--.*4 3/4.*"))
{
addPossibleForm( possibleForms, MediaType.VideoLaserdisc, new MediaTypeHeuristic(MediaType.VideoLaserdisc, 0.1, "300"));
addPossibleForm( possibleForms, MediaType.VideoDVD, new MediaTypeHeuristic(MediaType.VideoDVD, 0.7, "300"));
addPossibleForm( possibleForms, MediaType.VideoBluRay, new MediaTypeHeuristic(MediaType.VideoBluRay, 0.7, "300"));
}
else if (form.matches(".*[Vv]ideo[ ]?cassette.*--.*--.*[1\u00B9]/[2\u2082].*"))
{
addPossibleForm( possibleForms, MediaType.VideoVHS, new MediaTypeHeuristic(MediaType.VideoVHS, 0.7, "300"));
addPossibleForm( possibleForms, MediaType.VideoBeta, new MediaTypeHeuristic(MediaType.VideoBeta, 0.55, "300"));
}
else if (leaderType == ContentType.ProjectedMedium && form.matches(".*cassette.*--.*--.*[Uu][-]?[Mm]atic.*"))
{
addPossibleForm( possibleForms, MediaType.VideoUMatic, new MediaTypeHeuristic(MediaType.VideoUMatic, 0.75, "300"));
addPossibleForm( possibleForms, MediaType.VideoBeta, new MediaTypeHeuristic(MediaType.VideoBeta, 0.3, "300"));
addPossibleForm( possibleForms, MediaType.VideoVHS, new MediaTypeHeuristic(MediaType.VideoVHS, 0.4, "300"));
}
else if (form.matches(".*[Vv]ideo[ ]?cassette.*--.*--.*3/4.*"))
{
addPossibleForm( possibleForms, MediaType.VideoUMatic, new MediaTypeHeuristic(MediaType.VideoUMatic, 0.7, "300"));
addPossibleForm( possibleForms, MediaType.VideoBeta, new MediaTypeHeuristic(MediaType.VideoBeta, 0.3, "300"));
addPossibleForm( possibleForms, MediaType.VideoVHS, new MediaTypeHeuristic(MediaType.VideoVHS, 0.4, "300"));
}
else if (leaderType == ContentType.ProjectedMedium && form.matches(".*cassette.*--.*--.*3/4.*"))
{
addPossibleForm( possibleForms, MediaType.VideoUMatic, new MediaTypeHeuristic(MediaType.VideoUMatic, 0.65, "300"));
addPossibleForm( possibleForms, MediaType.VideoBeta, new MediaTypeHeuristic(MediaType.VideoBeta, 0.3, "300"));
addPossibleForm( possibleForms, MediaType.VideoVHS, new MediaTypeHeuristic(MediaType.VideoVHS, 0.4, "300"));
}
else if (form.matches(".*[Ss]ound [Dd]is[ck].*--.*--.*4 3/4.*"))
{
addPossibleForm( possibleForms, MediaType.SoundDiscCD, new MediaTypeHeuristic(MediaType.SoundDiscCD, 0.75, "300"));
}
else if (form.matches(".*[Ss]ound [Dd]is[ck].*--.*33 1/3.*--.*12.*"))
{
addPossibleForm( possibleForms, MediaType.SoundDiscLP, new MediaTypeHeuristic(MediaType.SoundDiscLP, 0.75, "300"));
}
else if ((leaderType == ContentType.SoundRecording || leaderType == ContentType.MusicRecording)
&& form.matches(".*[Dd]is[ck].*--.*33 1/3.*--.*12.*"))
{
addPossibleForm( possibleForms, MediaType.SoundDiscLP, new MediaTypeHeuristic(MediaType.SoundDiscLP, 0.75, "300"));
}
else if (form.matches(".*[Ss]ound [Tt]ape [Rr]eel.*"))
{
addPossibleForm( possibleForms, MediaType.SoundTapeReel, new MediaTypeHeuristic(MediaType.SoundTapeReel, 0.75, "300"));
}
else if (form.matches(".*[Ss]ound [Cc]assette.*"))
{
addPossibleForm( possibleForms, MediaType.SoundCassette, new MediaTypeHeuristic(MediaType.SoundCassette, 0.75, "300"));
}
else if (leaderType == ContentType.Book && (form.matches(".*broadside.*--.*") || form.matches(".*sheet\\b.*--.*")))
{
addPossibleForm( possibleForms, MediaType.Broadside, new MediaTypeHeuristic(MediaType.Broadside, 0.75, "300"));
}
}
double maxPriority = 0.0;
MediaTypeHeuristic maxMth = null;
for (MediaType mt : possibleForms.keySet())
{
MediaTypeHeuristic mth = possibleForms.get(mt);
if (mth.priority > maxPriority)
{
maxMth = mth;
maxPriority = mth.priority;
}
}
return(maxMth);
}
private String validate007Field(Record record, ProfileType profileType, ContentType leaderType, ControlField field007)
{
char field007_02 = '?';
if (field007.getData().matches(".*[bde][a-z][^a-z]{1,2}[cdef][a-z][^a-z]{1,2}[defgh][a-z].*"))
{
// catch the really wackadoodle 007 fields like this: v|bd|dc|ev|fa|gi|hz|iu
// and fix them (in this case the answer should be: vd cvaizu
boolean showError = false;
if (indexer != null && /*indexer.errors != null &&*/ (field007.getId() == null || (field007.getId() & (long)1) == (long)0))
{
/// set id on field to prevent multiple error messages for the same error
field007.setId(field007.getId() == null ? (long)1 : field007.getId() | (long)1);
showError = true;
}
String subfields[] = field007.getData().split("[^a-z]{1,2}");
char[] new007Val = " ".toCharArray();
if (subfields[0].length() == 0)
{
String newsf[] = Arrays.copyOfRange(subfields, 1, subfields.length);
subfields = newsf;
}
for (int i = 0; i < subfields.length; i++)
{
if (i == 0 && (subfields[i].length() == 1 || subfields[i].charAt(0) != 'a'))
{
new007Val[i] = subfields[i].charAt(0);
}
else if (subfields[i].length() > 1 )
{
int offset = (int)(subfields[i].charAt(0) - 'a');
if (new007Val[0] == 'h' && offset > 5) offset += 3;
new007Val[offset] = subfields[i].charAt(1);
}
}
new007Val[2] = ' '; // make sure character 2 of new field is blank
String newValue = new String(new007Val);
newValue = newValue.trim();
if (showError) addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.ERROR, "totally whackadoodle 007 field found \"Its got subfields\" changing it to \'"+ newValue+ "\'");
return(newValue);
}
else if (field007.getData().length() <= 2 ||
(field007_02 = field007.getData().toLowerCase().charAt(2)) != ' ' && field007_02 != '|' && field007_02 != '-' && field007_02 != '*')
{
{
boolean showError = false;
if (indexer != null)
{
/// set id on field to prevent multiple error messages for the same error
showError = true;
}
if (profileType == profileType.Visual && leaderType == ContentType.ProjectedMedium &&
((field007.getData().length() % 6) == 0 || field007.getData().replaceFirst("-*$", "").length() == 6))
{
String newValue = field007.getData().replaceFirst("([a-z])([-a-z][-a-z][-a-z][-a-z][-a-z]).*", "v$1 $2");
if (showError) addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "Old 007 visual material fixed field (pre-1981) mapping it from "+field007+ " to "+ newValue);
return(newValue);
}
else if (profileType == profileType.Music && (field007.getData().matches("^sl..j.*") || field007.getData().matches("^d[abcd].[ms][cde].*") || field007.getData().matches("^de.g.*")) )
{
String newValue = field007.getData().replaceFirst("([a-z])([-a-z][-a-z][-a-z][-a-z].*)", "s$1 $2");
if (showError) addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "Old 007 music fixed field (pre 1981)");
return(newValue);
}
else if ( field007_02 == 'r' || field007_02 == 'o')
{
if (showError) addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "Old 007 fixed field (post-1981), character 2 is '"+field007_02+"' it should be undefined.");
}
else if (field007.getData().length() <= 2)
{
if (showError) addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "Malformed 007 fixed field, field too short");
return (field007.getData() + " ");
}
else
{
if (showError) addFormatError(record.getControlNumber(), "007", "", eErrorSeverity.WARN, "Malformed 007 fixed field, character 02 should be blank");
}
}
}
return(field007.getData());
}
/**
* Return the type based on record type character from either leader or
* 006/00
*
* @param field
* leader or 006 as string
* @param source
* whether this is leader of 006
* @return string
*/
protected ContentType extractType(String field, String source)
{
char recordType = ' ';
if (source.equals("leader"))
{
recordType = field.toLowerCase().charAt(6);
}
else if (source.equals("006"))
{
recordType = field.toLowerCase().charAt(0);
}
if (recordType == 'a' && !source.equals("006"))
{
char subKey = field.toLowerCase().charAt(7);
if (mainSubTypeMap.containsKey(subKey))
{
return(mainSubTypeMap.get(subKey));
}
}
if (mainTypeMap.containsKey(recordType)) // not else if on purpose
{
return(mainTypeMap.get(recordType));
}
return ContentType.NoneDefined;
}
/**
* Return the profile based on record type character from either leader or
* 006/00
*
* @param field
* leader or 006 as string
* @param source
* whether this is leader of 006
* @return string
*/
protected ProfileType extractProfile(String field, String source)
{
char recordType = ' ';
if (source.equals("leader"))
{
recordType = field.toLowerCase().charAt(6);
}
else if (source.equals("006"))
{
recordType = field.toLowerCase().charAt(0);
}
if (recordType == 'a' && !source.equals("006"))
{
char subKey = field.toLowerCase().charAt(7);
if (mainSubTypeMap.containsKey(subKey))
{
return(mainSubProfileMap.get(subKey));
}
}
if (mainTypeMap.containsKey(recordType)) // not else if on purpose
{
return(mainProfileMap.get(recordType));
}
return ProfileType.NoneDefined;
}
/**
* Whether the record contains a full-text link
*
* @param record MARC Record
* @return <code>true</code> if record contains a full-text link
*/
public Boolean hasFullText(final Record record)
{
Set<String> urls = indexer.getFullTextUrls(record);
return(urls.size() != 0 ? true : false);
}
/**
* Whether the record contains a full-text link
*
* @param record MARC Record
* @return <code>true</code> if record contains a full-text link
*/
public Boolean hasSupplText(final Record record)
{
Set<String> urls = indexer.getSupplUrls(record);
return(urls.size() != 0 ? true : false);
}
/**
* Shift (or add) element to the front of our list
*
* @param formats list
* @param add item to add
* @return new <code>Set</code> with item <code>add</code> at the front
*/
protected Set<String> addToTop(Set<String> formats, String add )
{
// create a new list, and add ours first
Set<String> temp = new LinkedHashSet<String>();
temp.add(add);
// now add all the existing ones
temp.addAll(formats);
return temp;
}
}