package com.cyc.tool.distributedrepresentations; /* * #%L * DistributedRepresentations * %% * Copyright (C) 2015 Cycorp, Inc * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import java.io.IOException; import java.util.logging.Level; import java.util.logging.Logger; /** * The word2vec space produced by Google by training on 10^11 words of news. * * <p> * See: https://code.google.com/p/word2vec/ */ public class GoogleNewsW2VSpace extends Word2VecSpaceFromFile { private static GoogleNewsW2VSpace singleton; private static final String w2vfile = "/cyc/projects/kbTaxonomy/Experiments/ConceptFinder/GoogleNews-vectors-negative300.bin.gz"; private GoogleNewsW2VSpace() throws IOException { super(); vectors = db.getTreeMap(getWord2VecVectorsMapName()); if (!vectors.isEmpty()) { assert (getVector("snowcapped_Caucasus") != null); setSize(getVector("dog").length); return; } createW2VinDB(getW2vfile()); } /** * Factory get method for GoogleNewsW2VSpace. * * @return a GoogleNewsW2VSpace */ public static GoogleNewsW2VSpace get() { if (singleton == null) { try { singleton = new GoogleNewsW2VSpace(); } catch (IOException ex) { Logger.getLogger(GoogleNewsW2VSpace.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException("Can't create the Google News W2VSpace object " + ex); } } return singleton; } private static String getW2vfile() { return w2vfile; } private static String getWord2VecVectorsMapName() { /* @ToDo: change this to use the class name, so that it's automatically correct */ return GoogleNewsW2VSpace.class.getCanonicalName(); //return word2VecVectorsMapName; } }