package com.cyc.tool.distributedrepresentations;
/*
* #%L
* DistributedRepresentations
* %%
* Copyright (C) 2015 Cycorp, Inc
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.File;
import java.io.IOException;
import java.util.Map;
import java.util.function.Predicate;
import org.mapdb.DBMaker;
/**
* A space of words from Google Word2Vec.
*
*/
public abstract class Word2VecSubspace extends Word2VecSpace {
final Word2VecSpace mySuperSpace;
/**
* Word2VecSubspace constructor.
*
* @param ofSpace
* @param includeIf
* @param persistLoc
* @throws IOException
*/
protected Word2VecSubspace(Word2VecSpace ofSpace, Predicate<String> includeIf, String persistLoc) throws IOException {
mySuperSpace = ofSpace;
if (db == null) {
db = DBMaker.newFileDB(new File(Config.getW2vDBFile()))
.closeOnJvmShutdown()
// .encryptionEnable("password")
.make();
}
vectors = db.getTreeMap(persistLoc);
// vectors.clear();
if (!vectors.isEmpty()) {
setSize(vectors.values().iterator().next().length);
System.out.println("Got cached w2vspace for " + persistLoc + " of dimensionality " + getSize() + " and with " + vectors.size() + " entries.");
return;
}
// assert(vectors == null) :"Subspaces msut be completely empty when created";
System.out.println("Filtering vectors for:" + persistLoc);
Map<String, float[]> newvectors = ofSpace.filterVectors(includeIf);
newvectors.entrySet().forEach(e -> {
vectors.put(e.getKey(), e.getValue());
});
db.commit();
db.compact();
db.commit();
System.out.println("Vectors filtered and persisted.");
}
/**
*
* @return the mySuperSpace
*/
public Word2VecSpace getSuperSpace() {
return mySuperSpace;
}
}