package edu.berkeley.cs.nlp.ocular.model.em; import edu.berkeley.cs.nlp.ocular.model.CharacterTemplate; import tberg.murphy.gpu.CudaUtil; /** * @author Taylor Berg-Kirkpatrick (tberg@eecs.berkeley.edu) */ public class DefaultInnerLoop implements EmissionCacheInnerLoop { int numThreads; float[][] whiteTemplates; float[][] blackTemplates; int[] templateNumIndices; int[] templateIndicesOffsets; int maxTemplateWidth; int minTemplateWidth; public DefaultInnerLoop(int numThreads) { this.numThreads = numThreads; } public void startup(float[][] whiteTemplates, float[][] blackTemplates, int[] templateNumIndices, int[] templateIndicesOffsets, int minTemplateWidth, int maxTemplateWidth, int maxSequenceLength, int totalTemplateNumIndices) { this.whiteTemplates = whiteTemplates; this.blackTemplates = blackTemplates; this.templateNumIndices = templateNumIndices; this.templateIndicesOffsets = templateIndicesOffsets; this.maxTemplateWidth = maxTemplateWidth; this.minTemplateWidth = minTemplateWidth; } public void shutdown() { } public void compute(final float[] scores, final float[] whiteObservations, final float[] blackObservations, final int sequenceLength) { for (int tw=minTemplateWidth; tw<=maxTemplateWidth; ++tw) { float[] whiteTemplatesForWidth = whiteTemplates[tw-minTemplateWidth]; float[] blackTemplateForWidth = blackTemplates[tw-minTemplateWidth]; for (int t=0; t<(sequenceLength-tw)+1; ++t) { for (int i=0; i<templateNumIndices[tw-minTemplateWidth]; ++i) { float score = 0.0f; for (int j=0; j<tw*CharacterTemplate.LINE_HEIGHT; ++j) { score += whiteObservations[t*CharacterTemplate.LINE_HEIGHT+j] * whiteTemplatesForWidth[i*tw*CharacterTemplate.LINE_HEIGHT+j]; } scores[templateIndicesOffsets[tw-minTemplateWidth]*sequenceLength + CudaUtil.flatten(sequenceLength, templateNumIndices[tw-minTemplateWidth], t, i)] += score; } } for (int t=0; t<(sequenceLength-tw)+1; ++t) { for (int i=0; i<templateNumIndices[tw-minTemplateWidth]; ++i) { float score = 0.0f; for (int j=0; j<tw*CharacterTemplate.LINE_HEIGHT; ++j) { score += blackObservations[t*CharacterTemplate.LINE_HEIGHT+j] * blackTemplateForWidth[i*tw*CharacterTemplate.LINE_HEIGHT+j]; } scores[templateIndicesOffsets[tw-minTemplateWidth]*sequenceLength + CudaUtil.flatten(sequenceLength, templateNumIndices[tw-minTemplateWidth], t, i)] += score; } } } } public int numOuterThreads() { return numThreads; } public int numPopulateThreads() { return 1; } }