/* * Copyright 2012 * Ubiquitous Knowledge Processing (UKP) Lab and FG Language Technology * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.clarin.webanno.api.dao; import static de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.CHAIN_TYPE; import static de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.RELATION_TYPE; import static de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst.SPAN_TYPE; import static java.util.Arrays.asList; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.zip.ZipFile; import javax.persistence.EntityManager; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import org.apache.uima.UIMAException; import org.apache.uima.cas.CAS; import org.apache.uima.cas.TypeSystem; import org.apache.uima.cas.impl.CASCompleteSerializer; import org.apache.uima.cas.impl.CASImpl; import org.apache.uima.cas.impl.Serialization; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.factory.TypeSystemDescriptionFactory; import org.apache.uima.resource.metadata.TypeDescription; import org.apache.uima.resource.metadata.TypeSystemDescription; import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl; import org.apache.uima.util.CasCreationUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.ClassPathResource; import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; import de.tudarmstadt.ukp.clarin.webanno.api.AnnotationSchemaService; import de.tudarmstadt.ukp.clarin.webanno.api.ProjectLifecycleAware; import de.tudarmstadt.ukp.clarin.webanno.api.WebAnnoConst; import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature; import de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer; import de.tudarmstadt.ukp.clarin.webanno.model.LinkMode; import de.tudarmstadt.ukp.clarin.webanno.model.Project; import de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument; import de.tudarmstadt.ukp.clarin.webanno.model.Tag; import de.tudarmstadt.ukp.clarin.webanno.model.TagSet; import de.tudarmstadt.ukp.clarin.webanno.support.logging.Logging; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.SurfaceForm; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor; /** * Implementation of methods defined in the {@link AnnotationSchemaService} interface */ @Component(AnnotationSchemaService.SERVICE_NAME) public class AnnotationSchemaServiceImpl implements AnnotationSchemaService, ProjectLifecycleAware { private final Logger log = LoggerFactory.getLogger(getClass()); @Value(value = "${repository.path}") private File dir; @PersistenceContext private EntityManager entityManager; public AnnotationSchemaServiceImpl() { // Nothing to do } @Override @Transactional public void createTag(Tag aTag) throws IOException { entityManager.persist(aTag); try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aTag.getTagSet().getProject().getId()))) { TagSet tagset = aTag.getTagSet(); Project project = tagset.getProject(); log.info("Created tag [{}]({}) in tagset [{}]({}) in project [{}]({})", aTag.getName(), aTag.getId(), tagset.getName(), tagset.getId(), project.getName(), project.getId()); } } @Override @Transactional public void createTagSet(TagSet aTagSet) throws IOException { if (aTagSet.getId() == 0) { entityManager.persist(aTagSet); } else { entityManager.merge(aTagSet); } try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aTagSet.getProject().getId()))) { Project project = aTagSet.getProject(); log.info("Created tagset [{}]({}) in project [{}]({})", aTagSet.getName(), aTagSet.getId(), project.getName(), project.getId()); } } @Override @Transactional public void createLayer(AnnotationLayer aLayer) throws IOException { if (aLayer.getId() == 0) { entityManager.persist(aLayer); } else { entityManager.merge(aLayer); } try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aLayer.getProject().getId()))) { Project project = aLayer.getProject(); log.info("Created layer [{}]({}) in project [{}]({})", aLayer.getName(), aLayer.getId(), project.getName(), project.getId()); } } @Override @Transactional public void createFeature(AnnotationFeature aFeature) { if (aFeature.getId() == 0) { entityManager.persist(aFeature); } else { entityManager.merge(aFeature); } } @Override @Transactional public Tag getTag(String aTagName, TagSet aTagSet) { return entityManager .createQuery("FROM Tag WHERE name = :name AND" + " tagSet =:tagSet", Tag.class) .setParameter("name", aTagName).setParameter("tagSet", aTagSet).getSingleResult(); } @Override public boolean existsTag(String aTagName, TagSet aTagSet) { try { getTag(aTagName, aTagSet); return true; } catch (NoResultException e) { return false; } } @Override @Transactional(noRollbackFor = NoResultException.class) public boolean existsTagSet(String aName, Project aProject) { try { entityManager .createQuery("FROM TagSet WHERE name = :name AND project = :project", TagSet.class).setParameter("name", aName) .setParameter("project", aProject).getSingleResult(); return true; } catch (NoResultException e) { return false; } } @Override @Transactional(noRollbackFor = NoResultException.class) public boolean existsTagSet(Project aProject) { try { entityManager.createQuery("FROM TagSet WHERE project = :project", TagSet.class) .setParameter("project", aProject).getSingleResult(); return true; } catch (NoResultException e) { return false; } } @Override @Transactional(noRollbackFor = NoResultException.class) public boolean existsLayer(String aName, String aType, Project aProject) { try { entityManager .createQuery( "FROM AnnotationLayer WHERE name = :name AND type = :type AND project = :project", AnnotationLayer.class).setParameter("name", aName) .setParameter("type", aType).setParameter("project", aProject) .getSingleResult(); return true; } catch (NoResultException e) { return false; } } @Override public boolean existsFeature(String aName, AnnotationLayer aLayer) { try { entityManager .createQuery("FROM AnnotationFeature WHERE name = :name AND layer = :layer", AnnotationFeature.class).setParameter("name", aName) .setParameter("layer", aLayer).getSingleResult(); return true; } catch (NoResultException e) { return false; } } @Override @Transactional public TagSet getTagSet(String aName, Project aProject) { return entityManager .createQuery("FROM TagSet WHERE name = :name AND project =:project", TagSet.class) .setParameter("name", aName).setParameter("project", aProject).getSingleResult(); } @Override @Transactional public TagSet getTagSet(long aId) { return entityManager.createQuery("FROM TagSet WHERE id = :id", TagSet.class) .setParameter("id", aId).getSingleResult(); } @Override @Transactional public AnnotationLayer getLayer(long aId) { return entityManager .createQuery("FROM AnnotationLayer WHERE id = :id", AnnotationLayer.class) .setParameter("id", aId).getSingleResult(); } @Override @Transactional(noRollbackFor = NoResultException.class) public AnnotationLayer getLayer(String aName, Project aProject) { return entityManager .createQuery("From AnnotationLayer where name = :name AND project =:project", AnnotationLayer.class).setParameter("name", aName) .setParameter("project", aProject).getSingleResult(); } @Override @Transactional(noRollbackFor = NoResultException.class) public AnnotationFeature getFeature(long aId) { return entityManager .createQuery("From AnnotationFeature where id = :id", AnnotationFeature.class) .setParameter("id", aId).getSingleResult(); } @Override @Transactional(noRollbackFor = NoResultException.class) public AnnotationFeature getFeature(String aName, AnnotationLayer aLayer) { return entityManager .createQuery("From AnnotationFeature where name = :name AND layer = :layer", AnnotationFeature.class).setParameter("name", aName) .setParameter("layer", aLayer).getSingleResult(); } @Override @Transactional(noRollbackFor = NoResultException.class) public boolean existsType(String aName, String aType) { try { entityManager .createQuery("From AnnotationLayer where name = :name AND type = :type", AnnotationLayer.class).setParameter("name", aName) .setParameter("type", aType).getSingleResult(); return true; } catch (NoResultException e) { return false; } } @Override public TagSet createTagSet(String aDescription, String aTagSetName, String aLanguage, String[] aTags, String[] aTagDescription, Project aProject) throws IOException { TagSet tagSet = new TagSet(); tagSet.setDescription(aDescription); tagSet.setLanguage(aLanguage); tagSet.setName(aTagSetName); tagSet.setProject(aProject); createTagSet(tagSet); int i = 0; for (String tagName : aTags) { Tag tag = new Tag(); tag.setTagSet(tagSet); tag.setDescription(aTagDescription[i]); tag.setName(tagName); createTag(tag); i++; } return tagSet; } private AnnotationFeature createFeature(String aName, String aUiName, String aDescription, String aType, TagSet aTagSet, Project aProject) throws IOException { AnnotationFeature feature = new AnnotationFeature(); feature.setDescription(aDescription); feature.setName(aName); feature.setType(aType); feature.setProject(aProject); feature.setUiName(aUiName); feature.setTagset(aTagSet); createFeature(feature); return feature; } @Override @Transactional public void initializeTypesForProject(Project aProject, String[] aPostags, String[] aPosTagDescriptions, String[] aDepTags, String[] aDepTagDescriptions, String[] aNeTags, String[] aNeTagDescriptions, String[] aCorefTypeTags, String[] aCorefRelTags) throws IOException { createTokenLayer(aProject); String[] posTags = aPostags.length > 0 ? aPostags : new String[] { "$(", "$,", "$.", "ADJA", "ADJD", "ADV", "APPO", "APPR", "APPRART", "APZR", "ART", "CARD", "FM", "ITJ", "KOKOM", "KON", "KOUI", "KOUS", "NE", "NN", "PAV", "PDAT", "PDS", "PIAT", "PIDAT", "PIS", "PPER", "PPOSAT", "PPOSS", "PRELAT", "PRELS", "PRF", "PROAV", "PTKA", "PTKANT", "PTKNEG", "PTKVZ", "PTKZU", "PWAT", "PWAV", "PWS", "TRUNC", "VAFIN", "VAIMP", "VAINF", "VAPP", "VMFIN", "VMINF", "VMPP", "VVFIN", "VVIMP", "VVINF", "VVIZU", "VVPP", "XY", "--" }; String[] posTagDescriptions = aPosTagDescriptions.length == posTags.length ? aPosTagDescriptions : new String[] { "sonstige Satzzeichen; satzintern \nBsp: - [,]()", "Komma \nBsp: ,", "Satzbeendende Interpunktion \nBsp: . ? ! ; : ", "attributives Adjektiv \nBsp: [das] große [Haus]", "adverbiales oder prädikatives Adjektiv \nBsp: [er fährt] schnell, [er ist] schnell", "Adverb \nBsp: schon, bald, doch ", "Postposition \nBsp: [ihm] zufolge, [der Sache] wegen", "Präposition; Zirkumposition links \nBsp: in [der Stadt], ohne [mich]", "Präposition mit Artikel \nBsp: im [Haus], zur [Sache]", "Zirkumposition rechts \nBsp: [von jetzt] an", "bestimmter oder unbestimmter Artikel \nBsp: der, die, das, ein, eine", "Kardinalzahl \nBsp: zwei [Männer], [im Jahre] 1994", "Fremdsprachliches Material \nBsp: [Er hat das mit ``] A big fish ['' übersetzt]", "Interjektion \nBsp: mhm, ach, tja", "Vergleichskonjunktion \nBsp: als, wie", "nebenordnende Konjunktion \nBsp: und, oder, aber", "unterordnende Konjunktion mit ``zu'' und Infinitiv \nBsp: um [zu leben], anstatt [zu fragen]", "unterordnende Konjunktion mit Satz \nBsp: weil, daß, damit, wenn, ob ", "Eigennamen \nBsp: Hans, Hamburg, HSV ", "normales Nomen \nBsp: Tisch, Herr, [das] Reisen", "Pronominaladverb \nBsp: dafür, dabei, deswegen, trotzdem ", "attribuierendes Demonstrativpronomen \nBsp: jener [Mensch]", "substituierendes Demonstrativpronomen \nBsp: dieser, jener", "attribuierendes Indefinitpronomen ohne Determiner \nBsp: kein [Mensch], irgendein [Glas] ", "attribuierendes Indefinitpronomen mit Determiner \nBsp: [ein] wenig [Wasser], [die] beiden [Brüder] ", "substituierendes Indefinitpronomen \nBsp: keiner, viele, man, niemand ", "irreflexives Personalpronomen \nBsp: ich, er, ihm, mich, dir", "attribuierendes Possessivpronome \nBsp: mein [Buch], deine [Mutter] ", "substituierendes Possessivpronome \nBsp: meins, deiner", "attribuierendes Relativpronomen \nBsp: [der Mann ,] dessen [Hund] ", "substituierendes Relativpronomen \nBsp: [der Hund ,] der ", "reflexives Personalpronomen \nBsp: sich, einander, dich, mir", "PROAV", "Partikel bei Adjektiv oder Adverb \nBsp: am [schönsten], zu [schnell]", "Antwortpartikel \nBsp: ja, nein, danke, bitte ", "Negationspartikel \nBsp: nicht", "abgetrennter Verbzusatz \nBsp: [er kommt] an, [er fährt] rad ", "``zu'' vor Infinitiv \nBsp: zu [gehen]", "attribuierendes Interrogativpronomen \nBsp: welche [Farbe], wessen [Hut] ", "adverbiales Interrogativ- oder Relativpronomen \nBsp: warum, wo, wann, worüber, wobei", "substituierendes Interrogativpronomen \nBsp: wer, was", "Kompositions-Erstglied \nBsp: An- [und Abreise]", "finites Verb, aux \nBsp: [du] bist, [wir] werden ", "Imperativ, aux \nBsp: sei [ruhig !] ", "Infinitiv, aux \nBsp:werden, sein ", "Partizip Perfekt, aux \nBsp: gewesen ", "finites Verb, modal \nBsp: dürfen ", "Infinitiv, modal \nBsp: wollen ", "Partizip Perfekt, modal \nBsp: gekonnt, [er hat gehen] können ", "finites Verb, voll \nBsp: [du] gehst, [wir] kommen [an] ", "Imperativ, voll \nBsp: komm [!] ", "Infinitiv, voll \nBsp: gehen, ankommen", "Infinitiv mit ``zu'', voll \nBsp: anzukommen, loszulassen ", "Partizip Perfekt, voll \nBsp:gegangen, angekommen ", "Nichtwort, Sonderzeichen enthaltend \nBsp:3:7, H2O, D2XW3", "--" }; TagSet posFeatureTagset = createTagSet( "Stuttgart-Tübingen-Tag-Set \nGerman Part of Speech tagset " + "STTS Tag Table (1995/1999): " + "http://www.ims.uni-stuttgart.de/projekte/corplex/TagSets/stts-table.html", "STTS", "de", posTags, posTagDescriptions, aProject); createPOSLayer(aProject, posFeatureTagset); String[] depTags = aDepTags.length > 0 ? aDepTags : new String[] { "ADV", "APP", "ATTR", "AUX", "AVZ", "CJ", "DET", "ETH", "EXPL", "GMOD", "GRAD", "KOM", "KON", "KONJ", "NEB", "OBJA", "OBJA2", "OBJA3", "OBJC", "OBJC2", "OBJC3", "OBJD", "OBJD2", "OBJD3", "OBJG", "OBJG2", "OBJG3", "OBJI", "OBJI2", "OBJI3", "OBJP", "OBJP2", "OBJP3", "PAR", "PART", "PN", "PP", "PRED", "-PUNCT-", "REL", "ROOT", "S", "SUBJ", "SUBJ2", "SUBJ3", "SUBJC", "SUBJC2", "SUBJC3", "SUBJI", "SUBJI2", "CP", "PD", "RE", "CD", "DA", "SVP", "OP", "MO", "JU", "CVC", "NG", "SB", "SBP", "AG", "PM", "OCRC", "OG", "SUBJI3", "VOK", "ZEIT", "$", "--", "OC", "OA", "MNR", "NK", "RC", "EP", "CC", "CM", "UC", "AC", "PNC" }; String[] depTagsDescription = aDepTagDescriptions.length == depTags.length ? aDepTagDescriptions : depTags; TagSet deFeatureTagset = createTagSet("Dependency annotation", "Tiger", "de", depTags, depTagsDescription, aProject); createDepLayer(aProject, deFeatureTagset); String[] neTags = aNeTags.length > 0 ? aNeTags : new String[] { "PER", "PERderiv", "PERpart", "LOC", "LOCderiv", "LOCpart", "ORG", "ORGderiv", "ORGpart", "OTH", "OTHderiv", "OTHpart" }; String[] neTagDescriptions = aNeTagDescriptions.length == neTags.length ? aNeTagDescriptions : new String[] { "Person", "Person derivative", "Hyphenated part is person", "Location derivatives", "Location derivative", "Hyphenated part is location", "Organization", "Organization derivative", "Hyphenated part is organization", "Other: Every name that is not a location, person or organisation", "Other derivative", "Hyphenated part is Other" }; TagSet neFeatureTagset = createTagSet("Named Entity annotation", "NER_WebAnno", "de", neTags, neTagDescriptions, aProject); createNeLayer(aProject, neFeatureTagset); // Coref Layer TagSet corefTypeFeatureTagset = createTagSet("coreference type annotation", "BART", "de", aCorefTypeTags.length > 0 ? aCorefTypeTags : new String[] { "nam" }, aCorefTypeTags.length > 0 ? aCorefTypeTags : new String[] { "nam" }, aProject); TagSet corefRelFeatureTagset = createTagSet("coreference relation annotation", "TuebaDZ", "de", aCorefRelTags.length > 0 ? aCorefRelTags : new String[] { "anaphoric" }, aCorefRelTags.length > 0 ? aCorefRelTags : new String[] { "anaphoric" }, aProject); createCorefLayer(aProject, corefTypeFeatureTagset, corefRelFeatureTagset); createLemmaLayer(aProject); createChunkLayer(aProject); } @Override @Transactional public void initializeTypesForProject(Project aProject) throws IOException { // Default layers with default tagsets createTokenLayer(aProject); TagSet posTagSet = JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/mul-pos-ud.json").getInputStream(), this); createPOSLayer(aProject, posTagSet); TagSet depTagSet = JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/mul-dep-ud.json").getInputStream(), this); createDepLayer(aProject, depTagSet); TagSet nerTagSet = JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/de-ne-webanno.json").getInputStream(), this); createNeLayer(aProject, nerTagSet); TagSet corefTypeTagSet = JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/de-coref-type-bart.json").getInputStream(), this); TagSet corefRelTagSet = JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/de-coref-rel-tuebadz.json").getInputStream(), this); createCorefLayer(aProject, corefTypeTagSet, corefRelTagSet); createLemmaLayer(aProject); createChunkLayer(aProject); createSurfaceFormLayer(aProject); // Extra tagsets JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/de-pos-stts.json").getInputStream(), this); JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/de-dep-tiger.json").getInputStream(), this); JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/en-dep-sd.json").getInputStream(), this); JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/en-pos-ptb-tt.json").getInputStream(), this); JsonImportUtil.importTagSetFromJson(aProject, new ClassPathResource("/tagsets/mul-pos-upos.json").getInputStream(), this); } private void createLemmaLayer(Project aProject) throws IOException { AnnotationLayer tokenLayer = getLayer(Token.class.getName(), aProject); AnnotationFeature tokenLemmaFeature = createFeature("lemma", "lemma", aProject, tokenLayer, Lemma.class.getName()); tokenLemmaFeature.setVisible(true); AnnotationLayer lemmaLayer = new AnnotationLayer(Lemma.class.getName(), "Lemma", SPAN_TYPE, aProject, true); lemmaLayer.setAttachType(tokenLayer); lemmaLayer.setAttachFeature(tokenLemmaFeature); createLayer(lemmaLayer); AnnotationFeature lemmaFeature = new AnnotationFeature(); lemmaFeature.setDescription("lemma Annotation"); lemmaFeature.setName("value"); lemmaFeature.setType(CAS.TYPE_NAME_STRING); lemmaFeature.setProject(aProject); lemmaFeature.setUiName("Lemma value"); lemmaFeature.setLayer(lemmaLayer); createFeature(lemmaFeature); } private AnnotationLayer createCorefLayer(Project aProject, TagSet aCorefTypeTags, TagSet aCorefRelTags) throws IOException { AnnotationLayer base = new AnnotationLayer( "de.tudarmstadt.ukp.dkpro.core.api.coref.type.Coreference", "Coreference", CHAIN_TYPE, aProject, true); base.setCrossSentence(true); base.setAllowStacking(true); base.setMultipleTokens(true); base.setLockToTokenOffset(false); createLayer(base); AnnotationFeature corefTypeFeature = createFeature("referenceType", "referenceType", "Coreference type", CAS.TYPE_NAME_STRING, aCorefTypeTags, aProject); corefTypeFeature.setLayer(base); corefTypeFeature.setVisible(true); AnnotationFeature corefRelFeature = createFeature("referenceRelation", "referenceRelation", "Coreference relation", CAS.TYPE_NAME_STRING, aCorefRelTags, aProject); corefRelFeature.setLayer(base); corefRelFeature.setVisible(true); return base; } private void createNeLayer(Project aProject, TagSet aTagset) throws IOException { AnnotationFeature neFeature = createFeature("value", "value", "Named entity type", CAS.TYPE_NAME_STRING, aTagset, aProject); AnnotationLayer neLayer = new AnnotationLayer(NamedEntity.class.getName(), "Named Entity", SPAN_TYPE, aProject, true); neLayer.setAllowStacking(true); neLayer.setMultipleTokens(true); neLayer.setLockToTokenOffset(false); createLayer(neLayer); neFeature.setLayer(neLayer); } private void createChunkLayer(Project aProject) throws IOException { AnnotationLayer chunkLayer = new AnnotationLayer(Chunk.class.getName(), "Chunk", SPAN_TYPE, aProject, true); chunkLayer.setAllowStacking(false); chunkLayer.setMultipleTokens(true); chunkLayer.setLockToTokenOffset(false); createLayer(chunkLayer); AnnotationFeature chunkValueFeature = new AnnotationFeature(); chunkValueFeature.setDescription("Chunk tag"); chunkValueFeature.setName("chunkValue"); chunkValueFeature.setType(CAS.TYPE_NAME_STRING); chunkValueFeature.setProject(aProject); chunkValueFeature.setUiName("Tag"); chunkValueFeature.setLayer(chunkLayer); createFeature(chunkValueFeature); } private void createSurfaceFormLayer(Project aProject) throws IOException { AnnotationLayer surfaceFormLayer = new AnnotationLayer(SurfaceForm.class.getName(), "Surface form", SPAN_TYPE, aProject, true); surfaceFormLayer.setAllowStacking(false); // The surface form must be locked to tokens for CoNLL-U writer to work properly surfaceFormLayer.setLockToTokenOffset(false); surfaceFormLayer.setMultipleTokens(true); createLayer(surfaceFormLayer); AnnotationFeature surfaceFormValueFeature = new AnnotationFeature(); surfaceFormValueFeature.setDescription("Original surface text"); surfaceFormValueFeature.setName("value"); surfaceFormValueFeature.setType(CAS.TYPE_NAME_STRING); surfaceFormValueFeature.setProject(aProject); surfaceFormValueFeature.setUiName("Form"); surfaceFormValueFeature.setLayer(surfaceFormLayer); createFeature(surfaceFormValueFeature); } private void createDepLayer(Project aProject, TagSet aTagset) throws IOException { // Dependency Layer AnnotationLayer depLayer = new AnnotationLayer(Dependency.class.getName(), "Dependency", RELATION_TYPE, aProject, true); AnnotationLayer tokenLayer = getLayer(Token.class.getName(), aProject); List<AnnotationFeature> tokenFeatures = listAnnotationFeature(tokenLayer); AnnotationFeature tokenPosFeature = null; for (AnnotationFeature feature : tokenFeatures) { if (feature.getName().equals("pos")) { tokenPosFeature = feature; break; } } depLayer.setAttachType(tokenLayer); depLayer.setAttachFeature(tokenPosFeature); createLayer(depLayer); AnnotationFeature featRel = createFeature("DependencyType", "Relation", "Dependency relation", CAS.TYPE_NAME_STRING, aTagset, aProject); featRel.setLayer(depLayer); String[] flavors = { DependencyFlavor.BASIC, DependencyFlavor.ENHANCED }; String[] flavorDesc = { DependencyFlavor.BASIC, DependencyFlavor.ENHANCED }; TagSet flavorsTagset = createTagSet("Dependency flavors", "Dependency flavors", "mul", flavors, flavorDesc, aProject); AnnotationFeature featFlavor = createFeature("flavor", "Flavor", "Dependency relation", CAS.TYPE_NAME_STRING, flavorsTagset, aProject); featFlavor.setLayer(depLayer); } private void createPOSLayer(Project aProject, TagSet aPosTagset) throws IOException { AnnotationLayer tokenLayer = getLayer(Token.class.getName(), aProject); AnnotationLayer posLayer = new AnnotationLayer(POS.class.getName(), "POS", SPAN_TYPE, aProject, true); AnnotationFeature tokenPosFeature = createFeature("pos", "pos", aProject, tokenLayer, POS.class.getName()); tokenPosFeature.setVisible(true); posLayer.setAttachType(tokenLayer); posLayer.setAttachFeature(tokenPosFeature); createLayer(posLayer); AnnotationFeature posFeature = createFeature("PosValue", "PosValue", "Part-of-speech tag", CAS.TYPE_NAME_STRING, aPosTagset, aProject); posFeature.setLayer(posLayer); } private AnnotationLayer createTokenLayer(Project aProject) throws IOException { AnnotationLayer tokenLayer = new AnnotationLayer(Token.class.getName(), "Token", SPAN_TYPE, aProject, true); createLayer(tokenLayer); return tokenLayer; } private AnnotationFeature createFeature(String aName, String aUiname, Project aProject, AnnotationLayer aLayer, String aType) { AnnotationFeature feature = new AnnotationFeature(); feature.setName(aName); feature.setEnabled(true); feature.setType(aType); feature.setUiName(aUiname); feature.setLayer(aLayer); feature.setProject(aProject); createFeature(feature); return feature; } @Override @Transactional public List<AnnotationLayer> listAnnotationType() { return entityManager.createQuery("FROM AnnotationLayer ORDER BY name", AnnotationLayer.class).getResultList(); } @Override @Transactional public List<AnnotationLayer> listAnnotationLayer(Project aProject) { return entityManager .createQuery("FROM AnnotationLayer WHERE project =:project ORDER BY uiName", AnnotationLayer.class).setParameter("project", aProject).getResultList(); } @Override @Transactional public List<AnnotationLayer> listAttachedRelationLayers(AnnotationLayer aLayer) { return entityManager .createQuery( "SELECT l FROM AnnotationLayer l LEFT JOIN l.attachFeature f " + "WHERE l.type = :type AND " + "(l.attachType = :attachType OR f.type = :attachTypeName) " + "ORDER BY l.uiName", AnnotationLayer.class).setParameter("type", RELATION_TYPE) .setParameter("attachType", aLayer) .setParameter("attachTypeName", aLayer.getName()).getResultList(); } @Override @Transactional public List<AnnotationFeature> listAttachedLinkFeatures(AnnotationLayer aLayer) { return entityManager .createQuery( "FROM AnnotationFeature WHERE linkMode in (:modes) AND project = :project AND " + "type in (:attachType) ORDER BY uiName", AnnotationFeature.class) .setParameter("modes", asList(LinkMode.SIMPLE, LinkMode.WITH_ROLE)) .setParameter("attachType", asList(aLayer.getName(), CAS.TYPE_NAME_ANNOTATION)) // Checking for project is necessary because type match is string-based .setParameter("project", aLayer.getProject()).getResultList(); } @Override @Transactional public List<AnnotationFeature> listAnnotationFeature(AnnotationLayer aLayer) { if (aLayer == null || aLayer.getId() == 0) { return new ArrayList<AnnotationFeature>(); } return entityManager .createQuery("FROM AnnotationFeature WHERE layer =:layer ORDER BY uiName", AnnotationFeature.class).setParameter("layer", aLayer).getResultList(); } @Override @Transactional public List<AnnotationFeature> listAnnotationFeature(Project aProject) { return entityManager .createQuery( "FROM AnnotationFeature f WHERE project =:project ORDER BY f.layer.uiName, f.uiName", AnnotationFeature.class).setParameter("project", aProject).getResultList(); } @Override @Transactional public List<Tag> listTags() { return entityManager.createQuery("From Tag ORDER BY name", Tag.class).getResultList(); } @Override @Transactional public List<Tag> listTags(TagSet aTagSet) { List<Tag> tags = entityManager .createQuery("FROM Tag WHERE tagSet = :tagSet ORDER BY name ASC", Tag.class) .setParameter("tagSet", aTagSet).getResultList(); // FIXME ?!? This loop appears to make absolutely not sense! for (int i = 0; i < tags.size(); i++) { tags.get(i).setName(tags.get(i).getName()); } return tags; } @Override @Transactional public List<TagSet> listTagSets() { return entityManager.createQuery("FROM TagSet ORDER BY name ASC", TagSet.class) .getResultList(); } @Override @Transactional(noRollbackFor = NoResultException.class) public List<TagSet> listTagSets(Project aProject) { return entityManager .createQuery("FROM TagSet where project = :project ORDER BY name ASC", TagSet.class) .setParameter("project", aProject).getResultList(); } @Override @Transactional public void removeTag(Tag aTag) { entityManager.remove(aTag); } @Override @Transactional public void removeTagSet(TagSet aTagSet) { for (Tag tag : listTags(aTagSet)) { entityManager.remove(tag); } entityManager.remove(aTagSet); } @Override @Transactional public void removeAnnotationFeature(AnnotationFeature aFeature) { entityManager.remove(aFeature); } @Override @Transactional public void removeAnnotationLayer(AnnotationLayer aLayer) { entityManager.remove(aLayer); } @Override @Transactional public void removeAllTags(TagSet aTagSet) { for (Tag tag : listTags(aTagSet)) { entityManager.remove(tag); } } @Override public List<TypeSystemDescription> getProjectTypes(Project aProject) { // Create a new type system from scratch List<TypeSystemDescription> types = new ArrayList<TypeSystemDescription>(); for (AnnotationLayer type : listAnnotationLayer(aProject)) { if (type.getType().equals(SPAN_TYPE) && !type.isBuiltIn()) { TypeSystemDescription tsd = new TypeSystemDescription_impl(); TypeDescription td = tsd.addType(type.getName(), "", CAS.TYPE_NAME_ANNOTATION); List<AnnotationFeature> features = listAnnotationFeature(type); for (AnnotationFeature feature : features) { generateFeature(tsd, td, feature); } types.add(tsd); } else if (type.getType().equals(RELATION_TYPE) && !type.isBuiltIn()) { TypeSystemDescription tsd = new TypeSystemDescription_impl(); TypeDescription td = tsd.addType(type.getName(), "", CAS.TYPE_NAME_ANNOTATION); AnnotationLayer attachType = type.getAttachType(); td.addFeature(WebAnnoConst.FEAT_REL_TARGET, "", attachType.getName()); td.addFeature(WebAnnoConst.FEAT_REL_SOURCE, "", attachType.getName()); List<AnnotationFeature> features = listAnnotationFeature(type); for (AnnotationFeature feature : features) { generateFeature(tsd, td, feature); } types.add(tsd); } else if (type.getType().equals(CHAIN_TYPE) && !type.isBuiltIn()) { TypeSystemDescription tsdchains = new TypeSystemDescription_impl(); TypeDescription tdChains = tsdchains.addType(type.getName() + "Chain", "", CAS.TYPE_NAME_ANNOTATION); tdChains.addFeature("first", "", type.getName() + "Link"); types.add(tsdchains); TypeSystemDescription tsdLink = new TypeSystemDescription_impl(); TypeDescription tdLink = tsdLink.addType(type.getName() + "Link", "", CAS.TYPE_NAME_ANNOTATION); tdLink.addFeature("next", "", type.getName() + "Link"); tdLink.addFeature("referenceType", "", CAS.TYPE_NAME_STRING); tdLink.addFeature("referenceRelation", "", CAS.TYPE_NAME_STRING); types.add(tsdLink); } } return types; } private void generateFeature(TypeSystemDescription aTSD, TypeDescription aTD, AnnotationFeature aFeature) { switch (aFeature.getMultiValueMode()) { case NONE: aTD.addFeature(aFeature.getName(), "", aFeature.getType()); break; case ARRAY: { switch (aFeature.getLinkMode()) { case WITH_ROLE: { // Link type TypeDescription linkTD = aTSD.addType(aFeature.getLinkTypeName(), "", CAS.TYPE_NAME_TOP); linkTD.addFeature(aFeature.getLinkTypeRoleFeatureName(), "", CAS.TYPE_NAME_STRING); linkTD.addFeature(aFeature.getLinkTypeTargetFeatureName(), "", aFeature.getType()); // Link feature aTD.addFeature(aFeature.getName(), "", CAS.TYPE_NAME_FS_ARRAY, linkTD.getName(), false); break; } default: throw new IllegalArgumentException("Unsupported link mode [" + aFeature.getLinkMode() + "] on feature [" + aFeature.getName() + "]"); } break; } default: throw new IllegalArgumentException("Unsupported multi-value mode [" + aFeature.getMultiValueMode() + "] on feature [" + aFeature.getName() + "]"); } } @Override public void upgradeCas(CAS aCas, SourceDocument aSourceDocument, String aUser) throws UIMAException, IOException { TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory .createTypeSystemDescription(); List<TypeSystemDescription> projectTypes = getProjectTypes(aSourceDocument.getProject()); projectTypes.add(builtInTypes); TypeSystemDescription allTypes = CasCreationUtils.mergeTypeSystems(projectTypes); // Prepare template for new CAS CAS newCas = JCasFactory.createJCas(allTypes).getCas(); CASCompleteSerializer serializer = Serialization.serializeCASComplete((CASImpl) newCas); // Save old type system TypeSystem oldTypeSystem = aCas.getTypeSystem(); // Save old CAS contents ByteArrayOutputStream os2 = new ByteArrayOutputStream(); Serialization.serializeWithCompression(aCas, os2, oldTypeSystem); // Prepare CAS with new type system Serialization.deserializeCASComplete(serializer, (CASImpl) aCas); // Restore CAS data to new type system Serialization.deserializeCAS(aCas, new ByteArrayInputStream(os2.toByteArray()), oldTypeSystem, null); // Make sure JCas is properly initialized too aCas.getJCas(); try (MDC.MDCCloseable closable = MDC.putCloseable( Logging.KEY_PROJECT_ID, String.valueOf(aSourceDocument.getProject().getId()))) { Project project = aSourceDocument.getProject(); log.info( "Upgraded CAS of user [{}] for " + "document [{}]({}) in project [{}]({})", aUser, aSourceDocument.getName(), aSourceDocument.getId(), project.getName(), project.getId()); } } @Override public void afterProjectCreate(Project aProject) throws Exception { // Nothing to do } @Override @Transactional public void beforeProjectRemove(Project aProject) throws Exception { for (AnnotationFeature feature : listAnnotationFeature(aProject)) { removeAnnotationFeature(feature); } // remove the layers too for (AnnotationLayer layer : listAnnotationLayer(aProject)) { removeAnnotationLayer(layer); } for (TagSet tagSet : listTagSets(aProject)) { removeTagSet(tagSet); } } @Override @Transactional public void onProjectImport(ZipFile aZip, de.tudarmstadt.ukp.clarin.webanno.export.model.Project aExportedProject, Project aProject) throws Exception { // Nothing at the moment } }