001/* 002 * Copyright 2021 Dominik Kopczynski, Nils Hoffmann. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.lifstools.jgoslin.parser; 017 018import org.lifstools.jgoslin.domain.LipidFaBondType; 019import org.lifstools.jgoslin.domain.LipidException; 020import org.lifstools.jgoslin.domain.KnownFunctionalGroups; 021import org.lifstools.jgoslin.domain.Element; 022import org.lifstools.jgoslin.domain.Headgroup; 023import org.lifstools.jgoslin.domain.FunctionalGroup; 024import org.lifstools.jgoslin.domain.LipidAdduct; 025import org.lifstools.jgoslin.domain.Adduct; 026import org.lifstools.jgoslin.domain.LipidLevel; 027import org.lifstools.jgoslin.domain.LipidParsingException; 028import org.lifstools.jgoslin.domain.Cycle; 029import org.lifstools.jgoslin.domain.FattyAcid; 030import org.lifstools.jgoslin.domain.HeadgroupDecorator; 031import java.util.ArrayList; 032import java.util.Map; 033import static java.util.Map.entry; 034import java.util.Set; 035import org.lifstools.jgoslin.domain.ConstraintViolationException; 036import org.lifstools.jgoslin.domain.DoubleBonds; 037 038/** 039 * Event handler implementation for the {@link LipidMapsParser}. 040 * 041 * @author Dominik Kopczynski 042 * @author Nils Hoffmann 043 */ 044public class LipidMapsParserEventHandler extends LipidBaseParserEventHandler { 045 046 private boolean omitFa; 047 private int dbNumbers; 048 private int dbPosition; 049 private String dbCistrans; 050 private String modText; 051 private int modPos; 052 private int modNum; 053 private boolean addOmegaLinoleoyloxyCer; 054 055 private static final Set<String> HEAD_GROUP_EXCEPTIONS = Set.of("PA", "PC", "PE", "PG", "PI", "PS"); 056 private static final Map<String, Integer> ACER_HEADS = Map.ofEntries( 057 entry("1-O-myristoyl", 14), 058 entry("1-O-palmitoyl", 16), 059 entry("1-O-stearoyl", 18), 060 entry("1-O-eicosanoyl", 20), 061 entry("1-O-behenoyl", 22), 062 entry("1-O-lignoceroyl", 24), 063 entry("1-O-cerotoyl", 26), 064 entry("1-O-pentacosanoyl", 25), 065 entry("1-O-carboceroyl", 28), 066 entry("1-O-tricosanoyl", 30), 067 entry("1-O-lignoceroyl-omega-linoleoyloxy", 24), 068 entry("1-O-stearoyl-omega-linoleoyloxy", 18) 069 ); 070 071 /** 072 * Create a new {@code LipidMapsParserEventHandler}. 073 * 074 * @param knownFunctionalGroups the known functional groups 075 */ 076 public LipidMapsParserEventHandler(KnownFunctionalGroups knownFunctionalGroups) { 077 super(knownFunctionalGroups); 078 try { 079 registeredEvents = Map.ofEntries( 080 entry("lipid_pre_event", this::resetParser), 081 entry("lipid_post_event", this::buildLipid), 082 083 // set adduct events 084 entry("adduct_info_pre_event", this::newAdduct), 085 entry("adduct_pre_event", this::addAdduct), 086 entry("charge_pre_event", this::addCharge), 087 entry("charge_sign_pre_event", this::addChargeSign), 088 089 entry("mediator_pre_event", this::mediatorEvent), 090 091 entry("sgl_species_pre_event", this::setSpeciesLevel), 092 entry("species_fa_pre_event", this::setSpeciesLevel), 093 entry("tgl_species_pre_event", this::setSpeciesLevel), 094 entry("dpl_species_pre_event", this::setSpeciesLevel), 095 entry("cl_species_pre_event", this::setSpeciesLevel), 096 entry("dsl_species_pre_event", this::setSpeciesLevel), 097 entry("fa2_unsorted_pre_event", this::setMolecularSubspeciesLevel), 098 entry("fa3_unsorted_pre_event", this::setMolecularSubspeciesLevel), 099 entry("fa4_unsorted_pre_event", this::setMolecularSubspeciesLevel), 100 entry("hg_dg_pre_event", this::setMolecularSubspeciesLevel), 101 entry("fa_lpl_molecular_pre_event", this::setMolecularSubspeciesLevel), 102 entry("hg_lbpa_pre_event", this::setMolecularSubspeciesLevel), 103 104 entry("fa_no_hg_pre_event", this::pureFa), 105 106 entry("hg_sgl_pre_event", this::setHeadGroupName), 107 entry("hg_gl_pre_event", this::setHeadGroupName), 108 entry("hg_cl_pre_event", this::setHeadGroupName), 109 entry("hg_dpl_pre_event", this::setHeadGroupName), 110 entry("hg_lpl_pre_event", this::setHeadGroupName), 111 entry("hg_threepl_pre_event", this::setHeadGroupName), 112 entry("hg_fourpl_pre_event", this::setHeadGroupName), 113 entry("hg_dsl_pre_event", this::setHeadGroupName), 114 entry("hg_cpa_pre_event", this::setHeadGroupName), 115 entry("ch_pre_event", this::setHeadGroupName), 116 entry("hg_che_pre_event", this::setHeadGroupName), 117 entry("mediator_const_pre_event", this::setHeadGroupName), 118 entry("pk_hg_pre_event", this::setHeadGroupName), 119 entry("hg_fa_pre_event", this::setHeadGroupName), 120 entry("hg_lsl_pre_event", this::setHeadGroupName), 121 entry("special_cer_pre_event", this::setHeadGroupName), 122 entry("special_cer_hg_pre_event", this::setHeadGroupName), 123 entry("omega_linoleoyloxy_Cer_pre_event", this::setOmegaHeadGroupName), 124 125 entry("lcb_pre_event", this::newLcb), 126 entry("lcb_post_event", this::cleanLcb), 127 entry("fa_pre_event", this::newFa), 128 entry("fa_post_event", this::appendFa), 129 130 entry("glyco_struct_pre_event", this::addGlyco), 131 132 entry("db_single_position_pre_event", this::setIsomericLevel), 133 entry("db_single_position_post_event", this::addDbPosition), 134 entry("db_position_number_pre_event", this::addDbPositionNumber), 135 entry("cistrans_pre_event", this::addCistrans), 136 137 entry("ether_prefix_pre_event", this::addEther), 138 entry("ether_suffix_pre_event", this::addEther), 139 entry("lcb_pure_fa_pre_event", this::addDiHydroxyl), 140 entry("hydroxyl_pre_event", this::addHydroxyl), 141 entry("hydroxyl_lcb_pre_event", this::addHydroxylLcb), 142 entry("db_count_pre_event", this::addDoubleBonds), 143 entry("carbon_pre_event", this::addCarbon), 144 145 entry("structural_mod_pre_event", this::setStructuralSubspeciesLevel), 146 entry("single_mod_pre_event", this::setMod), 147 entry("mod_text_pre_event", this::setModText), 148 entry("mod_pos_pre_event", this::setModPos), 149 entry("mod_num_pre_event", this::setModNum), 150 entry("single_mod_post_event", this::addFunctionalGroup), 151 entry("special_cer_prefix_pre_event", this::addAcer) 152 ); 153 } catch (Exception e) { 154 throw new LipidParsingException("Cannot initialize LipidMapsParserEventHandler."); 155 } 156 } 157 158 @Override 159 protected void resetParser(TreeNode node) { 160 content = null; 161 level = LipidLevel.FULL_STRUCTURE; 162 headGroup = ""; 163 lcb = null; 164 adduct = null; 165 faList.clear(); 166 currentFa = null; 167 useHeadGroup = false; 168 omitFa = false; 169 dbPosition = 0; 170 dbNumbers = -1; 171 dbCistrans = ""; 172 modPos = -1; 173 modNum = 1; 174 modText = ""; 175 headgroupDecorators.clear(); 176 addOmegaLinoleoyloxyCer = false; 177 } 178 179 private void addAcer(TreeNode node) { 180 String head = node.getText(); 181 headGroup = "ACer"; 182 183 if (!ACER_HEADS.containsKey(head)) { 184 throw new LipidException("ACer head group '" + head + "' unknown"); 185 } 186 187 HeadgroupDecorator hgd = new HeadgroupDecorator("decorator_acyl", -1, 1, null, true, knownFunctionalGroups); 188 int acer_num = ACER_HEADS.get(head); 189 hgd.getFunctionalGroupsInternal().put("decorator_acyl", new ArrayList<>()); 190 hgd.getFunctionalGroupsInternal().get("decorator_acyl").add(new FattyAcid("FA", acer_num, knownFunctionalGroups)); 191 headgroupDecorators.add(hgd); 192 193 if (head.equals("1-O-lignoceroyl-omega-linoleoyloxy") || head.equals("1-O-stearoyl-omega-linoleoyloxy")) { 194 addOmegaLinoleoyloxyCer = true; 195 } 196 } 197 198 private void setMolecularSubspeciesLevel(TreeNode node) { 199 setLipidLevel(LipidLevel.MOLECULAR_SPECIES); 200 } 201 202 private void pureFa(TreeNode node) { 203 headGroup = "FA"; 204 } 205 206 private void mediatorEvent(TreeNode node) { 207 useHeadGroup = true; 208 headGroup = node.getText(); 209 } 210 211 private void setIsomericLevel(TreeNode node) { 212 dbPosition = 0; 213 dbCistrans = ""; 214 } 215 216 private void addDbPosition(TreeNode node) { 217 if (currentFa != null) { 218 currentFa.getDoubleBonds().getDoubleBondPositions().put(dbPosition, dbCistrans); 219 220 if (!dbCistrans.equals("E") && !dbCistrans.equals("Z")) { 221 setLipidLevel(LipidLevel.STRUCTURE_DEFINED); 222 } 223 224 } 225 } 226 227 private void setOmegaHeadGroupName(TreeNode node) { 228 addOmegaLinoleoyloxyCer = true; 229 setHeadGroupName(node); 230 } 231 232 private void addGlyco(TreeNode node) { 233 String glyco_name = node.getText(); 234 HeadgroupDecorator functional_group = null; 235 try { 236 functional_group = (HeadgroupDecorator) knownFunctionalGroups.get(glyco_name); 237 } catch (Exception e) { 238 throw new LipidParsingException("Carbohydrate '" + glyco_name + "' unknown"); 239 } 240 241 functional_group.getElements().put(Element.O, functional_group.getElements().get(Element.O) - 1); 242 headgroupDecorators.add(functional_group); 243 } 244 245 private void addDbPositionNumber(TreeNode node) { 246 dbPosition = Integer.valueOf(node.getText()); 247 } 248 249 private void addCistrans(TreeNode node) { 250 dbCistrans = node.getText(); 251 } 252 253 private void setHeadGroupName(TreeNode node) { 254 headGroup = node.getText(); 255 } 256 257 private void setSpeciesLevel(TreeNode node) { 258 setLipidLevel(LipidLevel.SPECIES); 259 } 260 261 private void setStructuralSubspeciesLevel(TreeNode node) { 262 setLipidLevel(LipidLevel.STRUCTURE_DEFINED); 263 } 264 265 private void setMod(TreeNode node) { 266 modText = ""; 267 modPos = -1; 268 modNum = 1; 269 } 270 271 private void setModText(TreeNode node) { 272 modText = node.getText(); 273 } 274 275 private void setModPos(TreeNode node) { 276 modPos = node.getInt(); 277 } 278 279 private void setModNum(TreeNode node) { 280 modNum = node.getInt(); 281 } 282 283 private void addFunctionalGroup(TreeNode node) { 284 if (!modText.equals("Cp")) { 285 FunctionalGroup functional_group = knownFunctionalGroups.get(modText); 286 functional_group.setPosition(modPos); 287 functional_group.setCount(modNum); 288 String fg_name = functional_group.getName(); 289 if (!currentFa.getFunctionalGroupsInternal().containsKey(fg_name)) { 290 currentFa.getFunctionalGroupsInternal().put(fg_name, new ArrayList<>()); 291 } 292 currentFa.getFunctionalGroupsInternal().get(fg_name).add(functional_group); 293 } else { 294 currentFa.setNumCarbon(currentFa.getNumCarbon() + 1); 295 Cycle cycle = new Cycle(3, modPos, modPos + 2, knownFunctionalGroups); 296 if (!currentFa.getFunctionalGroupsInternal().containsKey("cy")) { 297 currentFa.getFunctionalGroupsInternal().put("cy", new ArrayList<>()); 298 } 299 currentFa.getFunctionalGroupsInternal().get("cy").add(cycle); 300 } 301 } 302 303 private void newFa(TreeNode node) { 304 dbNumbers = -1; 305 currentFa = new FattyAcid("FA", knownFunctionalGroups); 306 } 307 308 private void newLcb(TreeNode node) { 309 lcb = new FattyAcid("LCB", knownFunctionalGroups); 310 lcb.setType(LipidFaBondType.LCB_REGULAR); 311 setLipidLevel(LipidLevel.STRUCTURE_DEFINED); 312 currentFa = lcb; 313 } 314 315 private void cleanLcb(TreeNode node) { 316 if (dbNumbers > -1 && dbNumbers != currentFa.getDoubleBonds().getNumDoubleBonds()) { 317 throw new LipidException("Double bond count does not match with number of double bond positions"); 318 } 319 if (currentFa.getDoubleBonds().getDoubleBondPositions().isEmpty() && currentFa.getDoubleBonds().getNumDoubleBonds() > 0) { 320 setLipidLevel(LipidLevel.SN_POSITION); 321 } 322 currentFa = null; 323 } 324 325 private void appendFa(TreeNode node) { 326 if (dbNumbers > -1 && dbNumbers != currentFa.getDoubleBonds().getNumDoubleBonds()) { 327 throw new LipidException("Double bond count does not match with number of double bond positions"); 328 } 329 if (currentFa.getDoubleBonds().getDoubleBondPositions().isEmpty() && currentFa.getDoubleBonds().getNumDoubleBonds() > 0) { 330 setLipidLevel(LipidLevel.SN_POSITION); 331 } 332 333 if (currentFa.getNumCarbon() == 0) { 334 omitFa = true; 335 } 336 faList.add(currentFa); 337 currentFa = null; 338 } 339 340 private void addEther(TreeNode node) { 341 String ether = node.getText(); 342 if (ether.equals("O-") || ether.equals("e")) { 343 currentFa.setLipidFaBondType(LipidFaBondType.ETHER_PLASMANYL); 344 } else if (ether.equals("P-") || ether.equals("p")) { 345 currentFa.setLipidFaBondType(LipidFaBondType.ETHER_PLASMENYL); 346 } 347 } 348 349 private void addHydroxyl(TreeNode node) { 350 int num_h = node.getInt(); 351 352 if (spRegularLcb()) { 353 num_h -= 1; 354 } 355 356 FunctionalGroup functional_group = knownFunctionalGroups.get("OH"); 357 functional_group.setCount(num_h); 358 if (!currentFa.getFunctionalGroupsInternal().containsKey("OH")) { 359 currentFa.getFunctionalGroupsInternal().put("OH", new ArrayList<>()); 360 } 361 currentFa.getFunctionalGroupsInternal().get("OH").add(functional_group); 362 } 363 364 private void addDiHydroxyl(TreeNode node) { 365 int num_h = 2; 366 367 if (spRegularLcb()) { 368 num_h -= 1; 369 } 370 371 FunctionalGroup functional_group = knownFunctionalGroups.get("OH"); 372 functional_group.setCount(num_h); 373 if (!currentFa.getFunctionalGroupsInternal().containsKey("OH")) { 374 currentFa.getFunctionalGroupsInternal().put("OH", new ArrayList<>()); 375 } 376 currentFa.getFunctionalGroupsInternal().get("OH").add(functional_group); 377 } 378 379 private void addHydroxylLcb(TreeNode node) { 380 String hydroxyl = node.getText(); 381 int num_h = 0; 382 if (hydroxyl.equals("m")) { 383 num_h = 1; 384 } else if (hydroxyl.equals("d")) { 385 num_h = 2; 386 } else if (hydroxyl.equals("t")) { 387 num_h = 3; 388 } 389 390 if (spRegularLcb()) { 391 num_h -= 1; 392 } 393 394 FunctionalGroup functional_group = knownFunctionalGroups.get("OH"); 395 functional_group.setCount(num_h); 396 if (!currentFa.getFunctionalGroupsInternal().containsKey("OH")) { 397 currentFa.getFunctionalGroupsInternal().put("OH", new ArrayList<>()); 398 } 399 currentFa.getFunctionalGroupsInternal().get("OH").add(functional_group); 400 } 401 402 private void addDoubleBonds(TreeNode node) { 403 currentFa.getDoubleBonds().setNumDoubleBonds(currentFa.getDoubleBonds().getNumDoubleBonds() + node.getInt()); 404 } 405 406 private void addCarbon(TreeNode node) { 407 currentFa.setNumCarbon(node.getInt()); 408 } 409 410 private void buildLipid(TreeNode node) { 411 if (omitFa && HEAD_GROUP_EXCEPTIONS.contains(headGroup)) { 412 headGroup = "L" + headGroup; 413 } 414 415 if (lcb != null) { 416 faList.add(0, lcb); 417 } 418 419 if (addOmegaLinoleoyloxyCer){ 420 if (faList.size() != 2){ 421 throw new ConstraintViolationException("omega-linoleoyloxy-Cer with a different combination to one long chain base and one fatty acyl chain unknown"); 422 } 423 Map<String, ArrayList<FunctionalGroup>> fgroups = faList.get(faList.size() - 1).getFunctionalGroupsInternal(); 424 if (!fgroups.containsKey("acyl")) fgroups.put("acyl", new ArrayList<>()); 425 426 DoubleBonds db = new DoubleBonds(2); 427 db.getDoubleBondPositions().put(9, "Z"); 428 db.getDoubleBondPositions().put(12, "Z"); 429 faList.get(faList.size() - 1).getFunctionalGroupsInternal().get("acyl").add(new FattyAcid("FA", 18, db)); 430 headGroup = "Cer"; 431 } 432 433 Headgroup headgroup = prepareHeadgroupAndChecks(); 434 content = new LipidAdduct(assembleLipid(headgroup), adduct); 435 } 436 437 private void newAdduct(TreeNode node) { 438 adduct = new Adduct("", ""); 439 } 440 441 private void addAdduct(TreeNode node) { 442 adduct.setAdductString(node.getText()); 443 } 444 445 private void addCharge(TreeNode node) { 446 adduct.setCharge(Integer.valueOf(node.getText())); 447 } 448 449 private void addChargeSign(TreeNode node) { 450 String sign = node.getText(); 451 if (sign.equals("+")) { 452 adduct.setChargeSign(1); 453 } else if (sign.equals("-")) { 454 adduct.setChargeSign(-1); 455 } 456 } 457}