LipidMapsParserEventHandler.java

  1. /*
  2.  * Copyright 2021 Dominik Kopczynski, Nils Hoffmann.
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *      http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16. package org.lifstools.jgoslin.parser;

  17. import org.lifstools.jgoslin.domain.LipidFaBondType;
  18. import org.lifstools.jgoslin.domain.LipidException;
  19. import org.lifstools.jgoslin.domain.KnownFunctionalGroups;
  20. import org.lifstools.jgoslin.domain.Element;
  21. import org.lifstools.jgoslin.domain.Headgroup;
  22. import org.lifstools.jgoslin.domain.FunctionalGroup;
  23. import org.lifstools.jgoslin.domain.LipidAdduct;
  24. import org.lifstools.jgoslin.domain.Adduct;
  25. import org.lifstools.jgoslin.domain.LipidLevel;
  26. import org.lifstools.jgoslin.domain.LipidParsingException;
  27. import org.lifstools.jgoslin.domain.Cycle;
  28. import org.lifstools.jgoslin.domain.FattyAcid;
  29. import org.lifstools.jgoslin.domain.HeadgroupDecorator;
  30. import java.util.ArrayList;
  31. import java.util.Map;
  32. import static java.util.Map.entry;
  33. import java.util.Set;
  34. import org.lifstools.jgoslin.domain.ConstraintViolationException;
  35. import org.lifstools.jgoslin.domain.DoubleBonds;

  36. /**
  37.  * Event handler implementation for the {@link LipidMapsParser}.
  38.  *
  39.  * @author Dominik Kopczynski
  40.  * @author Nils Hoffmann
  41.  */
  42. public class LipidMapsParserEventHandler extends LipidBaseParserEventHandler {

  43.     private boolean omitFa;
  44.     private int dbNumbers;
  45.     private int dbPosition;
  46.     private String dbCistrans;
  47.     private String modText;
  48.     private int modPos;
  49.     private int modNum;
  50.     private boolean addOmegaLinoleoyloxyCer;

  51.     private static final Set<String> HEAD_GROUP_EXCEPTIONS = Set.of("PA", "PC", "PE", "PG", "PI", "PS");
  52.     private static final Map<String, Integer> ACER_HEADS = Map.ofEntries(
  53.         entry("1-O-myristoyl", 14),
  54.         entry("1-O-palmitoyl", 16),
  55.         entry("1-O-stearoyl", 18),
  56.         entry("1-O-eicosanoyl", 20),
  57.         entry("1-O-behenoyl", 22),
  58.         entry("1-O-lignoceroyl", 24),
  59.         entry("1-O-cerotoyl", 26),
  60.         entry("1-O-pentacosanoyl", 25),
  61.         entry("1-O-carboceroyl", 28),
  62.         entry("1-O-tricosanoyl", 30),
  63.         entry("1-O-lignoceroyl-omega-linoleoyloxy", 24),
  64.         entry("1-O-stearoyl-omega-linoleoyloxy", 18)
  65.     );

  66.     /**
  67.      * Create a new {@code LipidMapsParserEventHandler}.
  68.      *
  69.      * @param knownFunctionalGroups the known functional groups
  70.      */
  71.     public LipidMapsParserEventHandler(KnownFunctionalGroups knownFunctionalGroups) {
  72.         super(knownFunctionalGroups);
  73.         try {
  74.             registeredEvents = Map.ofEntries(
  75.                 entry("lipid_pre_event", this::resetParser),
  76.                 entry("lipid_post_event", this::buildLipid),

  77.                 // set adduct events
  78.                 entry("adduct_info_pre_event", this::newAdduct),
  79.                 entry("adduct_pre_event", this::addAdduct),
  80.                 entry("charge_pre_event", this::addCharge),
  81.                 entry("charge_sign_pre_event", this::addChargeSign),

  82.                 entry("mediator_pre_event", this::mediatorEvent),

  83.                 entry("sgl_species_pre_event", this::setSpeciesLevel),
  84.                 entry("species_fa_pre_event", this::setSpeciesLevel),
  85.                 entry("tgl_species_pre_event", this::setSpeciesLevel),
  86.                 entry("dpl_species_pre_event", this::setSpeciesLevel),
  87.                 entry("cl_species_pre_event", this::setSpeciesLevel),
  88.                 entry("dsl_species_pre_event", this::setSpeciesLevel),
  89.                 entry("fa2_unsorted_pre_event", this::setMolecularSubspeciesLevel),
  90.                 entry("fa3_unsorted_pre_event", this::setMolecularSubspeciesLevel),
  91.                 entry("fa4_unsorted_pre_event", this::setMolecularSubspeciesLevel),
  92.                 entry("hg_dg_pre_event", this::setMolecularSubspeciesLevel),
  93.                 entry("fa_lpl_molecular_pre_event", this::setMolecularSubspeciesLevel),
  94.                 entry("hg_lbpa_pre_event", this::setMolecularSubspeciesLevel),

  95.                 entry("fa_no_hg_pre_event", this::pureFa),

  96.                 entry("hg_sgl_pre_event", this::setHeadGroupName),
  97.                 entry("hg_gl_pre_event", this::setHeadGroupName),
  98.                 entry("hg_cl_pre_event", this::setHeadGroupName),
  99.                 entry("hg_dpl_pre_event", this::setHeadGroupName),
  100.                 entry("hg_lpl_pre_event", this::setHeadGroupName),
  101.                 entry("hg_threepl_pre_event", this::setHeadGroupName),
  102.                 entry("hg_fourpl_pre_event", this::setHeadGroupName),
  103.                 entry("hg_dsl_pre_event", this::setHeadGroupName),
  104.                 entry("hg_cpa_pre_event", this::setHeadGroupName),
  105.                 entry("ch_pre_event", this::setHeadGroupName),
  106.                 entry("hg_che_pre_event", this::setHeadGroupName),
  107.                 entry("mediator_const_pre_event", this::setHeadGroupName),
  108.                 entry("pk_hg_pre_event", this::setHeadGroupName),
  109.                 entry("hg_fa_pre_event", this::setHeadGroupName),
  110.                 entry("hg_lsl_pre_event", this::setHeadGroupName),
  111.                 entry("special_cer_pre_event", this::setHeadGroupName),
  112.                 entry("special_cer_hg_pre_event", this::setHeadGroupName),
  113.                 entry("omega_linoleoyloxy_Cer_pre_event", this::setOmegaHeadGroupName),

  114.                 entry("lcb_pre_event", this::newLcb),
  115.                 entry("lcb_post_event", this::cleanLcb),
  116.                 entry("fa_pre_event", this::newFa),
  117.                 entry("fa_post_event", this::appendFa),

  118.                 entry("glyco_struct_pre_event", this::addGlyco),

  119.                 entry("db_single_position_pre_event", this::setIsomericLevel),
  120.                 entry("db_single_position_post_event", this::addDbPosition),
  121.                 entry("db_position_number_pre_event", this::addDbPositionNumber),
  122.                 entry("cistrans_pre_event", this::addCistrans),

  123.                 entry("ether_pre_event", this::addEther),
  124.                 entry("hydroxyl_pre_event", this::addHydroxyl),
  125.                 entry("hydroxyl_lcb_pre_event", this::addHydroxylLcb),
  126.                 entry("db_count_pre_event", this::addDoubleBonds),
  127.                 entry("carbon_pre_event", this::addCarbon),

  128.                 entry("structural_mod_pre_event", this::setStructuralSubspeciesLevel),
  129.                 entry("single_mod_pre_event", this::setMod),
  130.                 entry("mod_text_pre_event", this::setModText),
  131.                 entry("mod_pos_pre_event", this::setModPos),
  132.                 entry("mod_num_pre_event", this::setModNum),
  133.                 entry("single_mod_post_event", this::addFunctionalGroup),
  134.                 entry("special_cer_prefix_pre_event", this::addAcer)
  135.             );
  136.         } catch (Exception e) {
  137.             throw new LipidParsingException("Cannot initialize LipidMapsParserEventHandler.");
  138.         }
  139.     }

  140.     @Override
  141.     protected void resetParser(TreeNode node) {
  142.         content = null;
  143.         level = LipidLevel.FULL_STRUCTURE;
  144.         headGroup = "";
  145.         lcb = null;
  146.         adduct = null;
  147.         faList.clear();
  148.         currentFa = null;
  149.         useHeadGroup = false;
  150.         omitFa = false;
  151.         dbPosition = 0;
  152.         dbNumbers = -1;
  153.         dbCistrans = "";
  154.         modPos = -1;
  155.         modNum = 1;
  156.         modText = "";
  157.         headgroupDecorators.clear();
  158.         addOmegaLinoleoyloxyCer = false;
  159.     }

  160.     private void addAcer(TreeNode node) {
  161.         String head = node.getText();
  162.         headGroup = "ACer";

  163.         if (!ACER_HEADS.containsKey(head)) {
  164.             throw new LipidException("ACer head group '" + head + "' unknown");
  165.         }

  166.         HeadgroupDecorator hgd = new HeadgroupDecorator("decorator_acyl", -1, 1, null, true, knownFunctionalGroups);
  167.         int acer_num = ACER_HEADS.get(head);
  168.         hgd.getFunctionalGroupsInternal().put("decorator_acyl", new ArrayList<>());
  169.         hgd.getFunctionalGroupsInternal().get("decorator_acyl").add(new FattyAcid("FA", acer_num, knownFunctionalGroups));
  170.         headgroupDecorators.add(hgd);

  171.         if (head.equals("1-O-lignoceroyl-omega-linoleoyloxy") || head.equals("1-O-stearoyl-omega-linoleoyloxy")) {
  172.             addOmegaLinoleoyloxyCer = true;
  173.         }
  174.     }

  175.     private void setMolecularSubspeciesLevel(TreeNode node) {
  176.         setLipidLevel(LipidLevel.MOLECULAR_SPECIES);
  177.     }

  178.     private void pureFa(TreeNode node) {
  179.         headGroup = "FA";
  180.     }

  181.     private void mediatorEvent(TreeNode node) {
  182.         useHeadGroup = true;
  183.         headGroup = node.getText();
  184.     }

  185.     private void setIsomericLevel(TreeNode node) {
  186.         dbPosition = 0;
  187.         dbCistrans = "";
  188.     }

  189.     private void addDbPosition(TreeNode node) {
  190.         if (currentFa != null) {
  191.             currentFa.getDoubleBonds().getDoubleBondPositions().put(dbPosition, dbCistrans);

  192.             if (!dbCistrans.equals("E") && !dbCistrans.equals("Z")) {
  193.                 setLipidLevel(LipidLevel.STRUCTURE_DEFINED);
  194.             }

  195.         }
  196.     }

  197.     private void setOmegaHeadGroupName(TreeNode node) {
  198.         addOmegaLinoleoyloxyCer = true;
  199.         setHeadGroupName(node);
  200.     }

  201.     private void addGlyco(TreeNode node) {
  202.         String glyco_name = node.getText();
  203.         HeadgroupDecorator functional_group = null;
  204.         try {
  205.             functional_group = (HeadgroupDecorator) knownFunctionalGroups.get(glyco_name);
  206.         } catch (Exception e) {
  207.             throw new LipidParsingException("Carbohydrate '" + glyco_name + "' unknown");
  208.         }

  209.         functional_group.getElements().put(Element.O, functional_group.getElements().get(Element.O) - 1);
  210.         headgroupDecorators.add(functional_group);
  211.     }

  212.     private void addDbPositionNumber(TreeNode node) {
  213.         dbPosition = Integer.valueOf(node.getText());
  214.     }

  215.     private void addCistrans(TreeNode node) {
  216.         dbCistrans = node.getText();
  217.     }

  218.     private void setHeadGroupName(TreeNode node) {
  219.         headGroup = node.getText();
  220.     }

  221.     private void setSpeciesLevel(TreeNode node) {
  222.         setLipidLevel(LipidLevel.SPECIES);
  223.     }

  224.     private void setStructuralSubspeciesLevel(TreeNode node) {
  225.         setLipidLevel(LipidLevel.STRUCTURE_DEFINED);
  226.     }

  227.     private void setMod(TreeNode node) {
  228.         modText = "";
  229.         modPos = -1;
  230.         modNum = 1;
  231.     }

  232.     private void setModText(TreeNode node) {
  233.         modText = node.getText();
  234.     }

  235.     private void setModPos(TreeNode node) {
  236.         modPos = node.getInt();
  237.     }

  238.     private void setModNum(TreeNode node) {
  239.         modNum = node.getInt();
  240.     }

  241.     private void addFunctionalGroup(TreeNode node) {
  242.         if (!modText.equals("Cp")) {
  243.             FunctionalGroup functional_group = knownFunctionalGroups.get(modText);
  244.             functional_group.setPosition(modPos);
  245.             functional_group.setCount(modNum);
  246.             String fg_name = functional_group.getName();
  247.             if (!currentFa.getFunctionalGroupsInternal().containsKey(fg_name)) {
  248.                 currentFa.getFunctionalGroupsInternal().put(fg_name, new ArrayList<>());
  249.             }
  250.             currentFa.getFunctionalGroupsInternal().get(fg_name).add(functional_group);
  251.         } else {
  252.             currentFa.setNumCarbon(currentFa.getNumCarbon() + 1);
  253.             Cycle cycle = new Cycle(3, modPos, modPos + 2, knownFunctionalGroups);
  254.             if (!currentFa.getFunctionalGroupsInternal().containsKey("cy")) {
  255.                 currentFa.getFunctionalGroupsInternal().put("cy", new ArrayList<>());
  256.             }
  257.             currentFa.getFunctionalGroupsInternal().get("cy").add(cycle);
  258.         }
  259.     }

  260.     private void newFa(TreeNode node) {
  261.         dbNumbers = -1;
  262.         currentFa = new FattyAcid("FA", knownFunctionalGroups);
  263.     }

  264.     private void newLcb(TreeNode node) {
  265.         lcb = new FattyAcid("LCB", knownFunctionalGroups);
  266.         lcb.setType(LipidFaBondType.LCB_REGULAR);
  267.         setLipidLevel(LipidLevel.STRUCTURE_DEFINED);
  268.         currentFa = lcb;
  269.     }

  270.     private void cleanLcb(TreeNode node) {
  271.         if (dbNumbers > -1 && dbNumbers != currentFa.getDoubleBonds().getNumDoubleBonds()) {
  272.             throw new LipidException("Double bond count does not match with number of double bond positions");
  273.         }
  274.         if (currentFa.getDoubleBonds().getDoubleBondPositions().isEmpty() && currentFa.getDoubleBonds().getNumDoubleBonds() > 0) {
  275.             setLipidLevel(LipidLevel.SN_POSITION);
  276.         }
  277.         currentFa = null;
  278.     }

  279.     private void appendFa(TreeNode node) {
  280.         if (dbNumbers > -1 && dbNumbers != currentFa.getDoubleBonds().getNumDoubleBonds()) {
  281.             throw new LipidException("Double bond count does not match with number of double bond positions");
  282.         }
  283.         if (currentFa.getDoubleBonds().getDoubleBondPositions().isEmpty() && currentFa.getDoubleBonds().getNumDoubleBonds() > 0) {
  284.             setLipidLevel(LipidLevel.SN_POSITION);
  285.         }

  286.         if (currentFa.getNumCarbon() == 0) {
  287.             omitFa = true;
  288.         }
  289.         faList.add(currentFa);
  290.         currentFa = null;
  291.     }

  292.     private void addEther(TreeNode node) {
  293.         String ether = node.getText();
  294.         if (ether.equals("O-")) {
  295.             currentFa.setLipidFaBondType(LipidFaBondType.ETHER_PLASMANYL);
  296.         } else if (ether.equals("P-")) {
  297.             currentFa.setLipidFaBondType(LipidFaBondType.ETHER_PLASMENYL);
  298.         }
  299.     }

  300.     private void addHydroxyl(TreeNode node) {
  301.         int num_h = node.getInt();

  302.         if (spRegularLcb()) {
  303.             num_h -= 1;
  304.         }

  305.         FunctionalGroup functional_group = knownFunctionalGroups.get("OH");
  306.         functional_group.setCount(num_h);
  307.         if (!currentFa.getFunctionalGroupsInternal().containsKey("OH")) {
  308.             currentFa.getFunctionalGroupsInternal().put("OH", new ArrayList<>());
  309.         }
  310.         currentFa.getFunctionalGroupsInternal().get("OH").add(functional_group);
  311.     }

  312.     private void addHydroxylLcb(TreeNode node) {
  313.         String hydroxyl = node.getText();
  314.         int num_h = 0;
  315.         if (hydroxyl.equals("m")) {
  316.             num_h = 1;
  317.         } else if (hydroxyl.equals("d")) {
  318.             num_h = 2;
  319.         } else if (hydroxyl.equals("t")) {
  320.             num_h = 3;
  321.         }

  322.         if (spRegularLcb()) {
  323.             num_h -= 1;
  324.         }

  325.         FunctionalGroup functional_group = knownFunctionalGroups.get("OH");
  326.         functional_group.setCount(num_h);
  327.         if (!currentFa.getFunctionalGroupsInternal().containsKey("OH")) {
  328.             currentFa.getFunctionalGroupsInternal().put("OH", new ArrayList<>());
  329.         }
  330.         currentFa.getFunctionalGroupsInternal().get("OH").add(functional_group);
  331.     }

  332.     private void addDoubleBonds(TreeNode node) {
  333.         currentFa.getDoubleBonds().setNumDoubleBonds(currentFa.getDoubleBonds().getNumDoubleBonds() + node.getInt());
  334.     }

  335.     private void addCarbon(TreeNode node) {
  336.         currentFa.setNumCarbon(node.getInt());
  337.     }

  338.     private void buildLipid(TreeNode node) {
  339.         if (omitFa && HEAD_GROUP_EXCEPTIONS.contains(headGroup)) {
  340.             headGroup = "L" + headGroup;
  341.         }

  342.         if (lcb != null) {
  343.             faList.add(0, lcb);
  344.         }
  345.        
  346.         if (addOmegaLinoleoyloxyCer){
  347.             if (faList.size() != 2){
  348.                 throw new ConstraintViolationException("omega-linoleoyloxy-Cer with a different combination to one long chain base and one fatty acyl chain unknown");
  349.             }
  350.             Map<String, ArrayList<FunctionalGroup>> fgroups = faList.get(faList.size() - 1).getFunctionalGroupsInternal();
  351.             if (!fgroups.containsKey("acyl")) fgroups.put("acyl", new ArrayList<>());
  352.            
  353.             DoubleBonds db = new DoubleBonds(2);
  354.             db.getDoubleBondPositions().put(9, "Z");
  355.             db.getDoubleBondPositions().put(12, "Z");
  356.             faList.get(faList.size() - 1).getFunctionalGroupsInternal().get("acyl").add(new FattyAcid("FA", 18, db));
  357.             headGroup = "Cer";
  358.         }
  359.        
  360.         Headgroup headgroup = prepareHeadgroupAndChecks();
  361.         content = new LipidAdduct(assembleLipid(headgroup), adduct);
  362.     }

  363.     private void newAdduct(TreeNode node) {
  364.         adduct = new Adduct("", "");
  365.     }

  366.     private void addAdduct(TreeNode node) {
  367.         adduct.setAdductString(node.getText());
  368.     }

  369.     private void addCharge(TreeNode node) {
  370.         adduct.setCharge(Integer.valueOf(node.getText()));
  371.     }

  372.     private void addChargeSign(TreeNode node) {
  373.         String sign = node.getText();
  374.         if (sign.equals("+")) {
  375.             adduct.setChargeSign(1);
  376.         } else if (sign.equals("-")) {
  377.             adduct.setChargeSign(-1);
  378.         }
  379.     }
  380. }