001/*
002 * 
003 */
004package de.isas.lipidomics.domain;
005
006import static de.isas.lipidomics.domain.Element.ELEMENT_C;
007import static de.isas.lipidomics.domain.Element.ELEMENT_H;
008import de.isas.lipidomics.palinom.exceptions.ConstraintViolationException;
009import java.util.Collections;
010import java.util.Map;
011import java.util.Optional;
012import java.util.regex.Matcher;
013import java.util.regex.Pattern;
014import java.util.stream.Collectors;
015import lombok.AccessLevel;
016import lombok.Data;
017import lombok.Setter;
018
019/**
020 * A lipid species is the factual root of the object hierarchy. Lipid category
021 * and class are used as taxonomic roots of a lipid species. Partial structural
022 * knowledge, apart from the head group, is first encoded in the lipid species.
023 *
024 * A typical lipid species is PC 32:0 (SwissLipids SLM:000056493), where the
025 * head group is defined as PC (Glycerophosphocholines), with fatty acyl chains
026 * of unknown individual composition, but known total composition (32 carbon
027 * atoms, zero double bonds, no hydroxylations).
028 *
029 * @author nils.hoffmann
030 * @see LipidCategory
031 * @see LipidClass
032 * @see LipidMolecularSubspecies
033 * @see LipidStructuralSubspecies
034 * @see LipidIsomericSubspecies
035 */
036@Data
037public class LipidSpecies {
038
039    private static final class None extends LipidSpecies {
040
041        private None() {
042            super(new HeadGroup(""), Optional.of(LipidSpeciesInfo.NONE));
043        }
044    }
045
046    public static final LipidSpecies NONE = new None();
047    private final HeadGroup headGroup;
048    @Setter(AccessLevel.NONE)
049    protected Optional<LipidSpeciesInfo> info;
050
051    /**
052     * Create a lipid species using the provided head group and a lipid species
053     * info {@link LipidSpeciesInfo#NONE}.
054     *
055     * @param headGroup the lipid species head group.
056     */
057    public LipidSpecies(HeadGroup headGroup) {
058        this(headGroup, Optional.of(LipidSpeciesInfo.NONE));
059    }
060
061    /**
062     * Create a lipid species from a head group and an optional
063     * {@link LipidSpeciesInfo}. This constructor will infer the lipid class
064     * from the head group automatically. It then uses the lipid class to
065     * retrieve the category of this lipid automatically, or sets the category
066     * to {@link LipidCategory#UNDEFINED}. The lipid species info, which
067     * contains details about the total no. of carbons in FA chains, no. of
068     * double bonds etc., is used as provided.
069     *
070     * @param headGroup the lipid species head group.
071     * @param lipidSpeciesInfo the lipid species info object.
072     */
073    public LipidSpecies(HeadGroup headGroup, Optional<LipidSpeciesInfo> lipidSpeciesInfo) {
074        this.headGroup = headGroup;
075        this.info = lipidSpeciesInfo;
076    }
077
078    /**
079     * Returns the {@link LipidSpeciesInfo} for this lipid.
080     *
081     * @return the lipid species info.
082     */
083    public Optional<LipidSpeciesInfo> getInfo() {
084        return this.info;
085    }
086
087    /**
088     * Returns true, if the head group ends with ' O' or if the lipid fa bond
089     * type is either {@link LipidFaBondType#ETHER_UNSPECIFIED},
090     * {@link LipidFaBondType#ETHER_PLASMANYL} or
091     * {@link LipidFaBondType#ETHER_PLASMENYL}.
092     *
093     * @return whether this is an 'ether' lipid, e.g. a unspecified ether
094     * species, a Plasmanyl or Plasmenyl species.
095     */
096    public boolean isEtherLipid() {
097        LipidSpeciesInfo info = this.info.orElse(LipidSpeciesInfo.NONE);
098        LipidFaBondType bondType = info.getLipidFaBondType();
099        return bondType == LipidFaBondType.ETHER_PLASMANYL
100                || bondType == LipidFaBondType.ETHER_PLASMENYL
101                || bondType == LipidFaBondType.ETHER_UNSPECIFIED
102                || getFa().values().stream().anyMatch((t) -> {
103                    return t.getLipidFaBondType() == LipidFaBondType.ETHER_UNSPECIFIED
104                            || t.getLipidFaBondType() == LipidFaBondType.ETHER_PLASMANYL
105                            || t.getLipidFaBondType() == LipidFaBondType.ETHER_PLASMENYL;
106                });
107    }
108
109    /**
110     * Returns a lipid string representation for the {@link LipidLevel}, e.g.
111     * Category, Species, etc, as returned by {@link #getInfo()}.
112     *
113     * Will return the head group name if the level is
114     * {@link LipidSpeciesInfo#NONE}.
115     *
116     * @return the lipid name for the native level.
117     */
118    public String getLipidString() {
119        return getLipidString(getInfo().orElse(LipidSpeciesInfo.NONE).getLevel());
120    }
121
122    /**
123     * Returns a lipid string representation for the given {@link LipidLevel},
124     * e.g. Category, Species, etc. Please note that this method is overridden
125     * by specific implementations for molecular, structural and isomeric
126     * subspecies levels. This method does not normalize the head group.
127     *
128     * @param level the lipid level to report the name of this lipid on.
129     * @return the lipid name.
130     */
131    public String getLipidString(LipidLevel level) {
132        return this.buildLipidString(level, headGroup.getName(), false);
133    }
134
135    /**
136     * Returns a lipid string representation for the given {@link LipidLevel},
137     * e.g. Category, Species, etc. Please note that this method is overridden
138     * by specific implementations for molecular, structural and isomeric
139     * subspecies levels. This method normalizes the head group to the primary
140     * class-specific synonym. E.g. TG would be normalized to TAG.
141     *
142     * @param level the lipid level to report the name of this lipid on.
143     * @param normalizeHeadGroup if true, use class specific synonym for
144     * headGroup, if false, use head group as parsed.
145     * @return the lipid name.
146     */
147    public String getLipidString(LipidLevel level, boolean normalizeHeadGroup) {
148        return this.buildLipidString(level, normalizeHeadGroup ? getNormalizedHeadGroup() : headGroup.getName(), normalizeHeadGroup);
149    }
150
151    protected StringBuilder buildSpeciesHeadGroupString(String headGroup, boolean normalizeHeadGroup) {
152        StringBuilder lipidString = new StringBuilder();
153        lipidString.append(this.headGroup.getLipidClass().map((lclass) -> {
154            switch (lclass) {
155//                case SE:
156                case SE_27_1:
157                case SE_27_2:
158                case SE_28_2:
159                case SE_28_3:
160                case SE_29_2:
161                case SE_30_2:
162                    return getNormalizedHeadGroup() + "/"; // use this for disambiguation to avoid SE 16:1 to be similar to SE 43:2 because of expansion to SE 27:1/16:1
163            }
164            return headGroup + " ";
165        }).orElse(headGroup + " "));
166        return lipidString;
167    }
168
169    protected String buildLipidString(LipidLevel level, String headGroup, boolean isNormalized) throws ConstraintViolationException {
170        switch (level) {
171            case CATEGORY:
172                return this.headGroup.getLipidCategory().name();
173            case CLASS:
174                return this.headGroup.getLipidClass().orElse(LipidClass.UNDEFINED).name();
175            case SPECIES:
176                StringBuilder lipidString = new StringBuilder();
177                lipidString.append(buildSpeciesHeadGroupString(headGroup, isNormalized));
178                LipidSpeciesInfo info = this.info.orElse(LipidSpeciesInfo.NONE);
179                if (info.getNCarbon() > 0) {
180                    int nCarbon = info.getNCarbon();
181                    String hgToFaSep = "";
182                    if (isEtherLipid()) {
183                        hgToFaSep = "O-";
184                    }
185                    lipidString.append(hgToFaSep).append(nCarbon);
186                    int nDB = info.getNDoubleBonds();
187                    lipidString.append(":").append(nDB);
188                    int nHydroxy = info.getNHydroxy();
189                    lipidString.append(nHydroxy > 0 ? ";" + nHydroxy : "");
190                    lipidString.append(info.getLipidFaBondType().suffix());
191                    if (!info.getModifications().isEmpty()) {
192                        lipidString.append("(");
193                        lipidString.append(info.getModifications().stream().map((t) -> {
194                            return (t.getLeft() == -1 ? "" : t.getLeft()) + "" + t.getRight();
195                        }).collect(Collectors.joining(",")));
196                        lipidString.append(")");
197                    }
198                }
199                return lipidString.toString().trim();
200            case UNDEFINED:
201                return this.headGroup.getName();
202            default:
203                LipidLevel thisLevel = getInfo().orElse(LipidSpeciesInfo.NONE).getLevel();
204                throw new ConstraintViolationException(getClass().getSimpleName() + " can not create a string for lipid with level " + thisLevel + " for level " + level + ": target level is more specific than this lipid's level!");
205        }
206    }
207
208    /**
209     * Returns a lipid string representation for the head group of this lipid.
210     * This method normalizes the original head group name to the class specific
211     * primary alias, if the level and class are known. E.g. TG is normalized to
212     * TAG.
213     *
214     * @return the normalized lipid head group.
215     */
216    public String getNormalizedHeadGroup() {
217        return headGroup.getNormalizedName();
218    }
219
220    /**
221     * Returns a lipid string representation for the native {@link LipidLevel},
222     * e.g. Category, Species, etc, as returned by {@link #getInfo()} of this
223     * lipid. This method normalizes the head group to the primary
224     * class-specific synonym. E.g. TG would be normalized to TAG.
225     *
226     * @return the normalized lipid name.
227     */
228    public String getNormalizedLipidString() {
229        return getLipidString(getInfo().orElse(LipidSpeciesInfo.NONE).getLevel(), true);
230    }
231
232    /**
233     * Validate this lipid against the class-specific available FA types and
234     * slots.
235     *
236     * @return true if this lipid's FA types and their number match the class
237     * definition, false otherwise.
238     */
239    public boolean validate() {
240        return true;
241    }
242
243    /**
244     * Returns the fatty acyls registered for this lipid.
245     *
246     * @return the fatty acyls.
247     */
248    public Map<String, FattyAcid> getFa() {
249        return Collections.emptyMap();
250    }
251
252    /**
253     * Returns the element count table for this lipid.
254     *
255     * @return the element count table.
256     */
257    public ElementTable getElements() {
258        ElementTable elements = new ElementTable();
259        if (info.isPresent()) {
260            switch (info.get().getLevel()) {
261                case CATEGORY:
262                case CLASS:
263                case UNDEFINED:
264                    return elements;
265            }
266        }
267
268        headGroup.getLipidClass().ifPresent((lclass) -> {
269            elements.add(lclass.getElements());
270        });
271
272        info.ifPresent((t) -> {
273            switch (t.getLevel()) {
274                case MOLECULAR_SUBSPECIES:
275                case STRUCTURAL_SUBSPECIES:
276                case ISOMERIC_SUBSPECIES:
277                    int nTrueFa = 0;
278                    for (FattyAcid fa : getFa().values()) {
279                        ElementTable faElements = fa.getElements();
280                        if (fa.getNCarbon() != 0 || fa.getNDoubleBonds() != 0) {
281                            nTrueFa += 1;
282                        }
283                        elements.add(faElements);
284                    }
285                    if (headGroup.getLipidClass().isPresent()) {
286                        if (headGroup.getLipidClass().get().getMaxNumFa() < nTrueFa) {
287                            throw new ConstraintViolationException("Inconsistency in number of fatty acyl chains for lipid '" + headGroup.getName() + "'. Expected at most: " + headGroup.getLipidClass().get().getMaxNumFa() + "; received: " + nTrueFa);
288                        }
289                        elements.incrementBy(Element.ELEMENT_H, headGroup.getLipidClass().get().getMaxNumFa() - nTrueFa); // adding hydrogens for absent fatty acyl chains
290                    }
291                    break;
292                case SPECIES:
293                    int maxNumFa = 0;
294                    if (headGroup.getLipidClass().isPresent()) {
295                        LipidClass lclass = headGroup.getLipidClass().get();
296                        maxNumFa = lclass.getMaxNumFa();
297                    }
298
299                    if (info.isPresent()) {
300                        int maxPossNumFa = headGroup.getLipidClass().get().getAllowedNumFa().stream().max(Integer::compareTo).orElse(0);
301                        ElementTable faElements = info.get().getElements(maxPossNumFa);
302                        elements.add(faElements);
303                        elements.incrementBy(ELEMENT_H, maxNumFa - maxPossNumFa); // adding hydrogens for absent fatty acyl chains
304                    }
305                    break;
306                default:
307                    break;
308            }
309        });
310
311        return elements;
312    }
313
314    public Optional<LipidClass> getLipidClass() {
315        return headGroup.getLipidClass();
316    }
317
318    public LipidCategory getLipidCategory() {
319        return headGroup.getLipidCategory();
320    }
321
322    @Override
323    public String toString() {
324        return getLipidString(info.orElse(LipidSpeciesInfo.NONE).getLevel());
325    }
326
327}