LipidClasses.java
/*
* Copyright 2021 Dominik Kopczynski, Nils Hoffmann.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.lifstools.jgoslin.domain;
import org.lifstools.jgoslin.parser.SumFormulaParser;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import static java.util.Map.entry;
import org.lifstools.jgoslin.parser.BaseParserEventHandler;
import org.springframework.core.io.ClassPathResource;
/**
* Lookup class for lipid classes, consisting of {@link LipidClassMeta} entries,
* derived from lipid-list.csv.
*
* @author Dominik Kopczynski
* @author Nils Hoffmann
*/
public final class LipidClasses extends ArrayList<LipidClassMeta> {
private static final LipidClasses LIPID_CLASSES = new LipidClasses();
public static final int UNDEFINED_CLASS = 0;
private LipidClasses() {
super();
loadData(StringFunctions.getResourceAsStringList(new ClassPathResource("lipid-list.csv")), new SumFormulaParser());
}
private void loadData(List<String> lines, SumFormulaParser sfp) {
add(new LipidClassMeta(LipidCategory.NO_CATEGORY,
"UNDEFINED",
"",
0,
0,
new HashSet<>(),
new ElementTable(),
new ArrayList<>(Arrays.asList("UNDEFINED"))
));
int lineCounter = 0;
int SYNONYM_START_INDEX = 7;
HashMap<String, ArrayList<String>> data = new HashMap<>();
HashSet<String> keys = new HashSet<>();
Map<String, Integer> enum_names = new HashMap<>(Map.ofEntries(
entry("GL", 1),
entry("GP", 1),
entry("SP", 1),
entry("ST", 1),
entry("FA", 1),
entry("PK", 1),
entry("SL", 1),
entry("UNDEFINED", 1)
));
Map<String, LipidCategory> names_to_category = Map.ofEntries(
entry("GL", LipidCategory.GL),
entry("GP", LipidCategory.GP),
entry("SP", LipidCategory.SP),
entry("ST", LipidCategory.ST),
entry("FA", LipidCategory.FA),
entry("PK", LipidCategory.PK),
entry("SL", LipidCategory.SL),
entry("UNDEFINED", LipidCategory.UNDEFINED)
);
for (String line : lines) {
if (lineCounter++ == 0) {
continue;
}
ArrayList<String> tokens = StringFunctions.splitString(line, ',', '"', true);
if (keys.contains(tokens.get(0))) {
throw new ConstraintViolationException("Error: lipid name '" + tokens.get(0) + "' occurs multiple times in the lipid list.");
}
keys.add(tokens.get(0));
for (int i = SYNONYM_START_INDEX; i < tokens.size(); ++i) {
String test_lipid_name = tokens.get(i);
if (test_lipid_name.length() == 0) {
continue;
}
if (keys.contains(test_lipid_name)) {
throw new ConstraintViolationException("Error: lipid name '" + test_lipid_name + "' occurs multiple times in the lipid list.");
}
keys.add(test_lipid_name);
}
String enum_name = tokens.get(0);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < enum_name.length(); ++i) {
char c = enum_name.charAt(i);
if ('A' <= c && c <= 'Z') {
sb.append(c);
} else if ('0' <= c && c <= '9') {
sb.append(c);
} else if ('a' <= c && c <= 'z') {
sb.append(Character.toString(c - ('a' - 'A')));
} else {
sb.append('_');
}
}
enum_name = sb.toString();
if (enum_name.charAt(0) == '_') {
enum_name = "L" + enum_name;
}
if (enum_name.charAt(0) < 'A' || 'Z' < enum_name.charAt(0)) {
enum_name = "L" + enum_name;
}
if (!enum_names.containsKey(enum_name)) {
enum_names.put(enum_name, 1);
} else {
int cnt = enum_names.get(enum_name) + 1;
enum_names.put(enum_name, cnt);
enum_name += ('A' + cnt - 1);
enum_names.put(enum_name, 1);
}
data.put(enum_name, tokens);
}
// creating the lipid class dictionary
BaseParserEventHandler<ElementTable> handler = sfp.newEventHandler();
data.entrySet().forEach(kv -> {
HashSet<String> special_cases = new HashSet<>();
StringFunctions.splitString(kv.getValue().get(5), ';', '"').forEach(scase -> {
special_cases.add(StringFunctions.strip(scase, '"'));
});
ElementTable e = kv.getValue().get(6).length() > 0 ? sfp.parse(kv.getValue().get(6), handler) : new ElementTable();
ArrayList<String> synonyms = new ArrayList<>();
synonyms.add(kv.getValue().get(0));
for (int ii = SYNONYM_START_INDEX; ii < kv.getValue().size(); ++ii) {
if (kv.getValue().get(ii).length() > 0) {
synonyms.add(StringFunctions.strip(kv.getValue().get(ii), '"'));
}
}
add(new LipidClassMeta(names_to_category.get(kv.getValue().get(1)),
kv.getValue().get(0),
kv.getValue().get(2),
Integer.valueOf(kv.getValue().get(3)),
Integer.valueOf(kv.getValue().get(4)),
special_cases,
e,
synonyms));
});
}
public static LipidClasses getInstance() {
return LIPID_CLASSES;
}
}