001/*
002 * Copyright 2020  nils.hoffmann.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package de.isas.lipidomics.palinom.cli;
017
018import de.isas.lipidomics.domain.FattyAcid;
019import de.isas.lipidomics.domain.LipidAdduct;
020import de.isas.lipidomics.domain.LipidClass;
021import de.isas.lipidomics.domain.LipidLevel;
022import de.isas.lipidomics.domain.LipidSpeciesInfo;
023import de.isas.lipidomics.palinom.SyntaxErrorListener;
024import de.isas.lipidomics.palinom.VisitorParser;
025import de.isas.lipidomics.palinom.exceptions.ConstraintViolationException;
026import de.isas.lipidomics.palinom.exceptions.ParsingException;
027import de.isas.lipidomics.palinom.goslin.GoslinVisitorParser;
028import de.isas.lipidomics.palinom.goslinfragments.GoslinFragmentsVisitorParser;
029import de.isas.lipidomics.palinom.hmdb.HmdbVisitorParser;
030import de.isas.lipidomics.palinom.lipidmaps.LipidMapsVisitorParser;
031import de.isas.lipidomics.palinom.swisslipids.SwissLipidsVisitorParser;
032import java.io.BufferedWriter;
033import java.io.File;
034import java.io.IOException;
035import java.io.StringWriter;
036import java.nio.file.Files;
037import java.time.Instant;
038import java.util.ArrayList;
039import java.util.Arrays;
040import java.util.Collections;
041import java.util.HashSet;
042import java.util.LinkedHashMap;
043import java.util.LinkedHashSet;
044import java.util.LinkedList;
045import java.util.List;
046import java.util.Locale;
047import java.util.Map;
048import java.util.Optional;
049import java.util.Properties;
050import java.util.regex.Matcher;
051import java.util.regex.Pattern;
052import java.util.stream.Collectors;
053import java.util.stream.Stream;
054import lombok.Data;
055import lombok.extern.slf4j.Slf4j;
056import org.apache.commons.cli.CommandLine;
057import org.apache.commons.cli.CommandLineParser;
058import org.apache.commons.cli.HelpFormatter;
059import org.apache.commons.cli.Options;
060import org.apache.commons.cli.PosixParser;
061import org.apache.commons.lang3.tuple.Pair;
062
063/**
064 * Create a new command line parser for parsing of lipid names.
065 *
066 * @author nils.hoffmann
067 */
068@Slf4j
069public class CmdLineParser {
070
071    public static final String LIPIDMAPS_CLASS_REGEXP = ".+\\[([A-Z0-9]+)\\]";
072
073    private static String getAppInfo() throws IOException {
074        Properties p = new Properties();
075        p.load(CmdLineParser.class.getResourceAsStream(
076                "/application.properties"));
077        StringBuilder sb = new StringBuilder();
078        String buildDate = p.getProperty("app.build.date", "no build date");
079        if (!"no build date".equals(buildDate)) {
080            Instant instant = Instant.ofEpochMilli(Long.parseLong(buildDate));
081            buildDate = instant.toString();
082        }
083        /*
084         *Property keys are in src/main/resources/application.properties
085         */
086        sb.append("Running ").
087                append(p.getProperty("app.name", "undefined app")).
088                append("\n\r").
089                append(" version: '").
090                append(p.getProperty("app.version", "unknown version")).
091                append("'").
092                append("\n\r").
093                append(" build-date: '").
094                append(buildDate).
095                append("'").
096                append("\n\r").
097                append(" scm-location: '").
098                append(p.getProperty("scm.location", "no scm location")).
099                append("'").
100                append("\n\r").
101                append(" commit: '").
102                append(p.getProperty("scm.commit.id", "no commit id")).
103                append("'").
104                append("\n\r").
105                append(" branch: '").
106                append(p.getProperty("scm.branch", "no branch")).
107                append("'").
108                append("\n\r");
109        return sb.toString();
110    }
111
112    /**
113     * <p>
114     * Runs the command line parser for jgoslin, including validation.</p>
115     *
116     * @param args an array of {@link java.lang.String} lipid names.
117     * @throws java.lang.Exception if any unexpected errors occur.
118     */
119    @SuppressWarnings("static-access")
120    public static void main(String[] args) throws Exception {
121        CommandLineParser parser = new PosixParser();
122        Options options = new Options();
123        String helpOpt = addHelpOption(options);
124        String versionOpt = addVersionOption(options);
125        String lipidNameOpt = addLipidNameInputOption(options);
126        String lipidFileOpt = addLipidFileInputOption(options);
127        String outputToFileOpt = addOutputToFileOption(options);
128        String grammarOpt = addGrammarOption(options);
129
130        CommandLine line = parser.parse(options, args);
131        if (line.getOptions().length == 0 || line.hasOption(helpOpt)) {
132            HelpFormatter formatter = new HelpFormatter();
133            formatter.printHelp("jgoslin-cli", options);
134        } else if (line.hasOption(versionOpt)) {
135            log.info(getAppInfo());
136        } else {
137            boolean toFile = false;
138            if (line.hasOption(outputToFileOpt)) {
139                toFile = true;
140            }
141            Stream<String> lipidNames = Stream.empty();
142            if (line.hasOption(lipidNameOpt)) {
143                lipidNames = Stream.of(line.getOptionValues(lipidNameOpt));
144            } else if (line.hasOption(lipidFileOpt)) {
145                lipidNames = Files.lines(new File(line.getOptionValue(lipidFileOpt)).toPath()).filter((t) -> {
146                    return !t.isEmpty();
147                });
148            }
149            List<Pair<String, List<ValidationResult>>> results = Collections.emptyList();
150            if (line.hasOption(grammarOpt)) {
151                results = parseNamesWith(lipidNames, ValidationResult.Grammar.valueOf(line.getOptionValue(grammarOpt)));
152            } else {
153                results = parseNames(lipidNames);
154            }
155            if (results.isEmpty()) {
156                log.info("No results generated. Please check input file or lipid names passed on the cli!");
157                System.exit(1);
158            }
159            if (toFile) {
160                log.info("Saving output to 'goslin-out.tsv'.");
161                boolean successful = writeToFile(new File("goslin-out.tsv"), results);
162                if (!successful) {
163                    System.exit(1);
164                }
165            } else {
166                log.info("Echoing output to stdout.");
167                boolean successful = writeToStdOut(results);
168                if (!successful) {
169                    System.exit(1);
170                }
171            }
172        }
173    }
174
175    @Data
176    private static class ValidationResult {
177
178        public static enum Grammar {
179            GOSLIN, GOSLIN_FRAGMENTS, LIPIDMAPS, SWISSLIPIDS, HMDB, NONE
180        };
181
182        private String lipidName;
183
184        private Grammar grammar;
185
186        private LipidLevel level;
187
188        private List<String> messages = Collections.emptyList();
189
190        private LipidAdduct lipidAdduct;
191
192        private LipidSpeciesInfo lipidSpeciesInfo;
193
194        private String goslinName;
195
196        private String lipidMapsCategory;
197
198        private String lipidMapsClass;
199
200        private Map<String, FattyAcid> fattyAcids = Collections.emptyMap();
201
202    }
203
204    protected static boolean writeToStdOut(List<Pair<String, List<ValidationResult>>> results) {
205
206        try (StringWriter sw = new StringWriter()) {
207            try (BufferedWriter bw = new BufferedWriter(sw)) {
208                writeToWriter(bw, results);
209            }
210            sw.flush();
211            sw.close();
212            log.info(sw.toString());
213            return true;
214        } catch (IOException ex) {
215            log.error("Caught exception while trying to write validation results string!", ex);
216            return false;
217        }
218    }
219
220    protected static boolean writeToFile(File f, List<Pair<String, List<ValidationResult>>> results) {
221
222        try (BufferedWriter bw = Files.newBufferedWriter(f.toPath())) {
223            writeToWriter(bw, results);
224            return true;
225        } catch (IOException ex) {
226            log.error("Caught exception while trying to write validation results to file " + f, ex);
227            return false;
228        }
229    }
230
231    protected static String toTable(List<Pair<String, List<ValidationResult>>> results) {
232        StringBuilder sb = new StringBuilder();
233        HashSet<String> keys = new LinkedHashSet<>();
234        List<ValidationResult> validationResults = results.stream().map((t) -> {
235            return t.getValue();
236        }).flatMap(List::stream).collect(Collectors.toList());
237        List<Map<String, String>> entries = validationResults.stream().map((t) -> {
238            Map<String, String> m = new LinkedHashMap<>();
239            m.put("Normalized Name", Optional.ofNullable(t.getGoslinName()).orElse(""));
240            m.put("Original Name", t.getLipidName());
241            m.put("Grammar", t.getGrammar().name());
242            m.put("Message", t.getMessages().stream().collect(Collectors.joining(" | ")));
243            if (t.getLipidAdduct() != null) {
244                m.put("Adduct", t.getLipidAdduct().getAdduct().getLipidString());
245                m.put("Sum Formula", t.getLipidAdduct().getSumFormula());
246                m.put("Mass", String.format(Locale.US, "%.4f", t.getLipidAdduct().getMass()));
247                m.put("Lipid Maps Category", t.getLipidAdduct().getLipid().getLipidCategory().getFullName() + " [" + t.getLipidAdduct().getLipid().getLipidCategory().name() + "]");
248                LipidClass lclass = t.getLipidAdduct().getLipid().getLipidClass();
249                m.put("Lipid Maps Main Class", lclass.getLipidMapsClassName());
250                m.put("Functional Class Abbr", "[" + lclass.getAbbreviation() + "]");
251                m.put("Functional Class Synonyms", "[" + lclass.getSynonyms().stream().collect(Collectors.joining(", ")) + "]");
252                m.put("Level", t.getLipidSpeciesInfo().getLevel().toString());
253                m.put("Total #C", t.getLipidSpeciesInfo().getNCarbon() + "");
254                m.put("Total #OH", t.getLipidSpeciesInfo().getNHydroxy() + "");
255                m.put("Total #DB", t.getLipidSpeciesInfo().getNDoubleBonds() + "");
256                for (FattyAcid fa : t.getFattyAcids().values()) {
257                    m.put(fa.getName() + " SN Position", fa.getPosition() + "");
258                    m.put(fa.getName() + " #C", fa.getNCarbon() + "");
259                    m.put(fa.getName() + " #OH", fa.getNHydroxy() + "");
260                    m.put(fa.getName() + " #DB", fa.getNDoubleBonds() + "");
261                    m.put(fa.getName() + " Bond Type", fa.getLipidFaBondType() + "");
262                    String dbPositions = fa.getDoubleBondPositions().entrySet().stream().map((entry) -> {
263                        return entry.getKey() + "" + entry.getValue();
264                    }).collect(Collectors.joining(","));
265                    m.put(fa.getName() + " DB Positions", dbPositions + "");
266                }
267            } else {
268                m.put("Lipid Maps Category", "");
269                m.put("Lipid Maps Main Class", "");
270                m.put("Functional Class Abbr", "");
271                m.put("Functional Class Synonyms", "");
272                m.put("Level", "");
273                m.put("Total #C", "");
274                m.put("Total #OH", "");
275                m.put("Total #DB", "");
276            }
277            keys.addAll(m.keySet());
278            return m;
279        }).collect(Collectors.toList());
280        sb.append(keys.stream().collect(Collectors.joining("\t"))).append("\n");
281        for (Map<String, String> m : entries) {
282            List<String> l = new LinkedList();
283            for (String key : keys) {
284                l.add(m.getOrDefault(key, ""));
285            }
286            sb.append(l.stream().collect(Collectors.joining("\t"))).append("\n");
287        }
288        return sb.toString();
289    }
290
291    protected static void writeToWriter(BufferedWriter bw, List<Pair<String, List<ValidationResult>>> results) {
292        try {
293            bw.write(toTable(results));
294            bw.newLine();
295        } catch (IOException ex) {
296            log.error("Caught exception while trying to write validation results to buffered writer.", ex);
297        }
298    }
299
300    protected static List<Pair<String, List<ValidationResult>>> parseNames(Stream<String> lipidNames) {
301        return lipidNames.map((t) -> {
302            return parseName(t);
303        }).collect(Collectors.toList());
304    }
305
306    protected static List<Pair<String, List<ValidationResult>>> parseNamesWith(Stream<String> lipidNames, ValidationResult.Grammar grammar) {
307        return lipidNames.map((t) -> {
308            return parseNameWith(t, grammar);
309        }).collect(Collectors.toList()).stream().map((t) -> {
310            return Pair.of(t.getKey(), Arrays.asList(t.getValue()));
311        }).collect(Collectors.toList());
312    }
313
314    protected static Pair<String, ValidationResult> parseNameWith(String lipidName, ValidationResult.Grammar grammar) {
315        VisitorParser<LipidAdduct> parser;
316        SyntaxErrorListener listener = new SyntaxErrorListener();
317        ValidationResult validationResult = new ValidationResult();
318        switch (grammar) {
319            case GOSLIN:
320                parser = new GoslinVisitorParser();
321                break;
322            case GOSLIN_FRAGMENTS:
323                parser = new GoslinFragmentsVisitorParser();
324                break;
325            case LIPIDMAPS:
326                parser = new LipidMapsVisitorParser();
327                break;
328            case SWISSLIPIDS:
329                parser = new SwissLipidsVisitorParser();
330                break;
331            case HMDB:
332                parser = new HmdbVisitorParser();
333                break;
334            default:
335                throw new ConstraintViolationException("Unsupported grammar: " + grammar);
336        }
337        try {
338            LipidAdduct la = parser.parse(lipidName, listener);
339            validationResult.setLipidName(lipidName);
340            validationResult.setLipidAdduct(la);
341            validationResult.setGrammar(grammar);
342            validationResult.setLevel(la.getLipid().getInfo().getLevel());
343            validationResult.setMessages(toStringMessages(listener));
344            validationResult.setLipidMapsCategory(la.getLipid().getLipidCategory().name());
345            validationResult.setLipidMapsClass(getLipidMapsClassAbbreviation(la));
346            validationResult.setLipidSpeciesInfo(la.getLipid().getInfo());
347            try {
348                String normalizedName = la.getLipid().getLipidString();
349                validationResult.setGoslinName(normalizedName);
350            } catch (RuntimeException re) {
351                log.debug("Parsing error for {}!", lipidName);
352            }
353            extractFas(la, validationResult);
354        } catch (ParsingException ex) {
355            validationResult.setLipidName(lipidName);
356            validationResult.setMessages(toStringMessages(listener));
357            validationResult.setGrammar(grammar);
358            log.debug("Caught exception while parsing " + lipidName + " with " + grammar + " grammar: ", ex);
359
360        }
361        return Pair.of(lipidName, validationResult);
362    }
363
364    protected static Pair<String, List<ValidationResult>> parseName(String lipidName) {
365        List<ValidationResult> results = new ArrayList<>();
366        Pair<String, ValidationResult> goslinResult = parseNameWith(lipidName, ValidationResult.Grammar.GOSLIN);
367        if (goslinResult.getValue().getMessages().isEmpty()) {
368            return Pair.of(goslinResult.getKey(), Arrays.asList(goslinResult.getValue()));
369        }
370        Pair<String, ValidationResult> goslinFragmentsResult = parseNameWith(lipidName, ValidationResult.Grammar.GOSLIN_FRAGMENTS);
371        if (goslinFragmentsResult.getValue().getMessages().isEmpty()) {
372            return Pair.of(goslinFragmentsResult.getKey(), Arrays.asList(goslinFragmentsResult.getValue()));
373        }
374        Pair<String, ValidationResult> lipidMapsResult = parseNameWith(lipidName, ValidationResult.Grammar.LIPIDMAPS);
375        if (lipidMapsResult.getValue().getMessages().isEmpty()) {
376            return Pair.of(lipidMapsResult.getKey(), Arrays.asList(lipidMapsResult.getValue()));
377        }
378        Pair<String, ValidationResult> swissLipidsResult = parseNameWith(lipidName, ValidationResult.Grammar.SWISSLIPIDS);
379        if (swissLipidsResult.getValue().getMessages().isEmpty()) {
380            return Pair.of(swissLipidsResult.getKey(), Arrays.asList(swissLipidsResult.getValue()));
381        }
382        Pair<String, ValidationResult> hmdbResult = parseNameWith(lipidName, ValidationResult.Grammar.HMDB);
383        if (hmdbResult.getValue().getMessages().isEmpty()) {
384            return Pair.of(hmdbResult.getKey(), Arrays.asList(hmdbResult.getValue()));
385        }
386        ValidationResult r = new ValidationResult();
387        r.setGoslinName("");
388        r.setLipidName(lipidName);
389        r.setGrammar(ValidationResult.Grammar.NONE);
390        List<String> messages = new ArrayList<>(hmdbResult.getValue().getMessages());
391        messages.add("Lipid name could not be parsed with any grammar!");
392        r.setMessages(messages);
393        results.add(r);
394        return Pair.of(lipidName, results);
395    }
396
397    private static void extractFas(LipidAdduct la, ValidationResult result) {
398        result.setFattyAcids(la.getLipid().getFa());
399    }
400
401    private static List<String> toStringMessages(SyntaxErrorListener listener) {
402        return listener.getSyntaxErrors().stream().map((syntaxError) -> {
403            return syntaxError.getMessage();
404        }).collect(Collectors.toList());
405    }
406
407    private static String getLipidMapsClassAbbreviation(LipidAdduct la) {
408        String lipidMapsClass = la.getLipid().getLipidClass().getLipidMapsClassName();
409        Pattern lmcRegexp = Pattern.compile(LIPIDMAPS_CLASS_REGEXP);
410        Matcher lmcMatcher = lmcRegexp.matcher(lipidMapsClass);
411        if (lmcMatcher.matches() && lmcMatcher.groupCount() == 1) {
412            lipidMapsClass = lmcMatcher.group(1);
413        } else {
414            lipidMapsClass = null;
415        }
416        return lipidMapsClass;
417    }
418
419    protected static String addLipidFileInputOption(Options options) {
420        String versionOpt = "file";
421        options.addOption("f", versionOpt, true, "Input a file name to read from for lipid name for parsing. Each lipid name must be on a separate line.");
422        return versionOpt;
423    }
424
425    protected static String addLipidNameInputOption(Options options) {
426        String versionOpt = "name";
427        options.addOption("n", versionOpt, true, "Input a lipid name for parsing.");
428        return versionOpt;
429    }
430
431    protected static String addVersionOption(Options options) {
432        String versionOpt = "version";
433        options.addOption("v", versionOpt, false, "Print version information.");
434        return versionOpt;
435    }
436
437    protected static String addHelpOption(Options options) {
438        String helpOpt = "help";
439        options.addOption("h", helpOpt, false, "Print help message.");
440        return helpOpt;
441    }
442
443    protected static String addOutputToFileOption(Options options) {
444        String outputToFileOpt = "outputFile";
445        options.addOption("o", outputToFileOpt, false, "Write output to file 'goslin-out.tsv' instead of to std out.");
446        return outputToFileOpt;
447    }
448
449    protected static String addGrammarOption(Options options) {
450        String grammarOpt = "grammar";
451        options.addOption("g", grammarOpt, true, "Use the provided grammar explicitly instead of all grammars. Options are: " + Arrays.toString(ValidationResult.Grammar.values()));
452        return grammarOpt;
453    }
454
455}