001/* 002 * Copyright 2020 nils.hoffmann. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package de.isas.lipidomics.palinom.cli; 017 018import de.isas.lipidomics.domain.FattyAcid; 019import de.isas.lipidomics.domain.LipidAdduct; 020import de.isas.lipidomics.domain.LipidClass; 021import de.isas.lipidomics.domain.LipidLevel; 022import de.isas.lipidomics.domain.LipidSpeciesInfo; 023import de.isas.lipidomics.palinom.SyntaxErrorListener; 024import de.isas.lipidomics.palinom.VisitorParser; 025import de.isas.lipidomics.palinom.exceptions.ConstraintViolationException; 026import de.isas.lipidomics.palinom.exceptions.ParsingException; 027import de.isas.lipidomics.palinom.goslin.GoslinVisitorParser; 028import de.isas.lipidomics.palinom.goslinfragments.GoslinFragmentsVisitorParser; 029import de.isas.lipidomics.palinom.hmdb.HmdbVisitorParser; 030import de.isas.lipidomics.palinom.lipidmaps.LipidMapsVisitorParser; 031import de.isas.lipidomics.palinom.swisslipids.SwissLipidsVisitorParser; 032import java.io.BufferedWriter; 033import java.io.File; 034import java.io.IOException; 035import java.io.StringWriter; 036import java.nio.file.Files; 037import java.time.Instant; 038import java.util.ArrayList; 039import java.util.Arrays; 040import java.util.Collections; 041import java.util.HashSet; 042import java.util.LinkedHashMap; 043import java.util.LinkedHashSet; 044import java.util.LinkedList; 045import java.util.List; 046import java.util.Locale; 047import java.util.Map; 048import java.util.Optional; 049import java.util.Properties; 050import java.util.regex.Matcher; 051import java.util.regex.Pattern; 052import java.util.stream.Collectors; 053import java.util.stream.Stream; 054import lombok.Data; 055import lombok.extern.slf4j.Slf4j; 056import org.apache.commons.cli.CommandLine; 057import org.apache.commons.cli.CommandLineParser; 058import org.apache.commons.cli.HelpFormatter; 059import org.apache.commons.cli.Options; 060import org.apache.commons.cli.PosixParser; 061import org.apache.commons.lang3.tuple.Pair; 062 063/** 064 * Create a new command line parser for parsing of lipid names. 065 * 066 * @author nils.hoffmann 067 */ 068@Slf4j 069public class CmdLineParser { 070 071 public static final String LIPIDMAPS_CLASS_REGEXP = ".+\\[([A-Z0-9]+)\\]"; 072 073 private static String getAppInfo() throws IOException { 074 Properties p = new Properties(); 075 p.load(CmdLineParser.class.getResourceAsStream( 076 "/application.properties")); 077 StringBuilder sb = new StringBuilder(); 078 String buildDate = p.getProperty("app.build.date", "no build date"); 079 if (!"no build date".equals(buildDate)) { 080 Instant instant = Instant.ofEpochMilli(Long.parseLong(buildDate)); 081 buildDate = instant.toString(); 082 } 083 /* 084 *Property keys are in src/main/resources/application.properties 085 */ 086 sb.append("Running "). 087 append(p.getProperty("app.name", "undefined app")). 088 append("\n\r"). 089 append(" version: '"). 090 append(p.getProperty("app.version", "unknown version")). 091 append("'"). 092 append("\n\r"). 093 append(" build-date: '"). 094 append(buildDate). 095 append("'"). 096 append("\n\r"). 097 append(" scm-location: '"). 098 append(p.getProperty("scm.location", "no scm location")). 099 append("'"). 100 append("\n\r"). 101 append(" commit: '"). 102 append(p.getProperty("scm.commit.id", "no commit id")). 103 append("'"). 104 append("\n\r"). 105 append(" branch: '"). 106 append(p.getProperty("scm.branch", "no branch")). 107 append("'"). 108 append("\n\r"); 109 return sb.toString(); 110 } 111 112 /** 113 * <p> 114 * Runs the command line parser for jgoslin, including validation.</p> 115 * 116 * @param args an array of {@link java.lang.String} lipid names. 117 * @throws java.lang.Exception if any unexpected errors occur. 118 */ 119 @SuppressWarnings("static-access") 120 public static void main(String[] args) throws Exception { 121 CommandLineParser parser = new PosixParser(); 122 Options options = new Options(); 123 String helpOpt = addHelpOption(options); 124 String versionOpt = addVersionOption(options); 125 String lipidNameOpt = addLipidNameInputOption(options); 126 String lipidFileOpt = addLipidFileInputOption(options); 127 String outputToFileOpt = addOutputToFileOption(options); 128 String grammarOpt = addGrammarOption(options); 129 130 CommandLine line = parser.parse(options, args); 131 if (line.getOptions().length == 0 || line.hasOption(helpOpt)) { 132 HelpFormatter formatter = new HelpFormatter(); 133 formatter.printHelp("jgoslin-cli", options); 134 } else if (line.hasOption(versionOpt)) { 135 log.info(getAppInfo()); 136 } else { 137 boolean toFile = false; 138 if (line.hasOption(outputToFileOpt)) { 139 toFile = true; 140 } 141 Stream<String> lipidNames = Stream.empty(); 142 if (line.hasOption(lipidNameOpt)) { 143 lipidNames = Stream.of(line.getOptionValues(lipidNameOpt)); 144 } else if (line.hasOption(lipidFileOpt)) { 145 lipidNames = Files.lines(new File(line.getOptionValue(lipidFileOpt)).toPath()).filter((t) -> { 146 return !t.isEmpty(); 147 }); 148 } 149 List<Pair<String, List<ValidationResult>>> results = Collections.emptyList(); 150 if (line.hasOption(grammarOpt)) { 151 results = parseNamesWith(lipidNames, ValidationResult.Grammar.valueOf(line.getOptionValue(grammarOpt))); 152 } else { 153 results = parseNames(lipidNames); 154 } 155 if (results.isEmpty()) { 156 log.info("No results generated. Please check input file or lipid names passed on the cli!"); 157 System.exit(1); 158 } 159 if (toFile) { 160 log.info("Saving output to 'goslin-out.tsv'."); 161 boolean successful = writeToFile(new File("goslin-out.tsv"), results); 162 if (!successful) { 163 System.exit(1); 164 } 165 } else { 166 log.info("Echoing output to stdout."); 167 boolean successful = writeToStdOut(results); 168 if (!successful) { 169 System.exit(1); 170 } 171 } 172 } 173 } 174 175 @Data 176 private static class ValidationResult { 177 178 public static enum Grammar { 179 GOSLIN, GOSLIN_FRAGMENTS, LIPIDMAPS, SWISSLIPIDS, HMDB, NONE 180 }; 181 182 private String lipidName; 183 184 private Grammar grammar; 185 186 private LipidLevel level; 187 188 private List<String> messages = Collections.emptyList(); 189 190 private LipidAdduct lipidAdduct; 191 192 private LipidSpeciesInfo lipidSpeciesInfo; 193 194 private String goslinName; 195 196 private String lipidMapsCategory; 197 198 private String lipidMapsClass; 199 200 private Map<String, FattyAcid> fattyAcids = Collections.emptyMap(); 201 202 } 203 204 protected static boolean writeToStdOut(List<Pair<String, List<ValidationResult>>> results) { 205 206 try (StringWriter sw = new StringWriter()) { 207 try (BufferedWriter bw = new BufferedWriter(sw)) { 208 writeToWriter(bw, results); 209 } 210 sw.flush(); 211 sw.close(); 212 log.info(sw.toString()); 213 return true; 214 } catch (IOException ex) { 215 log.error("Caught exception while trying to write validation results string!", ex); 216 return false; 217 } 218 } 219 220 protected static boolean writeToFile(File f, List<Pair<String, List<ValidationResult>>> results) { 221 222 try (BufferedWriter bw = Files.newBufferedWriter(f.toPath())) { 223 writeToWriter(bw, results); 224 return true; 225 } catch (IOException ex) { 226 log.error("Caught exception while trying to write validation results to file " + f, ex); 227 return false; 228 } 229 } 230 231 protected static String toTable(List<Pair<String, List<ValidationResult>>> results) { 232 StringBuilder sb = new StringBuilder(); 233 HashSet<String> keys = new LinkedHashSet<>(); 234 List<ValidationResult> validationResults = results.stream().map((t) -> { 235 return t.getValue(); 236 }).flatMap(List::stream).collect(Collectors.toList()); 237 List<Map<String, String>> entries = validationResults.stream().map((t) -> { 238 Map<String, String> m = new LinkedHashMap<>(); 239 m.put("Normalized Name", Optional.ofNullable(t.getGoslinName()).orElse("")); 240 m.put("Original Name", t.getLipidName()); 241 m.put("Grammar", t.getGrammar().name()); 242 m.put("Message", t.getMessages().stream().collect(Collectors.joining(" | "))); 243 if (t.getLipidAdduct() != null) { 244 m.put("Adduct", t.getLipidAdduct().getAdduct().getLipidString()); 245 m.put("Sum Formula", t.getLipidAdduct().getSumFormula()); 246 m.put("Mass", String.format(Locale.US, "%.4f", t.getLipidAdduct().getMass())); 247 m.put("Lipid Maps Category", t.getLipidAdduct().getLipid().getLipidCategory().getFullName() + " [" + t.getLipidAdduct().getLipid().getLipidCategory().name() + "]"); 248 LipidClass lclass = t.getLipidAdduct().getLipid().getLipidClass().orElse(LipidClass.UNDEFINED); 249 m.put("Lipid Maps Main Class", lclass.getLipidMapsClassName()); 250 m.put("Functional Class Abbr", "[" + lclass.getAbbreviation() + "]"); 251 m.put("Functional Class Synonyms", "[" + lclass.getSynonyms().stream().collect(Collectors.joining(", ")) + "]"); 252 m.put("Level", t.getLipidSpeciesInfo().getLevel().toString()); 253 m.put("Total #C", t.getLipidSpeciesInfo().getNCarbon() + ""); 254 m.put("Total #OH", t.getLipidSpeciesInfo().getNHydroxy() + ""); 255 m.put("Total #DB", t.getLipidSpeciesInfo().getNDoubleBonds() + ""); 256 for (FattyAcid fa : t.getFattyAcids().values()) { 257 m.put(fa.getName() + " SN Position", fa.getPosition() + ""); 258 m.put(fa.getName() + " #C", fa.getNCarbon() + ""); 259 m.put(fa.getName() + " #OH", fa.getNHydroxy() + ""); 260 m.put(fa.getName() + " #DB", fa.getNDoubleBonds() + ""); 261 m.put(fa.getName() + " Bond Type", fa.getLipidFaBondType() + ""); 262 String dbPositions = fa.getDoubleBondPositions().entrySet().stream().map((entry) -> { 263 return entry.getKey() + "" + entry.getValue(); 264 }).collect(Collectors.joining(",")); 265 m.put(fa.getName() + " DB Positions", dbPositions + ""); 266 } 267 } else { 268 m.put("Lipid Maps Category", ""); 269 m.put("Lipid Maps Main Class", ""); 270 m.put("Functional Class Abbr", ""); 271 m.put("Functional Class Synonyms", ""); 272 m.put("Level", ""); 273 m.put("Total #C", ""); 274 m.put("Total #OH", ""); 275 m.put("Total #DB", ""); 276 } 277 keys.addAll(m.keySet()); 278 return m; 279 }).collect(Collectors.toList()); 280 sb.append(keys.stream().collect(Collectors.joining("\t"))).append("\n"); 281 for (Map<String, String> m : entries) { 282 List<String> l = new LinkedList(); 283 for (String key : keys) { 284 l.add(m.getOrDefault(key, "")); 285 } 286 sb.append(l.stream().collect(Collectors.joining("\t"))).append("\n"); 287 } 288 return sb.toString(); 289 } 290 291 protected static void writeToWriter(BufferedWriter bw, List<Pair<String, List<ValidationResult>>> results) { 292 try { 293 bw.write(toTable(results)); 294 bw.newLine(); 295 } catch (IOException ex) { 296 log.error("Caught exception while trying to write validation results to buffered writer.", ex); 297 } 298 } 299 300 protected static List<Pair<String, List<ValidationResult>>> parseNames(Stream<String> lipidNames) { 301 return lipidNames.map((t) -> { 302 return parseName(t); 303 }).collect(Collectors.toList()); 304 } 305 306 protected static List<Pair<String, List<ValidationResult>>> parseNamesWith(Stream<String> lipidNames, ValidationResult.Grammar grammar) { 307 return lipidNames.map((t) -> { 308 return parseNameWith(t, grammar); 309 }).collect(Collectors.toList()).stream().map((t) -> { 310 return Pair.of(t.getKey(), Arrays.asList(t.getValue())); 311 }).collect(Collectors.toList()); 312 } 313 314 protected static Pair<String, ValidationResult> parseNameWith(String lipidName, ValidationResult.Grammar grammar) { 315 VisitorParser<LipidAdduct> parser; 316 SyntaxErrorListener listener = new SyntaxErrorListener(); 317 ValidationResult validationResult = new ValidationResult(); 318 switch (grammar) { 319 case GOSLIN: 320 parser = new GoslinVisitorParser(); 321 break; 322 case GOSLIN_FRAGMENTS: 323 parser = new GoslinFragmentsVisitorParser(); 324 break; 325 case LIPIDMAPS: 326 parser = new LipidMapsVisitorParser(); 327 break; 328 case SWISSLIPIDS: 329 parser = new SwissLipidsVisitorParser(); 330 break; 331 case HMDB: 332 parser = new HmdbVisitorParser(); 333 break; 334 default: 335 throw new ConstraintViolationException("Unsupported grammar: " + grammar); 336 } 337 try { 338 LipidAdduct la = parser.parse(lipidName, listener); 339 validationResult.setLipidName(lipidName); 340 validationResult.setLipidAdduct(la); 341 validationResult.setGrammar(grammar); 342 validationResult.setLevel(la.getLipid().getInfo().orElse(LipidSpeciesInfo.NONE).getLevel()); 343 validationResult.setMessages(toStringMessages(listener)); 344 validationResult.setLipidMapsCategory(la.getLipid().getLipidCategory().name()); 345 validationResult.setLipidMapsClass(getLipidMapsClassAbbreviation(la)); 346 validationResult.setLipidSpeciesInfo(la.getLipid().getInfo().orElse(LipidSpeciesInfo.NONE)); 347 try { 348 String normalizedName = la.getLipid().getLipidString(); 349 validationResult.setGoslinName(normalizedName); 350 } catch (RuntimeException re) { 351 log.debug("Parsing error for {}!", lipidName); 352 } 353 extractFas(la, validationResult); 354 } catch (ParsingException ex) { 355 validationResult.setLipidName(lipidName); 356 validationResult.setMessages(toStringMessages(listener)); 357 validationResult.setGrammar(grammar); 358 log.debug("Caught exception while parsing " + lipidName + " with " + grammar + " grammar: ", ex); 359 360 } 361 return Pair.of(lipidName, validationResult); 362 } 363 364 protected static Pair<String, List<ValidationResult>> parseName(String lipidName) { 365 List<ValidationResult> results = new ArrayList<>(); 366 Pair<String, ValidationResult> goslinResult = parseNameWith(lipidName, ValidationResult.Grammar.GOSLIN); 367 if (goslinResult.getValue().getMessages().isEmpty()) { 368 return Pair.of(goslinResult.getKey(), Arrays.asList(goslinResult.getValue())); 369 } 370 Pair<String, ValidationResult> goslinFragmentsResult = parseNameWith(lipidName, ValidationResult.Grammar.GOSLIN_FRAGMENTS); 371 if (goslinFragmentsResult.getValue().getMessages().isEmpty()) { 372 return Pair.of(goslinFragmentsResult.getKey(), Arrays.asList(goslinFragmentsResult.getValue())); 373 } 374 Pair<String, ValidationResult> lipidMapsResult = parseNameWith(lipidName, ValidationResult.Grammar.LIPIDMAPS); 375 if (lipidMapsResult.getValue().getMessages().isEmpty()) { 376 return Pair.of(lipidMapsResult.getKey(), Arrays.asList(lipidMapsResult.getValue())); 377 } 378 Pair<String, ValidationResult> swissLipidsResult = parseNameWith(lipidName, ValidationResult.Grammar.SWISSLIPIDS); 379 if (swissLipidsResult.getValue().getMessages().isEmpty()) { 380 return Pair.of(swissLipidsResult.getKey(), Arrays.asList(swissLipidsResult.getValue())); 381 } 382 Pair<String, ValidationResult> hmdbResult = parseNameWith(lipidName, ValidationResult.Grammar.HMDB); 383 if (hmdbResult.getValue().getMessages().isEmpty()) { 384 return Pair.of(hmdbResult.getKey(), Arrays.asList(hmdbResult.getValue())); 385 } 386 ValidationResult r = new ValidationResult(); 387 r.setGoslinName(""); 388 r.setLipidName(lipidName); 389 r.setGrammar(ValidationResult.Grammar.NONE); 390 List<String> messages = new ArrayList<>(hmdbResult.getValue().getMessages()); 391 messages.add("Lipid name could not be parsed with any grammar!"); 392 r.setMessages(messages); 393 results.add(r); 394 return Pair.of(lipidName, results); 395 } 396 397 private static void extractFas(LipidAdduct la, ValidationResult result) { 398 result.setFattyAcids(la.getLipid().getFa()); 399 } 400 401 private static List<String> toStringMessages(SyntaxErrorListener listener) { 402 return listener.getSyntaxErrors().stream().map((syntaxError) -> { 403 return syntaxError.getMessage(); 404 }).collect(Collectors.toList()); 405 } 406 407 private static String getLipidMapsClassAbbreviation(LipidAdduct la) { 408 String lipidMapsClass = la.getLipid().getLipidClass().orElse(LipidClass.UNDEFINED).getLipidMapsClassName(); 409 Pattern lmcRegexp = Pattern.compile(LIPIDMAPS_CLASS_REGEXP); 410 Matcher lmcMatcher = lmcRegexp.matcher(lipidMapsClass); 411 if (lmcMatcher.matches() && lmcMatcher.groupCount() == 1) { 412 lipidMapsClass = lmcMatcher.group(1); 413 } else { 414 lipidMapsClass = null; 415 } 416 return lipidMapsClass; 417 } 418 419 protected static String addLipidFileInputOption(Options options) { 420 String versionOpt = "file"; 421 options.addOption("f", versionOpt, true, "Input a file name to read from for lipid name for parsing. Each lipid name must be on a separate line."); 422 return versionOpt; 423 } 424 425 protected static String addLipidNameInputOption(Options options) { 426 String versionOpt = "name"; 427 options.addOption("n", versionOpt, true, "Input a lipid name for parsing."); 428 return versionOpt; 429 } 430 431 protected static String addVersionOption(Options options) { 432 String versionOpt = "version"; 433 options.addOption("v", versionOpt, false, "Print version information."); 434 return versionOpt; 435 } 436 437 protected static String addHelpOption(Options options) { 438 String helpOpt = "help"; 439 options.addOption("h", helpOpt, false, "Print help message."); 440 return helpOpt; 441 } 442 443 protected static String addOutputToFileOption(Options options) { 444 String outputToFileOpt = "outputFile"; 445 options.addOption("o", outputToFileOpt, false, "Write output to file 'goslin-out.tsv' instead of to std out."); 446 return outputToFileOpt; 447 } 448 449 protected static String addGrammarOption(Options options) { 450 String grammarOpt = "grammar"; 451 options.addOption("g", grammarOpt, true, "Use the provided grammar explicitly instead of all grammars. Options are: " + Arrays.toString(ValidationResult.Grammar.values())); 452 return grammarOpt; 453 } 454 455}