001/* 002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V.. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package uk.ac.ebi.pride.jmztab2.utils.parser; 017 018import de.isas.mztab2.io.serialization.ParameterConverter; 019import de.isas.mztab2.model.Metadata; 020import de.isas.mztab2.model.MsRun; 021import de.isas.mztab2.model.Parameter; 022import de.isas.mztab2.model.SpectraRef; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026import java.util.Optional; 027import java.util.SortedMap; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030import lombok.extern.slf4j.Slf4j; 031import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn; 032import uk.ac.ebi.pride.jmztab2.model.MZBoolean; 033import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory; 034import uk.ac.ebi.pride.jmztab2.model.MZTabConstants; 035import static uk.ac.ebi.pride.jmztab2.model.MZTabConstants.*; 036import uk.ac.ebi.pride.jmztab2.model.MZTabUtils; 037import static uk.ac.ebi.pride.jmztab2.model.MZTabUtils.*; 038import uk.ac.ebi.pride.jmztab2.model.SplitList; 039import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType; 040import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType; 041import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError; 042import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList; 043import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException; 044 045/** 046 * This class allows the validation and loading of the data into mzTab domain 047 * objects. 048 * 049 * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain a 050 * couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have 051 * internal logical position and order. In physical mzTab file, we allow user 052 * not obey this logical position organized way, and provide their date with own 053 * order. In order to distinguish them, we use physical position (a positive 054 * integer) to record the column location in mzTab file. And use 055 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure to 056 * maintain the mapping between them. 057 * 058 * @param <T> the type of domain object the parser creates. 059 * @see SMLLineParser 060 * @see SMFLineParser 061 * @see SMELineParser 062 * @author qingwei 063 * @since 14/02/13 064 * 065 */ 066@Slf4j 067public abstract class MZTabDataLineParser<T> extends MZTabLineParser { 068 069 protected MZTabColumnFactory factory; 070 protected PositionMapping positionMapping; 071 protected SortedMap<String, Integer> exchangeMapping; // reverse the key and value of positionMapping. 072 073 protected SortedMap<Integer, IMZTabColumn> mapping; // logical position --> offset 074 protected Metadata metadata; 075 076 /** 077 * <p> 078 * Constructor for MZTabDataLineParser.</p> 079 * 080 * @param context a 081 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 082 */ 083 protected MZTabDataLineParser(MZTabParserContext context) { 084 super(context); 085 } 086 087 /** 088 * Generate a mzTab data line parser. 089 * 090 * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain 091 * a couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have 092 * internal logical position and order. In physical mzTab file, we allow 093 * user not obey this logical position organized way, and provide their date 094 * with own order. In order to distinguish them, we use physical position (a 095 * positive integer) to record the column location in mzTab file. And use 096 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure 097 * the maintain the mapping between them. 098 * 099 * @param context the parser context, keeping dynamic state and lookup 100 * associations. 101 * @param factory SHOULD NOT be set to null 102 * @param positionMapping SHOULD NOT be set to null 103 * @param metadata SHOULD NOT be set to null 104 * @param errorList a 105 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object. 106 */ 107 protected MZTabDataLineParser(MZTabParserContext context, 108 MZTabColumnFactory factory, PositionMapping positionMapping, 109 Metadata metadata, MZTabErrorList errorList) { 110 this(context); 111 if (factory == null) { 112 throw new NullPointerException( 113 "Column header factory should be created first."); 114 } 115 this.factory = factory; 116 117 this.positionMapping = positionMapping; 118 this.exchangeMapping = positionMapping.reverse(); 119 this.mapping = factory.getOffsetColumnsMap(); 120 121 if (metadata == null) { 122 throw new NullPointerException("Metadata should be parsed first."); 123 } 124 this.metadata = metadata; 125 this.errorList = errorList == null ? new MZTabErrorList() : errorList; 126 } 127 128 /** 129 * {@inheritDoc} 130 * 131 * Validate and parse the data line, if there exist errors, add them into 132 * {@link MZTabErrorList}. 133 */ 134 @Override 135 public void parse(int lineNumber, String line, MZTabErrorList errorList) throws MZTabException { 136 super.parse(lineNumber, line, errorList); 137 checkCount(); 138 139 int offset = checkData(); 140 if (offset != items.length) { 141 log.error( 142 "Number of expected items after parsing header is: {} but data line has: {} items!", 143 offset, 144 items.length); 145 log.error("Current mapping is: {}", mapping); 146 log.error("Items given: {} expected: {}", Arrays.toString(items), 147 Arrays.toString(line.split("\\t"))); 148 this.errorList.add(new MZTabError(FormatErrorType.CountMatch, 149 lineNumber, "" + offset, "" + items.length)); 150 } 151 } 152 153 /** 154 * Check header line items size equals data line items size. The number of 155 * Data line items does not match with the number of Header line items. 156 * Normally, the user has not used the Unicode Horizontal Tab character 157 * (Unicode codepoint 0009) as the column delimiter, there is a file 158 * encoding error, or the user has not provided the definition of optional 159 * columns in the header line. 160 */ 161 private void checkCount() { 162 int headerCount = mapping.size(); 163 int dataCount = items.length - 1; 164 165 if (headerCount != dataCount) { 166 log.error( 167 "Number of expected items after parsing header is: {} but data line has: {} items!", 168 headerCount, 169 dataCount); 170 log.error("Current mapping is: {}", mapping); 171 log.error("Items given: {} expected: {}", Arrays.toString(items), 172 Arrays.toString(line.split("\\t"))); 173 this.errorList.add(new MZTabError(FormatErrorType.CountMatch, 174 lineNumber, "" + dataCount, "" + headerCount)); 175 } 176 } 177 178 /** 179 * Retrieve the data line to a type mzTab domain object. 180 * 181 * @return a typed mzTab domain object. 182 */ 183 public abstract T getRecord(); 184 185 /** 186 * Check and translate the columns into mzTab elements. 187 * 188 * @return a int. 189 */ 190 protected abstract int checkData(); 191 192 /** 193 * load best_search_engine_score[id], read id value. 194 * 195 * @param bestSearchEngineScoreLabel a {@link java.lang.String} object. 196 * @return a {@link java.lang.Integer} object. 197 */ 198 protected Integer loadBestSearchEngineScoreId( 199 String bestSearchEngineScoreLabel) { 200 Pattern pattern = Pattern.compile( 201 "search_engine_score\\[(\\d+)\\](\\w+)?"); 202 Matcher matcher = pattern.matcher(bestSearchEngineScoreLabel); 203 204 if (matcher.find()) { 205 return new Integer(matcher.group(1)); 206 } 207 208 return null; 209 } 210 211 /** 212 * load search_engine_score[id]_ms_run[..], read id value. 213 * 214 * @param searchEngineLabel a {@link java.lang.String} object. 215 * @return a {@link java.lang.Integer} object. 216 */ 217 protected Integer loadSearchEngineScoreId(String searchEngineLabel) { 218 Pattern pattern = Pattern.compile("search_engine_score\\[(\\d+)\\]\\w*"); 219 Matcher matcher = pattern.matcher(searchEngineLabel); 220 221 if (matcher.find()) { 222 return new Integer(matcher.group(1)); 223 } 224 225 return null; 226 } 227 228 /** 229 * In the table-based sections (protein, peptide, and small molecule) there 230 * MUST NOT be any empty cells. Some field not allow "null" value, for 231 * example unit_id, accession and so on. In "Complete" file, in general 232 * "null" values SHOULD not be given. 233 * 234 * @param column SHOULD NOT be set to null 235 * @param target SHOULD NOT be empty. 236 * @param allowNull a boolean. 237 * @return a {@link java.lang.String} object. 238 */ 239 protected String checkData(IMZTabColumn column, String target, 240 boolean allowNull) { 241 if (target == null && allowNull) { 242 return null; 243 } 244 if (target == null) { 245 this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber, 246 column.getHeader())); 247 return null; 248 } 249 250 target = target.trim(); 251 if (target.isEmpty()) { 252 this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber, 253 column.getHeader())); 254 return null; 255 } 256 if (MZTabConstants.NULL.equals(target) && !allowNull) { 257 this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber, 258 column.getHeader())); 259 return null; 260 } 261 262 return target; 263 } 264 265 /** 266 * In the table-based sections (protein, peptide, and small molecule) there 267 * MUST NOT be any empty cells. Some field not allow "null" value, for 268 * example unit_id, accession and so on. In "Complete" file, in general 269 * "null" values SHOULD not be given. 270 * 271 * @param column SHOULD NOT be set to null 272 * @param target SHOULD NOT be empty. 273 * @return a {@link java.lang.String} object. 274 */ 275 protected String checkString(IMZTabColumn column, String target) { 276 return checkData(column, target, true); 277 } 278 279 /** 280 * In the table-based sections (protein, peptide, and small molecule) there 281 * MUST NOT be any empty cells. Some field not allow "null" value, for 282 * example unit_id, accession and so on. In "Complete" file, in general 283 * "null" values SHOULD not be given. 284 * 285 * @param column SHOULD NOT be set to null 286 * @param target SHOULD NOT be empty. 287 * @param allowNull if true, null target values will pass the check, if 288 * false, the check will raise an error in the error list. 289 * @return a {@link java.lang.String} object. 290 */ 291 protected String checkString(IMZTabColumn column, String target, 292 boolean allowNull) { 293 return checkData(column, target, allowNull); 294 } 295 296 /** 297 * Check and translate target string into Integer. If parse is incorrect, 298 * throws 299 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 300 * error. 301 * 302 * @param column SHOULD NOT be set to null 303 * @param target SHOULD NOT be empty. 304 * @return a {@link java.lang.Integer} object. 305 */ 306 protected Integer checkInteger(IMZTabColumn column, String target) { 307 return checkInteger(column, target, true); 308 } 309 310 /** 311 * Check and translate target string into Integer. If parse is incorrect, 312 * throws 313 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 314 * error. 315 * 316 * @param column SHOULD NOT be set to null 317 * @param target SHOULD NOT be empty. 318 * @param allowNull if true, null target values will pass the check, if 319 * false, the check will raise an error in the error list. 320 * @return a {@link java.lang.Integer} object. 321 */ 322 protected Integer checkInteger(IMZTabColumn column, String target, 323 boolean allowNull) { 324 String result = checkData(column, target, allowNull); 325 326 if (result == null || result.equalsIgnoreCase(NULL)) { 327 return null; 328 } 329 330 Integer value = parseInteger(result); 331 if (value == null) { 332 this.errorList.add(new MZTabError(FormatErrorType.Integer, 333 lineNumber, column.getHeader(), target)); 334 } 335 336 return value; 337 } 338 339 /** 340 * Check and translate target string into Double. If parse is incorrect, 341 * throws 342 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 343 * error. 344 * 345 * NOTICE: If ratios are included and the denominator is zero, the "INF" 346 * value MUST be used. If the result leads to calculation errors (for 347 * example 0/0), this MUST be reported as "not a number" ("NaN"). 348 * 349 * @param column SHOULD NOT be set to null 350 * @param target SHOULD NOT be empty. 351 * @return a {@link java.lang.Double} object. 352 */ 353 protected Double checkDouble(IMZTabColumn column, String target) { 354 return checkDouble(column, target, true); 355 } 356 357 /** 358 * Check and translate target string into Double. If parse is incorrect, 359 * throws 360 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 361 * error. 362 * 363 * NOTICE: If ratios are included and the denominator is zero, the "INF" 364 * value MUST be used. If the result leads to calculation errors (for 365 * example 0/0), this MUST be reported as "not a number" ("NaN"). 366 * 367 * @param column SHOULD NOT be set to null 368 * @param target SHOULD NOT be empty. 369 * @param allowNull if true, null target values will pass the check, if 370 * false, the check will raise an error in the error list. 371 * @return a {@link java.lang.Double} object. 372 */ 373 protected Double checkDouble(IMZTabColumn column, String target, 374 boolean allowNull) { 375 String result = checkData(column, target, allowNull); 376 377 if (result == null || result.equalsIgnoreCase(NULL)) { 378 return null; 379 } 380 381 Double value = parseDouble(result); 382 if (value == null) { 383 this.errorList.add( 384 new MZTabError(FormatErrorType.Double, lineNumber, column. 385 getHeader(), target)); 386 return null; 387 } 388 if (value.equals(Double.NaN) || value.equals(Double.POSITIVE_INFINITY)) { 389 return value; 390 } 391 392 return value; 393 } 394 395 /** 396 * Check and translate target string into parameter list which split by '|' 397 * character.. If parse is incorrect, throws 398 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList} 399 * error. 400 * 401 * @param column SHOULD NOT be set to null 402 * @param target SHOULD NOT be empty. 403 * @return a {@link java.util.List} object. 404 */ 405 protected List<Parameter> checkParamList(IMZTabColumn column, String target) { 406 String result = checkData(column, target, true); 407 408 if (result == null || result.equalsIgnoreCase(NULL)) { 409 return new ArrayList<>(BAR); 410 } 411 412 List<Parameter> paramList = parseParamList(result); 413 if (paramList.isEmpty()) { 414 this.errorList.add(new MZTabError(FormatErrorType.ParamList, 415 lineNumber, "Column " + column.getHeader(), target)); 416 } 417 for (Parameter param : paramList) { 418 if (param != null && param.getCvAccession() != null && !param. 419 getCvAccession(). 420 isEmpty()) { 421 if (!param.getCvAccession(). 422 contains(":")) { 423 this.errorList.add(new MZTabError( 424 FormatErrorType.ParamAccessionNotNamespaced, lineNumber, 425 column.getHeader(), param.getCvAccession(), 426 new ParameterConverter().convert(param))); 427 } 428 } 429 } 430 431 return paramList; 432 } 433 434 /** 435 * <p> 436 * checkParameter.</p> 437 * 438 * @param column a {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} 439 * object. 440 * @param target a {@link java.lang.String} object. 441 * @param allowNull a boolean. 442 * @return a {@link de.isas.mztab2.model.Parameter} object. 443 */ 444 protected Parameter checkParameter(IMZTabColumn column, String target, 445 boolean allowNull) { 446 String result = checkData(column, target, true); 447 if (result == null || (result.equalsIgnoreCase(NULL) && !allowNull)) { 448 this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber, 449 "Column " + column.getHeader(), target)); 450 } 451 Parameter param = MZTabUtils.parseParam(target); 452 if (param != null && param.getCvAccession() != null && !param. 453 getCvAccession(). 454 isEmpty()) { 455 if (!param.getCvAccession(). 456 contains(":")) { 457 this.errorList.add(new MZTabError( 458 FormatErrorType.ParamAccessionNotNamespaced, lineNumber, 459 column.getHeader(), param.getCvAccession(), 460 new ParameterConverter().convert(param))); 461 } 462 } else if (param == null && result != null && !result.isEmpty() && !(result. 463 equalsIgnoreCase(NULL))) { 464 this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber, 465 "Column " + column.getHeader(), target)); 466 } 467 return param; 468 } 469 470 /** 471 * Check and translate target string into parameter list which split by 472 * splitChar character.. If parse is incorrect, throws 473 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 474 * error. 475 * 476 * @param column SHOULD NOT be set to null 477 * @param target SHOULD NOT be empty. 478 * @param splitChar a char. 479 * @return a {@link java.util.List} object. 480 */ 481 protected List<String> checkStringList(IMZTabColumn column, String target, 482 char splitChar) { 483 String result = checkData(column, target, true); 484 485 if (result == null || result.equalsIgnoreCase(NULL)) { 486 return new ArrayList<>(splitChar); 487 } 488 489 List<String> stringList = parseStringList(splitChar, result); 490 if (stringList.isEmpty()) { 491 this.errorList.add(new MZTabError(FormatErrorType.StringList, 492 lineNumber, column.getHeader(), result, "" + splitChar)); 493 } 494 495 return stringList; 496 } 497 498 /** 499 * Check and translate target string into integer list which split by 500 * splitChar character.. If parse is incorrect, throws 501 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 502 * error. 503 * 504 * @param column SHOULD NOT be set to null 505 * @param target SHOULD NOT be empty. 506 * @param splitChar a char. 507 * @return a {@link java.util.List} object. 508 */ 509 protected List<Integer> checkIntegerList(IMZTabColumn column, String target, 510 char splitChar) { 511 return checkIntegerList(column, target, splitChar, true); 512 } 513 514 /** 515 * Check and translate target string into integer list which split by 516 * splitChar character.. If parse is incorrect, throws 517 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 518 * error. 519 * 520 * @param column SHOULD NOT be set to null 521 * @param target SHOULD NOT be empty. 522 * @param splitChar a char. 523 * @param allowNull if true, null will be treated as a valid element of the 524 * list. Otherwise, an error will be added to the error list. 525 * @return a {@link java.util.List} object. 526 */ 527 protected List<Integer> checkIntegerList(IMZTabColumn column, String target, 528 char splitChar, boolean allowNull) { 529 String result = checkData(column, target, allowNull); 530 531 if (result == null || result.equalsIgnoreCase(NULL)) { 532 return new ArrayList<>(splitChar); 533 } 534 535 List<Integer> stringList = parseIntegerList(result); 536 if (stringList.isEmpty()) { 537 this.errorList.add(new MZTabError(FormatErrorType.IntegerList, 538 lineNumber, column.getHeader(), result, "" + splitChar)); 539 } 540 541 return stringList; 542 } 543 544 /** 545 * Check and translate target string into parameter list which split by 546 * splitChar character.. If parse is incorrect, throws 547 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 548 * error. 549 * 550 * @param column SHOULD NOT be set to null 551 * @param target SHOULD NOT be empty. 552 * @return a {@link java.util.List} object. 553 */ 554 protected List<Double> checkDoubleList(IMZTabColumn column, String target) { 555 String result = checkData(column, target, true); 556 557 if (result == null || result.equalsIgnoreCase(NULL)) { 558 return new ArrayList<>(MZTabConstants.BAR); 559 } 560 561 List<Double> doubleList = parseDoubleList(target); 562 if (doubleList.isEmpty()) { 563 this.errorList.add(new MZTabError(FormatErrorType.DoubleList, 564 lineNumber, column.getHeader(), result, "" + MZTabConstants.BAR)); 565 } 566 567 return doubleList; 568 } 569 570 /** 571 * Check and translate target to 572 * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow 573 * used in express Boolean (0/1). If parse is incorrect, throws 574 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean} 575 * error. 576 * 577 * @param column SHOULD NOT be set to null 578 * @param target SHOULD NOT be empty. 579 * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object. 580 */ 581 protected MZBoolean checkMZBoolean(IMZTabColumn column, String target) { 582 String result = checkData(column, target, true); 583 584 if (result == null || result.equalsIgnoreCase(NULL)) { 585 return null; 586 } 587 588 MZBoolean value = MZBoolean.findBoolean(result); 589 if (value == null) { 590 this.errorList.add(new MZTabError(FormatErrorType.MZBoolean, 591 lineNumber, column.getHeader(), result)); 592 } 593 594 return value; 595 } 596 597 /** 598 * Check target string. Normally, description can set "null". But in 599 * "Complete" file, in general "null" values SHOULD not be given. 600 * 601 * @see #checkData(IMZTabColumn, String, boolean) 602 * @param column SHOULD NOT be set to null 603 * @param description SHOULD NOT be empty. 604 * @return a {@link java.lang.String} object. 605 */ 606 protected String checkDescription(IMZTabColumn column, String description) { 607 return checkData(column, description, true); 608 } 609 610 /** 611 * Check and translate taxid string into Integer. If exists error during 612 * parse, raise 613 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 614 * error. Normally, taxid may be set to "null"; in general "null" values 615 * SHOULD not be given. 616 * 617 * @param column SHOULD NOT be set to null 618 * @param taxid SHOULD NOT be empty. 619 * @return a {@link java.lang.Integer} object. 620 */ 621 protected Integer checkTaxid(IMZTabColumn column, String taxid) { 622 return checkInteger(column, taxid); 623 } 624 625 /** 626 * Check target string. Normally, species can set "null". But in "Complete" 627 * file, in general "null" values SHOULD not be given. 628 * 629 * @see #checkData(IMZTabColumn, String, boolean) 630 * @param column SHOULD NOT be set to null 631 * @param species SHOULD NOT be empty. 632 * @return a {@link java.lang.String} object. 633 */ 634 protected String checkSpecies(IMZTabColumn column, String species) { 635 return checkData(column, species, true); 636 } 637 638 /** 639 * Check target string. Normally, database can set "null". But in "Complete" 640 * file, in general "null" values SHOULD not be given. 641 * 642 * @see #checkData(IMZTabColumn, String, boolean) 643 * @param column SHOULD NOT be set to null 644 * @param database SHOULD NOT be empty. 645 * @return a {@link java.lang.String} object. 646 */ 647 protected String checkDatabase(IMZTabColumn column, String database) { 648 return checkData(column, database, true); 649 } 650 651 /** 652 * Check target string. Normally, databaseVersion can set "null". But in 653 * "Complete" file, in general "null" values SHOULD not be given. 654 * 655 * @see #checkData(IMZTabColumn, String, boolean) 656 * @param column SHOULD NOT be set to null 657 * @param databaseVersion SHOULD NOT be empty. 658 * @return a {@link java.lang.String} object. 659 */ 660 protected String checkDatabaseVersion(IMZTabColumn column, 661 String databaseVersion) { 662 return checkData(column, databaseVersion, true); 663 } 664 665 /** 666 * Check and translate searchEngine string into parameter list which split 667 * by '|' character.. If parse is incorrect, throws 668 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList} 669 * error. Normally, searchEngine may be set to "null"; in general "null" 670 * values SHOULD not be given. 671 * 672 * @param column SHOULD NOT be set to null 673 * @param searchEngine SHOULD NOT be empty. 674 * @return a {@link java.util.List} object. 675 */ 676 protected List<Parameter> checkSearchEngine(IMZTabColumn column, 677 String searchEngine) { 678 return checkParamList(column, searchEngine); 679 } 680 681 /** 682 * The best search engine score (for this type of score) for the given 683 * peptide across all replicates reported. The type of score MUST be defined 684 * in the metadata section. If the peptide was not identified by the 685 * specified search engine, “null” MUST be reported. 686 * 687 * @param column SHOULD NOT be set to null 688 * @param bestSearchEngineScore SHOULD NOT be empty. 689 * @return a {@link java.lang.Double} object. 690 */ 691 protected Double checkBestSearchEngineScore(IMZTabColumn column, 692 String bestSearchEngineScore) { 693 return checkDouble(column, bestSearchEngineScore); 694 } 695 696 /** 697 * The search engine score for the given peptide in the defined ms run. The 698 * type of score MUST be defined in the metadata section. If the peptide was 699 * not identified by the specified search engine “null” must be reported. 700 * 701 * @param column SHOULD NOT be set to null 702 * @param searchEngineScore SHOULD NOT be empty. 703 * @return a {@link java.lang.Double} object. 704 */ 705 protected Double checkSearchEngineScore(IMZTabColumn column, 706 String searchEngineScore) { 707 return checkDouble(column, searchEngineScore); 708 } 709 710 /** 711 * Check and translate numPSMs string into Integer. If exists error during 712 * parse, raise 713 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 714 * error. Normally, numPSMs may be set to "null"; in general "null" values 715 * SHOULD not be given. 716 * 717 * @param column SHOULD NOT be set to null 718 * @param numPSMs SHOULD NOT be empty. 719 * @return a {@link java.lang.Integer} object. 720 */ 721 protected Integer checkNumPSMs(IMZTabColumn column, String numPSMs) { 722 return checkInteger(column, numPSMs); 723 } 724 725 /** 726 * Check and translate numPeptidesDistinct string into Integer. If exists 727 * error during parse, raise 728 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 729 * error. Normally, numPeptidesDistinct can set "null", but in "Complete" 730 * file, in general "null" values SHOULD not be given. 731 * 732 * @param column SHOULD NOT be set to null 733 * @param numPeptidesDistinct SHOULD NOT be empty. 734 * @return a {@link java.lang.Integer} object. 735 */ 736 protected Integer checkNumPeptidesDistinct(IMZTabColumn column, 737 String numPeptidesDistinct) { 738 return checkInteger(column, numPeptidesDistinct); 739 } 740 741 /** 742 * Check and translate numPeptidesUnique string into Integer. If exists 743 * error during parse, raise 744 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 745 * error. Normally, numPeptidesUnique can set "null", but in "Complete" 746 * file, in general "null" values SHOULD not be given. 747 * 748 * @param column SHOULD NOT be set to null 749 * @param numPeptidesUnique SHOULD NOT be empty. 750 * @return a {@link java.lang.Integer} object. 751 */ 752 protected Integer checkNumPeptidesUnique(IMZTabColumn column, 753 String numPeptidesUnique) { 754 return checkInteger(column, numPeptidesUnique); 755 } 756 757 /** 758 * Check and translate target string into parameter list which split by ',' 759 * character.. If parse is incorrect, throws 760 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 761 * error. Normally, ambiguityMembers may be set to "null"; in general "null" 762 * values SHOULD not be given. 763 * 764 * @param column SHOULD NOT be set to null 765 * @param ambiguityMembers SHOULD NOT be empty. 766 * @return a {@link java.util.List} object. 767 */ 768 protected List<String> checkAmbiguityMembers(IMZTabColumn column, 769 String ambiguityMembers) { 770 return checkStringList(column, ambiguityMembers, COMMA); 771 } 772 773 /** 774 * Checks the provided URI string. 775 * 776 * @param column SHOULD NOT be set to null 777 * @param uri a {@link java.lang.String} object, conforming to URI format. 778 * @return the uri as an ASCII encoded string. 779 */ 780 protected String checkURI(IMZTabColumn column, String uri) { 781 String result_uri = checkData(column, uri, true); 782 783 if (result_uri == null || result_uri.equalsIgnoreCase(NULL)) { 784 return null; 785 } 786 787 java.net.URI result = parseURI(result_uri); 788 if (result == null) { 789 this.errorList.add(new MZTabError(FormatErrorType.URI, lineNumber, 790 "Column " + column.getHeader(), result_uri)); 791 return null; 792 } else { 793 return result.toASCIIString(); 794 } 795 } 796 797 /** 798 * Check and translate spectraRef string into 799 * {@link de.isas.mztab2.model.SpectraRef} list. If parse incorrect, or 800 * ms_run not defined in metadata raise 801 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#SpectraRef} 802 * error. Normally, spectraRef may be set to "null"; in general "null" 803 * values SHOULD not be given. 804 * 805 * @param column SHOULD NOT be set to null 806 * @param spectraRef SHOULD NOT be empty. 807 * @param context a 808 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 809 * @return a {@link java.util.List} object. 810 */ 811 protected List<SpectraRef> checkSpectraRef(MZTabParserContext context, 812 IMZTabColumn column, String spectraRef) { 813 return checkSpectraRef(context, column, spectraRef, false); 814 } 815 816 /** 817 * Check and translate spectraRef string into 818 * {@link de.isas.mztab2.model.SpectraRef} list. If parse incorrect, or 819 * ms_run not defined in metadata raise 820 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#SpectraRef} 821 * error. Normally, spectraRef may be set to "null"; in general "null" 822 * values SHOULD not be given. 823 * 824 * @param column SHOULD NOT be set to null 825 * @param spectraRef SHOULD NOT be empty. 826 * @param context a 827 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 828 * @param allowNull if true, allow null for value. Otherwise, an error will 829 * be added to the error list. 830 * @return a {@link java.util.List} object. 831 */ 832 protected List<SpectraRef> checkSpectraRef(MZTabParserContext context, 833 IMZTabColumn column, String spectraRef, boolean allowNull) { 834 String result_spectraRef = checkData(column, spectraRef, allowNull); 835 836 if (result_spectraRef == null || result_spectraRef. 837 equalsIgnoreCase(NULL)) { 838 return new SplitList<>(BAR); 839 } 840 841 List<SpectraRef> refList = parseSpectraRefList(context, metadata, 842 result_spectraRef); 843 if (refList.isEmpty()) { 844 this.errorList.add(new MZTabError(FormatErrorType.SpectraRef, 845 lineNumber, column.getHeader(), result_spectraRef)); 846 } else { 847 for (SpectraRef ref : refList) { 848 MsRun run = ref.getMsRun(); 849 if (!Optional.ofNullable(run.getLocation()).isPresent()) { 850 //As the location can be null and the field is mandatory, this is not an error, it is a warning 851 this.errorList.add(new MZTabError( 852 LogicalErrorType.SpectraRef, lineNumber, column. 853 getHeader(), result_spectraRef, "ms_run[" + run. 854 getId() + "]-location")); 855 } 856 } 857 } 858 859 return refList; 860 } 861 862 /** 863 * Check target string. Normally, pre can set "null". "null" values should 864 * only be given, if no value is available and where the specification 865 * allows for "null" explicitly." 866 * 867 * @see #checkData(IMZTabColumn, String, boolean) 868 * @param column SHOULD NOT be set to null 869 * @param pre SHOULD NOT be empty. 870 * @return a {@link java.lang.String} object. 871 */ 872 protected String checkPre(IMZTabColumn column, String pre) { 873 return checkData(column, pre, true); 874 } 875 876 /** 877 * Check target string. Normally, post can set "null". But in "Complete" 878 * file, in general "null" values SHOULD not be given. 879 * 880 * @see #checkData(IMZTabColumn, String, boolean) 881 * @param column SHOULD NOT be set to null 882 * @param post SHOULD NOT be empty. 883 * @return a {@link java.lang.String} object. 884 */ 885 protected String checkPost(IMZTabColumn column, String post) { 886 return checkData(column, post, true); 887 } 888 889 /** 890 * Check target string. Normally, start can set "null". But in "Complete" 891 * file, in general "null" values SHOULD not be given. 892 * 893 * @see #checkData(IMZTabColumn, String, boolean) 894 * @param column SHOULD NOT be set to null 895 * @param start SHOULD NOT be empty. 896 * @return a {@link java.lang.String} object. 897 */ 898 protected String checkStart(IMZTabColumn column, String start) { 899 return checkData(column, start, true); 900 } 901 902 /** 903 * Check target string. Normally, end can set "null". But in "Complete" 904 * file, in general "null" values SHOULD not be given. 905 * 906 * @see #checkData(IMZTabColumn, String, boolean) 907 * @param column SHOULD NOT be set to null 908 * @param end SHOULD NOT be empty. 909 * @return a {@link java.lang.String} object. 910 */ 911 protected String checkEnd(IMZTabColumn column, String end) { 912 return checkData(column, end, true); 913 } 914 915 /** 916 * Check and translate target string into string list which split by ',' 917 * character.. If parse is incorrect, throws 918 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 919 * error. Besides, each item in list should be start with "GO:", otherwise 920 * system raise 921 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#GOTermList} 922 * error. Normally, go_terms may be set to "null"; in general "null" values 923 * SHOULD not be given. 924 * 925 * @param column SHOULD NOT be set to null 926 * @param go_terms SHOULD NOT be empty. 927 * @return a {@link java.util.List} object. 928 */ 929 protected List<String> checkGOTerms(IMZTabColumn column, String go_terms) { 930 String result_go_terms = checkData(column, go_terms, true); 931 932 if (result_go_terms == null || result_go_terms.equalsIgnoreCase(NULL)) { 933 return new ArrayList<>(COMMA); 934 } 935 936 List<String> stringList = parseGOTermList(result_go_terms); 937 if (stringList.isEmpty()) { 938 this.errorList.add(new MZTabError(FormatErrorType.GOTermList, 939 lineNumber, column.getHeader(), result_go_terms)); 940 } 941 942 return stringList; 943 } 944 945 /** 946 * Check and translate protein_coverage string into Double. If parse is 947 * incorrect, throws 948 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 949 * error. protein_coverage range should be in the [0, 1), otherwise raise 950 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType#ProteinCoverage} 951 * error. 952 * 953 * NOTICE: If ratios are included and the denominator is zero, the "INF" 954 * value MUST be used. If the result leads to calculation errors (for 955 * example 0/0), this MUST be reported as "not a number" ("NaN"). 956 * 957 * @param column SHOULD NOT be set to null 958 * @param protein_coverage SHOULD NOT be empty. 959 * @return a {@link java.lang.Double} object. 960 */ 961 protected Double checkProteinCoverage(IMZTabColumn column, 962 String protein_coverage) { 963 Double result = checkDouble(column, protein_coverage); 964 965 if (result == null) { 966 return null; 967 } 968 969 if (result < 0 || result > 1) { 970 this.errorList.add(new MZTabError(LogicalErrorType.ProteinCoverage, 971 lineNumber, column.getHeader(), printDouble(result))); 972 return null; 973 } 974 975 return result; 976 } 977 978 /** 979 * Check and translate peptide sequence. 'O' and 'U' are encoded by codons 980 * that are usually interpreted as stop codons, which can not displayed in 981 * the sequence. So, if find it, system raise 982 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Sequence} 983 * error. 984 * 985 * @param column SHOULD NOT be set to null 986 * @param sequence SHOULD NOT be empty. 987 * @return a {@link java.lang.String} object. 988 */ 989 protected String checkSequence(IMZTabColumn column, String sequence) { 990 String result = checkData(column, sequence, true); 991 992 if (result == null) { 993 return null; 994 } 995 996 result = result.toUpperCase(); 997 998 Pattern pattern = Pattern.compile("[OU]"); 999 Matcher matcher = pattern.matcher(result); 1000 if (matcher.find()) { 1001 this.errorList.add(new MZTabError(FormatErrorType.Sequence, 1002 lineNumber, column.getHeader(), sequence)); 1003 } 1004 1005 return result; 1006 } 1007 1008 /** 1009 * Check and translate psm_id string into Integer. If exists error during 1010 * parse, raise 1011 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 1012 * error. Normally, psm_id may be set to "null"; in general "null" values 1013 * SHOULD not be given. 1014 * 1015 * @param column SHOULD NOT be set to null 1016 * @param psm_id SHOULD NOT be empty. 1017 * @return a {@link java.lang.Integer} object. 1018 */ 1019 protected Integer checkPSMID(IMZTabColumn column, String psm_id) { 1020 return checkInteger(column, psm_id); 1021 } 1022 1023 /** 1024 * Check and translate unique to 1025 * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow 1026 * used in express Boolean (0/1). If parse is incorrect, throws 1027 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean} 1028 * error. 1029 * 1030 * @param column SHOULD NOT be set to null 1031 * @param unique SHOULD NOT be empty. 1032 * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object. 1033 */ 1034 protected MZBoolean checkUnique(IMZTabColumn column, String unique) { 1035 return checkMZBoolean(column, unique); 1036 } 1037 1038 /** 1039 * Check and translate charge string into Integer. If exists error during 1040 * parse, raise 1041 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 1042 * error. Normally, charge may be set to "null"; in general "null" values 1043 * SHOULD not be given. 1044 * 1045 * @param column SHOULD NOT be set to null 1046 * @param charge SHOULD NOT be empty. 1047 * @return a {@link java.lang.Integer} object. 1048 */ 1049 protected Integer checkCharge(IMZTabColumn column, String charge) { 1050 return checkInteger(column, charge); 1051 } 1052 1053 /** 1054 * Check and translate mass_to_charge string into Double. If parse is 1055 * incorrect, throws 1056 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 1057 * error. 1058 * 1059 * NOTICE: If ratios are included and the denominator is zero, the "INF" 1060 * value MUST be used. If the result leads to calculation errors (for 1061 * example 0/0), this MUST be reported as "not a number" ("NaN"). 1062 * 1063 * @param column SHOULD NOT be set to null 1064 * @param mass_to_charge SHOULD NOT be empty. 1065 * @return a {@link java.lang.Double} object. 1066 */ 1067 protected Double checkMassToCharge(IMZTabColumn column, 1068 String mass_to_charge) { 1069 return checkDouble(column, mass_to_charge); 1070 } 1071 1072 /** 1073 * Check and translate exp_mass_to_charge string into Double. If parse is 1074 * incorrect, throws 1075 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 1076 * error. 1077 * 1078 * NOTICE: If ratios are included and the denominator is zero, the "INF" 1079 * value MUST be used. If the result leads to calculation errors (for 1080 * example 0/0), this MUST be reported as "not a number" ("NaN"). 1081 * 1082 * @param column SHOULD NOT be set to null 1083 * @param exp_mass_to_charge SHOULD NOT be empty. 1084 * @return a {@link java.lang.Double} object. 1085 */ 1086 protected Double checkExpMassToCharge(IMZTabColumn column, 1087 String exp_mass_to_charge) { 1088 return checkDouble(column, exp_mass_to_charge); 1089 } 1090 1091 /** 1092 * Check and translate calc_mass_to_charge string into Double. If parse is 1093 * incorrect, throws 1094 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 1095 * error. 1096 * 1097 * NOTICE: If ratios are included and the denominator is zero, the "INF" 1098 * value MUST be used. If the result leads to calculation errors (for 1099 * example 0/0), this MUST be reported as "not a number" ("NaN"). 1100 * 1101 * @param column SHOULD NOT be set to null 1102 * @param calc_mass_to_charge SHOULD NOT be empty. 1103 * @return a {@link java.lang.Double} object. 1104 */ 1105 protected Double checkCalcMassToCharge(IMZTabColumn column, 1106 String calc_mass_to_charge) { 1107 return checkDouble(column, calc_mass_to_charge); 1108 } 1109 1110 /** 1111 * Check and translate identifier string into string list which split by '|' 1112 * character.. If parse is incorrect, throws 1113 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 1114 * error. Normally, identifier may be set to "null"; in general "null" 1115 * values SHOULD not be given. 1116 * 1117 * @param column SHOULD NOT be set to null 1118 * @param identifier SHOULD NOT be empty. 1119 * @return a {@link java.util.List} object. 1120 */ 1121 protected List<String> checkIdentifier(IMZTabColumn column, 1122 String identifier) { 1123 return checkStringList(column, identifier, BAR); 1124 } 1125 1126 /** 1127 * Check chemical_formula string. Normally, chemical_formula can set "null". 1128 * But in "Complete" file, in general "null" values SHOULD not be given. 1129 * 1130 * @see #checkData(IMZTabColumn, String, boolean) 1131 * @param column SHOULD NOT be set to null 1132 * @param chemical_formula SHOULD NOT be empty. 1133 * @return a {@link java.lang.String} object. 1134 */ 1135 protected String checkChemicalFormula(IMZTabColumn column, 1136 String chemical_formula) { 1137 return checkData(column, chemical_formula, true); 1138 } 1139 1140 /** 1141 * Check and translate smiles string into parameter list which split by '|' 1142 * character.. If parse is incorrect, throws 1143 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 1144 * error. Normally, smiles may be set to "null"; in general "null" values 1145 * SHOULD not be given. 1146 * 1147 * @param column SHOULD NOT be set to null 1148 * @param smiles SHOULD NOT be empty. 1149 * @return a {@link java.util.List} object. 1150 */ 1151 protected List<String> checkSmiles(IMZTabColumn column, String smiles) { 1152 return checkStringList(column, smiles, BAR); 1153 } 1154 1155 /** 1156 * Check and translate inchi_key string into parameter list which split by 1157 * '|' character.. If parse is incorrect, throws 1158 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 1159 * error. Normally, inchi_key may be set to "null"; in general "null" values 1160 * SHOULD not be given. 1161 * 1162 * @param column SHOULD NOT be set to null 1163 * @param inchi_key SHOULD NOT be empty. 1164 * @return a {@link java.util.List} object. 1165 */ 1166 protected List<String> checkInchiKey(IMZTabColumn column, String inchi_key) { 1167 return checkStringList(column, inchi_key, BAR); 1168 } 1169 1170 /** 1171 * Check and translate retention_time string into Double list which split by 1172 * '|' character.. If parse is incorrect, throws 1173 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList} 1174 * error. Normally, retention_time may be set to "null"; in general "null" 1175 * values SHOULD not be given. 1176 * 1177 * @param column SHOULD NOT be set to null 1178 * @param retention_time SHOULD NOT be empty. 1179 * @return a {@link java.util.List} object. 1180 */ 1181 protected List<Double> checkRetentionTime(IMZTabColumn column, 1182 String retention_time) { 1183 String result = checkData(column, retention_time, true); 1184 1185 if (result == null || result.equalsIgnoreCase(NULL)) { 1186 return new SplitList<>(BAR); 1187 } 1188 1189 List<Double> valueList = parseDoubleList(result); 1190 if (valueList.isEmpty()) { 1191 this.errorList.add(new MZTabError(FormatErrorType.DoubleList, 1192 lineNumber, column.getHeader(), result, "" + BAR)); 1193 } 1194 1195 return valueList; 1196 } 1197 1198 /** 1199 * Check and translate retention_time_window string into Double list which 1200 * split by '|' character.. If parse is incorrect, throws 1201 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList} 1202 * error. Normally, retention_time_window can set "null", but in "Complete" 1203 * file, in general "null" values SHOULD not be given. 1204 * 1205 * @param column SHOULD NOT be set to null 1206 * @param retention_time_window SHOULD NOT be empty. 1207 * @return a {@link java.util.List} object. 1208 */ 1209 protected List<Double> checkRetentionTimeWindow(IMZTabColumn column, 1210 String retention_time_window) { 1211 String result = checkData(column, retention_time_window, true); 1212 1213 if (result == null || result.equalsIgnoreCase(NULL)) { 1214 return new SplitList<>(BAR); 1215 } 1216 1217 List<Double> valueList = parseDoubleList(result); 1218 if (valueList.isEmpty()) { 1219 this.errorList.add(new MZTabError(FormatErrorType.DoubleList, 1220 lineNumber, column.getHeader(), result, "" + BAR)); 1221 } 1222 1223 return valueList; 1224 } 1225}