001/* 002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V.. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package uk.ac.ebi.pride.jmztab2.utils.parser; 017 018import de.isas.mztab2.io.serialization.ParameterConverter; 019import de.isas.mztab2.io.validators.SpectraRefValidator; 020import de.isas.mztab2.model.Metadata; 021import de.isas.mztab2.model.Parameter; 022import de.isas.mztab2.model.SpectraRef; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026import java.util.SortedMap; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029import lombok.extern.slf4j.Slf4j; 030import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn; 031import uk.ac.ebi.pride.jmztab2.model.MZBoolean; 032import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory; 033import uk.ac.ebi.pride.jmztab2.model.MZTabConstants; 034import static uk.ac.ebi.pride.jmztab2.model.MZTabConstants.*; 035import uk.ac.ebi.pride.jmztab2.model.MZTabUtils; 036import static uk.ac.ebi.pride.jmztab2.model.MZTabUtils.*; 037import uk.ac.ebi.pride.jmztab2.model.SplitList; 038import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType; 039import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType; 040import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError; 041import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList; 042import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException; 043 044/** 045 * This class allows the validation and loading of the data into mzTab domain 046 * objects. 047 * 048 * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain a 049 * couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have 050 * internal logical position and order. In physical mzTab file, we allow user 051 * not obey this logical position organized way, and provide their date with own 052 * order. In order to distinguish them, we use physical position (a positive 053 * integer) to record the column location in mzTab file. And use 054 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure to 055 * maintain the mapping between them. 056 * 057 * @param <T> the type of domain object the parser creates. 058 * @see SMLLineParser 059 * @see SMFLineParser 060 * @see SMELineParser 061 * @author qingwei 062 * @since 14/02/13 063 * 064 */ 065@Slf4j 066public abstract class MZTabDataLineParser<T> extends MZTabLineParser { 067 068 protected MZTabColumnFactory factory; 069 protected PositionMapping positionMapping; 070 protected SortedMap<String, Integer> exchangeMapping; // reverse the key and value of positionMapping. 071 072 protected SortedMap<Integer, IMZTabColumn> mapping; // logical position --> offset 073 protected Metadata metadata; 074 075 /** 076 * <p> 077 * Constructor for MZTabDataLineParser.</p> 078 * 079 * @param context a 080 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 081 */ 082 protected MZTabDataLineParser(MZTabParserContext context) { 083 super(context); 084 } 085 086 /** 087 * Generate a mzTab data line parser. 088 * 089 * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain 090 * a couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have 091 * internal logical position and order. In physical mzTab file, we allow 092 * user not obey this logical position organized way, and provide their date 093 * with own order. In order to distinguish them, we use physical position (a 094 * positive integer) to record the column location in mzTab file. And use 095 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure 096 * the maintain the mapping between them. 097 * 098 * @param context the parser context, keeping dynamic state and lookup 099 * associations. 100 * @param factory SHOULD NOT be set to null 101 * @param positionMapping SHOULD NOT be set to null 102 * @param metadata SHOULD NOT be set to null 103 * @param errorList a 104 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object. 105 */ 106 protected MZTabDataLineParser(MZTabParserContext context, 107 MZTabColumnFactory factory, PositionMapping positionMapping, 108 Metadata metadata, MZTabErrorList errorList) { 109 this(context); 110 if (factory == null) { 111 throw new NullPointerException( 112 "Column header factory should be created first."); 113 } 114 this.factory = factory; 115 116 this.positionMapping = positionMapping; 117 this.exchangeMapping = positionMapping.reverse(); 118 this.mapping = factory.getOffsetColumnsMap(); 119 120 if (metadata == null) { 121 throw new NullPointerException("Metadata should be parsed first."); 122 } 123 this.metadata = metadata; 124 this.errorList = errorList == null ? new MZTabErrorList() : errorList; 125 } 126 127 /** 128 * {@inheritDoc} 129 * 130 * Validate and parse the data line, if there exist errors, add them into 131 * {@link MZTabErrorList}. 132 */ 133 @Override 134 public void parse(int lineNumber, String line, MZTabErrorList errorList) throws MZTabException { 135 super.parse(lineNumber, line, errorList); 136 checkCount(); 137 138 int offset = checkData(); 139 if (offset != items.length) { 140 log.error( 141 "Number of expected items after parsing header is: {} but data line has: {} items!", 142 offset, 143 items.length); 144 log.error("Current mapping is: {}", mapping); 145 log.error("Items given: {} expected: {}", Arrays.toString(items), 146 Arrays.toString(line.split("\\t"))); 147 this.errorList.add(new MZTabError(FormatErrorType.CountMatch, 148 lineNumber, "" + offset, "" + items.length)); 149 } 150 } 151 152 /** 153 * Check header line items size equals data line items size. The number of 154 * Data line items does not match with the number of Header line items. 155 * Normally, the user has not used the Unicode Horizontal Tab character 156 * (Unicode codepoint 0009) as the column delimiter, there is a file 157 * encoding error, or the user has not provided the definition of optional 158 * columns in the header line. 159 */ 160 private void checkCount() { 161 int headerCount = mapping.size(); 162 int dataCount = items.length - 1; 163 164 if (headerCount != dataCount) { 165 log.error( 166 "Number of expected items after parsing header is: {} but data line has: {} items!", 167 headerCount, 168 dataCount); 169 log.error("Current mapping is: {}", mapping); 170 log.error("Items given: {} expected: {}", Arrays.toString(items), 171 Arrays.toString(line.split("\\t"))); 172 this.errorList.add(new MZTabError(FormatErrorType.CountMatch, 173 lineNumber, "" + dataCount, "" + headerCount)); 174 } 175 } 176 177 /** 178 * Retrieve the data line to a type mzTab domain object. 179 * 180 * @return a typed mzTab domain object. 181 */ 182 public abstract T getRecord(); 183 184 /** 185 * Check and translate the columns into mzTab elements. 186 * 187 * @return a int. 188 */ 189 protected abstract int checkData(); 190 191 /** 192 * load best_search_engine_score[id], read id value. 193 * 194 * @param bestSearchEngineScoreLabel a {@link java.lang.String} object. 195 * @return a {@link java.lang.Integer} object. 196 */ 197 protected Integer loadBestSearchEngineScoreId( 198 String bestSearchEngineScoreLabel) { 199 Pattern pattern = Pattern.compile( 200 "search_engine_score\\[(\\d+)\\](\\w+)?"); 201 Matcher matcher = pattern.matcher(bestSearchEngineScoreLabel); 202 203 if (matcher.find()) { 204 return new Integer(matcher.group(1)); 205 } 206 207 return null; 208 } 209 210 /** 211 * load search_engine_score[id]_ms_run[..], read id value. 212 * 213 * @param searchEngineLabel a {@link java.lang.String} object. 214 * @return a {@link java.lang.Integer} object. 215 */ 216 protected Integer loadSearchEngineScoreId(String searchEngineLabel) { 217 Pattern pattern = Pattern.compile("search_engine_score\\[(\\d+)\\]\\w*"); 218 Matcher matcher = pattern.matcher(searchEngineLabel); 219 220 if (matcher.find()) { 221 return new Integer(matcher.group(1)); 222 } 223 224 return null; 225 } 226 227 /** 228 * In the table-based sections (protein, peptide, and small molecule) there 229 * MUST NOT be any empty cells. Some field not allow "null" value, for 230 * example unit_id, accession and so on. In "Complete" file, in general 231 * "null" values SHOULD not be given. 232 * 233 * @param column SHOULD NOT be set to null 234 * @param target SHOULD NOT be empty. 235 * @param allowNull a boolean. 236 * @return a {@link java.lang.String} object. 237 */ 238 protected String checkData(IMZTabColumn column, String target, 239 boolean allowNull) { 240 if (target == null && allowNull) { 241 return null; 242 } 243 if (target == null) { 244 this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber, 245 column.getHeader())); 246 return null; 247 } 248 249 target = target.trim(); 250 if (target.isEmpty()) { 251 this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber, 252 column.getHeader())); 253 return null; 254 } 255 if (MZTabConstants.NULL.equals(target) && !allowNull) { 256 this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber, 257 column.getHeader())); 258 return null; 259 } 260 261 return target; 262 } 263 264 /** 265 * In the table-based sections (protein, peptide, and small molecule) there 266 * MUST NOT be any empty cells. Some field not allow "null" value, for 267 * example unit_id, accession and so on. In "Complete" file, in general 268 * "null" values SHOULD not be given. 269 * 270 * @param column SHOULD NOT be set to null 271 * @param target SHOULD NOT be empty. 272 * @return a {@link java.lang.String} object. 273 */ 274 protected String checkString(IMZTabColumn column, String target) { 275 return checkData(column, target, true); 276 } 277 278 /** 279 * In the table-based sections (protein, peptide, and small molecule) there 280 * MUST NOT be any empty cells. Some field not allow "null" value, for 281 * example unit_id, accession and so on. In "Complete" file, in general 282 * "null" values SHOULD not be given. 283 * 284 * @param column SHOULD NOT be set to null 285 * @param target SHOULD NOT be empty. 286 * @param allowNull if true, null target values will pass the check, if 287 * false, the check will raise an error in the error list. 288 * @return a {@link java.lang.String} object. 289 */ 290 protected String checkString(IMZTabColumn column, String target, 291 boolean allowNull) { 292 return checkData(column, target, allowNull); 293 } 294 295 /** 296 * Check and translate target string into Integer. If parse is incorrect, 297 * throws 298 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 299 * error. 300 * 301 * @param column SHOULD NOT be set to null 302 * @param target SHOULD NOT be empty. 303 * @return a {@link java.lang.Integer} object. 304 */ 305 protected Integer checkInteger(IMZTabColumn column, String target) { 306 return checkInteger(column, target, true); 307 } 308 309 /** 310 * Check and translate target string into Integer. If parse is incorrect, 311 * throws 312 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 313 * error. 314 * 315 * @param column SHOULD NOT be set to null 316 * @param target SHOULD NOT be empty. 317 * @param allowNull if true, null target values will pass the check, if 318 * false, the check will raise an error in the error list. 319 * @return a {@link java.lang.Integer} object. 320 */ 321 protected Integer checkInteger(IMZTabColumn column, String target, 322 boolean allowNull) { 323 String result = checkData(column, target, allowNull); 324 325 if (result == null || result.equalsIgnoreCase(NULL)) { 326 return null; 327 } 328 329 Integer value = parseInteger(result); 330 if (value == null) { 331 this.errorList.add(new MZTabError(FormatErrorType.Integer, 332 lineNumber, column.getHeader(), target)); 333 } 334 335 return value; 336 } 337 338 /** 339 * Check and translate target string into Double. If parse is incorrect, 340 * throws 341 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 342 * error. 343 * 344 * NOTICE: If ratios are included and the denominator is zero, the "INF" 345 * value MUST be used. If the result leads to calculation errors (for 346 * example 0/0), this MUST be reported as "not a number" ("NaN"). 347 * 348 * @param column SHOULD NOT be set to null 349 * @param target SHOULD NOT be empty. 350 * @return a {@link java.lang.Double} object. 351 */ 352 protected Double checkDouble(IMZTabColumn column, String target) { 353 return checkDouble(column, target, true); 354 } 355 356 /** 357 * Check and translate target string into Double. If parse is incorrect, 358 * throws 359 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 360 * error. 361 * 362 * NOTICE: If ratios are included and the denominator is zero, the "INF" 363 * value MUST be used. If the result leads to calculation errors (for 364 * example 0/0), this MUST be reported as "not a number" ("NaN"). 365 * 366 * @param column SHOULD NOT be set to null 367 * @param target SHOULD NOT be empty. 368 * @param allowNull if true, null target values will pass the check, if 369 * false, the check will raise an error in the error list. 370 * @return a {@link java.lang.Double} object. 371 */ 372 protected Double checkDouble(IMZTabColumn column, String target, 373 boolean allowNull) { 374 String result = checkData(column, target, allowNull); 375 376 if (result == null || result.equalsIgnoreCase(NULL)) { 377 return null; 378 } 379 380 Double value = parseDouble(result); 381 if (value == null) { 382 this.errorList.add( 383 new MZTabError(FormatErrorType.Double, lineNumber, column. 384 getHeader(), target)); 385 return null; 386 } 387 if (value.equals(Double.NaN) || value.equals(Double.POSITIVE_INFINITY)) { 388 return value; 389 } 390 391 return value; 392 } 393 394 /** 395 * Check and translate target string into parameter list which split by '|' 396 * character.. If parse is incorrect, throws 397 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList} 398 * error. 399 * 400 * @param column SHOULD NOT be set to null 401 * @param target SHOULD NOT be empty. 402 * @return a {@link java.util.List} object. 403 */ 404 protected List<Parameter> checkParamList(IMZTabColumn column, String target) { 405 String result = checkData(column, target, true); 406 407 if (result == null || result.equalsIgnoreCase(NULL)) { 408 return new ArrayList<>(BAR); 409 } 410 411 List<Parameter> paramList = parseParamList(result); 412 if (paramList.isEmpty()) { 413 this.errorList.add(new MZTabError(FormatErrorType.ParamList, 414 lineNumber, "Column " + column.getHeader(), target)); 415 } 416 for (Parameter param : paramList) { 417 if (param != null && param.getCvAccession() != null && !param. 418 getCvAccession(). 419 isEmpty()) { 420 if (!param.getCvAccession(). 421 contains(":")) { 422 this.errorList.add(new MZTabError( 423 FormatErrorType.ParamAccessionNotNamespaced, lineNumber, 424 column.getHeader(), param.getCvAccession(), 425 new ParameterConverter().convert(param))); 426 } 427 } 428 } 429 430 return paramList; 431 } 432 433 /** 434 * <p> 435 * checkParameter.</p> 436 * 437 * @param column a {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} 438 * object. 439 * @param target a {@link java.lang.String} object. 440 * @param allowNull a boolean. 441 * @return a {@link de.isas.mztab2.model.Parameter} object. 442 */ 443 protected Parameter checkParameter(IMZTabColumn column, String target, 444 boolean allowNull) { 445 String result = checkData(column, target, true); 446 if (result == null || (result.equalsIgnoreCase(NULL) && !allowNull)) { 447 this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber, 448 "Column " + column.getHeader(), target)); 449 } 450 Parameter param = MZTabUtils.parseParam(target); 451 if (param != null && param.getCvAccession() != null && !param. 452 getCvAccession(). 453 isEmpty()) { 454 if (!param.getCvAccession(). 455 contains(":")) { 456 this.errorList.add(new MZTabError( 457 FormatErrorType.ParamAccessionNotNamespaced, lineNumber, 458 column.getHeader(), param.getCvAccession(), 459 new ParameterConverter().convert(param))); 460 } 461 } else if (param == null && result != null && !result.isEmpty() && !(result. 462 equalsIgnoreCase(NULL))) { 463 this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber, 464 "Column " + column.getHeader(), target)); 465 } 466 return param; 467 } 468 469 /** 470 * Check and translate target string into parameter list which split by 471 * splitChar character.. If parse is incorrect, throws 472 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 473 * error. 474 * 475 * @param column SHOULD NOT be set to null 476 * @param target SHOULD NOT be empty. 477 * @param splitChar a char. 478 * @return a {@link java.util.List} object. 479 */ 480 protected List<String> checkStringList(IMZTabColumn column, String target, 481 char splitChar) { 482 String result = checkData(column, target, true); 483 484 if (result == null || result.equalsIgnoreCase(NULL)) { 485 return new ArrayList<>(splitChar); 486 } 487 488 List<String> stringList = parseStringList(splitChar, result); 489 if (stringList.isEmpty()) { 490 this.errorList.add(new MZTabError(FormatErrorType.StringList, 491 lineNumber, column.getHeader(), result, "" + splitChar)); 492 } 493 494 return stringList; 495 } 496 497 /** 498 * Check and translate target string into integer list which split by 499 * splitChar character.. If parse is incorrect, throws 500 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 501 * error. 502 * 503 * @param column SHOULD NOT be set to null 504 * @param target SHOULD NOT be empty. 505 * @param splitChar a char. 506 * @return a {@link java.util.List} object. 507 */ 508 protected List<Integer> checkIntegerList(IMZTabColumn column, String target, 509 char splitChar) { 510 return checkIntegerList(column, target, splitChar, true); 511 } 512 513 /** 514 * Check and translate target string into integer list which split by 515 * splitChar character.. If parse is incorrect, throws 516 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 517 * error. 518 * 519 * @param column SHOULD NOT be set to null 520 * @param target SHOULD NOT be empty. 521 * @param splitChar a char. 522 * @param allowNull if true, null will be treated as a valid element of the 523 * list. Otherwise, an error will be added to the error list. 524 * @return a {@link java.util.List} object. 525 */ 526 protected List<Integer> checkIntegerList(IMZTabColumn column, String target, 527 char splitChar, boolean allowNull) { 528 String result = checkData(column, target, allowNull); 529 530 if (result == null || result.equalsIgnoreCase(NULL)) { 531 return new ArrayList<>(splitChar); 532 } 533 534 List<Integer> stringList = parseIntegerList(result); 535 if (stringList.isEmpty()) { 536 this.errorList.add(new MZTabError(FormatErrorType.IntegerList, 537 lineNumber, column.getHeader(), result, "" + splitChar)); 538 } 539 540 return stringList; 541 } 542 543 /** 544 * Check and translate target string into parameter list which split by 545 * splitChar character.. If parse is incorrect, throws 546 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 547 * error. 548 * 549 * @param column SHOULD NOT be set to null 550 * @param target SHOULD NOT be empty. 551 * @return a {@link java.util.List} object. 552 */ 553 protected List<Double> checkDoubleList(IMZTabColumn column, String target) { 554 String result = checkData(column, target, true); 555 556 if (result == null || result.equalsIgnoreCase(NULL)) { 557 return new ArrayList<>(MZTabConstants.BAR); 558 } 559 560 List<Double> doubleList = parseDoubleList(target); 561 if (doubleList.isEmpty()) { 562 this.errorList.add(new MZTabError(FormatErrorType.DoubleList, 563 lineNumber, column.getHeader(), result, "" + MZTabConstants.BAR)); 564 } 565 566 return doubleList; 567 } 568 569 /** 570 * Check and translate target to 571 * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow 572 * used in express Boolean (0/1). If parse is incorrect, throws 573 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean} 574 * error. 575 * 576 * @param column SHOULD NOT be set to null 577 * @param target SHOULD NOT be empty. 578 * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object. 579 */ 580 protected MZBoolean checkMZBoolean(IMZTabColumn column, String target) { 581 String result = checkData(column, target, true); 582 583 if (result == null || result.equalsIgnoreCase(NULL)) { 584 return null; 585 } 586 587 MZBoolean value = MZBoolean.findBoolean(result); 588 if (value == null) { 589 this.errorList.add(new MZTabError(FormatErrorType.MZBoolean, 590 lineNumber, column.getHeader(), result)); 591 } 592 593 return value; 594 } 595 596 /** 597 * Check target string. Normally, description can set "null". But in 598 * "Complete" file, in general "null" values SHOULD not be given. 599 * 600 * @see #checkData(IMZTabColumn, String, boolean) 601 * @param column SHOULD NOT be set to null 602 * @param description SHOULD NOT be empty. 603 * @return a {@link java.lang.String} object. 604 */ 605 protected String checkDescription(IMZTabColumn column, String description) { 606 return checkData(column, description, true); 607 } 608 609 /** 610 * Check and translate taxid string into Integer. If exists error during 611 * parse, raise 612 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 613 * error. Normally, taxid may be set to "null"; in general "null" values 614 * SHOULD not be given. 615 * 616 * @param column SHOULD NOT be set to null 617 * @param taxid SHOULD NOT be empty. 618 * @return a {@link java.lang.Integer} object. 619 */ 620 protected Integer checkTaxid(IMZTabColumn column, String taxid) { 621 return checkInteger(column, taxid); 622 } 623 624 /** 625 * Check target string. Normally, species can set "null". But in "Complete" 626 * file, in general "null" values SHOULD not be given. 627 * 628 * @see #checkData(IMZTabColumn, String, boolean) 629 * @param column SHOULD NOT be set to null 630 * @param species SHOULD NOT be empty. 631 * @return a {@link java.lang.String} object. 632 */ 633 protected String checkSpecies(IMZTabColumn column, String species) { 634 return checkData(column, species, true); 635 } 636 637 /** 638 * Check target string. Normally, database can set "null". But in "Complete" 639 * file, in general "null" values SHOULD not be given. 640 * 641 * @see #checkData(IMZTabColumn, String, boolean) 642 * @param column SHOULD NOT be set to null 643 * @param database SHOULD NOT be empty. 644 * @return a {@link java.lang.String} object. 645 */ 646 protected String checkDatabase(IMZTabColumn column, String database) { 647 return checkData(column, database, true); 648 } 649 650 /** 651 * Check target string. Normally, databaseVersion can set "null". But in 652 * "Complete" file, in general "null" values SHOULD not be given. 653 * 654 * @see #checkData(IMZTabColumn, String, boolean) 655 * @param column SHOULD NOT be set to null 656 * @param databaseVersion SHOULD NOT be empty. 657 * @return a {@link java.lang.String} object. 658 */ 659 protected String checkDatabaseVersion(IMZTabColumn column, 660 String databaseVersion) { 661 return checkData(column, databaseVersion, true); 662 } 663 664 /** 665 * Check and translate searchEngine string into parameter list which split 666 * by '|' character.. If parse is incorrect, throws 667 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList} 668 * error. Normally, searchEngine may be set to "null"; in general "null" 669 * values SHOULD not be given. 670 * 671 * @param column SHOULD NOT be set to null 672 * @param searchEngine SHOULD NOT be empty. 673 * @return a {@link java.util.List} object. 674 */ 675 protected List<Parameter> checkSearchEngine(IMZTabColumn column, 676 String searchEngine) { 677 return checkParamList(column, searchEngine); 678 } 679 680 /** 681 * The best search engine score (for this type of score) for the given 682 * peptide across all replicates reported. The type of score MUST be defined 683 * in the metadata section. If the peptide was not identified by the 684 * specified search engine, “null” MUST be reported. 685 * 686 * @param column SHOULD NOT be set to null 687 * @param bestSearchEngineScore SHOULD NOT be empty. 688 * @return a {@link java.lang.Double} object. 689 */ 690 protected Double checkBestSearchEngineScore(IMZTabColumn column, 691 String bestSearchEngineScore) { 692 return checkDouble(column, bestSearchEngineScore); 693 } 694 695 /** 696 * The search engine score for the given peptide in the defined ms run. The 697 * type of score MUST be defined in the metadata section. If the peptide was 698 * not identified by the specified search engine “null” must be reported. 699 * 700 * @param column SHOULD NOT be set to null 701 * @param searchEngineScore SHOULD NOT be empty. 702 * @return a {@link java.lang.Double} object. 703 */ 704 protected Double checkSearchEngineScore(IMZTabColumn column, 705 String searchEngineScore) { 706 return checkDouble(column, searchEngineScore); 707 } 708 709 /** 710 * Check and translate numPSMs string into Integer. If exists error during 711 * parse, raise 712 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 713 * error. Normally, numPSMs may be set to "null"; in general "null" values 714 * SHOULD not be given. 715 * 716 * @param column SHOULD NOT be set to null 717 * @param numPSMs SHOULD NOT be empty. 718 * @return a {@link java.lang.Integer} object. 719 */ 720 protected Integer checkNumPSMs(IMZTabColumn column, String numPSMs) { 721 return checkInteger(column, numPSMs); 722 } 723 724 /** 725 * Check and translate numPeptidesDistinct string into Integer. If exists 726 * error during parse, raise 727 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 728 * error. Normally, numPeptidesDistinct can set "null", but in "Complete" 729 * file, in general "null" values SHOULD not be given. 730 * 731 * @param column SHOULD NOT be set to null 732 * @param numPeptidesDistinct SHOULD NOT be empty. 733 * @return a {@link java.lang.Integer} object. 734 */ 735 protected Integer checkNumPeptidesDistinct(IMZTabColumn column, 736 String numPeptidesDistinct) { 737 return checkInteger(column, numPeptidesDistinct); 738 } 739 740 /** 741 * Check and translate numPeptidesUnique string into Integer. If exists 742 * error during parse, raise 743 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 744 * error. Normally, numPeptidesUnique can set "null", but in "Complete" 745 * file, in general "null" values SHOULD not be given. 746 * 747 * @param column SHOULD NOT be set to null 748 * @param numPeptidesUnique SHOULD NOT be empty. 749 * @return a {@link java.lang.Integer} object. 750 */ 751 protected Integer checkNumPeptidesUnique(IMZTabColumn column, 752 String numPeptidesUnique) { 753 return checkInteger(column, numPeptidesUnique); 754 } 755 756 /** 757 * Check and translate target string into parameter list which split by ',' 758 * character.. If parse is incorrect, throws 759 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 760 * error. Normally, ambiguityMembers may be set to "null"; in general "null" 761 * values SHOULD not be given. 762 * 763 * @param column SHOULD NOT be set to null 764 * @param ambiguityMembers SHOULD NOT be empty. 765 * @return a {@link java.util.List} object. 766 */ 767 protected List<String> checkAmbiguityMembers(IMZTabColumn column, 768 String ambiguityMembers) { 769 return checkStringList(column, ambiguityMembers, COMMA); 770 } 771 772 /** 773 * Checks the provided URI string. 774 * 775 * @param column SHOULD NOT be set to null 776 * @param uri a {@link java.lang.String} object, conforming to URI format. 777 * @return the uri as an ASCII encoded string. 778 */ 779 protected String checkURI(IMZTabColumn column, String uri) { 780 String result_uri = checkData(column, uri, true); 781 782 if (result_uri == null || result_uri.equalsIgnoreCase(NULL)) { 783 return null; 784 } 785 786 java.net.URI result = parseURI(result_uri); 787 if (result == null) { 788 this.errorList.add(new MZTabError(FormatErrorType.URI, lineNumber, 789 "Column " + column.getHeader(), result_uri)); 790 return null; 791 } else { 792 return result.toASCIIString(); 793 } 794 } 795 796 /** 797 * Check and translate spectraRef string into 798 * {@link de.isas.mztab2.model.SpectraRef} list. If parse incorrect, or 799 * ms_run not defined in metadata raise 800 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#SpectraRef} 801 * error. Normally, spectraRef may be set to "null"; in general "null" 802 * values SHOULD not be given. 803 * 804 * @param column SHOULD NOT be set to null 805 * @param spectraRef SHOULD NOT be empty. 806 * @param context a 807 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 808 * @param allowNull if true, allow null for value. Otherwise, an error will 809 * be added to the error list. 810 * @return a {@link java.util.List} object. 811 */ 812 protected List<SpectraRef> checkSpectraRef(MZTabParserContext context, 813 IMZTabColumn column, String spectraRef, boolean allowNull) { 814 String result_spectraRef = checkData(column, spectraRef, allowNull); 815 816 if (result_spectraRef == null || result_spectraRef. 817 equalsIgnoreCase(NULL)) { 818 return new SplitList<>(BAR); 819 } 820 821 List<SpectraRef> refList = parseSpectraRefList(context, metadata, 822 result_spectraRef); 823 SpectraRefValidator validator = new SpectraRefValidator(); 824 this.errorList.addAll( 825 validator.validateLine( 826 lineNumber, 827 context, 828 column, 829 result_spectraRef, 830 refList 831 ) 832 ); 833 return refList; 834 } 835 836 /** 837 * Check target string. Normally, pre can set "null". "null" values should 838 * only be given, if no value is available and where the specification 839 * allows for "null" explicitly." 840 * 841 * @see #checkData(IMZTabColumn, String, boolean) 842 * @param column SHOULD NOT be set to null 843 * @param pre SHOULD NOT be empty. 844 * @return a {@link java.lang.String} object. 845 */ 846 protected String checkPre(IMZTabColumn column, String pre) { 847 return checkData(column, pre, true); 848 } 849 850 /** 851 * Check target string. Normally, post can set "null". But in "Complete" 852 * file, in general "null" values SHOULD not be given. 853 * 854 * @see #checkData(IMZTabColumn, String, boolean) 855 * @param column SHOULD NOT be set to null 856 * @param post SHOULD NOT be empty. 857 * @return a {@link java.lang.String} object. 858 */ 859 protected String checkPost(IMZTabColumn column, String post) { 860 return checkData(column, post, true); 861 } 862 863 /** 864 * Check target string. Normally, start can set "null". But in "Complete" 865 * file, in general "null" values SHOULD not be given. 866 * 867 * @see #checkData(IMZTabColumn, String, boolean) 868 * @param column SHOULD NOT be set to null 869 * @param start SHOULD NOT be empty. 870 * @return a {@link java.lang.String} object. 871 */ 872 protected String checkStart(IMZTabColumn column, String start) { 873 return checkData(column, start, true); 874 } 875 876 /** 877 * Check target string. Normally, end can set "null". But in "Complete" 878 * file, in general "null" values SHOULD not be given. 879 * 880 * @see #checkData(IMZTabColumn, String, boolean) 881 * @param column SHOULD NOT be set to null 882 * @param end SHOULD NOT be empty. 883 * @return a {@link java.lang.String} object. 884 */ 885 protected String checkEnd(IMZTabColumn column, String end) { 886 return checkData(column, end, true); 887 } 888 889 /** 890 * Check and translate target string into string list which split by ',' 891 * character.. If parse is incorrect, throws 892 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 893 * error. Besides, each item in list should be start with "GO:", otherwise 894 * system raise 895 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#GOTermList} 896 * error. Normally, go_terms may be set to "null"; in general "null" values 897 * SHOULD not be given. 898 * 899 * @param column SHOULD NOT be set to null 900 * @param go_terms SHOULD NOT be empty. 901 * @return a {@link java.util.List} object. 902 */ 903 protected List<String> checkGOTerms(IMZTabColumn column, String go_terms) { 904 String result_go_terms = checkData(column, go_terms, true); 905 906 if (result_go_terms == null || result_go_terms.equalsIgnoreCase(NULL)) { 907 return new ArrayList<>(COMMA); 908 } 909 910 List<String> stringList = parseGOTermList(result_go_terms); 911 if (stringList.isEmpty()) { 912 this.errorList.add(new MZTabError(FormatErrorType.GOTermList, 913 lineNumber, column.getHeader(), result_go_terms)); 914 } 915 916 return stringList; 917 } 918 919 /** 920 * Check and translate protein_coverage string into Double. If parse is 921 * incorrect, throws 922 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 923 * error. protein_coverage range should be in the [0, 1), otherwise raise 924 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType#ProteinCoverage} 925 * error. 926 * 927 * NOTICE: If ratios are included and the denominator is zero, the "INF" 928 * value MUST be used. If the result leads to calculation errors (for 929 * example 0/0), this MUST be reported as "not a number" ("NaN"). 930 * 931 * @param column SHOULD NOT be set to null 932 * @param protein_coverage SHOULD NOT be empty. 933 * @return a {@link java.lang.Double} object. 934 */ 935 protected Double checkProteinCoverage(IMZTabColumn column, 936 String protein_coverage) { 937 Double result = checkDouble(column, protein_coverage); 938 939 if (result == null) { 940 return null; 941 } 942 943 if (result < 0 || result > 1) { 944 this.errorList.add(new MZTabError(LogicalErrorType.ProteinCoverage, 945 lineNumber, column.getHeader(), printDouble(result))); 946 return null; 947 } 948 949 return result; 950 } 951 952 /** 953 * Check and translate peptide sequence. 'O' and 'U' are encoded by codons 954 * that are usually interpreted as stop codons, which can not displayed in 955 * the sequence. So, if find it, system raise 956 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Sequence} 957 * error. 958 * 959 * @param column SHOULD NOT be set to null 960 * @param sequence SHOULD NOT be empty. 961 * @return a {@link java.lang.String} object. 962 */ 963 protected String checkSequence(IMZTabColumn column, String sequence) { 964 String result = checkData(column, sequence, true); 965 966 if (result == null) { 967 return null; 968 } 969 970 result = result.toUpperCase(); 971 972 Pattern pattern = Pattern.compile("[OU]"); 973 Matcher matcher = pattern.matcher(result); 974 if (matcher.find()) { 975 this.errorList.add(new MZTabError(FormatErrorType.Sequence, 976 lineNumber, column.getHeader(), sequence)); 977 } 978 979 return result; 980 } 981 982 /** 983 * Check and translate psm_id string into Integer. If exists error during 984 * parse, raise 985 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 986 * error. Normally, psm_id may be set to "null"; in general "null" values 987 * SHOULD not be given. 988 * 989 * @param column SHOULD NOT be set to null 990 * @param psm_id SHOULD NOT be empty. 991 * @return a {@link java.lang.Integer} object. 992 */ 993 protected Integer checkPSMID(IMZTabColumn column, String psm_id) { 994 return checkInteger(column, psm_id); 995 } 996 997 /** 998 * Check and translate unique to 999 * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow 1000 * used in express Boolean (0/1). If parse is incorrect, throws 1001 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean} 1002 * error. 1003 * 1004 * @param column SHOULD NOT be set to null 1005 * @param unique SHOULD NOT be empty. 1006 * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object. 1007 */ 1008 protected MZBoolean checkUnique(IMZTabColumn column, String unique) { 1009 return checkMZBoolean(column, unique); 1010 } 1011 1012 /** 1013 * Check and translate charge string into Integer. If exists error during 1014 * parse, raise 1015 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer} 1016 * error. Normally, charge may be set to "null"; in general "null" values 1017 * SHOULD not be given. 1018 * 1019 * @param column SHOULD NOT be set to null 1020 * @param charge SHOULD NOT be empty. 1021 * @return a {@link java.lang.Integer} object. 1022 */ 1023 protected Integer checkCharge(IMZTabColumn column, String charge) { 1024 return checkInteger(column, charge); 1025 } 1026 1027 /** 1028 * Check and translate mass_to_charge string into Double. If parse is 1029 * incorrect, throws 1030 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 1031 * error. 1032 * 1033 * NOTICE: If ratios are included and the denominator is zero, the "INF" 1034 * value MUST be used. If the result leads to calculation errors (for 1035 * example 0/0), this MUST be reported as "not a number" ("NaN"). 1036 * 1037 * @param column SHOULD NOT be set to null 1038 * @param mass_to_charge SHOULD NOT be empty. 1039 * @return a {@link java.lang.Double} object. 1040 */ 1041 protected Double checkMassToCharge(IMZTabColumn column, 1042 String mass_to_charge) { 1043 return checkDouble(column, mass_to_charge); 1044 } 1045 1046 /** 1047 * Check and translate exp_mass_to_charge string into Double. If parse is 1048 * incorrect, throws 1049 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 1050 * error. 1051 * 1052 * NOTICE: If ratios are included and the denominator is zero, the "INF" 1053 * value MUST be used. If the result leads to calculation errors (for 1054 * example 0/0), this MUST be reported as "not a number" ("NaN"). 1055 * 1056 * @param column SHOULD NOT be set to null 1057 * @param exp_mass_to_charge SHOULD NOT be empty. 1058 * @return a {@link java.lang.Double} object. 1059 */ 1060 protected Double checkExpMassToCharge(IMZTabColumn column, 1061 String exp_mass_to_charge) { 1062 return checkDouble(column, exp_mass_to_charge); 1063 } 1064 1065 /** 1066 * Check and translate calc_mass_to_charge string into Double. If parse is 1067 * incorrect, throws 1068 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double} 1069 * error. 1070 * 1071 * NOTICE: If ratios are included and the denominator is zero, the "INF" 1072 * value MUST be used. If the result leads to calculation errors (for 1073 * example 0/0), this MUST be reported as "not a number" ("NaN"). 1074 * 1075 * @param column SHOULD NOT be set to null 1076 * @param calc_mass_to_charge SHOULD NOT be empty. 1077 * @return a {@link java.lang.Double} object. 1078 */ 1079 protected Double checkCalcMassToCharge(IMZTabColumn column, 1080 String calc_mass_to_charge) { 1081 return checkDouble(column, calc_mass_to_charge); 1082 } 1083 1084 /** 1085 * Check and translate identifier string into string list which split by '|' 1086 * character.. If parse is incorrect, throws 1087 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 1088 * error. Normally, identifier may be set to "null"; in general "null" 1089 * values SHOULD not be given. 1090 * 1091 * @param column SHOULD NOT be set to null 1092 * @param identifier SHOULD NOT be empty. 1093 * @return a {@link java.util.List} object. 1094 */ 1095 protected List<String> checkIdentifier(IMZTabColumn column, 1096 String identifier) { 1097 return checkStringList(column, identifier, BAR); 1098 } 1099 1100 /** 1101 * Check chemical_formula string. Normally, chemical_formula can set "null". 1102 * But in "Complete" file, in general "null" values SHOULD not be given. 1103 * 1104 * @see #checkData(IMZTabColumn, String, boolean) 1105 * @param column SHOULD NOT be set to null 1106 * @param chemical_formula SHOULD NOT be empty. 1107 * @return a {@link java.lang.String} object. 1108 */ 1109 protected String checkChemicalFormula(IMZTabColumn column, 1110 String chemical_formula) { 1111 return checkData(column, chemical_formula, true); 1112 } 1113 1114 /** 1115 * Check and translate smiles string into parameter list which split by '|' 1116 * character.. If parse is incorrect, throws 1117 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 1118 * error. Normally, smiles may be set to "null"; in general "null" values 1119 * SHOULD not be given. 1120 * 1121 * @param column SHOULD NOT be set to null 1122 * @param smiles SHOULD NOT be empty. 1123 * @return a {@link java.util.List} object. 1124 */ 1125 protected List<String> checkSmiles(IMZTabColumn column, String smiles) { 1126 return checkStringList(column, smiles, BAR); 1127 } 1128 1129 /** 1130 * Check and translate inchi_key string into parameter list which split by 1131 * '|' character.. If parse is incorrect, throws 1132 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList} 1133 * error. Normally, inchi_key may be set to "null"; in general "null" values 1134 * SHOULD not be given. 1135 * 1136 * @param column SHOULD NOT be set to null 1137 * @param inchi_key SHOULD NOT be empty. 1138 * @return a {@link java.util.List} object. 1139 */ 1140 protected List<String> checkInchiKey(IMZTabColumn column, String inchi_key) { 1141 return checkStringList(column, inchi_key, BAR); 1142 } 1143 1144 /** 1145 * Check and translate retention_time string into Double list which split by 1146 * '|' character.. If parse is incorrect, throws 1147 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList} 1148 * error. Normally, retention_time may be set to "null"; in general "null" 1149 * values SHOULD not be given. 1150 * 1151 * @param column SHOULD NOT be set to null 1152 * @param retention_time SHOULD NOT be empty. 1153 * @return a {@link java.util.List} object. 1154 */ 1155 protected List<Double> checkRetentionTime(IMZTabColumn column, 1156 String retention_time) { 1157 String result = checkData(column, retention_time, true); 1158 1159 if (result == null || result.equalsIgnoreCase(NULL)) { 1160 return new SplitList<>(BAR); 1161 } 1162 1163 List<Double> valueList = parseDoubleList(result); 1164 if (valueList.isEmpty()) { 1165 this.errorList.add(new MZTabError(FormatErrorType.DoubleList, 1166 lineNumber, column.getHeader(), result, "" + BAR)); 1167 } 1168 1169 return valueList; 1170 } 1171 1172 /** 1173 * Check and translate retention_time_window string into Double list which 1174 * split by '|' character.. If parse is incorrect, throws 1175 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList} 1176 * error. Normally, retention_time_window can set "null", but in "Complete" 1177 * file, in general "null" values SHOULD not be given. 1178 * 1179 * @param column SHOULD NOT be set to null 1180 * @param retention_time_window SHOULD NOT be empty. 1181 * @return a {@link java.util.List} object. 1182 */ 1183 protected List<Double> checkRetentionTimeWindow(IMZTabColumn column, 1184 String retention_time_window) { 1185 String result = checkData(column, retention_time_window, true); 1186 1187 if (result == null || result.equalsIgnoreCase(NULL)) { 1188 return new SplitList<>(BAR); 1189 } 1190 1191 List<Double> valueList = parseDoubleList(result); 1192 if (valueList.isEmpty()) { 1193 this.errorList.add(new MZTabError(FormatErrorType.DoubleList, 1194 lineNumber, column.getHeader(), result, "" + BAR)); 1195 } 1196 1197 return valueList; 1198 } 1199}