001/* 
002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package uk.ac.ebi.pride.jmztab2.utils.parser;
017
018import de.isas.mztab2.io.serialization.ParameterConverter;
019import de.isas.mztab2.model.Metadata;
020import de.isas.mztab2.model.MsRun;
021import de.isas.mztab2.model.Parameter;
022import de.isas.mztab2.model.SpectraRef;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.Optional;
027import java.util.SortedMap;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030import lombok.extern.slf4j.Slf4j;
031import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn;
032import uk.ac.ebi.pride.jmztab2.model.MZBoolean;
033import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory;
034import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
035import static uk.ac.ebi.pride.jmztab2.model.MZTabConstants.*;
036import uk.ac.ebi.pride.jmztab2.model.MZTabUtils;
037import static uk.ac.ebi.pride.jmztab2.model.MZTabUtils.*;
038import uk.ac.ebi.pride.jmztab2.model.SplitList;
039import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType;
040import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
041import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
042import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList;
043import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException;
044
045/**
046 * This class allows the validation and loading of the data into mzTab domain
047 * objects.
048 *
049 * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain a
050 * couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have
051 * internal logical position and order. In physical mzTab file, we allow user
052 * not obey this logical position organized way, and provide their date with own
053 * order. In order to distinguish them, we use physical position (a positive
054 * integer) to record the column location in mzTab file. And use
055 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure to
056 * maintain the mapping between them.
057 *
058 * @param <T> the type of domain object the parser creates.
059 * @see SMLLineParser
060 * @see SMFLineParser
061 * @see SMELineParser
062 * @author qingwei
063 * @since 14/02/13
064 *
065 */
066@Slf4j
067public abstract class MZTabDataLineParser<T> extends MZTabLineParser {
068
069    protected MZTabColumnFactory factory;
070    protected PositionMapping positionMapping;
071    protected SortedMap<String, Integer> exchangeMapping; // reverse the key and value of positionMapping.
072
073    protected SortedMap<Integer, IMZTabColumn> mapping;   // logical position --> offset
074    protected Metadata metadata;
075
076    /**
077     * <p>
078     * Constructor for MZTabDataLineParser.</p>
079     *
080     * @param context a
081     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
082     */
083    protected MZTabDataLineParser(MZTabParserContext context) {
084        super(context);
085    }
086
087    /**
088     * Generate a mzTab data line parser.
089     *
090     * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain
091     * a couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have
092     * internal logical position and order. In physical mzTab file, we allow
093     * user not obey this logical position organized way, and provide their date
094     * with own order. In order to distinguish them, we use physical position (a
095     * positive integer) to record the column location in mzTab file. And use
096     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure
097     * the maintain the mapping between them.
098     *
099     * @param context the parser context, keeping dynamic state and lookup
100     * associations.
101     * @param factory SHOULD NOT be set to null
102     * @param positionMapping SHOULD NOT be set to null
103     * @param metadata SHOULD NOT be set to null
104     * @param errorList a
105     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object.
106     */
107    protected MZTabDataLineParser(MZTabParserContext context,
108        MZTabColumnFactory factory, PositionMapping positionMapping,
109        Metadata metadata, MZTabErrorList errorList) {
110        this(context);
111        if (factory == null) {
112            throw new NullPointerException(
113                "Column header factory should be created first.");
114        }
115        this.factory = factory;
116
117        this.positionMapping = positionMapping;
118        this.exchangeMapping = positionMapping.reverse();
119        this.mapping = factory.getOffsetColumnsMap();
120
121        if (metadata == null) {
122            throw new NullPointerException("Metadata should be parsed first.");
123        }
124        this.metadata = metadata;
125        this.errorList = errorList == null ? new MZTabErrorList() : errorList;
126    }
127
128    /**
129     * {@inheritDoc}
130     *
131     * Validate and parse the data line, if there exist errors, add them into
132     * {@link MZTabErrorList}.
133     */
134    @Override
135    public void parse(int lineNumber, String line, MZTabErrorList errorList) throws MZTabException {
136        super.parse(lineNumber, line, errorList);
137        checkCount();
138
139        int offset = checkData();
140        if (offset != items.length) {
141            log.error(
142                "Number of expected items after parsing header is: {} but data line has: {} items!",
143                offset,
144                items.length);
145            log.error("Current mapping is: {}", mapping);
146            log.error("Items given: {} expected: {}", Arrays.toString(items),
147                Arrays.toString(line.split("\\t")));
148            this.errorList.add(new MZTabError(FormatErrorType.CountMatch,
149                lineNumber, "" + offset, "" + items.length));
150        }
151    }
152
153    /**
154     * Check header line items size equals data line items size. The number of
155     * Data line items does not match with the number of Header line items.
156     * Normally, the user has not used the Unicode Horizontal Tab character
157     * (Unicode codepoint 0009) as the column delimiter, there is a file
158     * encoding error, or the user has not provided the definition of optional
159     * columns in the header line.
160     */
161    private void checkCount() {
162        int headerCount = mapping.size();
163        int dataCount = items.length - 1;
164
165        if (headerCount != dataCount) {
166            log.error(
167                "Number of expected items after parsing header is: {} but data line has: {} items!",
168                headerCount,
169                dataCount);
170            log.error("Current mapping is: {}", mapping);
171            log.error("Items given: {} expected: {}", Arrays.toString(items),
172                Arrays.toString(line.split("\\t")));
173            this.errorList.add(new MZTabError(FormatErrorType.CountMatch,
174                lineNumber, "" + dataCount, "" + headerCount));
175        }
176    }
177
178    /**
179     * Retrieve the data line to a type mzTab domain object.
180     *
181     * @return a typed mzTab domain object.
182     */
183    public abstract T getRecord();
184
185    /**
186     * Check and translate the columns into mzTab elements.
187     *
188     * @return a int.
189     */
190    protected abstract int checkData();
191
192    /**
193     * load best_search_engine_score[id], read id value.
194     *
195     * @param bestSearchEngineScoreLabel a {@link java.lang.String} object.
196     * @return a {@link java.lang.Integer} object.
197     */
198    protected Integer loadBestSearchEngineScoreId(
199        String bestSearchEngineScoreLabel) {
200        Pattern pattern = Pattern.compile(
201            "search_engine_score\\[(\\d+)\\](\\w+)?");
202        Matcher matcher = pattern.matcher(bestSearchEngineScoreLabel);
203
204        if (matcher.find()) {
205            return new Integer(matcher.group(1));
206        }
207
208        return null;
209    }
210
211    /**
212     * load search_engine_score[id]_ms_run[..], read id value.
213     *
214     * @param searchEngineLabel a {@link java.lang.String} object.
215     * @return a {@link java.lang.Integer} object.
216     */
217    protected Integer loadSearchEngineScoreId(String searchEngineLabel) {
218        Pattern pattern = Pattern.compile("search_engine_score\\[(\\d+)\\]\\w*");
219        Matcher matcher = pattern.matcher(searchEngineLabel);
220
221        if (matcher.find()) {
222            return new Integer(matcher.group(1));
223        }
224
225        return null;
226    }
227
228    /**
229     * In the table-based sections (protein, peptide, and small molecule) there
230     * MUST NOT be any empty cells. Some field not allow "null" value, for
231     * example unit_id, accession and so on. In "Complete" file, in general
232     * "null" values SHOULD not be given.
233     *
234     * @param column SHOULD NOT be set to null
235     * @param target SHOULD NOT be empty.
236     * @param allowNull a boolean.
237     * @return a {@link java.lang.String} object.
238     */
239    protected String checkData(IMZTabColumn column, String target,
240        boolean allowNull) {
241        if (target == null && allowNull) {
242            return null;
243        }
244        if (target == null) {
245            this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber,
246                column.getHeader()));
247            return null;
248        }
249
250        target = target.trim();
251        if (target.isEmpty()) {
252            this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber,
253                column.getHeader()));
254            return null;
255        }
256        if (MZTabConstants.NULL.equals(target) && !allowNull) {
257            this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber,
258                column.getHeader()));
259            return null;
260        }
261
262        return target;
263    }
264
265    /**
266     * In the table-based sections (protein, peptide, and small molecule) there
267     * MUST NOT be any empty cells. Some field not allow "null" value, for
268     * example unit_id, accession and so on. In "Complete" file, in general
269     * "null" values SHOULD not be given.
270     *
271     * @param column SHOULD NOT be set to null
272     * @param target SHOULD NOT be empty.
273     * @return a {@link java.lang.String} object.
274     */
275    protected String checkString(IMZTabColumn column, String target) {
276        return checkData(column, target, true);
277    }
278
279    /**
280     * In the table-based sections (protein, peptide, and small molecule) there
281     * MUST NOT be any empty cells. Some field not allow "null" value, for
282     * example unit_id, accession and so on. In "Complete" file, in general
283     * "null" values SHOULD not be given.
284     *
285     * @param column SHOULD NOT be set to null
286     * @param target SHOULD NOT be empty.
287     * @param allowNull if true, null target values will pass the check, if
288     * false, the check will raise an error in the error list.
289     * @return a {@link java.lang.String} object.
290     */
291    protected String checkString(IMZTabColumn column, String target,
292        boolean allowNull) {
293        return checkData(column, target, allowNull);
294    }
295
296    /**
297     * Check and translate target string into Integer. If parse is incorrect,
298     * throws
299     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
300     * error.
301     *
302     * @param column SHOULD NOT be set to null
303     * @param target SHOULD NOT be empty.
304     * @return a {@link java.lang.Integer} object.
305     */
306    protected Integer checkInteger(IMZTabColumn column, String target) {
307        return checkInteger(column, target, true);
308    }
309
310    /**
311     * Check and translate target string into Integer. If parse is incorrect,
312     * throws
313     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
314     * error.
315     *
316     * @param column SHOULD NOT be set to null
317     * @param target SHOULD NOT be empty.
318     * @param allowNull if true, null target values will pass the check, if
319     * false, the check will raise an error in the error list.
320     * @return a {@link java.lang.Integer} object.
321     */
322    protected Integer checkInteger(IMZTabColumn column, String target,
323        boolean allowNull) {
324        String result = checkData(column, target, allowNull);
325
326        if (result == null || result.equalsIgnoreCase(NULL)) {
327            return null;
328        }
329
330        Integer value = parseInteger(result);
331        if (value == null) {
332            this.errorList.add(new MZTabError(FormatErrorType.Integer,
333                lineNumber, column.getHeader(), target));
334        }
335
336        return value;
337    }
338
339    /**
340     * Check and translate target string into Double. If parse is incorrect,
341     * throws
342     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
343     * error.
344     *
345     * NOTICE: If ratios are included and the denominator is zero, the "INF"
346     * value MUST be used. If the result leads to calculation errors (for
347     * example 0/0), this MUST be reported as "not a number" ("NaN").
348     *
349     * @param column SHOULD NOT be set to null
350     * @param target SHOULD NOT be empty.
351     * @return a {@link java.lang.Double} object.
352     */
353    protected Double checkDouble(IMZTabColumn column, String target) {
354        return checkDouble(column, target, true);
355    }
356
357    /**
358     * Check and translate target string into Double. If parse is incorrect,
359     * throws
360     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
361     * error.
362     *
363     * NOTICE: If ratios are included and the denominator is zero, the "INF"
364     * value MUST be used. If the result leads to calculation errors (for
365     * example 0/0), this MUST be reported as "not a number" ("NaN").
366     *
367     * @param column SHOULD NOT be set to null
368     * @param target SHOULD NOT be empty.
369     * @param allowNull if true, null target values will pass the check, if
370     * false, the check will raise an error in the error list.
371     * @return a {@link java.lang.Double} object.
372     */
373    protected Double checkDouble(IMZTabColumn column, String target,
374        boolean allowNull) {
375        String result = checkData(column, target, allowNull);
376
377        if (result == null || result.equalsIgnoreCase(NULL)) {
378            return null;
379        }
380
381        Double value = parseDouble(result);
382        if (value == null) {
383            this.errorList.add(
384                new MZTabError(FormatErrorType.Double, lineNumber, column.
385                    getHeader(), target));
386            return null;
387        }
388        if (value.equals(Double.NaN) || value.equals(Double.POSITIVE_INFINITY)) {
389            return value;
390        }
391
392        return value;
393    }
394
395    /**
396     * Check and translate target string into parameter list which split by '|'
397     * character.. If parse is incorrect, throws
398     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList}
399     * error.
400     *
401     * @param column SHOULD NOT be set to null
402     * @param target SHOULD NOT be empty.
403     * @return a {@link java.util.List} object.
404     */
405    protected List<Parameter> checkParamList(IMZTabColumn column, String target) {
406        String result = checkData(column, target, true);
407
408        if (result == null || result.equalsIgnoreCase(NULL)) {
409            return new ArrayList<>(BAR);
410        }
411
412        List<Parameter> paramList = parseParamList(result);
413        if (paramList.isEmpty()) {
414            this.errorList.add(new MZTabError(FormatErrorType.ParamList,
415                lineNumber, "Column " + column.getHeader(), target));
416        }
417        for (Parameter param : paramList) {
418            if (param != null && param.getCvAccession() != null && !param.
419                getCvAccession().
420                isEmpty()) {
421                if (!param.getCvAccession().
422                    contains(":")) {
423                    this.errorList.add(new MZTabError(
424                        FormatErrorType.ParamAccessionNotNamespaced, lineNumber,
425                        column.getHeader(), param.getCvAccession(),
426                        new ParameterConverter().convert(param)));
427                }
428            }
429        }
430
431        return paramList;
432    }
433
434    /**
435     * <p>
436     * checkParameter.</p>
437     *
438     * @param column a {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn}
439     * object.
440     * @param target a {@link java.lang.String} object.
441     * @param allowNull a boolean.
442     * @return a {@link de.isas.mztab2.model.Parameter} object.
443     */
444    protected Parameter checkParameter(IMZTabColumn column, String target,
445        boolean allowNull) {
446        String result = checkData(column, target, true);
447        if (result == null || (result.equalsIgnoreCase(NULL) && !allowNull)) {
448            this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber,
449                "Column " + column.getHeader(), target));
450        }
451        Parameter param = MZTabUtils.parseParam(target);
452        if (param != null && param.getCvAccession() != null && !param.
453            getCvAccession().
454            isEmpty()) {
455            if (!param.getCvAccession().
456                contains(":")) {
457                this.errorList.add(new MZTabError(
458                    FormatErrorType.ParamAccessionNotNamespaced, lineNumber,
459                    column.getHeader(), param.getCvAccession(),
460                    new ParameterConverter().convert(param)));
461            }
462        } else if (param == null && result != null && !result.isEmpty() && !(result.
463            equalsIgnoreCase(NULL))) {
464            this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber,
465                "Column " + column.getHeader(), target));
466        }
467        return param;
468    }
469
470    /**
471     * Check and translate target string into parameter list which split by
472     * splitChar character.. If parse is incorrect, throws
473     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
474     * error.
475     *
476     * @param column SHOULD NOT be set to null
477     * @param target SHOULD NOT be empty.
478     * @param splitChar a char.
479     * @return a {@link java.util.List} object.
480     */
481    protected List<String> checkStringList(IMZTabColumn column, String target,
482        char splitChar) {
483        String result = checkData(column, target, true);
484
485        if (result == null || result.equalsIgnoreCase(NULL)) {
486            return new ArrayList<>(splitChar);
487        }
488
489        List<String> stringList = parseStringList(splitChar, result);
490        if (stringList.isEmpty()) {
491            this.errorList.add(new MZTabError(FormatErrorType.StringList,
492                lineNumber, column.getHeader(), result, "" + splitChar));
493        }
494
495        return stringList;
496    }
497
498    /**
499     * Check and translate target string into integer list which split by
500     * splitChar character.. If parse is incorrect, throws
501     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
502     * error.
503     *
504     * @param column SHOULD NOT be set to null
505     * @param target SHOULD NOT be empty.
506     * @param splitChar a char.
507     * @return a {@link java.util.List} object.
508     */
509    protected List<Integer> checkIntegerList(IMZTabColumn column, String target,
510        char splitChar) {
511        return checkIntegerList(column, target, splitChar, true);
512    }
513
514    /**
515     * Check and translate target string into integer list which split by
516     * splitChar character.. If parse is incorrect, throws
517     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
518     * error.
519     *
520     * @param column SHOULD NOT be set to null
521     * @param target SHOULD NOT be empty.
522     * @param splitChar a char.
523     * @param allowNull if true, null will be treated as a valid element of the
524     * list. Otherwise, an error will be added to the error list.
525     * @return a {@link java.util.List} object.
526     */
527    protected List<Integer> checkIntegerList(IMZTabColumn column, String target,
528        char splitChar, boolean allowNull) {
529        String result = checkData(column, target, allowNull);
530
531        if (result == null || result.equalsIgnoreCase(NULL)) {
532            return new ArrayList<>(splitChar);
533        }
534
535        List<Integer> stringList = parseIntegerList(result);
536        if (stringList.isEmpty()) {
537            this.errorList.add(new MZTabError(FormatErrorType.IntegerList,
538                lineNumber, column.getHeader(), result, "" + splitChar));
539        }
540
541        return stringList;
542    }
543
544    /**
545     * Check and translate target string into parameter list which split by
546     * splitChar character.. If parse is incorrect, throws
547     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
548     * error.
549     *
550     * @param column SHOULD NOT be set to null
551     * @param target SHOULD NOT be empty.
552     * @return a {@link java.util.List} object.
553     */
554    protected List<Double> checkDoubleList(IMZTabColumn column, String target) {
555        String result = checkData(column, target, true);
556
557        if (result == null || result.equalsIgnoreCase(NULL)) {
558            return new ArrayList<>(MZTabConstants.BAR);
559        }
560
561        List<Double> doubleList = parseDoubleList(target);
562        if (doubleList.isEmpty()) {
563            this.errorList.add(new MZTabError(FormatErrorType.DoubleList,
564                lineNumber, column.getHeader(), result, "" + MZTabConstants.BAR));
565        }
566
567        return doubleList;
568    }
569
570    /**
571     * Check and translate target to
572     * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow
573     * used in express Boolean (0/1). If parse is incorrect, throws
574     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean}
575     * error.
576     *
577     * @param column SHOULD NOT be set to null
578     * @param target SHOULD NOT be empty.
579     * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object.
580     */
581    protected MZBoolean checkMZBoolean(IMZTabColumn column, String target) {
582        String result = checkData(column, target, true);
583
584        if (result == null || result.equalsIgnoreCase(NULL)) {
585            return null;
586        }
587
588        MZBoolean value = MZBoolean.findBoolean(result);
589        if (value == null) {
590            this.errorList.add(new MZTabError(FormatErrorType.MZBoolean,
591                lineNumber, column.getHeader(), result));
592        }
593
594        return value;
595    }
596
597    /**
598     * Check target string. Normally, description can set "null". But in
599     * "Complete" file, in general "null" values SHOULD not be given.
600     *
601     * @see #checkData(IMZTabColumn, String, boolean)
602     * @param column SHOULD NOT be set to null
603     * @param description SHOULD NOT be empty.
604     * @return a {@link java.lang.String} object.
605     */
606    protected String checkDescription(IMZTabColumn column, String description) {
607        return checkData(column, description, true);
608    }
609
610    /**
611     * Check and translate taxid string into Integer. If exists error during
612     * parse, raise
613     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
614     * error. Normally, taxid may be set to "null"; in general "null" values
615     * SHOULD not be given.
616     *
617     * @param column SHOULD NOT be set to null
618     * @param taxid SHOULD NOT be empty.
619     * @return a {@link java.lang.Integer} object.
620     */
621    protected Integer checkTaxid(IMZTabColumn column, String taxid) {
622        return checkInteger(column, taxid);
623    }
624
625    /**
626     * Check target string. Normally, species can set "null". But in "Complete"
627     * file, in general "null" values SHOULD not be given.
628     *
629     * @see #checkData(IMZTabColumn, String, boolean)
630     * @param column SHOULD NOT be set to null
631     * @param species SHOULD NOT be empty.
632     * @return a {@link java.lang.String} object.
633     */
634    protected String checkSpecies(IMZTabColumn column, String species) {
635        return checkData(column, species, true);
636    }
637
638    /**
639     * Check target string. Normally, database can set "null". But in "Complete"
640     * file, in general "null" values SHOULD not be given.
641     *
642     * @see #checkData(IMZTabColumn, String, boolean)
643     * @param column SHOULD NOT be set to null
644     * @param database SHOULD NOT be empty.
645     * @return a {@link java.lang.String} object.
646     */
647    protected String checkDatabase(IMZTabColumn column, String database) {
648        return checkData(column, database, true);
649    }
650
651    /**
652     * Check target string. Normally, databaseVersion can set "null". But in
653     * "Complete" file, in general "null" values SHOULD not be given.
654     *
655     * @see #checkData(IMZTabColumn, String, boolean)
656     * @param column SHOULD NOT be set to null
657     * @param databaseVersion SHOULD NOT be empty.
658     * @return a {@link java.lang.String} object.
659     */
660    protected String checkDatabaseVersion(IMZTabColumn column,
661        String databaseVersion) {
662        return checkData(column, databaseVersion, true);
663    }
664
665    /**
666     * Check and translate searchEngine string into parameter list which split
667     * by '|' character.. If parse is incorrect, throws
668     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList}
669     * error. Normally, searchEngine may be set to "null"; in general "null"
670     * values SHOULD not be given.
671     *
672     * @param column SHOULD NOT be set to null
673     * @param searchEngine SHOULD NOT be empty.
674     * @return a {@link java.util.List} object.
675     */
676    protected List<Parameter> checkSearchEngine(IMZTabColumn column,
677        String searchEngine) {
678        return checkParamList(column, searchEngine);
679    }
680
681    /**
682     * The best search engine score (for this type of score) for the given
683     * peptide across all replicates reported. The type of score MUST be defined
684     * in the metadata section. If the peptide was not identified by the
685     * specified search engine, “null” MUST be reported.
686     *
687     * @param column SHOULD NOT be set to null
688     * @param bestSearchEngineScore SHOULD NOT be empty.
689     * @return a {@link java.lang.Double} object.
690     */
691    protected Double checkBestSearchEngineScore(IMZTabColumn column,
692        String bestSearchEngineScore) {
693        return checkDouble(column, bestSearchEngineScore);
694    }
695
696    /**
697     * The search engine score for the given peptide in the defined ms run. The
698     * type of score MUST be defined in the metadata section. If the peptide was
699     * not identified by the specified search engine “null” must be reported.
700     *
701     * @param column SHOULD NOT be set to null
702     * @param searchEngineScore SHOULD NOT be empty.
703     * @return a {@link java.lang.Double} object.
704     */
705    protected Double checkSearchEngineScore(IMZTabColumn column,
706        String searchEngineScore) {
707        return checkDouble(column, searchEngineScore);
708    }
709
710    /**
711     * Check and translate numPSMs string into Integer. If exists error during
712     * parse, raise
713     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
714     * error. Normally, numPSMs may be set to "null"; in general "null" values
715     * SHOULD not be given.
716     *
717     * @param column SHOULD NOT be set to null
718     * @param numPSMs SHOULD NOT be empty.
719     * @return a {@link java.lang.Integer} object.
720     */
721    protected Integer checkNumPSMs(IMZTabColumn column, String numPSMs) {
722        return checkInteger(column, numPSMs);
723    }
724
725    /**
726     * Check and translate numPeptidesDistinct string into Integer. If exists
727     * error during parse, raise
728     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
729     * error. Normally, numPeptidesDistinct can set "null", but in "Complete"
730     * file, in general "null" values SHOULD not be given.
731     *
732     * @param column SHOULD NOT be set to null
733     * @param numPeptidesDistinct SHOULD NOT be empty.
734     * @return a {@link java.lang.Integer} object.
735     */
736    protected Integer checkNumPeptidesDistinct(IMZTabColumn column,
737        String numPeptidesDistinct) {
738        return checkInteger(column, numPeptidesDistinct);
739    }
740
741    /**
742     * Check and translate numPeptidesUnique string into Integer. If exists
743     * error during parse, raise
744     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
745     * error. Normally, numPeptidesUnique can set "null", but in "Complete"
746     * file, in general "null" values SHOULD not be given.
747     *
748     * @param column SHOULD NOT be set to null
749     * @param numPeptidesUnique SHOULD NOT be empty.
750     * @return a {@link java.lang.Integer} object.
751     */
752    protected Integer checkNumPeptidesUnique(IMZTabColumn column,
753        String numPeptidesUnique) {
754        return checkInteger(column, numPeptidesUnique);
755    }
756
757    /**
758     * Check and translate target string into parameter list which split by ','
759     * character.. If parse is incorrect, throws
760     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
761     * error. Normally, ambiguityMembers may be set to "null"; in general "null"
762     * values SHOULD not be given.
763     *
764     * @param column SHOULD NOT be set to null
765     * @param ambiguityMembers SHOULD NOT be empty.
766     * @return a {@link java.util.List} object.
767     */
768    protected List<String> checkAmbiguityMembers(IMZTabColumn column,
769        String ambiguityMembers) {
770        return checkStringList(column, ambiguityMembers, COMMA);
771    }
772
773    /**
774     * Checks the provided URI string.
775     *
776     * @param column SHOULD NOT be set to null
777     * @param uri a {@link java.lang.String} object, conforming to URI format.
778     * @return the uri as an ASCII encoded string.
779     */
780    protected String checkURI(IMZTabColumn column, String uri) {
781        String result_uri = checkData(column, uri, true);
782
783        if (result_uri == null || result_uri.equalsIgnoreCase(NULL)) {
784            return null;
785        }
786
787        java.net.URI result = parseURI(result_uri);
788        if (result == null) {
789            this.errorList.add(new MZTabError(FormatErrorType.URI, lineNumber,
790                "Column " + column.getHeader(), result_uri));
791            return null;
792        } else {
793            return result.toASCIIString();
794        }
795    }
796
797    /**
798     * Check and translate spectraRef string into
799     * {@link de.isas.mztab2.model.SpectraRef} list. If parse incorrect, or
800     * ms_run not defined in metadata raise
801     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#SpectraRef}
802     * error. Normally, spectraRef may be set to "null"; in general "null"
803     * values SHOULD not be given.
804     *
805     * @param column SHOULD NOT be set to null
806     * @param spectraRef SHOULD NOT be empty.
807     * @param context a
808     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
809     * @return a {@link java.util.List} object.
810     */
811    protected List<SpectraRef> checkSpectraRef(MZTabParserContext context,
812        IMZTabColumn column, String spectraRef) {
813        return checkSpectraRef(context, column, spectraRef, false);
814    }
815
816    /**
817     * Check and translate spectraRef string into
818     * {@link de.isas.mztab2.model.SpectraRef} list. If parse incorrect, or
819     * ms_run not defined in metadata raise
820     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#SpectraRef}
821     * error. Normally, spectraRef may be set to "null"; in general "null"
822     * values SHOULD not be given.
823     *
824     * @param column SHOULD NOT be set to null
825     * @param spectraRef SHOULD NOT be empty.
826     * @param context a
827     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
828     * @param allowNull if true, allow null for value. Otherwise, an error will
829     * be added to the error list.
830     * @return a {@link java.util.List} object.
831     */
832    protected List<SpectraRef> checkSpectraRef(MZTabParserContext context,
833        IMZTabColumn column, String spectraRef, boolean allowNull) {
834        String result_spectraRef = checkData(column, spectraRef, allowNull);
835
836        if (result_spectraRef == null || result_spectraRef.
837            equalsIgnoreCase(NULL)) {
838            return new SplitList<>(BAR);
839        }
840
841        List<SpectraRef> refList = parseSpectraRefList(context, metadata,
842            result_spectraRef);
843        if (refList.isEmpty()) {
844            this.errorList.add(new MZTabError(FormatErrorType.SpectraRef,
845                lineNumber, column.getHeader(), result_spectraRef));
846        } else {
847            for (SpectraRef ref : refList) {
848                MsRun run = ref.getMsRun();
849                if (!Optional.ofNullable(run.getLocation()).isPresent()) {
850                    //As the location can be null and the field is mandatory, this is not an error, it is a warning
851                    this.errorList.add(new MZTabError(
852                        LogicalErrorType.SpectraRef, lineNumber, column.
853                            getHeader(), result_spectraRef, "ms_run[" + run.
854                            getId() + "]-location"));
855                }
856            }
857        }
858
859        return refList;
860    }
861
862    /**
863     * Check target string. Normally, pre can set "null". "null" values should
864     * only be given, if no value is available and where the specification
865     * allows for "null" explicitly."
866     *
867     * @see #checkData(IMZTabColumn, String, boolean)
868     * @param column SHOULD NOT be set to null
869     * @param pre SHOULD NOT be empty.
870     * @return a {@link java.lang.String} object.
871     */
872    protected String checkPre(IMZTabColumn column, String pre) {
873        return checkData(column, pre, true);
874    }
875
876    /**
877     * Check target string. Normally, post can set "null". But in "Complete"
878     * file, in general "null" values SHOULD not be given.
879     *
880     * @see #checkData(IMZTabColumn, String, boolean)
881     * @param column SHOULD NOT be set to null
882     * @param post SHOULD NOT be empty.
883     * @return a {@link java.lang.String} object.
884     */
885    protected String checkPost(IMZTabColumn column, String post) {
886        return checkData(column, post, true);
887    }
888
889    /**
890     * Check target string. Normally, start can set "null". But in "Complete"
891     * file, in general "null" values SHOULD not be given.
892     *
893     * @see #checkData(IMZTabColumn, String, boolean)
894     * @param column SHOULD NOT be set to null
895     * @param start SHOULD NOT be empty.
896     * @return a {@link java.lang.String} object.
897     */
898    protected String checkStart(IMZTabColumn column, String start) {
899        return checkData(column, start, true);
900    }
901
902    /**
903     * Check target string. Normally, end can set "null". But in "Complete"
904     * file, in general "null" values SHOULD not be given.
905     *
906     * @see #checkData(IMZTabColumn, String, boolean)
907     * @param column SHOULD NOT be set to null
908     * @param end SHOULD NOT be empty.
909     * @return a {@link java.lang.String} object.
910     */
911    protected String checkEnd(IMZTabColumn column, String end) {
912        return checkData(column, end, true);
913    }
914
915    /**
916     * Check and translate target string into string list which split by ','
917     * character.. If parse is incorrect, throws
918     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
919     * error. Besides, each item in list should be start with "GO:", otherwise
920     * system raise
921     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#GOTermList}
922     * error. Normally, go_terms may be set to "null"; in general "null" values
923     * SHOULD not be given.
924     *
925     * @param column SHOULD NOT be set to null
926     * @param go_terms SHOULD NOT be empty.
927     * @return a {@link java.util.List} object.
928     */
929    protected List<String> checkGOTerms(IMZTabColumn column, String go_terms) {
930        String result_go_terms = checkData(column, go_terms, true);
931
932        if (result_go_terms == null || result_go_terms.equalsIgnoreCase(NULL)) {
933            return new ArrayList<>(COMMA);
934        }
935
936        List<String> stringList = parseGOTermList(result_go_terms);
937        if (stringList.isEmpty()) {
938            this.errorList.add(new MZTabError(FormatErrorType.GOTermList,
939                lineNumber, column.getHeader(), result_go_terms));
940        }
941
942        return stringList;
943    }
944
945    /**
946     * Check and translate protein_coverage string into Double. If parse is
947     * incorrect, throws
948     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
949     * error. protein_coverage range should be in the [0, 1), otherwise raise
950     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType#ProteinCoverage}
951     * error.
952     *
953     * NOTICE: If ratios are included and the denominator is zero, the "INF"
954     * value MUST be used. If the result leads to calculation errors (for
955     * example 0/0), this MUST be reported as "not a number" ("NaN").
956     *
957     * @param column SHOULD NOT be set to null
958     * @param protein_coverage SHOULD NOT be empty.
959     * @return a {@link java.lang.Double} object.
960     */
961    protected Double checkProteinCoverage(IMZTabColumn column,
962        String protein_coverage) {
963        Double result = checkDouble(column, protein_coverage);
964
965        if (result == null) {
966            return null;
967        }
968
969        if (result < 0 || result > 1) {
970            this.errorList.add(new MZTabError(LogicalErrorType.ProteinCoverage,
971                lineNumber, column.getHeader(), printDouble(result)));
972            return null;
973        }
974
975        return result;
976    }
977
978    /**
979     * Check and translate peptide sequence. 'O' and 'U' are encoded by codons
980     * that are usually interpreted as stop codons, which can not displayed in
981     * the sequence. So, if find it, system raise
982     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Sequence}
983     * error.
984     *
985     * @param column SHOULD NOT be set to null
986     * @param sequence SHOULD NOT be empty.
987     * @return a {@link java.lang.String} object.
988     */
989    protected String checkSequence(IMZTabColumn column, String sequence) {
990        String result = checkData(column, sequence, true);
991
992        if (result == null) {
993            return null;
994        }
995
996        result = result.toUpperCase();
997
998        Pattern pattern = Pattern.compile("[OU]");
999        Matcher matcher = pattern.matcher(result);
1000        if (matcher.find()) {
1001            this.errorList.add(new MZTabError(FormatErrorType.Sequence,
1002                lineNumber, column.getHeader(), sequence));
1003        }
1004
1005        return result;
1006    }
1007
1008    /**
1009     * Check and translate psm_id string into Integer. If exists error during
1010     * parse, raise
1011     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
1012     * error. Normally, psm_id may be set to "null"; in general "null" values
1013     * SHOULD not be given.
1014     *
1015     * @param column SHOULD NOT be set to null
1016     * @param psm_id SHOULD NOT be empty.
1017     * @return a {@link java.lang.Integer} object.
1018     */
1019    protected Integer checkPSMID(IMZTabColumn column, String psm_id) {
1020        return checkInteger(column, psm_id);
1021    }
1022
1023    /**
1024     * Check and translate unique to
1025     * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow
1026     * used in express Boolean (0/1). If parse is incorrect, throws
1027     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean}
1028     * error.
1029     *
1030     * @param column SHOULD NOT be set to null
1031     * @param unique SHOULD NOT be empty.
1032     * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object.
1033     */
1034    protected MZBoolean checkUnique(IMZTabColumn column, String unique) {
1035        return checkMZBoolean(column, unique);
1036    }
1037
1038    /**
1039     * Check and translate charge string into Integer. If exists error during
1040     * parse, raise
1041     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
1042     * error. Normally, charge may be set to "null"; in general "null" values
1043     * SHOULD not be given.
1044     *
1045     * @param column SHOULD NOT be set to null
1046     * @param charge SHOULD NOT be empty.
1047     * @return a {@link java.lang.Integer} object.
1048     */
1049    protected Integer checkCharge(IMZTabColumn column, String charge) {
1050        return checkInteger(column, charge);
1051    }
1052
1053    /**
1054     * Check and translate mass_to_charge string into Double. If parse is
1055     * incorrect, throws
1056     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
1057     * error.
1058     *
1059     * NOTICE: If ratios are included and the denominator is zero, the "INF"
1060     * value MUST be used. If the result leads to calculation errors (for
1061     * example 0/0), this MUST be reported as "not a number" ("NaN").
1062     *
1063     * @param column SHOULD NOT be set to null
1064     * @param mass_to_charge SHOULD NOT be empty.
1065     * @return a {@link java.lang.Double} object.
1066     */
1067    protected Double checkMassToCharge(IMZTabColumn column,
1068        String mass_to_charge) {
1069        return checkDouble(column, mass_to_charge);
1070    }
1071
1072    /**
1073     * Check and translate exp_mass_to_charge string into Double. If parse is
1074     * incorrect, throws
1075     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
1076     * error.
1077     *
1078     * NOTICE: If ratios are included and the denominator is zero, the "INF"
1079     * value MUST be used. If the result leads to calculation errors (for
1080     * example 0/0), this MUST be reported as "not a number" ("NaN").
1081     *
1082     * @param column SHOULD NOT be set to null
1083     * @param exp_mass_to_charge SHOULD NOT be empty.
1084     * @return a {@link java.lang.Double} object.
1085     */
1086    protected Double checkExpMassToCharge(IMZTabColumn column,
1087        String exp_mass_to_charge) {
1088        return checkDouble(column, exp_mass_to_charge);
1089    }
1090
1091    /**
1092     * Check and translate calc_mass_to_charge string into Double. If parse is
1093     * incorrect, throws
1094     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
1095     * error.
1096     *
1097     * NOTICE: If ratios are included and the denominator is zero, the "INF"
1098     * value MUST be used. If the result leads to calculation errors (for
1099     * example 0/0), this MUST be reported as "not a number" ("NaN").
1100     *
1101     * @param column SHOULD NOT be set to null
1102     * @param calc_mass_to_charge SHOULD NOT be empty.
1103     * @return a {@link java.lang.Double} object.
1104     */
1105    protected Double checkCalcMassToCharge(IMZTabColumn column,
1106        String calc_mass_to_charge) {
1107        return checkDouble(column, calc_mass_to_charge);
1108    }
1109
1110    /**
1111     * Check and translate identifier string into string list which split by '|'
1112     * character.. If parse is incorrect, throws
1113     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
1114     * error. Normally, identifier may be set to "null"; in general "null"
1115     * values SHOULD not be given.
1116     *
1117     * @param column SHOULD NOT be set to null
1118     * @param identifier SHOULD NOT be empty.
1119     * @return a {@link java.util.List} object.
1120     */
1121    protected List<String> checkIdentifier(IMZTabColumn column,
1122        String identifier) {
1123        return checkStringList(column, identifier, BAR);
1124    }
1125
1126    /**
1127     * Check chemical_formula string. Normally, chemical_formula can set "null".
1128     * But in "Complete" file, in general "null" values SHOULD not be given.
1129     *
1130     * @see #checkData(IMZTabColumn, String, boolean)
1131     * @param column SHOULD NOT be set to null
1132     * @param chemical_formula SHOULD NOT be empty.
1133     * @return a {@link java.lang.String} object.
1134     */
1135    protected String checkChemicalFormula(IMZTabColumn column,
1136        String chemical_formula) {
1137        return checkData(column, chemical_formula, true);
1138    }
1139
1140    /**
1141     * Check and translate smiles string into parameter list which split by '|'
1142     * character.. If parse is incorrect, throws
1143     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
1144     * error. Normally, smiles may be set to "null"; in general "null" values
1145     * SHOULD not be given.
1146     *
1147     * @param column SHOULD NOT be set to null
1148     * @param smiles SHOULD NOT be empty.
1149     * @return a {@link java.util.List} object.
1150     */
1151    protected List<String> checkSmiles(IMZTabColumn column, String smiles) {
1152        return checkStringList(column, smiles, BAR);
1153    }
1154
1155    /**
1156     * Check and translate inchi_key string into parameter list which split by
1157     * '|' character.. If parse is incorrect, throws
1158     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
1159     * error. Normally, inchi_key may be set to "null"; in general "null" values
1160     * SHOULD not be given.
1161     *
1162     * @param column SHOULD NOT be set to null
1163     * @param inchi_key SHOULD NOT be empty.
1164     * @return a {@link java.util.List} object.
1165     */
1166    protected List<String> checkInchiKey(IMZTabColumn column, String inchi_key) {
1167        return checkStringList(column, inchi_key, BAR);
1168    }
1169
1170    /**
1171     * Check and translate retention_time string into Double list which split by
1172     * '|' character.. If parse is incorrect, throws
1173     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList}
1174     * error. Normally, retention_time may be set to "null"; in general "null"
1175     * values SHOULD not be given.
1176     *
1177     * @param column SHOULD NOT be set to null
1178     * @param retention_time SHOULD NOT be empty.
1179     * @return a {@link java.util.List} object.
1180     */
1181    protected List<Double> checkRetentionTime(IMZTabColumn column,
1182        String retention_time) {
1183        String result = checkData(column, retention_time, true);
1184
1185        if (result == null || result.equalsIgnoreCase(NULL)) {
1186            return new SplitList<>(BAR);
1187        }
1188
1189        List<Double> valueList = parseDoubleList(result);
1190        if (valueList.isEmpty()) {
1191            this.errorList.add(new MZTabError(FormatErrorType.DoubleList,
1192                lineNumber, column.getHeader(), result, "" + BAR));
1193        }
1194
1195        return valueList;
1196    }
1197
1198    /**
1199     * Check and translate retention_time_window string into Double list which
1200     * split by '|' character.. If parse is incorrect, throws
1201     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList}
1202     * error. Normally, retention_time_window can set "null", but in "Complete"
1203     * file, in general "null" values SHOULD not be given.
1204     *
1205     * @param column SHOULD NOT be set to null
1206     * @param retention_time_window SHOULD NOT be empty.
1207     * @return a {@link java.util.List} object.
1208     */
1209    protected List<Double> checkRetentionTimeWindow(IMZTabColumn column,
1210        String retention_time_window) {
1211        String result = checkData(column, retention_time_window, true);
1212
1213        if (result == null || result.equalsIgnoreCase(NULL)) {
1214            return new SplitList<>(BAR);
1215        }
1216
1217        List<Double> valueList = parseDoubleList(result);
1218        if (valueList.isEmpty()) {
1219            this.errorList.add(new MZTabError(FormatErrorType.DoubleList,
1220                lineNumber, column.getHeader(), result, "" + BAR));
1221        }
1222
1223        return valueList;
1224    }
1225}