001/* 
002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package uk.ac.ebi.pride.jmztab2.utils.parser;
017
018import de.isas.mztab2.io.serialization.ParameterConverter;
019import de.isas.mztab2.io.validators.SpectraRefValidator;
020import de.isas.mztab2.model.Metadata;
021import de.isas.mztab2.model.Parameter;
022import de.isas.mztab2.model.SpectraRef;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.SortedMap;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029import lombok.extern.slf4j.Slf4j;
030import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn;
031import uk.ac.ebi.pride.jmztab2.model.MZBoolean;
032import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory;
033import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
034import static uk.ac.ebi.pride.jmztab2.model.MZTabConstants.*;
035import uk.ac.ebi.pride.jmztab2.model.MZTabUtils;
036import static uk.ac.ebi.pride.jmztab2.model.MZTabUtils.*;
037import uk.ac.ebi.pride.jmztab2.model.SplitList;
038import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType;
039import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
040import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
041import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList;
042import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException;
043
044/**
045 * This class allows the validation and loading of the data into mzTab domain
046 * objects.
047 *
048 * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain a
049 * couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have
050 * internal logical position and order. In physical mzTab file, we allow user
051 * not obey this logical position organized way, and provide their date with own
052 * order. In order to distinguish them, we use physical position (a positive
053 * integer) to record the column location in mzTab file. And use
054 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure to
055 * maintain the mapping between them.
056 *
057 * @param <T> the type of domain object the parser creates.
058 * @see SMLLineParser
059 * @see SMFLineParser
060 * @see SMELineParser
061 * @author qingwei
062 * @since 14/02/13
063 *
064 */
065@Slf4j
066public abstract class MZTabDataLineParser<T> extends MZTabLineParser {
067
068    protected MZTabColumnFactory factory;
069    protected PositionMapping positionMapping;
070    protected SortedMap<String, Integer> exchangeMapping; // reverse the key and value of positionMapping.
071
072    protected SortedMap<Integer, IMZTabColumn> mapping;   // logical position --> offset
073    protected Metadata metadata;
074
075    /**
076     * <p>
077     * Constructor for MZTabDataLineParser.</p>
078     *
079     * @param context a
080     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
081     */
082    protected MZTabDataLineParser(MZTabParserContext context) {
083        super(context);
084    }
085
086    /**
087     * Generate a mzTab data line parser.
088     *
089     * NOTICE: {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} maintain
090     * a couple of {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn} which have
091     * internal logical position and order. In physical mzTab file, we allow
092     * user not obey this logical position organized way, and provide their date
093     * with own order. In order to distinguish them, we use physical position (a
094     * positive integer) to record the column location in mzTab file. And use
095     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} structure
096     * the maintain the mapping between them.
097     *
098     * @param context the parser context, keeping dynamic state and lookup
099     * associations.
100     * @param factory SHOULD NOT be set to null
101     * @param positionMapping SHOULD NOT be set to null
102     * @param metadata SHOULD NOT be set to null
103     * @param errorList a
104     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object.
105     */
106    protected MZTabDataLineParser(MZTabParserContext context,
107            MZTabColumnFactory factory, PositionMapping positionMapping,
108            Metadata metadata, MZTabErrorList errorList) {
109        this(context);
110        if (factory == null) {
111            throw new NullPointerException(
112                    "Column header factory should be created first.");
113        }
114        this.factory = factory;
115
116        this.positionMapping = positionMapping;
117        this.exchangeMapping = positionMapping.reverse();
118        this.mapping = factory.getOffsetColumnsMap();
119
120        if (metadata == null) {
121            throw new NullPointerException("Metadata should be parsed first.");
122        }
123        this.metadata = metadata;
124        this.errorList = errorList == null ? new MZTabErrorList() : errorList;
125    }
126
127    /**
128     * {@inheritDoc}
129     *
130     * Validate and parse the data line, if there exist errors, add them into
131     * {@link MZTabErrorList}.
132     */
133    @Override
134    public void parse(int lineNumber, String line, MZTabErrorList errorList) throws MZTabException {
135        super.parse(lineNumber, line, errorList);
136        checkCount();
137
138        int offset = checkData();
139        if (offset != items.length) {
140            log.error(
141                    "Number of expected items after parsing header is: {} but data line has: {} items!",
142                    offset,
143                    items.length);
144            log.error("Current mapping is: {}", mapping);
145            log.error("Items given: {} expected: {}", Arrays.toString(items),
146                    Arrays.toString(line.split("\\t")));
147            this.errorList.add(new MZTabError(FormatErrorType.CountMatch,
148                    lineNumber, "" + offset, "" + items.length));
149        }
150    }
151
152    /**
153     * Check header line items size equals data line items size. The number of
154     * Data line items does not match with the number of Header line items.
155     * Normally, the user has not used the Unicode Horizontal Tab character
156     * (Unicode codepoint 0009) as the column delimiter, there is a file
157     * encoding error, or the user has not provided the definition of optional
158     * columns in the header line.
159     */
160    private void checkCount() {
161        int headerCount = mapping.size();
162        int dataCount = items.length - 1;
163
164        if (headerCount != dataCount) {
165            log.error(
166                    "Number of expected items after parsing header is: {} but data line has: {} items!",
167                    headerCount,
168                    dataCount);
169            log.error("Current mapping is: {}", mapping);
170            log.error("Items given: {} expected: {}", Arrays.toString(items),
171                    Arrays.toString(line.split("\\t")));
172            this.errorList.add(new MZTabError(FormatErrorType.CountMatch,
173                    lineNumber, "" + dataCount, "" + headerCount));
174        }
175    }
176
177    /**
178     * Retrieve the data line to a type mzTab domain object.
179     *
180     * @return a typed mzTab domain object.
181     */
182    public abstract T getRecord();
183
184    /**
185     * Check and translate the columns into mzTab elements.
186     *
187     * @return a int.
188     */
189    protected abstract int checkData();
190
191    /**
192     * load best_search_engine_score[id], read id value.
193     *
194     * @param bestSearchEngineScoreLabel a {@link java.lang.String} object.
195     * @return a {@link java.lang.Integer} object.
196     */
197    protected Integer loadBestSearchEngineScoreId(
198            String bestSearchEngineScoreLabel) {
199        Pattern pattern = Pattern.compile(
200                "search_engine_score\\[(\\d+)\\](\\w+)?");
201        Matcher matcher = pattern.matcher(bestSearchEngineScoreLabel);
202
203        if (matcher.find()) {
204            return new Integer(matcher.group(1));
205        }
206
207        return null;
208    }
209
210    /**
211     * load search_engine_score[id]_ms_run[..], read id value.
212     *
213     * @param searchEngineLabel a {@link java.lang.String} object.
214     * @return a {@link java.lang.Integer} object.
215     */
216    protected Integer loadSearchEngineScoreId(String searchEngineLabel) {
217        Pattern pattern = Pattern.compile("search_engine_score\\[(\\d+)\\]\\w*");
218        Matcher matcher = pattern.matcher(searchEngineLabel);
219
220        if (matcher.find()) {
221            return new Integer(matcher.group(1));
222        }
223
224        return null;
225    }
226
227    /**
228     * In the table-based sections (protein, peptide, and small molecule) there
229     * MUST NOT be any empty cells. Some field not allow "null" value, for
230     * example unit_id, accession and so on. In "Complete" file, in general
231     * "null" values SHOULD not be given.
232     *
233     * @param column SHOULD NOT be set to null
234     * @param target SHOULD NOT be empty.
235     * @param allowNull a boolean.
236     * @return a {@link java.lang.String} object.
237     */
238    protected String checkData(IMZTabColumn column, String target,
239            boolean allowNull) {
240        if (target == null && allowNull) {
241            return null;
242        }
243        if (target == null) {
244            this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber,
245                    column.getHeader()));
246            return null;
247        }
248
249        target = target.trim();
250        if (target.isEmpty()) {
251            this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber,
252                    column.getHeader()));
253            return null;
254        }
255        if (MZTabConstants.NULL.equals(target) && !allowNull) {
256            this.errorList.add(new MZTabError(LogicalErrorType.NULL, lineNumber,
257                    column.getHeader()));
258            return null;
259        }
260
261        return target;
262    }
263
264    /**
265     * In the table-based sections (protein, peptide, and small molecule) there
266     * MUST NOT be any empty cells. Some field not allow "null" value, for
267     * example unit_id, accession and so on. In "Complete" file, in general
268     * "null" values SHOULD not be given.
269     *
270     * @param column SHOULD NOT be set to null
271     * @param target SHOULD NOT be empty.
272     * @return a {@link java.lang.String} object.
273     */
274    protected String checkString(IMZTabColumn column, String target) {
275        return checkData(column, target, true);
276    }
277
278    /**
279     * In the table-based sections (protein, peptide, and small molecule) there
280     * MUST NOT be any empty cells. Some field not allow "null" value, for
281     * example unit_id, accession and so on. In "Complete" file, in general
282     * "null" values SHOULD not be given.
283     *
284     * @param column SHOULD NOT be set to null
285     * @param target SHOULD NOT be empty.
286     * @param allowNull if true, null target values will pass the check, if
287     * false, the check will raise an error in the error list.
288     * @return a {@link java.lang.String} object.
289     */
290    protected String checkString(IMZTabColumn column, String target,
291            boolean allowNull) {
292        return checkData(column, target, allowNull);
293    }
294
295    /**
296     * Check and translate target string into Integer. If parse is incorrect,
297     * throws
298     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
299     * error.
300     *
301     * @param column SHOULD NOT be set to null
302     * @param target SHOULD NOT be empty.
303     * @return a {@link java.lang.Integer} object.
304     */
305    protected Integer checkInteger(IMZTabColumn column, String target) {
306        return checkInteger(column, target, true);
307    }
308
309    /**
310     * Check and translate target string into Integer. If parse is incorrect,
311     * throws
312     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
313     * error.
314     *
315     * @param column SHOULD NOT be set to null
316     * @param target SHOULD NOT be empty.
317     * @param allowNull if true, null target values will pass the check, if
318     * false, the check will raise an error in the error list.
319     * @return a {@link java.lang.Integer} object.
320     */
321    protected Integer checkInteger(IMZTabColumn column, String target,
322            boolean allowNull) {
323        String result = checkData(column, target, allowNull);
324
325        if (result == null || result.equalsIgnoreCase(NULL)) {
326            return null;
327        }
328
329        Integer value = parseInteger(result);
330        if (value == null) {
331            this.errorList.add(new MZTabError(FormatErrorType.Integer,
332                    lineNumber, column.getHeader(), target));
333        }
334
335        return value;
336    }
337
338    /**
339     * Check and translate target string into Double. If parse is incorrect,
340     * throws
341     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
342     * error.
343     *
344     * NOTICE: If ratios are included and the denominator is zero, the "INF"
345     * value MUST be used. If the result leads to calculation errors (for
346     * example 0/0), this MUST be reported as "not a number" ("NaN").
347     *
348     * @param column SHOULD NOT be set to null
349     * @param target SHOULD NOT be empty.
350     * @return a {@link java.lang.Double} object.
351     */
352    protected Double checkDouble(IMZTabColumn column, String target) {
353        return checkDouble(column, target, true);
354    }
355
356    /**
357     * Check and translate target string into Double. If parse is incorrect,
358     * throws
359     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
360     * error.
361     *
362     * NOTICE: If ratios are included and the denominator is zero, the "INF"
363     * value MUST be used. If the result leads to calculation errors (for
364     * example 0/0), this MUST be reported as "not a number" ("NaN").
365     *
366     * @param column SHOULD NOT be set to null
367     * @param target SHOULD NOT be empty.
368     * @param allowNull if true, null target values will pass the check, if
369     * false, the check will raise an error in the error list.
370     * @return a {@link java.lang.Double} object.
371     */
372    protected Double checkDouble(IMZTabColumn column, String target,
373            boolean allowNull) {
374        String result = checkData(column, target, allowNull);
375
376        if (result == null || result.equalsIgnoreCase(NULL)) {
377            return null;
378        }
379
380        Double value = parseDouble(result);
381        if (value == null) {
382            this.errorList.add(
383                    new MZTabError(FormatErrorType.Double, lineNumber, column.
384                            getHeader(), target));
385            return null;
386        }
387        if (value.equals(Double.NaN) || value.equals(Double.POSITIVE_INFINITY)) {
388            return value;
389        }
390
391        return value;
392    }
393
394    /**
395     * Check and translate target string into parameter list which split by '|'
396     * character.. If parse is incorrect, throws
397     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList}
398     * error.
399     *
400     * @param column SHOULD NOT be set to null
401     * @param target SHOULD NOT be empty.
402     * @return a {@link java.util.List} object.
403     */
404    protected List<Parameter> checkParamList(IMZTabColumn column, String target) {
405        String result = checkData(column, target, true);
406
407        if (result == null || result.equalsIgnoreCase(NULL)) {
408            return new ArrayList<>(BAR);
409        }
410
411        List<Parameter> paramList = parseParamList(result);
412        if (paramList.isEmpty()) {
413            this.errorList.add(new MZTabError(FormatErrorType.ParamList,
414                    lineNumber, "Column " + column.getHeader(), target));
415        }
416        for (Parameter param : paramList) {
417            if (param != null && param.getCvAccession() != null && !param.
418                    getCvAccession().
419                    isEmpty()) {
420                if (!param.getCvAccession().
421                        contains(":")) {
422                    this.errorList.add(new MZTabError(
423                            FormatErrorType.ParamAccessionNotNamespaced, lineNumber,
424                            column.getHeader(), param.getCvAccession(),
425                            new ParameterConverter().convert(param)));
426                }
427            }
428        }
429
430        return paramList;
431    }
432
433    /**
434     * <p>
435     * checkParameter.</p>
436     *
437     * @param column a {@link uk.ac.ebi.pride.jmztab2.model.IMZTabColumn}
438     * object.
439     * @param target a {@link java.lang.String} object.
440     * @param allowNull a boolean.
441     * @return a {@link de.isas.mztab2.model.Parameter} object.
442     */
443    protected Parameter checkParameter(IMZTabColumn column, String target,
444            boolean allowNull) {
445        String result = checkData(column, target, true);
446        if (result == null || (result.equalsIgnoreCase(NULL) && !allowNull)) {
447            this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber,
448                    "Column " + column.getHeader(), target));
449        }
450        Parameter param = MZTabUtils.parseParam(target);
451        if (param != null && param.getCvAccession() != null && !param.
452                getCvAccession().
453                isEmpty()) {
454            if (!param.getCvAccession().
455                    contains(":")) {
456                this.errorList.add(new MZTabError(
457                        FormatErrorType.ParamAccessionNotNamespaced, lineNumber,
458                        column.getHeader(), param.getCvAccession(),
459                        new ParameterConverter().convert(param)));
460            }
461        } else if (param == null && result != null && !result.isEmpty() && !(result.
462                equalsIgnoreCase(NULL))) {
463            this.errorList.add(new MZTabError(FormatErrorType.Param, lineNumber,
464                    "Column " + column.getHeader(), target));
465        }
466        return param;
467    }
468
469    /**
470     * Check and translate target string into parameter list which split by
471     * splitChar character.. If parse is incorrect, throws
472     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
473     * error.
474     *
475     * @param column SHOULD NOT be set to null
476     * @param target SHOULD NOT be empty.
477     * @param splitChar a char.
478     * @return a {@link java.util.List} object.
479     */
480    protected List<String> checkStringList(IMZTabColumn column, String target,
481            char splitChar) {
482        String result = checkData(column, target, true);
483
484        if (result == null || result.equalsIgnoreCase(NULL)) {
485            return new ArrayList<>(splitChar);
486        }
487
488        List<String> stringList = parseStringList(splitChar, result);
489        if (stringList.isEmpty()) {
490            this.errorList.add(new MZTabError(FormatErrorType.StringList,
491                    lineNumber, column.getHeader(), result, "" + splitChar));
492        }
493
494        return stringList;
495    }
496
497    /**
498     * Check and translate target string into integer list which split by
499     * splitChar character.. If parse is incorrect, throws
500     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
501     * error.
502     *
503     * @param column SHOULD NOT be set to null
504     * @param target SHOULD NOT be empty.
505     * @param splitChar a char.
506     * @return a {@link java.util.List} object.
507     */
508    protected List<Integer> checkIntegerList(IMZTabColumn column, String target,
509            char splitChar) {
510        return checkIntegerList(column, target, splitChar, true);
511    }
512
513    /**
514     * Check and translate target string into integer list which split by
515     * splitChar character.. If parse is incorrect, throws
516     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
517     * error.
518     *
519     * @param column SHOULD NOT be set to null
520     * @param target SHOULD NOT be empty.
521     * @param splitChar a char.
522     * @param allowNull if true, null will be treated as a valid element of the
523     * list. Otherwise, an error will be added to the error list.
524     * @return a {@link java.util.List} object.
525     */
526    protected List<Integer> checkIntegerList(IMZTabColumn column, String target,
527            char splitChar, boolean allowNull) {
528        String result = checkData(column, target, allowNull);
529
530        if (result == null || result.equalsIgnoreCase(NULL)) {
531            return new ArrayList<>(splitChar);
532        }
533
534        List<Integer> stringList = parseIntegerList(result);
535        if (stringList.isEmpty()) {
536            this.errorList.add(new MZTabError(FormatErrorType.IntegerList,
537                    lineNumber, column.getHeader(), result, "" + splitChar));
538        }
539
540        return stringList;
541    }
542
543    /**
544     * Check and translate target string into parameter list which split by
545     * splitChar character.. If parse is incorrect, throws
546     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
547     * error.
548     *
549     * @param column SHOULD NOT be set to null
550     * @param target SHOULD NOT be empty.
551     * @return a {@link java.util.List} object.
552     */
553    protected List<Double> checkDoubleList(IMZTabColumn column, String target) {
554        String result = checkData(column, target, true);
555
556        if (result == null || result.equalsIgnoreCase(NULL)) {
557            return new ArrayList<>(MZTabConstants.BAR);
558        }
559
560        List<Double> doubleList = parseDoubleList(target);
561        if (doubleList.isEmpty()) {
562            this.errorList.add(new MZTabError(FormatErrorType.DoubleList,
563                    lineNumber, column.getHeader(), result, "" + MZTabConstants.BAR));
564        }
565
566        return doubleList;
567    }
568
569    /**
570     * Check and translate target to
571     * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow
572     * used in express Boolean (0/1). If parse is incorrect, throws
573     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean}
574     * error.
575     *
576     * @param column SHOULD NOT be set to null
577     * @param target SHOULD NOT be empty.
578     * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object.
579     */
580    protected MZBoolean checkMZBoolean(IMZTabColumn column, String target) {
581        String result = checkData(column, target, true);
582
583        if (result == null || result.equalsIgnoreCase(NULL)) {
584            return null;
585        }
586
587        MZBoolean value = MZBoolean.findBoolean(result);
588        if (value == null) {
589            this.errorList.add(new MZTabError(FormatErrorType.MZBoolean,
590                    lineNumber, column.getHeader(), result));
591        }
592
593        return value;
594    }
595
596    /**
597     * Check target string. Normally, description can set "null". But in
598     * "Complete" file, in general "null" values SHOULD not be given.
599     *
600     * @see #checkData(IMZTabColumn, String, boolean)
601     * @param column SHOULD NOT be set to null
602     * @param description SHOULD NOT be empty.
603     * @return a {@link java.lang.String} object.
604     */
605    protected String checkDescription(IMZTabColumn column, String description) {
606        return checkData(column, description, true);
607    }
608
609    /**
610     * Check and translate taxid string into Integer. If exists error during
611     * parse, raise
612     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
613     * error. Normally, taxid may be set to "null"; in general "null" values
614     * SHOULD not be given.
615     *
616     * @param column SHOULD NOT be set to null
617     * @param taxid SHOULD NOT be empty.
618     * @return a {@link java.lang.Integer} object.
619     */
620    protected Integer checkTaxid(IMZTabColumn column, String taxid) {
621        return checkInteger(column, taxid);
622    }
623
624    /**
625     * Check target string. Normally, species can set "null". But in "Complete"
626     * file, in general "null" values SHOULD not be given.
627     *
628     * @see #checkData(IMZTabColumn, String, boolean)
629     * @param column SHOULD NOT be set to null
630     * @param species SHOULD NOT be empty.
631     * @return a {@link java.lang.String} object.
632     */
633    protected String checkSpecies(IMZTabColumn column, String species) {
634        return checkData(column, species, true);
635    }
636
637    /**
638     * Check target string. Normally, database can set "null". But in "Complete"
639     * file, in general "null" values SHOULD not be given.
640     *
641     * @see #checkData(IMZTabColumn, String, boolean)
642     * @param column SHOULD NOT be set to null
643     * @param database SHOULD NOT be empty.
644     * @return a {@link java.lang.String} object.
645     */
646    protected String checkDatabase(IMZTabColumn column, String database) {
647        return checkData(column, database, true);
648    }
649
650    /**
651     * Check target string. Normally, databaseVersion can set "null". But in
652     * "Complete" file, in general "null" values SHOULD not be given.
653     *
654     * @see #checkData(IMZTabColumn, String, boolean)
655     * @param column SHOULD NOT be set to null
656     * @param databaseVersion SHOULD NOT be empty.
657     * @return a {@link java.lang.String} object.
658     */
659    protected String checkDatabaseVersion(IMZTabColumn column,
660            String databaseVersion) {
661        return checkData(column, databaseVersion, true);
662    }
663
664    /**
665     * Check and translate searchEngine string into parameter list which split
666     * by '|' character.. If parse is incorrect, throws
667     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#ParamList}
668     * error. Normally, searchEngine may be set to "null"; in general "null"
669     * values SHOULD not be given.
670     *
671     * @param column SHOULD NOT be set to null
672     * @param searchEngine SHOULD NOT be empty.
673     * @return a {@link java.util.List} object.
674     */
675    protected List<Parameter> checkSearchEngine(IMZTabColumn column,
676            String searchEngine) {
677        return checkParamList(column, searchEngine);
678    }
679
680    /**
681     * The best search engine score (for this type of score) for the given
682     * peptide across all replicates reported. The type of score MUST be defined
683     * in the metadata section. If the peptide was not identified by the
684     * specified search engine, “null” MUST be reported.
685     *
686     * @param column SHOULD NOT be set to null
687     * @param bestSearchEngineScore SHOULD NOT be empty.
688     * @return a {@link java.lang.Double} object.
689     */
690    protected Double checkBestSearchEngineScore(IMZTabColumn column,
691            String bestSearchEngineScore) {
692        return checkDouble(column, bestSearchEngineScore);
693    }
694
695    /**
696     * The search engine score for the given peptide in the defined ms run. The
697     * type of score MUST be defined in the metadata section. If the peptide was
698     * not identified by the specified search engine “null” must be reported.
699     *
700     * @param column SHOULD NOT be set to null
701     * @param searchEngineScore SHOULD NOT be empty.
702     * @return a {@link java.lang.Double} object.
703     */
704    protected Double checkSearchEngineScore(IMZTabColumn column,
705            String searchEngineScore) {
706        return checkDouble(column, searchEngineScore);
707    }
708
709    /**
710     * Check and translate numPSMs string into Integer. If exists error during
711     * parse, raise
712     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
713     * error. Normally, numPSMs may be set to "null"; in general "null" values
714     * SHOULD not be given.
715     *
716     * @param column SHOULD NOT be set to null
717     * @param numPSMs SHOULD NOT be empty.
718     * @return a {@link java.lang.Integer} object.
719     */
720    protected Integer checkNumPSMs(IMZTabColumn column, String numPSMs) {
721        return checkInteger(column, numPSMs);
722    }
723
724    /**
725     * Check and translate numPeptidesDistinct string into Integer. If exists
726     * error during parse, raise
727     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
728     * error. Normally, numPeptidesDistinct can set "null", but in "Complete"
729     * file, in general "null" values SHOULD not be given.
730     *
731     * @param column SHOULD NOT be set to null
732     * @param numPeptidesDistinct SHOULD NOT be empty.
733     * @return a {@link java.lang.Integer} object.
734     */
735    protected Integer checkNumPeptidesDistinct(IMZTabColumn column,
736            String numPeptidesDistinct) {
737        return checkInteger(column, numPeptidesDistinct);
738    }
739
740    /**
741     * Check and translate numPeptidesUnique string into Integer. If exists
742     * error during parse, raise
743     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
744     * error. Normally, numPeptidesUnique can set "null", but in "Complete"
745     * file, in general "null" values SHOULD not be given.
746     *
747     * @param column SHOULD NOT be set to null
748     * @param numPeptidesUnique SHOULD NOT be empty.
749     * @return a {@link java.lang.Integer} object.
750     */
751    protected Integer checkNumPeptidesUnique(IMZTabColumn column,
752            String numPeptidesUnique) {
753        return checkInteger(column, numPeptidesUnique);
754    }
755
756    /**
757     * Check and translate target string into parameter list which split by ','
758     * character.. If parse is incorrect, throws
759     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
760     * error. Normally, ambiguityMembers may be set to "null"; in general "null"
761     * values SHOULD not be given.
762     *
763     * @param column SHOULD NOT be set to null
764     * @param ambiguityMembers SHOULD NOT be empty.
765     * @return a {@link java.util.List} object.
766     */
767    protected List<String> checkAmbiguityMembers(IMZTabColumn column,
768            String ambiguityMembers) {
769        return checkStringList(column, ambiguityMembers, COMMA);
770    }
771
772    /**
773     * Checks the provided URI string.
774     *
775     * @param column SHOULD NOT be set to null
776     * @param uri a {@link java.lang.String} object, conforming to URI format.
777     * @return the uri as an ASCII encoded string.
778     */
779    protected String checkURI(IMZTabColumn column, String uri) {
780        String result_uri = checkData(column, uri, true);
781
782        if (result_uri == null || result_uri.equalsIgnoreCase(NULL)) {
783            return null;
784        }
785
786        java.net.URI result = parseURI(result_uri);
787        if (result == null) {
788            this.errorList.add(new MZTabError(FormatErrorType.URI, lineNumber,
789                    "Column " + column.getHeader(), result_uri));
790            return null;
791        } else {
792            return result.toASCIIString();
793        }
794    }
795
796    /**
797     * Check and translate spectraRef string into
798     * {@link de.isas.mztab2.model.SpectraRef} list. If parse incorrect, or
799     * ms_run not defined in metadata raise
800     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#SpectraRef}
801     * error. Normally, spectraRef may be set to "null"; in general "null"
802     * values SHOULD not be given.
803     *
804     * @param column SHOULD NOT be set to null
805     * @param spectraRef SHOULD NOT be empty.
806     * @param context a
807     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
808     * @param allowNull if true, allow null for value. Otherwise, an error will
809     * be added to the error list.
810     * @return a {@link java.util.List} object.
811     */
812    protected List<SpectraRef> checkSpectraRef(MZTabParserContext context,
813            IMZTabColumn column, String spectraRef, boolean allowNull) {
814        String result_spectraRef = checkData(column, spectraRef, allowNull);
815
816        if (result_spectraRef == null || result_spectraRef.
817                equalsIgnoreCase(NULL)) {
818            return new SplitList<>(BAR);
819        }
820
821        List<SpectraRef> refList = parseSpectraRefList(context, metadata,
822                result_spectraRef);
823        SpectraRefValidator validator = new SpectraRefValidator();
824        this.errorList.addAll(
825            validator.validateLine(
826                lineNumber, 
827                context, 
828                column, 
829                result_spectraRef, 
830                refList
831            )
832        );
833        return refList;
834    }
835
836    /**
837     * Check target string. Normally, pre can set "null". "null" values should
838     * only be given, if no value is available and where the specification
839     * allows for "null" explicitly."
840     *
841     * @see #checkData(IMZTabColumn, String, boolean)
842     * @param column SHOULD NOT be set to null
843     * @param pre SHOULD NOT be empty.
844     * @return a {@link java.lang.String} object.
845     */
846    protected String checkPre(IMZTabColumn column, String pre) {
847        return checkData(column, pre, true);
848    }
849
850    /**
851     * Check target string. Normally, post can set "null". But in "Complete"
852     * file, in general "null" values SHOULD not be given.
853     *
854     * @see #checkData(IMZTabColumn, String, boolean)
855     * @param column SHOULD NOT be set to null
856     * @param post SHOULD NOT be empty.
857     * @return a {@link java.lang.String} object.
858     */
859    protected String checkPost(IMZTabColumn column, String post) {
860        return checkData(column, post, true);
861    }
862
863    /**
864     * Check target string. Normally, start can set "null". But in "Complete"
865     * file, in general "null" values SHOULD not be given.
866     *
867     * @see #checkData(IMZTabColumn, String, boolean)
868     * @param column SHOULD NOT be set to null
869     * @param start SHOULD NOT be empty.
870     * @return a {@link java.lang.String} object.
871     */
872    protected String checkStart(IMZTabColumn column, String start) {
873        return checkData(column, start, true);
874    }
875
876    /**
877     * Check target string. Normally, end can set "null". But in "Complete"
878     * file, in general "null" values SHOULD not be given.
879     *
880     * @see #checkData(IMZTabColumn, String, boolean)
881     * @param column SHOULD NOT be set to null
882     * @param end SHOULD NOT be empty.
883     * @return a {@link java.lang.String} object.
884     */
885    protected String checkEnd(IMZTabColumn column, String end) {
886        return checkData(column, end, true);
887    }
888
889    /**
890     * Check and translate target string into string list which split by ','
891     * character.. If parse is incorrect, throws
892     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
893     * error. Besides, each item in list should be start with "GO:", otherwise
894     * system raise
895     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#GOTermList}
896     * error. Normally, go_terms may be set to "null"; in general "null" values
897     * SHOULD not be given.
898     *
899     * @param column SHOULD NOT be set to null
900     * @param go_terms SHOULD NOT be empty.
901     * @return a {@link java.util.List} object.
902     */
903    protected List<String> checkGOTerms(IMZTabColumn column, String go_terms) {
904        String result_go_terms = checkData(column, go_terms, true);
905
906        if (result_go_terms == null || result_go_terms.equalsIgnoreCase(NULL)) {
907            return new ArrayList<>(COMMA);
908        }
909
910        List<String> stringList = parseGOTermList(result_go_terms);
911        if (stringList.isEmpty()) {
912            this.errorList.add(new MZTabError(FormatErrorType.GOTermList,
913                    lineNumber, column.getHeader(), result_go_terms));
914        }
915
916        return stringList;
917    }
918
919    /**
920     * Check and translate protein_coverage string into Double. If parse is
921     * incorrect, throws
922     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
923     * error. protein_coverage range should be in the [0, 1), otherwise raise
924     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType#ProteinCoverage}
925     * error.
926     *
927     * NOTICE: If ratios are included and the denominator is zero, the "INF"
928     * value MUST be used. If the result leads to calculation errors (for
929     * example 0/0), this MUST be reported as "not a number" ("NaN").
930     *
931     * @param column SHOULD NOT be set to null
932     * @param protein_coverage SHOULD NOT be empty.
933     * @return a {@link java.lang.Double} object.
934     */
935    protected Double checkProteinCoverage(IMZTabColumn column,
936            String protein_coverage) {
937        Double result = checkDouble(column, protein_coverage);
938
939        if (result == null) {
940            return null;
941        }
942
943        if (result < 0 || result > 1) {
944            this.errorList.add(new MZTabError(LogicalErrorType.ProteinCoverage,
945                    lineNumber, column.getHeader(), printDouble(result)));
946            return null;
947        }
948
949        return result;
950    }
951
952    /**
953     * Check and translate peptide sequence. 'O' and 'U' are encoded by codons
954     * that are usually interpreted as stop codons, which can not displayed in
955     * the sequence. So, if find it, system raise
956     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Sequence}
957     * error.
958     *
959     * @param column SHOULD NOT be set to null
960     * @param sequence SHOULD NOT be empty.
961     * @return a {@link java.lang.String} object.
962     */
963    protected String checkSequence(IMZTabColumn column, String sequence) {
964        String result = checkData(column, sequence, true);
965
966        if (result == null) {
967            return null;
968        }
969
970        result = result.toUpperCase();
971
972        Pattern pattern = Pattern.compile("[OU]");
973        Matcher matcher = pattern.matcher(result);
974        if (matcher.find()) {
975            this.errorList.add(new MZTabError(FormatErrorType.Sequence,
976                    lineNumber, column.getHeader(), sequence));
977        }
978
979        return result;
980    }
981
982    /**
983     * Check and translate psm_id string into Integer. If exists error during
984     * parse, raise
985     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
986     * error. Normally, psm_id may be set to "null"; in general "null" values
987     * SHOULD not be given.
988     *
989     * @param column SHOULD NOT be set to null
990     * @param psm_id SHOULD NOT be empty.
991     * @return a {@link java.lang.Integer} object.
992     */
993    protected Integer checkPSMID(IMZTabColumn column, String psm_id) {
994        return checkInteger(column, psm_id);
995    }
996
997    /**
998     * Check and translate unique to
999     * {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean}. Only "0" and "1" allow
1000     * used in express Boolean (0/1). If parse is incorrect, throws
1001     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#MZBoolean}
1002     * error.
1003     *
1004     * @param column SHOULD NOT be set to null
1005     * @param unique SHOULD NOT be empty.
1006     * @return a {@link uk.ac.ebi.pride.jmztab2.model.MZBoolean} object.
1007     */
1008    protected MZBoolean checkUnique(IMZTabColumn column, String unique) {
1009        return checkMZBoolean(column, unique);
1010    }
1011
1012    /**
1013     * Check and translate charge string into Integer. If exists error during
1014     * parse, raise
1015     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Integer}
1016     * error. Normally, charge may be set to "null"; in general "null" values
1017     * SHOULD not be given.
1018     *
1019     * @param column SHOULD NOT be set to null
1020     * @param charge SHOULD NOT be empty.
1021     * @return a {@link java.lang.Integer} object.
1022     */
1023    protected Integer checkCharge(IMZTabColumn column, String charge) {
1024        return checkInteger(column, charge);
1025    }
1026
1027    /**
1028     * Check and translate mass_to_charge string into Double. If parse is
1029     * incorrect, throws
1030     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
1031     * error.
1032     *
1033     * NOTICE: If ratios are included and the denominator is zero, the "INF"
1034     * value MUST be used. If the result leads to calculation errors (for
1035     * example 0/0), this MUST be reported as "not a number" ("NaN").
1036     *
1037     * @param column SHOULD NOT be set to null
1038     * @param mass_to_charge SHOULD NOT be empty.
1039     * @return a {@link java.lang.Double} object.
1040     */
1041    protected Double checkMassToCharge(IMZTabColumn column,
1042            String mass_to_charge) {
1043        return checkDouble(column, mass_to_charge);
1044    }
1045
1046    /**
1047     * Check and translate exp_mass_to_charge string into Double. If parse is
1048     * incorrect, throws
1049     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
1050     * error.
1051     *
1052     * NOTICE: If ratios are included and the denominator is zero, the "INF"
1053     * value MUST be used. If the result leads to calculation errors (for
1054     * example 0/0), this MUST be reported as "not a number" ("NaN").
1055     *
1056     * @param column SHOULD NOT be set to null
1057     * @param exp_mass_to_charge SHOULD NOT be empty.
1058     * @return a {@link java.lang.Double} object.
1059     */
1060    protected Double checkExpMassToCharge(IMZTabColumn column,
1061            String exp_mass_to_charge) {
1062        return checkDouble(column, exp_mass_to_charge);
1063    }
1064
1065    /**
1066     * Check and translate calc_mass_to_charge string into Double. If parse is
1067     * incorrect, throws
1068     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#Double}
1069     * error.
1070     *
1071     * NOTICE: If ratios are included and the denominator is zero, the "INF"
1072     * value MUST be used. If the result leads to calculation errors (for
1073     * example 0/0), this MUST be reported as "not a number" ("NaN").
1074     *
1075     * @param column SHOULD NOT be set to null
1076     * @param calc_mass_to_charge SHOULD NOT be empty.
1077     * @return a {@link java.lang.Double} object.
1078     */
1079    protected Double checkCalcMassToCharge(IMZTabColumn column,
1080            String calc_mass_to_charge) {
1081        return checkDouble(column, calc_mass_to_charge);
1082    }
1083
1084    /**
1085     * Check and translate identifier string into string list which split by '|'
1086     * character.. If parse is incorrect, throws
1087     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
1088     * error. Normally, identifier may be set to "null"; in general "null"
1089     * values SHOULD not be given.
1090     *
1091     * @param column SHOULD NOT be set to null
1092     * @param identifier SHOULD NOT be empty.
1093     * @return a {@link java.util.List} object.
1094     */
1095    protected List<String> checkIdentifier(IMZTabColumn column,
1096            String identifier) {
1097        return checkStringList(column, identifier, BAR);
1098    }
1099
1100    /**
1101     * Check chemical_formula string. Normally, chemical_formula can set "null".
1102     * But in "Complete" file, in general "null" values SHOULD not be given.
1103     *
1104     * @see #checkData(IMZTabColumn, String, boolean)
1105     * @param column SHOULD NOT be set to null
1106     * @param chemical_formula SHOULD NOT be empty.
1107     * @return a {@link java.lang.String} object.
1108     */
1109    protected String checkChemicalFormula(IMZTabColumn column,
1110            String chemical_formula) {
1111        return checkData(column, chemical_formula, true);
1112    }
1113
1114    /**
1115     * Check and translate smiles string into parameter list which split by '|'
1116     * character.. If parse is incorrect, throws
1117     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
1118     * error. Normally, smiles may be set to "null"; in general "null" values
1119     * SHOULD not be given.
1120     *
1121     * @param column SHOULD NOT be set to null
1122     * @param smiles SHOULD NOT be empty.
1123     * @return a {@link java.util.List} object.
1124     */
1125    protected List<String> checkSmiles(IMZTabColumn column, String smiles) {
1126        return checkStringList(column, smiles, BAR);
1127    }
1128
1129    /**
1130     * Check and translate inchi_key string into parameter list which split by
1131     * '|' character.. If parse is incorrect, throws
1132     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#StringList}
1133     * error. Normally, inchi_key may be set to "null"; in general "null" values
1134     * SHOULD not be given.
1135     *
1136     * @param column SHOULD NOT be set to null
1137     * @param inchi_key SHOULD NOT be empty.
1138     * @return a {@link java.util.List} object.
1139     */
1140    protected List<String> checkInchiKey(IMZTabColumn column, String inchi_key) {
1141        return checkStringList(column, inchi_key, BAR);
1142    }
1143
1144    /**
1145     * Check and translate retention_time string into Double list which split by
1146     * '|' character.. If parse is incorrect, throws
1147     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList}
1148     * error. Normally, retention_time may be set to "null"; in general "null"
1149     * values SHOULD not be given.
1150     *
1151     * @param column SHOULD NOT be set to null
1152     * @param retention_time SHOULD NOT be empty.
1153     * @return a {@link java.util.List} object.
1154     */
1155    protected List<Double> checkRetentionTime(IMZTabColumn column,
1156            String retention_time) {
1157        String result = checkData(column, retention_time, true);
1158
1159        if (result == null || result.equalsIgnoreCase(NULL)) {
1160            return new SplitList<>(BAR);
1161        }
1162
1163        List<Double> valueList = parseDoubleList(result);
1164        if (valueList.isEmpty()) {
1165            this.errorList.add(new MZTabError(FormatErrorType.DoubleList,
1166                    lineNumber, column.getHeader(), result, "" + BAR));
1167        }
1168
1169        return valueList;
1170    }
1171
1172    /**
1173     * Check and translate retention_time_window string into Double list which
1174     * split by '|' character.. If parse is incorrect, throws
1175     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType#DoubleList}
1176     * error. Normally, retention_time_window can set "null", but in "Complete"
1177     * file, in general "null" values SHOULD not be given.
1178     *
1179     * @param column SHOULD NOT be set to null
1180     * @param retention_time_window SHOULD NOT be empty.
1181     * @return a {@link java.util.List} object.
1182     */
1183    protected List<Double> checkRetentionTimeWindow(IMZTabColumn column,
1184            String retention_time_window) {
1185        String result = checkData(column, retention_time_window, true);
1186
1187        if (result == null || result.equalsIgnoreCase(NULL)) {
1188            return new SplitList<>(BAR);
1189        }
1190
1191        List<Double> valueList = parseDoubleList(result);
1192        if (valueList.isEmpty()) {
1193            this.errorList.add(new MZTabError(FormatErrorType.DoubleList,
1194                    lineNumber, column.getHeader(), result, "" + BAR));
1195        }
1196
1197        return valueList;
1198    }
1199}