001/* 
002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package uk.ac.ebi.pride.jmztab2.utils.parser;
017
018import de.isas.mztab2.io.validators.AssayValidator;
019import de.isas.mztab2.io.validators.CvValidator;
020import de.isas.mztab2.io.validators.DatabaseValidator;
021import de.isas.mztab2.io.validators.MsRunValidator;
022import de.isas.mztab2.io.validators.MzTabIdValidator;
023import de.isas.mztab2.io.validators.MzTabVersionValidator;
024import de.isas.mztab2.io.validators.QuantificationMethodValidator;
025import de.isas.mztab2.io.validators.SmallMoleculeFeatureQuantificationUnitValidator;
026import de.isas.mztab2.io.validators.SmallMoleculeQuantificationUnitValidator;
027import de.isas.mztab2.io.validators.SoftwareValidator;
028import de.isas.mztab2.io.validators.StudyVariableValidator;
029import de.isas.mztab2.model.Assay;
030import de.isas.mztab2.model.CV;
031import de.isas.mztab2.model.Contact;
032import de.isas.mztab2.model.Database;
033import de.isas.mztab2.model.IndexedElement;
034import de.isas.mztab2.model.Instrument;
035import de.isas.mztab2.model.Metadata;
036import de.isas.mztab2.model.MsRun;
037import de.isas.mztab2.model.Parameter;
038import de.isas.mztab2.model.Publication;
039import de.isas.mztab2.model.Sample;
040import de.isas.mztab2.model.SampleProcessing;
041import de.isas.mztab2.model.Software;
042import de.isas.mztab2.model.StudyVariable;
043import de.isas.mztab2.model.Uri;
044import java.net.URI;
045import java.util.*;
046import java.util.function.Consumer;
047import java.util.regex.Matcher;
048import java.util.regex.Pattern;
049import java.util.stream.Collectors;
050import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
051import static uk.ac.ebi.pride.jmztab2.model.MZTabStringUtils.*;
052import static uk.ac.ebi.pride.jmztab2.model.MZTabUtils.*;
053import uk.ac.ebi.pride.jmztab2.model.MetadataElement;
054import uk.ac.ebi.pride.jmztab2.model.MetadataProperty;
055import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType;
056import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
057import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
058import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList;
059import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorOverflowException;
060import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorType;
061import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException;
062import de.isas.mztab2.io.validators.RefiningValidator;
063
064/**
065 * Parse a metadata line into a element. Metadata Element start with MTD, its
066 * structure like: MTD
067 * {@link uk.ac.ebi.pride.jmztab2.model.MetadataElement}([id])(-{@link uk.ac.ebi.pride.jmztab2.model.MetadataProperty})    {Element Value}
068 *
069 * @see MetadataElement
070 * @see MetadataProperty
071 * @author qingwei
072 * @author nilshoffmann
073 * @since 08/02/13
074 *
075 */
076public class MTDLineParser extends MZTabLineParser {
077
078    private static final String Error_Header = Metadata.PrefixEnum.MTD.
079        getValue() + "\t";
080
081    private final Metadata metadata = new Metadata();
082
083    /**
084     * <p>
085     * Constructor for MTDLineParser.</p>
086     *
087     * @param context a
088     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
089     */
090    public MTDLineParser(MZTabParserContext context) {
091        super(context);
092    }
093
094    /**
095     * {@inheritDoc}
096     *
097     * Most of e, we use {@link #parseNormalMetadata(String, String)} to parse
098     * defineLabel into Metadata Element.
099     */
100    @Override
101    public void parse(int lineNumber, String mtdLine, MZTabErrorList errorList) throws MZTabException {
102        super.parse(lineNumber, mtdLine, errorList);
103
104        if (items.length != 3) {
105            MZTabError error = new MZTabError(FormatErrorType.MTDLine,
106                lineNumber, mtdLine);
107            throw new MZTabException(error);
108        }
109
110        String defineLabel = items[1].trim().
111            toLowerCase();
112        String valueLabel = items[2].trim();
113
114        parseNormalMetadata(defineLabel, valueLabel);
115    }
116
117    /**
118     * Parse valueLabel based on email format. If exists parse error, add it
119     * into {@link MZTabErrorList}.
120     */
121    private String checkEmail(String defineLabel, String valueLabel) {
122        String email = parseEmail(valueLabel);
123
124        if (email == null) {
125            errorList.add(new MZTabError(FormatErrorType.Email, lineNumber,
126                Error_Header + defineLabel, valueLabel));
127        }
128
129        return email;
130    }
131
132    /**
133     * Parse {@link MetadataProperty} which depend on the
134     * {@link MetadataElement}. If exists parse error, stop validate and throw
135     * {@link MZTabException} directly.
136     */
137    private MetadataProperty checkProperty(MetadataElement element,
138        String propertyName) throws MZTabException {
139        if (isEmpty(propertyName)) {
140            return null;
141        }
142
143        Optional<MetadataProperty> property = MetadataProperty.findProperty(element,
144            propertyName);
145        if (!property.isPresent()) {
146            MZTabError error = new MZTabError(FormatErrorType.MTDDefineLabel,
147                lineNumber, element.getName() + "-" + propertyName);
148            throw new MZTabException(error);
149        }
150
151        return property.get();
152    }
153
154    /**
155     * Parse valueLabel to {@link Parameter} If exists parse error, add it into
156     * {@link MZTabErrorList}
157     */
158    private Parameter checkParameter(String defineLabel, String valueLabel) {
159        Parameter param = parseParam(valueLabel);
160        if (param == null) {
161            errorList.add(new MZTabError(FormatErrorType.Param, lineNumber,
162                Error_Header + defineLabel, valueLabel));
163        }
164        return param;
165    }
166
167    /**
168     * Parse valueLabel to a list of '|' separated parameters. If exists parse
169     * error, add it into {@link MZTabErrorList}
170     */
171    private List<Parameter> checkParameterList(String defineLabel,
172        String valueLabel) {
173        List<Parameter> paramList = parseParamList(valueLabel);
174
175        if (paramList.isEmpty()) {
176            errorList.add(new MZTabError(FormatErrorType.ParamList, lineNumber,
177                Error_Header + defineLabel, valueLabel));
178        }
179
180        return paramList;
181    }
182
183    /**
184     * Parse valueLabel to a list of '|' separated parameters. If exists parse
185     * error, add it into {@link MZTabErrorList}
186     */
187    private Publication checkPublication(Integer id, String defineLabel,
188        String valueLabel) throws MZTabException {
189        if (!context.getPublicationMap().
190            containsKey(id)) {
191            context.addPublication(metadata, new Publication().id(id));
192        }
193        Publication publications = null;
194        try {
195            publications = parsePublicationItems(context.
196                getPublicationMap().
197                get(id), lineNumber, valueLabel);
198            if (publications == null || publications.getPublicationItems() == null || publications.
199                getPublicationItems().
200                isEmpty()) {
201                errorList.add(
202                    new MZTabError(FormatErrorType.Publication, lineNumber,
203                        Error_Header + defineLabel, valueLabel));
204            }
205        } catch (MZTabException ex) {
206            errorList.add(ex.getError());
207        }
208
209        return publications;
210
211    }
212
213    /**
214     * Parse valueLabel to a {@link java.net.URI} If exists parse error, add it
215     * into {@link MZTabErrorList}
216     */
217    private java.net.URI checkURI(String defineLabel, String valueLabel,
218        boolean mandatory) {
219        if (null == parseString(valueLabel)) {
220            if (mandatory) {
221                // "null" value is supported when the ms_run[1-n]-location is unknown
222                errorList.add(new MZTabError(LogicalErrorType.NotNULL,
223                    lineNumber,
224                    Error_Header + defineLabel, valueLabel));
225            }
226            return null;
227        }
228
229        java.net.URI uri = parseURI(valueLabel);
230        if (uri == null) {
231            errorList.add(new MZTabError(FormatErrorType.URI, lineNumber,
232                Error_Header + defineLabel, valueLabel));
233        }
234
235        return uri;
236    }
237
238    /**
239     * Parse defineLabel to a index id number. If exists parse error, stop
240     * validate and throw {@link MZTabException} directly.
241     */
242    private int checkIndex(String defineLabel, String id) throws MZTabException {
243        try {
244            Integer index = Integer.parseInt(id);
245            if (index < 1) {
246                throw new NumberFormatException();
247            }
248
249            return index;
250        } catch (NumberFormatException e) {
251            MZTabError error = new MZTabError(LogicalErrorType.IdNumber,
252                lineNumber, Error_Header + defineLabel, id);
253            throw new MZTabException(error);
254        }
255    }
256
257    /**
258     * Parse valueLabel to a {@link IndexedElement} If exists parse error, stop
259     * validate and throw {@link MZTabException} directly.
260     */
261    private IndexedElement checkIndexedElement(String defineLabel,
262        String valueLabel, MetadataElement element) throws MZTabException {
263        IndexedElement indexedElement = parseIndexedElement(valueLabel, element);
264        if (indexedElement == null) {
265            MZTabError error = new MZTabError(FormatErrorType.IndexedElement,
266                lineNumber, Error_Header + defineLabel, valueLabel);
267            throw new MZTabException(error);
268        }
269
270        return indexedElement;
271    }
272
273    /**
274     * Parse valueLabel to a {@link IndexedElement} list. If exists parse error,
275     * stop validate and throw {@link MZTabException} directly.
276     */
277    private List<IndexedElement> checkIndexedElementList(String defineLabel,
278        String valueLabel, MetadataElement element) throws MZTabException {
279        List<IndexedElement> indexedElementList = parseRefList(valueLabel,
280            element);
281        if (indexedElementList == null || indexedElementList.isEmpty()) {
282            MZTabError error = new MZTabError(FormatErrorType.IndexedElement,
283                lineNumber, Error_Header + defineLabel, valueLabel);
284            throw new MZTabException(error);
285        }
286        return indexedElementList;
287    }
288
289    /**
290     * The metadata line including three parts: MTD {defineLabel} {valueLabel}
291     *
292     * In normal, define label structure like:
293     * {@link MetadataElement}([id])(-{@link MetadataSubElement}[pid])(-{@link MetadataProperty})
294     *
295     * @see MetadataElement : Mandatory
296     * @see MetadataSubElement : Optional
297     * @see MetadataProperty : Optional.
298     *
299     * If exists parse error, add it into {@link MZTabErrorList}
300     */
301    private void parseNormalMetadata(String defineLabel, String valueLabel) throws MZTabException {
302        Pattern pattern = Pattern.compile(MZTabConstants.REGEX_NORMAL_METADATA);
303        Matcher matcher = pattern.matcher(defineLabel);
304
305        if (matcher.find()) {
306            // Stage 1: create Unit.
307            MetadataElement element = MetadataElement.findElement(matcher.group(
308                1));
309            if (element == null) {
310                throw new MZTabException(new MZTabError(
311                    FormatErrorType.MTDDefineLabel, lineNumber, defineLabel));
312            }
313
314            switch (element) {
315                case MZTAB:
316                    handleMzTab(element, matcher, defineLabel, valueLabel);
317                    break;
318                case TITLE:
319                    handleTitle(defineLabel, valueLabel);
320                    break;
321                case DESCRIPTION:
322                    handleDescription(defineLabel, valueLabel);
323                    break;
324                case SAMPLE_PROCESSING:
325                    handleSampleProcessing(defineLabel, matcher, valueLabel);
326                    break;
327                case INSTRUMENT:
328                    handleInstrument(defineLabel, matcher, element, valueLabel);
329                    break;
330                case SOFTWARE:
331                    handleSoftware(defineLabel, matcher, element, valueLabel);
332                    break;
333                case PUBLICATION:
334                    handlePublication(defineLabel, matcher, valueLabel);
335                    break;
336                case CONTACT:
337                    handleContact(defineLabel, matcher, element, valueLabel);
338                    break;
339                case URI:
340                    handleUri(defineLabel, matcher, valueLabel, false);
341                    break;
342                case EXTERNAL_STUDY_URI:
343                    handleExternalStudyUri(defineLabel, matcher, valueLabel);
344                    break;
345                case QUANTIFICATION_METHOD:
346                    handleQuantificationMethod(defineLabel, valueLabel);
347                    break;
348                case SMALL_MOLECULE:
349                    handleSmallMolecule(element, matcher, defineLabel,
350                        valueLabel);
351                    break;
352                case SMALL_MOLECULE_FEATURE:
353                    handleSmallMoleculeFeature(element, matcher, defineLabel,
354                        valueLabel);
355                    break;
356                case MS_RUN:
357                    handleMsRun(defineLabel, matcher, element, valueLabel);
358                    break;
359                case SAMPLE:
360                    handleSample(defineLabel, matcher, element, valueLabel);
361                    break;
362                case ASSAY:
363                    handleAssay(matcher, defineLabel, element, valueLabel);
364                    break;
365                case STUDY_VARIABLE:
366                    handleStudyVariable(defineLabel, matcher, element,
367                        valueLabel);
368                    break;
369                case CUSTOM:
370                    handleCustom(defineLabel, matcher, valueLabel);
371                    break;
372                case CV:
373                    handleCv(defineLabel, matcher, element, valueLabel);
374                    break;
375                case DATABASE:
376                    handleDatabase(defineLabel, matcher, element, valueLabel);
377                    break;
378                case DERIVATIZATION_AGENT:
379                    handleDerivatizationAgent(defineLabel, matcher, valueLabel);
380                    break;
381                case COLUNIT:
382                case COLUNIT_SMALL_MOLECULE:
383                case COLUNIT_SMALL_MOLECULE_FEATURE:
384                case COLUNIT_SMALL_MOLECULE_EVIDENCE:
385                    handleColunit(defineLabel, valueLabel);
386                    break;
387                case ID_CONFIDENCE_MEASURE:
388                    handleIdConfidenceMeasure(defineLabel, matcher, valueLabel);
389                    break;
390                //opt column definitions are handled later
391            }
392
393        } else {
394            throw new MZTabException(new MZTabError(FormatErrorType.MTDLine,
395                lineNumber, line));
396        }
397    }
398
399    protected void handleIdConfidenceMeasure(String defineLabel, Matcher matcher,
400        String valueLabel) throws MZTabException {
401        Integer id;
402        id = checkIndex(defineLabel, matcher.group(3));
403        context.addIdConfidenceMeasure(metadata, id, checkParameter(
404            defineLabel, valueLabel));
405    }
406
407    protected void handleColunit(String defineLabel, String valueLabel) throws MZTabErrorOverflowException {
408        // In this stage, just store them into colUnitMap<defineLabel, valueLabel>.
409        // after the section columns is created we will add the col unit.
410        if (!defineLabel.equals("colunit-protein")
411            && !defineLabel.equals("colunit-peptide")
412            && !defineLabel.equals("colunit-psm")
413            && !defineLabel.equals(Metadata.Properties.colunitSmallMolecule.
414                getPropertyName())
415            && !defineLabel.equals(
416                Metadata.Properties.colunitSmallMoleculeEvidence.
417                    getPropertyName())
418            && !defineLabel.equals(
419                Metadata.Properties.colunitSmallMoleculeFeature.
420                    getPropertyName())) {
421            errorList.add(new MZTabError(
422                FormatErrorType.MTDDefineLabel, lineNumber,
423                defineLabel));
424        } else {
425            String[] colunitDef = valueLabel.split("=");
426            if (colunitDef.length != 2) {
427                errorList.add(new MZTabError(
428                    FormatErrorType.InvalidColunitFormat, lineNumber, valueLabel));
429            }
430            Parameter p = checkParameter(defineLabel, colunitDef[1]);
431            String columnName = colunitDef[0];
432            if (columnName == null) {
433                errorList.add(new MZTabError(
434                    FormatErrorType.InvalidColunitFormat, lineNumber, valueLabel));
435            } else {
436                if (defineLabel.equals(
437                    Metadata.Properties.colunitSmallMolecule.getPropertyName())) {
438                    context.addSmallMoleculeColUnit(metadata, columnName, p);
439                } else if (defineLabel.equals(
440                    Metadata.Properties.colunitSmallMoleculeFeature.
441                        getPropertyName())) {
442                    context.addSmallMoleculeFeatureColUnit(metadata, columnName,
443                        p);
444                } else if (defineLabel.equals(
445                    Metadata.Properties.colunitSmallMoleculeEvidence.
446                        getPropertyName())) {
447                    context.
448                        addSmallMoleculeEvidenceColUnit(metadata, columnName, p);
449                } else {
450                    errorList.add(new MZTabError(
451                        FormatErrorType.MTDDefineLabel, lineNumber,
452                        defineLabel));
453                }
454            }
455        }
456    }
457
458    protected void handleDatabase(String defineLabel, Matcher matcher,
459        MetadataElement element, String valueLabel) throws MZTabException {
460        Integer id;
461        MetadataProperty property;
462        id = checkIndex(defineLabel, matcher.group(3));
463        property = checkProperty(element, matcher.group(5));
464        addDatabase(context, metadata, property, id, defineLabel, valueLabel);
465    }
466
467    protected void handleCv(String defineLabel, Matcher matcher,
468        MetadataElement element, String valueLabel) throws MZTabException {
469        Integer id;
470        MetadataProperty property;
471        id = checkIndex(defineLabel, matcher.group(3));
472        property = checkProperty(element, matcher.group(5));
473        addCv(context, metadata, property, id, valueLabel);
474    }
475
476    protected void handleStudyVariable(String defineLabel, Matcher matcher,
477        MetadataElement element, String valueLabel) throws MZTabException, MZTabErrorOverflowException {
478        Integer id;
479        MetadataProperty property;
480        id = checkIndex(defineLabel, matcher.group(3));
481        property = checkProperty(element, matcher.group(5));
482        addStudyVariable(context, metadata, property, defineLabel, valueLabel,
483            id);
484    }
485
486    protected void handleAssay(Matcher matcher, String defineLabel,
487        MetadataElement element, String valueLabel) throws MZTabException {
488        Integer id;
489        MetadataProperty property;
490        if (isEmpty(matcher.group(6))) {
491            // no quantification modification. For example: assay[1-n]-quantification_reagent
492            id = checkIndex(defineLabel, matcher.group(3));
493            property = checkProperty(element, matcher.group(5));
494            addAssay(context, metadata, property, defineLabel, valueLabel, id);
495        } else {
496            throw new MZTabException(
497                "assay does not support quantification modification!");
498        }
499    }
500
501    protected void handleSample(String defineLabel, Matcher matcher,
502        MetadataElement element, String valueLabel) throws MZTabException {
503        Integer id;
504        MetadataProperty property;
505        id = checkIndex(defineLabel, matcher.group(3));
506        property = checkProperty(element, matcher.group(5));
507        addSample(context, metadata, property, id, defineLabel, valueLabel);
508    }
509
510    protected void handleCustom(String defineLabel, Matcher matcher,
511        String valueLabel) throws MZTabException {
512        Integer id;
513        id = checkIndex(defineLabel, matcher.group(3));
514        context.addCustomItem(metadata, id, checkParameter(
515            defineLabel, valueLabel));
516    }
517
518    protected void handleDerivatizationAgent(String defineLabel, Matcher matcher,
519        String valueLabel) throws MZTabException {
520        Integer id;
521        id = checkIndex(defineLabel, matcher.group(3));
522        context.addDerivatizationAgentItem(metadata, id, checkParameter(
523            defineLabel, valueLabel));
524    }
525
526    protected void handleMsRun(String defineLabel, Matcher matcher,
527        MetadataElement element, String valueLabel) throws MZTabException {
528        Integer id;
529        MetadataProperty property;
530        id = checkIndex(defineLabel, matcher.group(3));
531        property = checkProperty(element, matcher.group(5));
532        addMsRun(context, metadata, property, id, defineLabel, valueLabel);
533    }
534
535    protected void handleSmallMoleculeFeature(MetadataElement element,
536        Matcher matcher, String defineLabel, String valueLabel) throws MZTabException {
537        MetadataProperty property;
538        property = checkProperty(element, matcher.group(5));
539        if (property == null) {
540            MZTabError error = new MZTabError(
541                FormatErrorType.MTDDefineLabel,
542                lineNumber, defineLabel + "-" + valueLabel);
543            throw new MZTabException(error);
544        }
545        if (property == MetadataProperty.SMALL_MOLECULE_FEATURE_QUANTIFICATION_UNIT) {
546            if (metadata.
547                getSmallMoleculeFeatureQuantificationUnit() != null) {
548                throw new MZTabException(new MZTabError(
549                    LogicalErrorType.DuplicationDefine,
550                    lineNumber, defineLabel));
551            }
552            metadata.setSmallMoleculeFeatureQuantificationUnit(
553                checkParameter(defineLabel, valueLabel));
554        }
555    }
556
557    protected void handleSmallMolecule(MetadataElement element, Matcher matcher,
558        String defineLabel, String valueLabel) throws MZTabException {
559        MetadataProperty property;
560        property = checkProperty(element, matcher.group(5));
561        if (property == null) {
562            MZTabError error = new MZTabError(
563                FormatErrorType.MTDDefineLabel,
564                lineNumber, defineLabel + "-" + valueLabel);
565            throw new MZTabException(error);
566        }
567        if (property == MetadataProperty.SMALL_MOLECULE_QUANTIFICATION_UNIT) {
568            if (metadata.getSmallMoleculeQuantificationUnit() != null) {
569                throw new MZTabException(new MZTabError(
570                    LogicalErrorType.DuplicationDefine,
571                    lineNumber, defineLabel));
572            }
573            metadata.setSmallMoleculeQuantificationUnit(
574                checkParameter(defineLabel, valueLabel));
575        } else if (property == MetadataProperty.SMALL_MOLECULE_IDENTIFICATION_RELIABILITY) {
576            if (metadata.
577                getSmallMoleculeIdentificationReliability() != null) {
578                throw new MZTabException(new MZTabError(
579                    LogicalErrorType.DuplicationDefine,
580                    lineNumber, defineLabel));
581            }
582            metadata.setSmallMoleculeIdentificationReliability(
583                checkParameter(defineLabel, valueLabel));
584        }
585    }
586
587    protected void handleQuantificationMethod(String defineLabel,
588        String valueLabel) throws MZTabException {
589        if (metadata.getQuantificationMethod() != null) {
590            throw new MZTabException(new MZTabError(
591                LogicalErrorType.DuplicationDefine, lineNumber,
592                defineLabel));
593        }
594        metadata.
595            setQuantificationMethod(checkParameter(defineLabel, valueLabel));
596    }
597
598    protected void handleExternalStudyUri(String defineLabel, Matcher matcher,
599        String valueLabel) throws MZTabException {
600        Integer id;
601        id = checkIndex(defineLabel, matcher.group(3));
602        URI uri = checkURI(defineLabel, valueLabel, false);
603        metadata.addExternalStudyUriItem(new Uri().id(id).
604            value(uri == null ? MZTabConstants.NULL : uri.toASCIIString()));
605    }
606
607    protected void handleUri(String defineLabel, Matcher matcher,
608        String valueLabel, boolean mandatory) throws MZTabException {
609        Integer id;
610        id = checkIndex(defineLabel, matcher.group(3));
611        URI uri = checkURI(defineLabel, valueLabel, mandatory);
612        metadata.addUriItem(new Uri().id(id).
613            value(uri == null ? MZTabConstants.NULL : uri.toASCIIString()));
614    }
615
616    protected void handleContact(String defineLabel, Matcher matcher,
617        MetadataElement element, String valueLabel) throws MZTabException {
618        Integer id;
619        MetadataProperty property;
620        id = checkIndex(defineLabel, matcher.group(3));
621        property = checkProperty(element, matcher.group(5));
622        addContact(context, metadata, property, id, valueLabel, defineLabel);
623    }
624
625    protected void handlePublication(String defineLabel, Matcher matcher,
626        String valueLabel) throws MZTabException {
627        Integer id;
628        id = checkIndex(defineLabel, matcher.group(3));
629        checkPublication(id, defineLabel, valueLabel);
630    }
631
632    protected void handleSoftware(String defineLabel, Matcher matcher,
633        MetadataElement element, String valueLabel) throws MZTabErrorOverflowException, MZTabException {
634        Integer id;
635        MetadataProperty property;
636        id = checkIndex(defineLabel, matcher.group(3));
637        property = checkProperty(element, matcher.group(5));
638        addSoftware(context, metadata, property, defineLabel, valueLabel, id);
639    }
640
641    protected void handleInstrument(String defineLabel, Matcher matcher,
642        MetadataElement element, String valueLabel) throws MZTabException {
643        Integer id;
644        MetadataProperty property;
645        Parameter param;
646        id = checkIndex(defineLabel, matcher.group(3));
647        property = checkProperty(element, matcher.group(5));
648        param = checkParameter(defineLabel, valueLabel);
649        addInstrument(context, metadata, property, id, param);
650    }
651
652    protected void handleSampleProcessing(String defineLabel, Matcher matcher,
653        String valueLabel) throws MZTabException {
654        Integer id;
655        id = checkIndex(defineLabel, matcher.group(3));
656        addSampleProcessing(context, metadata, id, checkParameterList(
657            defineLabel, valueLabel));
658    }
659
660    protected void handleDescription(String defineLabel, String valueLabel) throws MZTabException {
661        if (metadata.getDescription() != null) {
662            throw new MZTabException(new MZTabError(
663                LogicalErrorType.DuplicationDefine, lineNumber,
664                defineLabel));
665        }
666        metadata.setDescription(valueLabel);
667    }
668
669    protected void handleTitle(String defineLabel, String valueLabel) throws MZTabException {
670        if (metadata.getTitle() != null) {
671            throw new MZTabException(new MZTabError(
672                LogicalErrorType.DuplicationDefine, lineNumber,
673                defineLabel));
674        }
675        metadata.setTitle(valueLabel);
676    }
677
678    protected void handleMzTab(MetadataElement element, Matcher matcher,
679        String defineLabel, String valueLabel) throws MZTabException {
680        MetadataProperty property;
681        property = checkProperty(element, matcher.group(5));
682        if (property == null) {
683            MZTabError error = new MZTabError(
684                FormatErrorType.MTDDefineLabel,
685                lineNumber, defineLabel + "-" + valueLabel);
686            throw new MZTabException(error);
687        }
688        switch (property) {
689            case MZTAB_VERSION:
690                if (metadata.getMzTabVersion() != null) {
691                    throw new MZTabException(new MZTabError(
692                        LogicalErrorType.DuplicationDefine,
693                        lineNumber, defineLabel));
694                }
695                if (parseMzTabVersion(valueLabel) == null) {
696                    throw new MZTabException(new MZTabError(
697                        FormatErrorType.MZTabVersion, lineNumber,
698                        defineLabel, valueLabel));
699                }
700
701                metadata.mzTabVersion(valueLabel);
702                break;
703            case MZTAB_ID:
704                if (metadata.getMzTabID() != null) {
705                    throw new MZTabException(new MZTabError(
706                        LogicalErrorType.DuplicationDefine,
707                        lineNumber, defineLabel));
708                }
709                if (parseString(valueLabel) == null) {
710                    throw new MZTabException(new MZTabError(
711                        FormatErrorType.MZTabId, lineNumber,
712                        defineLabel, valueLabel));
713                }
714                metadata.mzTabID(parseString(valueLabel));
715                break;
716            default:
717                MZTabError error = new MZTabError(
718                    FormatErrorType.MTDDefineLabel,
719                    lineNumber, defineLabel + "-" + valueLabel);
720                throw new MZTabException(error);
721        }
722    }
723    
724    private <T> void validate(T t, RefiningValidator<T> validator, MZTabParserContext context, MZTabErrorList errorList) {
725        validator.validateRefine(t, context).forEach((error) -> {
726            errorList.add(error);
727        });
728    }
729
730    /**
731     * Refine the metadata, and check whether missing some important
732     * information. fixed_mode, variable_mode must provide in the Complete file.
733     * Detail information see specification 5.5
734     *
735     * @throws uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException if any.
736     */
737    public void refineNormalMetadata() throws MZTabException {
738        validate(metadata, new MzTabVersionValidator(), context, errorList);
739        validate(metadata, new MzTabIdValidator(), context, errorList);
740        validate(metadata, new SoftwareValidator(), context, errorList);
741        validate(metadata, new QuantificationMethodValidator(), context, errorList);
742        validate(metadata, new AssayValidator(), context, errorList);
743        validate(metadata, new StudyVariableValidator(), context, errorList);
744        validate(metadata, new MsRunValidator(), context, errorList);
745        validate(metadata, new CvValidator(), context, errorList);
746        validate(metadata, new DatabaseValidator(), context, errorList);
747        validate(metadata, new SmallMoleculeQuantificationUnitValidator(), context, errorList);
748        validate(metadata, new SmallMoleculeFeatureQuantificationUnitValidator(), context, errorList);
749        validate(metadata, new SmallMoleculeQuantificationUnitValidator(), context, errorList);
750    }
751
752    /**
753     * <p>
754     * Getter for the field <code>metadata</code>.</p>
755     *
756     * @return a {@link de.isas.mztab2.model.Metadata} object.
757     */
758    public Metadata getMetadata() {
759        return metadata;
760    }
761
762    private void addSampleProcessing(MZTabParserContext context,
763        Metadata metadata, Integer id,
764        List<Parameter> checkParameterList) throws MZTabException {
765        SampleProcessing sp = context.addSampleProcessing(metadata, id,
766            checkParameterList);
767        if (sp == null) {
768            throw new MZTabException(new MZTabError(LogicalErrorType.NULL,
769                lineNumber,
770                Metadata.Properties.sampleProcessing + "[" + id + "]"));
771        }
772    }
773
774    /**
775     * <p>
776     * handleParam.</p>
777     *
778     * @param defineLabel a {@link java.lang.String} object.
779     * @param valueLabel a {@link java.lang.String} object.
780     * @param errorType a
781     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorType} object.
782     * @param lineNumber a int.
783     * @param consumer a {@link java.util.function.Consumer} object.
784     * @throws uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorOverflowException
785     * if any.
786     */
787    public void handleParam(String defineLabel, String valueLabel,
788        MZTabErrorType errorType, int lineNumber,
789        Consumer<Parameter> consumer) throws MZTabErrorOverflowException {
790        Parameter param;
791        param = checkParameter(defineLabel, valueLabel);
792        if (param != null && (param.getValue() == null || param.getValue().
793            trim().
794            length() == 0)) {
795            errorList.add(new MZTabError(errorType, lineNumber, valueLabel));
796        } else {
797            consumer.accept(param);
798        }
799    }
800
801    private void addInstrument(MZTabParserContext context, Metadata metadata,
802        MetadataProperty property,
803        Integer id,
804        Parameter param) throws MZTabException {
805        Instrument instrument = null;
806
807        if (property == null) {
808            MZTabError error = new MZTabError(
809                FormatErrorType.MTDDefineLabel,
810                lineNumber,
811                Metadata.Properties.instrument + "[" + id + "]" + "-" + property);
812            throw new MZTabException(error);
813        }
814
815        switch (property) {
816            case INSTRUMENT_NAME:
817                instrument = context.addInstrumentName(metadata, id, param);
818                break;
819            case INSTRUMENT_SOURCE:
820                instrument = context.addInstrumentSource(metadata, id, param);
821                break;
822            case INSTRUMENT_ANALYZER:
823                instrument = context.addInstrumentAnalyzer(metadata, id, param);
824                break;
825            case INSTRUMENT_DETECTOR:
826                instrument = context.addInstrumentDetector(metadata, id, param);
827                break;
828            default:
829                MZTabError error = new MZTabError(
830                    FormatErrorType.MTDDefineLabel,
831                    lineNumber,
832                    Metadata.Properties.instrument + "[" + id + "]" + "-" + property);
833                throw new MZTabException(error);
834        }
835        if (instrument == null) {
836            throw new MZTabException(new MZTabError(LogicalErrorType.NULL,
837                lineNumber, Metadata.Properties.instrument + "[" + id + "]"));
838        }
839    }
840
841    private void addSoftware(MZTabParserContext context, Metadata metadata,
842        MetadataProperty property,
843        String defineLabel,
844        String valueLabel, Integer id) throws MZTabErrorOverflowException, MZTabException {
845        Parameter param;
846        Software software = null;
847        if (property == null) {
848            param = checkParameter(defineLabel, valueLabel);
849            if (param != null && (param.getValue() == null || param.getValue().
850                trim().
851                length() == 0)) {
852                // this is a warn.
853                errorList.add(new MZTabError(LogicalErrorType.SoftwareVersion,
854                    lineNumber, valueLabel));
855            }
856            software = context.addSoftwareParameter(metadata, id, param);
857        } else {
858            switch (property) {
859                case SOFTWARE_SETTING:
860                    software = context.addSoftwareSetting(metadata, id,
861                        valueLabel);
862                    break;
863                default:
864                    MZTabError error = new MZTabError(
865                        FormatErrorType.MTDDefineLabel,
866                        lineNumber, defineLabel + "-" + valueLabel);
867                    throw new MZTabException(error);
868            }
869        }
870        if (software == null) {
871            throw new MZTabException(new MZTabError(LogicalErrorType.NULL,
872                lineNumber, Metadata.Properties.software + "[" + id + "]"));
873        }
874    }
875
876    private void addContact(MZTabParserContext context, Metadata metadata,
877        MetadataProperty property,
878        Integer id,
879        String valueLabel, String defineLabel) throws MZTabException {
880        Contact contact = null;
881        if (property == null) {
882            MZTabError error = new MZTabError(
883                FormatErrorType.MTDDefineLabel,
884                lineNumber, defineLabel + "-" + valueLabel);
885            throw new MZTabException(error);
886        }
887        switch (property) {
888            case CONTACT_NAME:
889                contact = context.addContactName(metadata, id, valueLabel);
890                break;
891            case CONTACT_AFFILIATION:
892                contact = context.
893                    addContactAffiliation(metadata, id, valueLabel);
894                break;
895            case CONTACT_EMAIL:
896                checkEmail(defineLabel, valueLabel);
897                contact = context.addContactEmail(metadata, id, valueLabel);
898                break;
899            default:
900                MZTabError error = new MZTabError(
901                    FormatErrorType.MTDDefineLabel,
902                    lineNumber, defineLabel + "-" + valueLabel);
903                throw new MZTabException(error);
904        }
905        if (contact == null) {
906            throw new MZTabException(new MZTabError(LogicalErrorType.NULL,
907                lineNumber, Metadata.Properties.contact + "[" + id + "]"));
908        }
909    }
910
911    private void addMsRun(MZTabParserContext context, Metadata metadata,
912        MetadataProperty property,
913        Integer id,
914        String defineLabel, String valueLabel) throws MZTabException {
915        MsRun msRun = null;
916        if (property == null) {
917            msRun = context.addMsRun(metadata, new MsRun().id(id).
918                name(valueLabel));
919        } else {
920            switch (property) {
921                case MS_RUN_LOCATION:
922                    msRun = context.addMsRunLocation(metadata, id, checkURI(
923                        defineLabel, valueLabel, true));
924                    break;
925                case MS_RUN_INSTRUMENT_REF:
926                    List<IndexedElement> indexedElements = checkIndexedElementList(
927                        defineLabel, valueLabel,
928                        MetadataElement.INSTRUMENT);
929                    if (indexedElements != null && !indexedElements.isEmpty() && indexedElements.
930                        size() == 1) {
931                        Instrument instrument = context.getInstrumentMap().
932                            get(indexedElements.get(0).
933                                getId());
934                        if (instrument == null) {
935                            throw new MZTabException(new MZTabError(
936                                LogicalErrorType.NotDefineInMetadata, lineNumber,
937                                valueLabel,
938                                valueLabel));
939                        }
940                        msRun = context.addMsRunInstrumentRef(metadata, id,
941                            instrument);
942                    }
943                    break;
944                case MS_RUN_FORMAT:
945                    msRun = context.addMsRunFormat(metadata, id, checkParameter(
946                        defineLabel, valueLabel));
947                    break;
948                case MS_RUN_ID_FORMAT:
949                    msRun = context.addMsRunIdFormat(metadata, id,
950                        checkParameter(defineLabel, valueLabel));
951                    break;
952                case MS_RUN_FRAGMENTATION_METHOD:
953                    msRun = context.addMsRunFragmentationMethod(metadata, id,
954                        checkParameter(defineLabel, valueLabel));
955                    break;
956                case MS_RUN_SCAN_POLARITY:
957                    msRun = context.addMsRunScanPolarity(metadata, id,
958                        checkParameter(defineLabel, valueLabel));
959                    break;
960                case MS_RUN_HASH:
961                    msRun = context.addMsRunHash(metadata, id, valueLabel);
962                    break;
963                case MS_RUN_HASH_METHOD:
964                    msRun = context.addMsRunHashMethod(metadata, id,
965                        checkParameter(defineLabel, valueLabel));
966                    break;
967                default:
968                    MZTabError error = new MZTabError(
969                        FormatErrorType.MTDDefineLabel,
970                        lineNumber, defineLabel + "-" + valueLabel);
971                    throw new MZTabException(error);
972            }
973        }
974        if (msRun == null) {
975            throw new MZTabException(new MZTabError(LogicalErrorType.NULL,
976                lineNumber, Metadata.Properties.msRun + "[" + id + "]"));
977        }
978    }
979
980    private void addDatabase(MZTabParserContext context, Metadata metadata,
981        MetadataProperty property,
982        Integer id,
983        String defineLabel, String valueLabel) throws MZTabException {
984        Database database = null;
985        if (property == null) {
986            database = context.addDatabase(metadata, new Database().id(id).
987                param(checkParameter(defineLabel, valueLabel)));
988        } else {
989            switch (property) {
990                case DATABASE_PREFIX:
991                    database = context.addDatabasePrefix(metadata, id,
992                        valueLabel);
993                    break;
994                case DATABASE_VERSION:
995                    database = context.addDatabaseVersion(metadata, id,
996                        valueLabel);
997                    break;
998                case DATABASE_URI:
999                    database = context.addDatabaseUri(metadata, id, checkURI(
1000                        defineLabel,
1001                        valueLabel, false));
1002                    break;
1003                default:
1004                    MZTabError error = new MZTabError(
1005                        FormatErrorType.MTDDefineLabel,
1006                        lineNumber, defineLabel + "-" + valueLabel);
1007                    throw new MZTabException(error);
1008            }
1009        }
1010        if (database == null) {
1011            throw new MZTabException(new MZTabError(LogicalErrorType.NULL,
1012                lineNumber, Metadata.Properties.database + "[" + id + "]"));
1013        }
1014    }
1015
1016    private void addSample(MZTabParserContext context, Metadata metadata,
1017        MetadataProperty property,
1018        Integer id,
1019        String defineLabel, String valueLabel) throws MZTabException {
1020        if (property == null) {
1021            context.addSample(metadata, new Sample().id(id).
1022                name(valueLabel));
1023        } else {
1024            switch (property) {
1025                case SAMPLE_SPECIES:
1026                    context.addSampleSpecies(metadata, id, checkParameter(
1027                        defineLabel, valueLabel));
1028                    break;
1029                case SAMPLE_TISSUE:
1030                    context.addSampleTissue(metadata, id, checkParameter(
1031                        defineLabel, valueLabel));
1032                    break;
1033                case SAMPLE_CELL_TYPE:
1034                    context.addSampleCellType(metadata, id, checkParameter(
1035                        defineLabel, valueLabel));
1036                    break;
1037                case SAMPLE_DISEASE:
1038                    context.addSampleDisease(metadata, id, checkParameter(
1039                        defineLabel, valueLabel));
1040                    break;
1041                case SAMPLE_DESCRIPTION:
1042                    context.addSampleDescription(metadata, id, valueLabel);
1043                    break;
1044                case SAMPLE_CUSTOM:
1045                    context.addSampleCustom(metadata, id, checkParameter(
1046                        defineLabel, valueLabel));
1047                    break;
1048                default:
1049                    MZTabError error = new MZTabError(
1050                        FormatErrorType.MTDDefineLabel,
1051                        lineNumber, defineLabel + "-" + valueLabel);
1052                    throw new MZTabException(error);
1053            }
1054        }
1055    }
1056
1057    private void addAssay(MZTabParserContext context, Metadata metadata,
1058        MetadataProperty property,
1059        String defineLabel,
1060        String valueLabel, Integer id) throws MZTabException {
1061        IndexedElement indexedElement;
1062        if (property == null) {
1063            context.addAssay(metadata, new Assay().id(id).
1064                name(valueLabel));
1065        } else {
1066            switch (property) {
1067                case ASSAY_CUSTOM:
1068                    context.addAssayCustom(metadata, id, checkParameter(
1069                        defineLabel, valueLabel));
1070                    break;
1071                case ASSAY_EXTERNAL_URI:
1072                    context.addAssayExternalUri(metadata, id, checkURI(
1073                        defineLabel,
1074                        valueLabel, false));
1075                    break;
1076                case ASSAY_SAMPLE_REF:
1077                    indexedElement = checkIndexedElement(defineLabel, valueLabel,
1078                        MetadataElement.SAMPLE);
1079                    if (indexedElement != null) {
1080                        Sample sample = context.getSampleMap().
1081                            get(indexedElement.getId());
1082                        if (sample == null) {
1083                            throw new MZTabException(new MZTabError(
1084                                LogicalErrorType.NotDefineInMetadata, lineNumber,
1085                                valueLabel,
1086                                valueLabel));
1087                        }
1088                        context.addAssaySample(metadata, id, sample);
1089                    }
1090                    break;
1091                case ASSAY_MS_RUN_REF:
1092                    indexedElement = checkIndexedElement(defineLabel, valueLabel,
1093                        MetadataElement.MS_RUN);
1094                    if (indexedElement != null) {
1095                        MsRun msRun = context.getMsRunMap().
1096                            get(indexedElement.getId());
1097                        if (msRun == null) {
1098                            throw new MZTabException(new MZTabError(
1099                                LogicalErrorType.NotDefineInMetadata, lineNumber,
1100                                valueLabel));
1101                        }
1102                        context.addAssayMsRun(metadata, id, msRun);
1103                    }
1104                    break;
1105                default:
1106                    MZTabError error = new MZTabError(
1107                        FormatErrorType.MTDDefineLabel,
1108                        lineNumber, defineLabel + "-" + valueLabel);
1109                    throw new MZTabException(error);
1110            }
1111        }
1112    }
1113
1114    private void addStudyVariable(MZTabParserContext context, Metadata metadata,
1115        MetadataProperty property,
1116        String defineLabel,
1117        String valueLabel, Integer id) throws MZTabErrorOverflowException, MZTabException {
1118        List<IndexedElement> indexedElementList;
1119        if (property == null) {
1120            context.addStudyVariable(metadata, new StudyVariable().id(id).
1121                name(valueLabel));
1122        } else {
1123            switch (property) {
1124                case STUDY_VARIABLE_ASSAY_REFS:
1125                    indexedElementList = checkIndexedElementList(defineLabel,
1126                        valueLabel, MetadataElement.ASSAY);
1127                    // detect duplicates
1128                    indexedElementList.stream().
1129                        filter(i ->
1130                            Collections.frequency(indexedElementList, i) > 1).
1131                        collect(Collectors.toSet()).
1132                        forEach((indexedElement) ->
1133                        {
1134                            errorList.add(new MZTabError(
1135                                LogicalErrorType.DuplicationID, lineNumber,
1136                                valueLabel));
1137                        });
1138                    // check that assays exist
1139                    for (IndexedElement e : indexedElementList) {
1140                        //assays need to be defined before
1141                        if (!context.getAssayMap().
1142                            containsKey(e.getId())) {
1143                            // can not find assay[id] in metadata.
1144                            throw new MZTabException(new MZTabError(
1145                                LogicalErrorType.NotDefineInMetadata, lineNumber,
1146                                valueLabel));
1147                        }
1148                        context.addStudyVariableAssay(metadata, id, context.
1149                            getAssayMap().
1150                            get(e.getId()));
1151                    }
1152                    break;
1153                case STUDY_VARIABLE_AVERAGE_FUNCTION:
1154                    context.addStudyVariableAverageFunction(metadata, id,
1155                        checkParameter(defineLabel, valueLabel));
1156                    break;
1157                case STUDY_VARIABLE_VARIATION_FUNCTION:
1158                    context.addStudyVariableVariationFunction(metadata, id,
1159                        checkParameter(defineLabel, valueLabel));
1160                    break;
1161                case STUDY_VARIABLE_DESCRIPTION:
1162                    context.
1163                        addStudyVariableDescription(metadata, id, valueLabel);
1164                    break;
1165                case STUDY_VARIABLE_FACTORS:
1166                    context.addStudyVariableFactors(metadata, id,
1167                        checkParameter(defineLabel, valueLabel));
1168                    break;
1169                default:
1170                    MZTabError error = new MZTabError(
1171                        FormatErrorType.MTDDefineLabel,
1172                        lineNumber, defineLabel + "-" + valueLabel);
1173                    throw new MZTabException(error);
1174            }
1175        }
1176    }
1177
1178    private void addCv(MZTabParserContext context, Metadata metadata,
1179        MetadataProperty property, Integer id,
1180        String valueLabel) throws MZTabException {
1181        if (property == null) {
1182            context.addCV(metadata, new CV().id(id));
1183        } else {
1184            switch (property) {
1185                case CV_LABEL:
1186                    context.addCVLabel(metadata, id, valueLabel);
1187                    break;
1188                case CV_FULL_NAME:
1189                    context.addCVFullName(metadata, id, valueLabel);
1190                    break;
1191                case CV_VERSION:
1192                    context.addCVVersion(metadata, id, valueLabel);
1193                    break;
1194                case CV_URI:
1195                    context.addCVURI(metadata, id, valueLabel);
1196                    break;
1197                default:
1198                    MZTabError error = new MZTabError(
1199                        FormatErrorType.MTDDefineLabel,
1200                        lineNumber, property + "[" + id + "]" + "-" + valueLabel);
1201                    throw new MZTabException(error);
1202            }
1203        }
1204    }
1205}