001/* 
002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package uk.ac.ebi.pride.jmztab2.utils.parser;
017
018import de.isas.mztab2.model.Metadata;
019import de.isas.mztab2.model.Parameter;
020import de.isas.mztab2.model.SmallMoleculeEvidence;
021import java.util.*;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024import java.util.stream.IntStream;
025import org.slf4j.Logger;
026import org.slf4j.LoggerFactory;
027import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn;
028import uk.ac.ebi.pride.jmztab2.model.ISmallMoleculeColumn;
029import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory;
030import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
031import uk.ac.ebi.pride.jmztab2.model.Section;
032import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeEvidenceColumn;
033import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType;
034import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
035import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
036import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList;
037import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException;
038
039/**
040 * Parse and validate Small Molecule Evidence header line into a {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory}.
041 *
042 * @author nilshoffmann
043 * @since 11/09/17
044 * 
045 */
046public class SEHLineParser extends MZTabHeaderLineParser {
047
048    private static final Logger LOGGER = LoggerFactory.getLogger(SEHLineParser.class);
049    private Map<Integer, String> physPositionToOrder;
050
051
052    /**
053     * <p>Constructor for SEHLineParser.</p>
054     *
055     * @param context a {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
056     * @param metadata a {@link de.isas.mztab2.model.Metadata} object.
057     */
058    public SEHLineParser(MZTabParserContext context, Metadata metadata) {
059        super(context, MZTabColumnFactory.getInstance(Section.Small_Molecule_Evidence_Header), metadata);
060    }
061
062    /** {@inheritDoc} */
063    @Override
064    protected int parseColumns() throws MZTabException {
065        String header;
066        Integer physicalPosition;
067
068        ISmallMoleculeColumn column;
069        SortedMap<String, IMZTabColumn> columnMapping = factory.getColumnMapping();
070        SortedMap<String, IMZTabColumn> optionalMapping = factory.getOptionalColumnMapping();
071        SortedMap<String, IMZTabColumn> stableMapping = factory.getStableColumnMapping();
072
073        physPositionToOrder = generateHeaderPhysPositionToOrderMap(items);
074
075        //Iterates through the tokens in the small molecule evidence header
076        //It will identify the type of column and the position accordingly
077        for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) {
078
079            column = null;
080            header = items[physicalPosition];
081            if (header.startsWith(SmallMoleculeEvidence.Properties.idConfidenceMeasure.getPropertyName())) {
082                checkIdConfidenceMeasure(header);
083            } else if (header.startsWith(MZTabConstants.OPT_PREFIX)) {
084                checkOptColumnName(header);
085            } else {
086                try {
087                    column = SmallMoleculeEvidenceColumn.Stable.columnFor(header);
088                } catch(IllegalArgumentException ex) {
089                    throw new MZTabException(new MZTabError(LogicalErrorType.ColumnNotValid,lineNumber,header,section.getName()));    
090                }
091
092            }
093
094            if (column != null) {
095                if (!column.getOrder().equals(physPositionToOrder.get(physicalPosition))) {
096                    column.setOrder(physPositionToOrder.get(physicalPosition));
097                    LOGGER.debug(column.toString());
098                }
099                if(column.isOptional()){
100                    optionalMapping.put(column.getLogicPosition(), column);
101                } else {
102                    stableMapping.put(column.getLogicPosition(), column);
103                }
104                columnMapping.put(column.getLogicPosition(), column);
105            }
106        }
107        return physicalPosition;
108    }
109
110    private void checkIdConfidenceMeasure(String header) throws MZTabException {
111        String valueLabel = header;
112        
113        Pattern pattern = Pattern.compile(SmallMoleculeEvidence.Properties.idConfidenceMeasure.getPropertyName()+MZTabConstants.REGEX_INDEXED_VALUE);
114        Matcher matcher = pattern.matcher(valueLabel);
115        if (!matcher.find()) {
116            MZTabError error = new MZTabError(FormatErrorType.StableColumn, lineNumber, header);
117            throw new MZTabException(error);
118        }
119        
120        int id = parseIndex(header, matcher.group(1));
121        Parameter p = metadata.getIdConfidenceMeasure().get(id-1);
122        factory.addIdConfidenceMeasureColumn(p, id, Double.class);
123    }
124
125    private Map<Integer, String> generateHeaderPhysPositionToOrderMap(String[] items) {
126        Integer physicalPosition;
127        Map<Integer, String> physicalPositionToOrder = new LinkedHashMap<>();
128        int order = 0;
129        for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) {
130            physicalPositionToOrder.put(physicalPosition, fromIndexToOrder(++order));
131        }
132        return physicalPositionToOrder;
133    }
134
135    /**
136     * {@inheritDoc}
137     *
138     * The following optional columns are mandatory:
139     * 1. id_confidence_measure[1-n]
140     * 
141     * NOTICE: this method will be called at end of parse() function.
142     * @see MZTabHeaderLineParser#parse(int, String, MZTabErrorList)
143     * @see MZTabHeaderLineParser#parse(int, String, MZTabErrorList)
144     */
145    @Override
146    protected void refine() throws MZTabException {
147        //mandatory columns
148        List<String> mandatoryColumnHeaders = new ArrayList<>();
149        for(ISmallMoleculeColumn col:SmallMoleculeEvidenceColumn.Stable.columns()) {
150            mandatoryColumnHeaders.add(col.getName());
151        }
152
153        IntStream.range(0, metadata.getIdConfidenceMeasure().size()).
154        forEachOrdered(i ->
155        {
156            mandatoryColumnHeaders.add(SmallMoleculeEvidence.Properties.idConfidenceMeasure.getPropertyName()+"["+(i+1)+"]");
157        });
158
159        for (String columnHeader : mandatoryColumnHeaders) {
160            if (factory.findColumnByHeader(columnHeader) == null) {
161                throw new MZTabException(new MZTabError(FormatErrorType.StableColumn, lineNumber, columnHeader));
162            }
163        }
164    }
165}