001/* 
002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package uk.ac.ebi.pride.jmztab2.utils.parser;
017
018import de.isas.mztab2.model.Metadata;
019import de.isas.mztab2.model.OptColumnMapping;
020import de.isas.mztab2.model.SmallMoleculeSummary;
021import java.util.List;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024import uk.ac.ebi.pride.jmztab2.model.AbundanceColumn;
025import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn;
026import uk.ac.ebi.pride.jmztab2.model.ISmallMoleculeColumn;
027import uk.ac.ebi.pride.jmztab2.model.MZBoolean;
028import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory;
029import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
030import uk.ac.ebi.pride.jmztab2.model.OptionColumn;
031import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeColumn;
032import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType;
033import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
034import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
035import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList;
036import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorOverflowException;
037
038/**
039 * <p>
040 * SMLLineParser class.</p>
041 *
042 * @author qingwei
043 * @author nilshoffmann
044 * @since 10/02/13
045 *
046 */
047public class SMLLineParser extends MZTabDataLineParser<SmallMoleculeSummary> {
048
049    private SmallMoleculeSummary smallMoleculeSummary;
050
051    /**
052     * <p>
053     * Constructor for SMLLineParser.</p>
054     *
055     * @param context a
056     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
057     * @param factory a {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory}
058     * object.
059     * @param positionMapping a
060     * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} object.
061     * @param metadata a {@link de.isas.mztab2.model.Metadata} object.
062     * @param errorList a
063     * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object.
064     */
065    public SMLLineParser(MZTabParserContext context, MZTabColumnFactory factory,
066        PositionMapping positionMapping,
067        Metadata metadata, MZTabErrorList errorList) {
068        super(context, factory, positionMapping, metadata, errorList);
069    }
070
071    /**
072     * {@inheritDoc}
073     */
074    @Override
075    protected int checkData() {
076
077        IMZTabColumn column;
078        String columnName;
079        String target;
080        int physicalPosition;
081        String logicalPosition;
082        smallMoleculeSummary = new SmallMoleculeSummary();
083
084        for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) {
085            logicalPosition = positionMapping.get(physicalPosition);
086            column = factory.getColumnMapping().
087                get(logicalPosition);
088
089            if (column != null) {
090                columnName = column.getName();
091                target = items[physicalPosition];
092                if (column instanceof ISmallMoleculeColumn) {
093                    SmallMoleculeColumn.Stable stableColumn = SmallMoleculeColumn.Stable.
094                        forName(columnName);
095                    switch (stableColumn) {
096                        case ADDUCT_IONS:
097                            List<String> adductIons = checkStringList(
098                                column, target, MZTabConstants.BAR);
099                            checkRegexMatches(errorList, lineNumber,
100                                SmallMoleculeSummary.Properties.adductIons,
101                                MZTabConstants.REGEX_ADDUCT, adductIons);
102                            smallMoleculeSummary.adductIons(adductIons);
103                            break;
104                        case BEST_ID_CONFIDENCE_MEASURE:
105                            smallMoleculeSummary.bestIdConfidenceMeasure(
106                                checkParameter(column, target, true));
107                            break;
108                        case BEST_ID_CONFIDENCE_VALUE:
109                            smallMoleculeSummary.bestIdConfidenceValue(
110                                checkDouble(column, target));
111                            break;
112                        case CHEMICAL_FORMULA:
113                            smallMoleculeSummary.chemicalFormula(
114                                checkStringList(column, target,
115                                    MZTabConstants.BAR));
116                            break;
117                        case CHEMICAL_NAME:
118                            smallMoleculeSummary.chemicalName(checkStringList(
119                                column, target, MZTabConstants.BAR));
120                            break;
121                        case DATABASE_IDENTIFIER:
122                            smallMoleculeSummary.databaseIdentifier(
123                                checkStringList(column, target,
124                                    MZTabConstants.BAR));
125                            break;
126                        case INCHI:
127                            smallMoleculeSummary.inchi(checkStringList(column,
128                                target, MZTabConstants.BAR));
129                            break;
130                        case RELIABILITY:
131                            smallMoleculeSummary.reliability(checkString(column,
132                                target, false));
133                            break;
134                        case SMF_ID_REFS:
135                            smallMoleculeSummary.smfIdRefs(checkIntegerList(
136                                column, target, MZTabConstants.BAR));
137                            break;
138                        case SMILES:
139                            smallMoleculeSummary.smiles(checkSmiles(column,
140                                target));
141                            break;
142                        case SML_ID:
143                            smallMoleculeSummary.smlId(checkInteger(column,
144                                target, false));
145                            break;
146                        case THEOR_NEUTRAL_MASS:
147                            smallMoleculeSummary.theoreticalNeutralMass(
148                                checkDoubleList(column, target));
149                            break;
150                        case URI:
151                            smallMoleculeSummary.uri(
152                                checkStringList(column, target,
153                                    MZTabConstants.BAR));
154                            break;
155
156                    }
157
158                } else if (column instanceof AbundanceColumn) {
159                    if (columnName.startsWith(
160                        SmallMoleculeSummary.Properties.abundanceAssay.
161                            getPropertyName())) {
162                        smallMoleculeSummary.addAbundanceAssayItem(checkDouble(
163                            column, target));
164                    } else if (columnName.startsWith(
165                        SmallMoleculeSummary.Properties.abundanceStudyVariable.
166                            getPropertyName())) {
167                        smallMoleculeSummary.addAbundanceStudyVariableItem(
168                            checkDouble(column, target));
169                    } else if (columnName.startsWith(
170                        SmallMoleculeSummary.Properties.abundanceVariationStudyVariable.
171                            getPropertyName())) {
172                        smallMoleculeSummary.
173                            addAbundanceVariationStudyVariableItem(checkDouble(
174                                column, target));
175                    }
176                } else if (column instanceof OptionColumn) {
177                    if (columnName.startsWith(MZTabConstants.OPT_PREFIX)) {
178                        Class dataType = column.getDataType();
179                        OptColumnMapping optColMapping = new OptColumnMapping();
180                        optColMapping.identifier(columnName.substring(
181                            MZTabConstants.OPT_PREFIX.length()));
182                        if (dataType.equals(String.class)) {
183                            optColMapping.value(checkString(column, target));
184                        } else if (dataType.equals(Double.class)) {
185                            optColMapping.value(Double.toString(checkDouble(
186                                column, target)));
187                        } else if (dataType.equals(MZBoolean.class)) {
188                            optColMapping.value(Boolean.toString(checkMZBoolean(
189                                column, target).
190                                toBoolean()));
191                        }
192                        smallMoleculeSummary.addOptItem(optColMapping);
193                    }
194                }
195            }
196        }
197
198        checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
199            getDatabaseIdentifier(),
200            SmallMoleculeSummary.Properties.databaseIdentifier,
201            smallMoleculeSummary.getChemicalFormula(),
202            SmallMoleculeSummary.Properties.chemicalFormula);
203        checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
204            getDatabaseIdentifier(),
205            SmallMoleculeSummary.Properties.databaseIdentifier,
206            smallMoleculeSummary.getSmiles(),
207            SmallMoleculeSummary.Properties.smiles);
208        checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
209            getDatabaseIdentifier(),
210            SmallMoleculeSummary.Properties.databaseIdentifier,
211            smallMoleculeSummary.getInchi(),
212            SmallMoleculeSummary.Properties.inchi);
213        checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
214            getDatabaseIdentifier(),
215            SmallMoleculeSummary.Properties.databaseIdentifier,
216            smallMoleculeSummary.getChemicalName(),
217            SmallMoleculeSummary.Properties.chemicalName);
218        checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
219            getDatabaseIdentifier(),
220            SmallMoleculeSummary.Properties.databaseIdentifier,
221            smallMoleculeSummary.getUri(),
222            SmallMoleculeSummary.Properties.uri);
223        checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
224            getDatabaseIdentifier(),
225            SmallMoleculeSummary.Properties.databaseIdentifier,
226            smallMoleculeSummary.getTheoreticalNeutralMass(),
227            SmallMoleculeSummary.Properties.theoreticalNeutralMass);
228        return physicalPosition;
229    }
230
231    protected void checkRegexMatches(MZTabErrorList errorList, int lineNumber,
232        SmallMoleculeSummary.Properties elementProperty,
233        String regularExpression, List<String> elements) {
234        if (!elements.isEmpty()) {
235            Pattern p = Pattern.compile(regularExpression);
236            for (int i = 0; i < elements.size(); i++) {
237                String element = elements.get(i);
238                if(!"null".equals(element)) {
239                    Matcher m = p.matcher(element);
240                    if (!m.matches()) {
241                        errorList.add(new MZTabError(FormatErrorType.RegexMismatch,
242                            lineNumber, elementProperty.getPropertyName(), element,
243                            "" + (i + 1), regularExpression));
244                    }
245                }
246            }
247        }
248
249    }
250
251    protected void checkItemNumbers(MZTabErrorList errorList, int lineNumber,
252        List<?> reference, SmallMoleculeSummary.Properties referenceProperty,
253        List<?> toCheck, SmallMoleculeSummary.Properties toCheckProperty) throws MZTabErrorOverflowException {
254        //check that array types have same element number
255        if (!toCheck.isEmpty() && reference.size() != toCheck.size()) {
256            errorList.add(new MZTabError(LogicalErrorType.ItemNumberMismatch,
257                lineNumber, toCheckProperty.getPropertyName(), "" + toCheck.
258                size(), referenceProperty.getPropertyName(), "" + reference.
259                size()));
260        }
261    }
262
263    /**
264     * <p>
265     * getRecord.</p>
266     *
267     * @return a {@link de.isas.mztab2.model.SmallMoleculeSummary} object.
268     */
269    @Override
270    public SmallMoleculeSummary getRecord() {
271
272        if (smallMoleculeSummary == null) {
273            smallMoleculeSummary = new SmallMoleculeSummary();
274        }
275        return smallMoleculeSummary;
276    }
277}