001/* 002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V.. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package uk.ac.ebi.pride.jmztab2.utils.parser; 017 018import de.isas.mztab2.model.Metadata; 019import de.isas.mztab2.model.OptColumnMapping; 020import de.isas.mztab2.model.SmallMoleculeSummary; 021import java.util.List; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024import uk.ac.ebi.pride.jmztab2.model.AbundanceColumn; 025import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn; 026import uk.ac.ebi.pride.jmztab2.model.ISmallMoleculeColumn; 027import uk.ac.ebi.pride.jmztab2.model.MZBoolean; 028import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory; 029import uk.ac.ebi.pride.jmztab2.model.MZTabConstants; 030import uk.ac.ebi.pride.jmztab2.model.OptionColumn; 031import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeColumn; 032import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType; 033import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType; 034import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError; 035import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList; 036import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorOverflowException; 037 038/** 039 * <p> 040 * SMLLineParser class.</p> 041 * 042 * @author qingwei 043 * @author nilshoffmann 044 * @since 10/02/13 045 * 046 */ 047public class SMLLineParser extends MZTabDataLineParser<SmallMoleculeSummary> { 048 049 private SmallMoleculeSummary smallMoleculeSummary; 050 051 /** 052 * <p> 053 * Constructor for SMLLineParser.</p> 054 * 055 * @param context a 056 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 057 * @param factory a {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory} 058 * object. 059 * @param positionMapping a 060 * {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} object. 061 * @param metadata a {@link de.isas.mztab2.model.Metadata} object. 062 * @param errorList a 063 * {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object. 064 */ 065 public SMLLineParser(MZTabParserContext context, MZTabColumnFactory factory, 066 PositionMapping positionMapping, 067 Metadata metadata, MZTabErrorList errorList) { 068 super(context, factory, positionMapping, metadata, errorList); 069 } 070 071 /** 072 * {@inheritDoc} 073 */ 074 @Override 075 protected int checkData() { 076 077 IMZTabColumn column; 078 String columnName; 079 String target; 080 int physicalPosition; 081 String logicalPosition; 082 smallMoleculeSummary = new SmallMoleculeSummary(); 083 084 for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) { 085 logicalPosition = positionMapping.get(physicalPosition); 086 column = factory.getColumnMapping(). 087 get(logicalPosition); 088 089 if (column != null) { 090 columnName = column.getName(); 091 target = items[physicalPosition]; 092 if (column instanceof ISmallMoleculeColumn) { 093 SmallMoleculeColumn.Stable stableColumn = SmallMoleculeColumn.Stable. 094 forName(columnName); 095 switch (stableColumn) { 096 case ADDUCT_IONS: 097 List<String> adductIons = checkStringList( 098 column, target, MZTabConstants.BAR); 099 checkRegexMatches(errorList, lineNumber, 100 SmallMoleculeSummary.Properties.adductIons, 101 MZTabConstants.REGEX_ADDUCT, adductIons); 102 smallMoleculeSummary.adductIons(adductIons); 103 break; 104 case BEST_ID_CONFIDENCE_MEASURE: 105 smallMoleculeSummary.bestIdConfidenceMeasure( 106 checkParameter(column, target, true)); 107 break; 108 case BEST_ID_CONFIDENCE_VALUE: 109 smallMoleculeSummary.bestIdConfidenceValue( 110 checkDouble(column, target)); 111 break; 112 case CHEMICAL_FORMULA: 113 smallMoleculeSummary.chemicalFormula( 114 checkStringList(column, target, 115 MZTabConstants.BAR)); 116 break; 117 case CHEMICAL_NAME: 118 smallMoleculeSummary.chemicalName(checkStringList( 119 column, target, MZTabConstants.BAR)); 120 break; 121 case DATABASE_IDENTIFIER: 122 smallMoleculeSummary.databaseIdentifier( 123 checkStringList(column, target, 124 MZTabConstants.BAR)); 125 break; 126 case INCHI: 127 smallMoleculeSummary.inchi(checkStringList(column, 128 target, MZTabConstants.BAR)); 129 break; 130 case RELIABILITY: 131 smallMoleculeSummary.reliability(checkString(column, 132 target, false)); 133 break; 134 case SMF_ID_REFS: 135 smallMoleculeSummary.smfIdRefs(checkIntegerList( 136 column, target, MZTabConstants.BAR)); 137 break; 138 case SMILES: 139 smallMoleculeSummary.smiles(checkSmiles(column, 140 target)); 141 break; 142 case SML_ID: 143 smallMoleculeSummary.smlId(checkInteger(column, 144 target, false)); 145 break; 146 case THEOR_NEUTRAL_MASS: 147 smallMoleculeSummary.theoreticalNeutralMass( 148 checkDoubleList(column, target)); 149 break; 150 case URI: 151 smallMoleculeSummary.uri( 152 checkStringList(column, target, 153 MZTabConstants.BAR)); 154 break; 155 156 } 157 158 } else if (column instanceof AbundanceColumn) { 159 if (columnName.startsWith( 160 SmallMoleculeSummary.Properties.abundanceAssay. 161 getPropertyName())) { 162 smallMoleculeSummary.addAbundanceAssayItem(checkDouble( 163 column, target)); 164 } else if (columnName.startsWith( 165 SmallMoleculeSummary.Properties.abundanceStudyVariable. 166 getPropertyName())) { 167 smallMoleculeSummary.addAbundanceStudyVariableItem( 168 checkDouble(column, target)); 169 } else if (columnName.startsWith( 170 SmallMoleculeSummary.Properties.abundanceVariationStudyVariable. 171 getPropertyName())) { 172 smallMoleculeSummary. 173 addAbundanceVariationStudyVariableItem(checkDouble( 174 column, target)); 175 } 176 } else if (column instanceof OptionColumn) { 177 if (columnName.startsWith(MZTabConstants.OPT_PREFIX)) { 178 Class dataType = column.getDataType(); 179 OptColumnMapping optColMapping = new OptColumnMapping(); 180 optColMapping.identifier(columnName.substring( 181 MZTabConstants.OPT_PREFIX.length())); 182 if (dataType.equals(String.class)) { 183 optColMapping.value(checkString(column, target)); 184 } else if (dataType.equals(Double.class)) { 185 optColMapping.value(Double.toString(checkDouble( 186 column, target))); 187 } else if (dataType.equals(MZBoolean.class)) { 188 optColMapping.value(Boolean.toString(checkMZBoolean( 189 column, target). 190 toBoolean())); 191 } 192 smallMoleculeSummary.addOptItem(optColMapping); 193 } 194 } 195 } 196 } 197 198 checkItemNumbers(errorList, lineNumber, smallMoleculeSummary. 199 getDatabaseIdentifier(), 200 SmallMoleculeSummary.Properties.databaseIdentifier, 201 smallMoleculeSummary.getChemicalFormula(), 202 SmallMoleculeSummary.Properties.chemicalFormula); 203 checkItemNumbers(errorList, lineNumber, smallMoleculeSummary. 204 getDatabaseIdentifier(), 205 SmallMoleculeSummary.Properties.databaseIdentifier, 206 smallMoleculeSummary.getSmiles(), 207 SmallMoleculeSummary.Properties.smiles); 208 checkItemNumbers(errorList, lineNumber, smallMoleculeSummary. 209 getDatabaseIdentifier(), 210 SmallMoleculeSummary.Properties.databaseIdentifier, 211 smallMoleculeSummary.getInchi(), 212 SmallMoleculeSummary.Properties.inchi); 213 checkItemNumbers(errorList, lineNumber, smallMoleculeSummary. 214 getDatabaseIdentifier(), 215 SmallMoleculeSummary.Properties.databaseIdentifier, 216 smallMoleculeSummary.getChemicalName(), 217 SmallMoleculeSummary.Properties.chemicalName); 218 checkItemNumbers(errorList, lineNumber, smallMoleculeSummary. 219 getDatabaseIdentifier(), 220 SmallMoleculeSummary.Properties.databaseIdentifier, 221 smallMoleculeSummary.getUri(), 222 SmallMoleculeSummary.Properties.uri); 223 checkItemNumbers(errorList, lineNumber, smallMoleculeSummary. 224 getDatabaseIdentifier(), 225 SmallMoleculeSummary.Properties.databaseIdentifier, 226 smallMoleculeSummary.getTheoreticalNeutralMass(), 227 SmallMoleculeSummary.Properties.theoreticalNeutralMass); 228 return physicalPosition; 229 } 230 231 protected void checkRegexMatches(MZTabErrorList errorList, int lineNumber, 232 SmallMoleculeSummary.Properties elementProperty, 233 String regularExpression, List<String> elements) { 234 if (!elements.isEmpty()) { 235 Pattern p = Pattern.compile(regularExpression); 236 for (int i = 0; i < elements.size(); i++) { 237 String element = elements.get(i); 238 if(!"null".equals(element)) { 239 Matcher m = p.matcher(element); 240 if (!m.matches()) { 241 errorList.add(new MZTabError(FormatErrorType.RegexMismatch, 242 lineNumber, elementProperty.getPropertyName(), element, 243 "" + (i + 1), regularExpression)); 244 } 245 } 246 } 247 } 248 249 } 250 251 protected void checkItemNumbers(MZTabErrorList errorList, int lineNumber, 252 List<?> reference, SmallMoleculeSummary.Properties referenceProperty, 253 List<?> toCheck, SmallMoleculeSummary.Properties toCheckProperty) throws MZTabErrorOverflowException { 254 //check that array types have same element number 255 if (!toCheck.isEmpty() && reference.size() != toCheck.size()) { 256 errorList.add(new MZTabError(LogicalErrorType.ItemNumberMismatch, 257 lineNumber, toCheckProperty.getPropertyName(), "" + toCheck. 258 size(), referenceProperty.getPropertyName(), "" + reference. 259 size())); 260 } 261 } 262 263 /** 264 * <p> 265 * getRecord.</p> 266 * 267 * @return a {@link de.isas.mztab2.model.SmallMoleculeSummary} object. 268 */ 269 @Override 270 public SmallMoleculeSummary getRecord() { 271 272 if (smallMoleculeSummary == null) { 273 smallMoleculeSummary = new SmallMoleculeSummary(); 274 } 275 return smallMoleculeSummary; 276 } 277}