001/* 002 * Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V.. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package uk.ac.ebi.pride.jmztab2.utils.parser; 017 018import de.isas.mztab2.model.Metadata; 019import de.isas.mztab2.model.Parameter; 020import de.isas.mztab2.model.SmallMoleculeEvidence; 021import java.util.*; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024import java.util.stream.IntStream; 025import org.slf4j.Logger; 026import org.slf4j.LoggerFactory; 027import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn; 028import uk.ac.ebi.pride.jmztab2.model.ISmallMoleculeColumn; 029import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory; 030import uk.ac.ebi.pride.jmztab2.model.MZTabConstants; 031import uk.ac.ebi.pride.jmztab2.model.Section; 032import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeEvidenceColumn; 033import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType; 034import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType; 035import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError; 036import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList; 037import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException; 038 039/** 040 * Parse and validate Small Molecule Evidence header line into a {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory}. 041 * 042 * @author nilshoffmann 043 * @since 11/09/17 044 * 045 */ 046public class SEHLineParser extends MZTabHeaderLineParser { 047 048 private static final Logger LOGGER = LoggerFactory.getLogger(SEHLineParser.class); 049 private Map<Integer, String> physPositionToOrder; 050 051 052 /** 053 * <p>Constructor for SEHLineParser.</p> 054 * 055 * @param context a {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object. 056 * @param metadata a {@link de.isas.mztab2.model.Metadata} object. 057 */ 058 public SEHLineParser(MZTabParserContext context, Metadata metadata) { 059 super(context, MZTabColumnFactory.getInstance(Section.Small_Molecule_Evidence_Header), metadata); 060 } 061 062 /** {@inheritDoc} */ 063 @Override 064 protected int parseColumns() throws MZTabException { 065 String header; 066 Integer physicalPosition; 067 068 ISmallMoleculeColumn column; 069 SortedMap<String, IMZTabColumn> columnMapping = factory.getColumnMapping(); 070 SortedMap<String, IMZTabColumn> optionalMapping = factory.getOptionalColumnMapping(); 071 SortedMap<String, IMZTabColumn> stableMapping = factory.getStableColumnMapping(); 072 073 physPositionToOrder = generateHeaderPhysPositionToOrderMap(items); 074 075 //Iterates through the tokens in the small molecule evidence header 076 //It will identify the type of column and the position accordingly 077 for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) { 078 079 column = null; 080 header = items[physicalPosition]; 081 if (header.startsWith(SmallMoleculeEvidence.Properties.idConfidenceMeasure.getPropertyName())) { 082 checkIdConfidenceMeasure(header); 083 } else if (header.startsWith(MZTabConstants.OPT_PREFIX)) { 084 checkOptColumnName(header); 085 } else { 086 try { 087 column = SmallMoleculeEvidenceColumn.Stable.columnFor(header); 088 } catch(IllegalArgumentException ex) { 089 throw new MZTabException(new MZTabError(LogicalErrorType.ColumnNotValid,lineNumber,header,section.getName())); 090 } 091 092 } 093 094 if (column != null) { 095 if (!column.getOrder().equals(physPositionToOrder.get(physicalPosition))) { 096 column.setOrder(physPositionToOrder.get(physicalPosition)); 097 LOGGER.debug(column.toString()); 098 } 099 if(column.isOptional()){ 100 optionalMapping.put(column.getLogicPosition(), column); 101 } else { 102 stableMapping.put(column.getLogicPosition(), column); 103 } 104 columnMapping.put(column.getLogicPosition(), column); 105 } 106 } 107 return physicalPosition; 108 } 109 110 private void checkIdConfidenceMeasure(String header) throws MZTabException { 111 String valueLabel = header; 112 113 Pattern pattern = Pattern.compile(SmallMoleculeEvidence.Properties.idConfidenceMeasure.getPropertyName()+MZTabConstants.REGEX_INDEXED_VALUE); 114 Matcher matcher = pattern.matcher(valueLabel); 115 if (!matcher.find()) { 116 MZTabError error = new MZTabError(FormatErrorType.StableColumn, lineNumber, header); 117 throw new MZTabException(error); 118 } 119 120 int id = parseIndex(header, matcher.group(1)); 121 Parameter p = metadata.getIdConfidenceMeasure().get(id-1); 122 factory.addIdConfidenceMeasureColumn(p, id, Double.class); 123 } 124 125 private Map<Integer, String> generateHeaderPhysPositionToOrderMap(String[] items) { 126 Integer physicalPosition; 127 Map<Integer, String> physicalPositionToOrder = new LinkedHashMap<>(); 128 int order = 0; 129 for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) { 130 physicalPositionToOrder.put(physicalPosition, fromIndexToOrder(++order)); 131 } 132 return physicalPositionToOrder; 133 } 134 135 /** 136 * {@inheritDoc} 137 * 138 * The following optional columns are mandatory: 139 * 1. id_confidence_measure[1-n] 140 * 141 * NOTICE: this method will be called at end of parse() function. 142 * @see MZTabHeaderLineParser#parse(int, String, MZTabErrorList) 143 * @see MZTabHeaderLineParser#parse(int, String, MZTabErrorList) 144 */ 145 @Override 146 protected void refine() throws MZTabException { 147 //mandatory columns 148 List<String> mandatoryColumnHeaders = new ArrayList<>(); 149 for(ISmallMoleculeColumn col:SmallMoleculeEvidenceColumn.Stable.columns()) { 150 mandatoryColumnHeaders.add(col.getName()); 151 } 152 153 IntStream.range(0, metadata.getIdConfidenceMeasure().size()). 154 forEachOrdered(i -> 155 { 156 mandatoryColumnHeaders.add(SmallMoleculeEvidence.Properties.idConfidenceMeasure.getPropertyName()+"["+(i+1)+"]"); 157 }); 158 159 for (String columnHeader : mandatoryColumnHeaders) { 160 if (factory.findColumnByHeader(columnHeader) == null) { 161 throw new MZTabException(new MZTabError(FormatErrorType.StableColumn, lineNumber, columnHeader)); 162 } 163 } 164 } 165}