SMLLineParser.java
/*
* Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.pride.jmztab2.utils.parser;
import de.isas.mztab2.model.Metadata;
import de.isas.mztab2.model.OptColumnMapping;
import de.isas.mztab2.model.SmallMoleculeSummary;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import uk.ac.ebi.pride.jmztab2.model.AbundanceColumn;
import uk.ac.ebi.pride.jmztab2.model.IMZTabColumn;
import uk.ac.ebi.pride.jmztab2.model.ISmallMoleculeColumn;
import uk.ac.ebi.pride.jmztab2.model.MZBoolean;
import uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory;
import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
import uk.ac.ebi.pride.jmztab2.model.OptionColumn;
import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeColumn;
import uk.ac.ebi.pride.jmztab2.utils.errors.FormatErrorType;
import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList;
import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorOverflowException;
/**
* <p>
* SMLLineParser class.</p>
*
* @author qingwei
* @author nilshoffmann
* @since 10/02/13
*
*/
public class SMLLineParser extends MZTabDataLineParser<SmallMoleculeSummary> {
private SmallMoleculeSummary smallMoleculeSummary;
/**
* <p>
* Constructor for SMLLineParser.</p>
*
* @param context a
* {@link uk.ac.ebi.pride.jmztab2.utils.parser.MZTabParserContext} object.
* @param factory a {@link uk.ac.ebi.pride.jmztab2.model.MZTabColumnFactory}
* object.
* @param positionMapping a
* {@link uk.ac.ebi.pride.jmztab2.utils.parser.PositionMapping} object.
* @param metadata a {@link de.isas.mztab2.model.Metadata} object.
* @param errorList a
* {@link uk.ac.ebi.pride.jmztab2.utils.errors.MZTabErrorList} object.
*/
public SMLLineParser(MZTabParserContext context, MZTabColumnFactory factory,
PositionMapping positionMapping,
Metadata metadata, MZTabErrorList errorList) {
super(context, factory, positionMapping, metadata, errorList);
}
/**
* {@inheritDoc}
*/
@Override
protected int checkData() {
IMZTabColumn column;
String columnName;
String target;
int physicalPosition;
String logicalPosition;
smallMoleculeSummary = new SmallMoleculeSummary();
for (physicalPosition = 1; physicalPosition < items.length; physicalPosition++) {
logicalPosition = positionMapping.get(physicalPosition);
column = factory.getColumnMapping().
get(logicalPosition);
if (column != null) {
columnName = column.getName();
target = items[physicalPosition];
if (column instanceof ISmallMoleculeColumn) {
SmallMoleculeColumn.Stable stableColumn = SmallMoleculeColumn.Stable.
forName(columnName);
switch (stableColumn) {
case ADDUCT_IONS:
List<String> adductIons = checkStringList(
column, target, MZTabConstants.BAR);
checkRegexMatches(errorList, lineNumber,
SmallMoleculeSummary.Properties.adductIons,
MZTabConstants.REGEX_ADDUCT, adductIons);
smallMoleculeSummary.adductIons(adductIons);
break;
case BEST_ID_CONFIDENCE_MEASURE:
smallMoleculeSummary.bestIdConfidenceMeasure(
checkParameter(column, target, true));
break;
case BEST_ID_CONFIDENCE_VALUE:
smallMoleculeSummary.bestIdConfidenceValue(
checkDouble(column, target));
break;
case CHEMICAL_FORMULA:
smallMoleculeSummary.chemicalFormula(
checkStringList(column, target,
MZTabConstants.BAR));
break;
case CHEMICAL_NAME:
smallMoleculeSummary.chemicalName(checkStringList(
column, target, MZTabConstants.BAR));
break;
case DATABASE_IDENTIFIER:
smallMoleculeSummary.databaseIdentifier(
checkStringList(column, target,
MZTabConstants.BAR));
break;
case INCHI:
smallMoleculeSummary.inchi(checkStringList(column,
target, MZTabConstants.BAR));
break;
case RELIABILITY:
smallMoleculeSummary.reliability(checkString(column,
target, false));
break;
case SMF_ID_REFS:
smallMoleculeSummary.smfIdRefs(checkIntegerList(
column, target, MZTabConstants.BAR));
break;
case SMILES:
smallMoleculeSummary.smiles(checkSmiles(column,
target));
break;
case SML_ID:
smallMoleculeSummary.smlId(checkInteger(column,
target, false));
break;
case THEOR_NEUTRAL_MASS:
smallMoleculeSummary.theoreticalNeutralMass(
checkDoubleList(column, target));
break;
case URI:
smallMoleculeSummary.uri(
checkStringList(column, target,
MZTabConstants.BAR));
break;
}
} else if (column instanceof AbundanceColumn) {
if (columnName.startsWith(
SmallMoleculeSummary.Properties.abundanceAssay.
getPropertyName())) {
smallMoleculeSummary.addAbundanceAssayItem(checkDouble(
column, target));
} else if (columnName.startsWith(
SmallMoleculeSummary.Properties.abundanceStudyVariable.
getPropertyName())) {
smallMoleculeSummary.addAbundanceStudyVariableItem(
checkDouble(column, target));
} else if (columnName.startsWith(
SmallMoleculeSummary.Properties.abundanceVariationStudyVariable.
getPropertyName())) {
smallMoleculeSummary.
addAbundanceVariationStudyVariableItem(checkDouble(
column, target));
}
} else if (column instanceof OptionColumn) {
if (columnName.startsWith(MZTabConstants.OPT_PREFIX)) {
Class dataType = column.getDataType();
OptColumnMapping optColMapping = new OptColumnMapping();
optColMapping.identifier(columnName.substring(
MZTabConstants.OPT_PREFIX.length()));
if (dataType.equals(String.class)) {
optColMapping.value(checkString(column, target));
} else if (dataType.equals(Double.class)) {
optColMapping.value(Double.toString(checkDouble(
column, target)));
} else if (dataType.equals(MZBoolean.class)) {
optColMapping.value(Boolean.toString(checkMZBoolean(
column, target).
toBoolean()));
}
smallMoleculeSummary.addOptItem(optColMapping);
}
}
}
}
checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
getDatabaseIdentifier(),
SmallMoleculeSummary.Properties.databaseIdentifier,
smallMoleculeSummary.getChemicalFormula(),
SmallMoleculeSummary.Properties.chemicalFormula);
checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
getDatabaseIdentifier(),
SmallMoleculeSummary.Properties.databaseIdentifier,
smallMoleculeSummary.getSmiles(),
SmallMoleculeSummary.Properties.smiles);
checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
getDatabaseIdentifier(),
SmallMoleculeSummary.Properties.databaseIdentifier,
smallMoleculeSummary.getInchi(),
SmallMoleculeSummary.Properties.inchi);
checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
getDatabaseIdentifier(),
SmallMoleculeSummary.Properties.databaseIdentifier,
smallMoleculeSummary.getChemicalName(),
SmallMoleculeSummary.Properties.chemicalName);
checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
getDatabaseIdentifier(),
SmallMoleculeSummary.Properties.databaseIdentifier,
smallMoleculeSummary.getUri(),
SmallMoleculeSummary.Properties.uri);
checkItemNumbers(errorList, lineNumber, smallMoleculeSummary.
getDatabaseIdentifier(),
SmallMoleculeSummary.Properties.databaseIdentifier,
smallMoleculeSummary.getTheoreticalNeutralMass(),
SmallMoleculeSummary.Properties.theoreticalNeutralMass);
return physicalPosition;
}
protected void checkRegexMatches(MZTabErrorList errorList, int lineNumber,
SmallMoleculeSummary.Properties elementProperty,
String regularExpression, List<String> elements) {
if (!elements.isEmpty()) {
Pattern p = Pattern.compile(regularExpression);
for (int i = 0; i < elements.size(); i++) {
String element = elements.get(i);
if(!"null".equals(element)) {
Matcher m = p.matcher(element);
if (!m.matches()) {
errorList.add(new MZTabError(FormatErrorType.RegexMismatch,
lineNumber, elementProperty.getPropertyName(), element,
"" + (i + 1), regularExpression));
}
}
}
}
}
protected void checkItemNumbers(MZTabErrorList errorList, int lineNumber,
List<?> reference, SmallMoleculeSummary.Properties referenceProperty,
List<?> toCheck, SmallMoleculeSummary.Properties toCheckProperty) throws MZTabErrorOverflowException {
//check that array types have same element number
if (!toCheck.isEmpty() && reference.size() != toCheck.size()) {
errorList.add(new MZTabError(LogicalErrorType.ItemNumberMismatch,
lineNumber, toCheckProperty.getPropertyName(), "" + toCheck.
size(), referenceProperty.getPropertyName(), "" + reference.
size()));
}
}
/**
* <p>
* getRecord.</p>
*
* @return a {@link de.isas.mztab2.model.SmallMoleculeSummary} object.
*/
@Override
public SmallMoleculeSummary getRecord() {
if (smallMoleculeSummary == null) {
smallMoleculeSummary = new SmallMoleculeSummary();
}
return smallMoleculeSummary;
}
}