MzTabWriterDefaults.java
/*
* Copyright 2018 Leibniz-Institut für Analytische Wissenschaften – ISAS – e.V..
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.isas.mztab2.io;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonGenerator.Feature;
import com.fasterxml.jackson.dataformat.csv.CsvFactory;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import de.isas.mztab2.io.formats.AssayFormat;
import de.isas.mztab2.io.formats.ContactFormat;
import de.isas.mztab2.io.formats.CvFormat;
import de.isas.mztab2.io.formats.DatabaseFormat;
import de.isas.mztab2.io.formats.InstrumentFormat;
import de.isas.mztab2.io.formats.MetadataFormat;
import de.isas.mztab2.io.formats.MsRunFormat;
import de.isas.mztab2.io.formats.ParameterFormat;
import de.isas.mztab2.io.formats.PublicationFormat;
import de.isas.mztab2.io.formats.SampleFormat;
import de.isas.mztab2.io.formats.SampleProcessingFormat;
import de.isas.mztab2.io.formats.SmallMoleculeEvidenceFormat;
import de.isas.mztab2.io.formats.SmallMoleculeFeatureFormat;
import de.isas.mztab2.io.formats.SmallMoleculeSummaryFormat;
import de.isas.mztab2.io.formats.SoftwareFormat;
import de.isas.mztab2.io.formats.StudyVariableFormat;
import de.isas.mztab2.io.formats.UriFormat;
import de.isas.mztab2.io.serialization.Serializers;
import de.isas.mztab2.model.Assay;
import de.isas.mztab2.model.CV;
import de.isas.mztab2.model.Contact;
import de.isas.mztab2.model.Database;
import de.isas.mztab2.model.Instrument;
import de.isas.mztab2.model.Metadata;
import de.isas.mztab2.model.MsRun;
import de.isas.mztab2.model.MzTab;
import de.isas.mztab2.model.OptColumnMapping;
import de.isas.mztab2.model.Parameter;
import de.isas.mztab2.model.Publication;
import de.isas.mztab2.model.Sample;
import de.isas.mztab2.model.SampleProcessing;
import de.isas.mztab2.model.SmallMoleculeEvidence;
import de.isas.mztab2.model.SmallMoleculeFeature;
import de.isas.mztab2.model.SmallMoleculeSummary;
import de.isas.mztab2.model.Software;
import de.isas.mztab2.model.StudyVariable;
import de.isas.mztab2.model.Uri;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import uk.ac.ebi.pride.jmztab2.model.MZTabConstants;
import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeColumn;
import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeEvidenceColumn;
import uk.ac.ebi.pride.jmztab2.model.SmallMoleculeFeatureColumn;
import uk.ac.ebi.pride.jmztab2.utils.errors.LogicalErrorType;
import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabError;
import uk.ac.ebi.pride.jmztab2.utils.errors.MZTabException;
/**
* Default mapper and schema definitions for writing of mzTab files using the
* Jackson CSV mapper.
*
* @author nilshoffmann
*/
public class MzTabWriterDefaults {
/**
* Create a default csv mapper instance.
*
* @return the csv mapper
*/
public CsvMapper defaultMapper() {
CsvFactory factory = new CsvFactory();
factory.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
CsvMapper mapper = new CsvMapper(factory);
mapper.configure(Feature.IGNORE_UNKNOWN, true);
return mapper;
}
/**
* Create a metadata section csv mapper. This registers mixins for
* serialization of all objects that are part of the metadata section.
*
* @return the metadata section csv mapper.
*/
public CsvMapper metadataMapper() {
CsvMapper mapper = defaultMapper();
mapper.addMixIn(Metadata.class, MetadataFormat.class);
mapper.addMixIn(Assay.class, AssayFormat.class);
mapper.addMixIn(Contact.class, ContactFormat.class);
mapper.addMixIn(Publication.class, PublicationFormat.class);
mapper.addMixIn(Instrument.class, InstrumentFormat.class);
mapper.addMixIn(Sample.class, SampleFormat.class);
mapper.addMixIn(SampleProcessing.class, SampleProcessingFormat.class);
mapper.addMixIn(Software.class, SoftwareFormat.class);
mapper.addMixIn(StudyVariable.class, StudyVariableFormat.class);
mapper.addMixIn(MsRun.class, MsRunFormat.class);
mapper.addMixIn(Database.class, DatabaseFormat.class);
mapper.addMixIn(Parameter.class, ParameterFormat.class);
mapper.addMixIn(CV.class, CvFormat.class);
mapper.addMixIn(Uri.class, UriFormat.class);
return mapper;
}
/**
* Creates the csv schema for the metadata section (column names, value
* separators, array element separators, etc.).
*
* @param mapper the configured csv mapper
* @return the metadata csv schema
*/
public CsvSchema metaDataSchema(CsvMapper mapper) {
CsvSchema.Builder builder = mapper.schema().
builder();
return builder.addColumn("PREFIX",
CsvSchema.ColumnType.STRING).
addColumn("KEY",
CsvSchema.ColumnType.STRING).
addArrayColumn("VALUES", MZTabConstants.BAR_S).
build().
withAllowComments(true).
withArrayElementSeparator(MZTabConstants.BAR_S).
withNullValue(MZTabConstants.NULL).
withUseHeader(false).
withoutQuoteChar().
withoutEscapeChar().
withLineSeparator(MZTabConstants.NEW_LINE).
withColumnSeparator(MZTabConstants.TAB);
}
/**
* Create a small molecule summary section csv mapper. This registers mixins
* for serialization of all objects that are part of the small molecule
* summary section.
*
* @return the small molecule summary section csv mapper.
*/
public CsvMapper smallMoleculeSummaryMapper() {
CsvMapper mapper = metadataMapper();
mapper.addMixIn(SmallMoleculeSummary.class,
SmallMoleculeSummaryFormat.class);
return mapper;
}
/**
* Create a small molecule feature section csv mapper. This registers mixins
* for serialization of all objects that are part of the small molecule
* feature section.
*
* @return the small molecule feature section csv mapper.
*/
public CsvMapper smallMoleculeFeatureMapper() {
CsvMapper mapper = metadataMapper();
mapper.addMixIn(SmallMoleculeFeature.class,
SmallMoleculeFeatureFormat.class);
return mapper;
}
/**
* Create a small molecule evidence section csv mapper. This registers
* mixins for serialization of all objects that are part of the small
* molecule evidence section.
*
* @return the small molecule evidence section csv mapper.
*/
public CsvMapper smallMoleculeEvidenceMapper() {
CsvMapper mapper = metadataMapper();
mapper.addMixIn(SmallMoleculeEvidence.class,
SmallMoleculeEvidenceFormat.class);
return mapper;
}
/**
* Apply the default csv schema to the provided builder.
*
* @param builder the builder to use for schema configuration
* @return the configured csv schema
*/
public CsvSchema defaultSchemaForBuilder(CsvSchema.Builder builder) {
return builder.
build().
withAllowComments(true).
withArrayElementSeparator(MZTabConstants.BAR_S).
withNullValue(MZTabConstants.NULL).
withUseHeader(true).
withoutQuoteChar().
withoutEscapeChar().
withLineSeparator(MZTabConstants.NEW_LINE).
withColumnSeparator(MZTabConstants.TAB);
}
/**
* Creates the csv schema (column names and types) for the small molecule summary section.
*
* @param mapper the csv mapper
* @param mzTabFile the mztab object
* @return the configured csv schema for the small molecule summary section
* @throws MZTabException
*/
public CsvSchema smallMoleculeSummarySchema(CsvMapper mapper,
MzTab mzTabFile) throws MZTabException {
CsvSchema.Builder builder = mapper.schema().
builder();
builder.addColumn(SmallMoleculeSummary.HeaderPrefixEnum.SMH.getValue(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.SML_ID).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.SMF_ID_REFS).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.DATABASE_IDENTIFIER).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.CHEMICAL_FORMULA).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.SMILES).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.INCHI).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.CHEMICAL_NAME).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.URI).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.THEOR_NEUTRAL_MASS).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.ADDUCT_IONS).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.RELIABILITY).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.BEST_ID_CONFIDENCE_MEASURE).
getHeader(), CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeColumn.Stable.columnFor(
SmallMoleculeColumn.Stable.BEST_ID_CONFIDENCE_VALUE).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING);
Metadata metadata = Optional.ofNullable(mzTabFile.getMetadata()).orElseThrow(() -> new MZTabException(new MZTabError(
LogicalErrorType.NoMetadataSection, -1)));
List<SmallMoleculeSummary> smsList = Optional.ofNullable(mzTabFile.getSmallMoleculeSummary()).orElseThrow(() -> new MZTabException(new MZTabError(
LogicalErrorType.NoSmallMoleculeSummarySection, -1)));
metadata.
getAssay().
forEach((assay)
-> {
builder.addColumn(
SmallMoleculeSummary.Properties.abundanceAssay + "[" + assay.
getId() + "]",
CsvSchema.ColumnType.NUMBER_OR_STRING);
});
metadata.
getStudyVariable().
forEach((studyVariable)
-> {
builder.addColumn(
SmallMoleculeSummary.Properties.abundanceStudyVariable + "[" + studyVariable.
getId() + "]", CsvSchema.ColumnType.NUMBER_OR_STRING);
});
metadata.
getStudyVariable().
forEach((studyVariable)
-> {
builder.addColumn(
SmallMoleculeSummary.Properties.abundanceVariationStudyVariable + "[" + studyVariable.
getId() + "]",
CsvSchema.ColumnType.NUMBER_OR_STRING);
});
Map<String, OptColumnMapping> optColumns = new LinkedHashMap<>();
smsList.
forEach((SmallMoleculeSummary sms)
-> {
Optional.ofNullable(sms.getOpt()).
orElse(Collections.emptyList()).
forEach((ocm)
-> {
optColumns.putIfAbsent(Serializers.
printOptColumnMapping(ocm),
ocm);
});
});
optColumns.keySet().
forEach((key)
-> {
builder.addColumn(key, CsvSchema.ColumnType.NUMBER_OR_STRING);
});
return defaultSchemaForBuilder(builder);
}
/**
* Creates the csv schema (column names and types) for the small molecule feature section.
*
* @param mapper the csv mapper
* @param mzTabFile the mztab object
* @return the configured csv schema for the small molecule feature section
* @throws MZTabException
*/
public CsvSchema smallMoleculeFeatureSchema(CsvMapper mapper,
MzTab mzTabFile) throws MZTabException {
CsvSchema.Builder builder = mapper.schema().
builder();
builder.addColumn(SmallMoleculeFeature.HeaderPrefixEnum.SFH.getValue(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.SMF_ID).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.SME_ID_REFS).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(
SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.SME_ID_REF_AMBIGUITY_CODE).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.ADDUCT_ION).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.ISOTOPOMER).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.EXP_MASS_TO_CHARGE).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.CHARGE).
getHeader(),
CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(
SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.RETENTION_TIME_IN_SECONDS).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(
SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.RETENTION_TIME_IN_SECONDS_START).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(
SmallMoleculeFeatureColumn.Stable.columnFor(
SmallMoleculeFeatureColumn.Stable.RETENTION_TIME_IN_SECONDS_END).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING);
Metadata metadata = Optional.ofNullable(mzTabFile.getMetadata()).orElseThrow(
() -> new MZTabException(new MZTabError(
LogicalErrorType.NoMetadataSection, -1)));
Optional.ofNullable(metadata.
getAssay()).
ifPresent((assayList)
-> assayList.forEach((assay)
-> {
builder.addColumn(
SmallMoleculeFeature.Properties.abundanceAssay + "[" + assay.
getId() + "]",
CsvSchema.ColumnType.NUMBER_OR_STRING);
})
);
Map<String, OptColumnMapping> optColumns = new LinkedHashMap<>();
mzTabFile.getSmallMoleculeFeature().
forEach((SmallMoleculeFeature smf)
-> {
Optional.ofNullable(smf.getOpt()).
orElse(Collections.emptyList()).
forEach((ocm)
-> {
optColumns.putIfAbsent(Serializers.
printOptColumnMapping(ocm),
ocm);
});
});
optColumns.keySet().
forEach((key)
-> {
builder.addColumn(key, CsvSchema.ColumnType.NUMBER_OR_STRING);
});
return defaultSchemaForBuilder(builder);
}
/**
* Creates the csv schema (column names and types) for the small molecule feature section.
*
* @param mapper the csv mapper
* @param mzTabFile the mztab object
* @return the configured csv schema for the small molecule feature section
* @throws MZTabException
*/
public CsvSchema smallMoleculeEvidenceSchema(CsvMapper mapper,
MzTab mzTabFile) throws MZTabException {
CsvSchema.Builder builder = mapper.schema().
builder();
builder.addColumn(SmallMoleculeEvidence.HeaderPrefixEnum.SEH.getValue(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.SME_ID).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.EVIDENCE_INPUT_ID).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.DATABASE_IDENTIFIER).
getHeader(), CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.CHEMICAL_FORMULA).
getHeader(), CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.SMILES).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.INCHI).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.CHEMICAL_NAME).
getHeader(), CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.URI).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.DERIVATIZED_FORM).
getHeader(), CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.ADDUCT_ION).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.EXP_MASS_TO_CHARGE).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.CHARGE).
getHeader(),
CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(
SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.THEORETICAL_MASS_TO_CHARGE).
getHeader(), CsvSchema.ColumnType.NUMBER_OR_STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.SPECTRA_REF).
getHeader(),
CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.IDENTIFICATION_METHOD).
getHeader(), CsvSchema.ColumnType.STRING).
addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.MS_LEVEL).
getHeader(),
CsvSchema.ColumnType.STRING);
Metadata metadata = Optional.ofNullable(mzTabFile.getMetadata()).orElseThrow(() ->
new MZTabException(new MZTabError(
LogicalErrorType.NoMetadataSection, -1)));
Optional.ofNullable(metadata.
getIdConfidenceMeasure()).
ifPresent((parameterList)
-> {
parameterList.forEach((param)
-> {
builder.
addColumn(
SmallMoleculeEvidence.Properties.idConfidenceMeasure + "[" + param.
getId() + "]",
CsvSchema.ColumnType.NUMBER_OR_STRING);
});
});
builder.addColumn(SmallMoleculeEvidenceColumn.Stable.columnFor(
SmallMoleculeEvidenceColumn.Stable.RANK).
getHeader(),
CsvSchema.ColumnType.NUMBER_OR_STRING);
Map<String, OptColumnMapping> optColumns = new LinkedHashMap<>();
mzTabFile.getSmallMoleculeEvidence().
forEach((SmallMoleculeEvidence sme)
-> {
Optional.ofNullable(sme.getOpt()).
orElse(Collections.emptyList()).
forEach((ocm)
-> {
optColumns.putIfAbsent(Serializers.
printOptColumnMapping(ocm),
ocm);
});
});
optColumns.keySet().
forEach((key)
-> {
builder.addColumn(key, CsvSchema.ColumnType.NUMBER_OR_STRING);
});
return defaultSchemaForBuilder(builder);
}
}