OpenMS
Loading...
Searching...
No Matches
FeatureLinkerBase.cpp
Go to the documentation of this file.
1// Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Clemens Groepl, Steffen Sass $
7// --------------------------------------------------------------------------
8
9#include <OpenMS/FORMAT/ConsensusXMLFile.h>
10#include <OpenMS/FORMAT/FeatureXMLFile.h>
11#include <OpenMS/FORMAT/FileHandler.h>
12#include <OpenMS/FORMAT/FileTypes.h>
13#include <OpenMS/ANALYSIS/MAPMATCHING/FeatureGroupingAlgorithm.h>
14#include <OpenMS/DATASTRUCTURES/ListUtils.h>
15#include <OpenMS/CONCEPT/ProgressLogger.h>
16#include <OpenMS/METADATA/ExperimentalDesign.h>
17#include <OpenMS/FORMAT/ExperimentalDesignFile.h>
18
19#include <OpenMS/KERNEL/ConversionHelper.h>
20
21#include <OpenMS/APPLICATIONS/TOPPBase.h>
22
23#include <iomanip> // setw
24
25using namespace OpenMS;
26using namespace std;
27
28//-------------------------------------------------------------
29//Doxygen docu
30//-------------------------------------------------------------
31
39// We do not want this class to show up in the docu:
41
42class TOPPFeatureLinkerBase :
43 public TOPPBase,
44 public ProgressLogger
45{
46
47public:
48 TOPPFeatureLinkerBase(String name, String description, bool official = true) :
49 TOPPBase(name, description, official)
50 {
51 }
52
53protected:
54 void registerOptionsAndFlags_() override // only for "unlabeled" algorithms!
55 {
56 registerInputFileList_("in", "<files>", ListUtils::create<String>(""), "input files separated by blanks", true);
57 setValidFormats_("in", ListUtils::create<String>("featureXML,consensusXML"));
58 registerOutputFile_("out", "<file>", "", "Output file", true);
59 setValidFormats_("out", ListUtils::create<String>("consensusXML"));
60 registerInputFile_("design", "<file>", "", "input file containing the experimental design", false);
61 setValidFormats_("design", ListUtils::create<String>("tsv"));
62 addEmptyLine_();
63 registerFlag_("keep_subelements", "For consensusXML input only: If set, the sub-features of the inputs are transferred to the output.");
64 }
65
66 ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
67 bool labeled = false)
68 {
69 //-------------------------------------------------------------
70 // parameter handling
71 //-------------------------------------------------------------
72 StringList ins;
73 if (labeled)
74 {
75 ins.push_back(getStringOption_("in"));
76 }
77 else
78 {
79 ins = getStringList_("in");
80 }
81 String out = getStringOption_("out");
82
83 //-------------------------------------------------------------
84 // check for valid input
85 //-------------------------------------------------------------
86 // check if all input files have the correct type
87 FileTypes::Type file_type = FileHandler::getType(ins[0]);
88 for (Size i = 0; i < ins.size(); ++i)
89 {
90 if (FileHandler::getType(ins[i]) != file_type)
91 {
92 writeLogError_("Error: All input files must be of the same type!");
93 return ILLEGAL_PARAMETERS;
94 }
95 }
96
97 //-------------------------------------------------------------
98 // set up algorithm
99 //-------------------------------------------------------------
100 Param algorithm_param = getParam_().copy("algorithm:", true);
101 writeDebug_("Used algorithm parameters", algorithm_param, 3);
102 algorithm->setParameters(algorithm_param);
103
104 //-------------------------------------------------------------
105 // perform grouping
106 //-------------------------------------------------------------
107 // load input
108 ConsensusMap out_map;
109 StringList ms_run_locations;
110
111 String design_file;
112
113 // TODO: support design in labeled feature linker
114 if (!labeled)
115 {
116 design_file = getStringOption_("design");
117 }
118
119 if (file_type == FileTypes::CONSENSUSXML && !design_file.empty())
120 {
121 writeLogError_("Error: Using fractionated design with consensusXML als input is not supported!");
122 return ILLEGAL_PARAMETERS;
123 }
124
125 if (file_type == FileTypes::FEATUREXML)
126 {
127 OPENMS_LOG_INFO << "Linking " << ins.size() << " featureXMLs." << endl;
128
129 //-------------------------------------------------------------
130 // Extract (optional) fraction identifiers and associate with featureXMLs
131 //-------------------------------------------------------------
132
133 // determine map of fractions to MS files
134 map<unsigned, vector<String>> frac2files;
135
136 if (!design_file.empty())
137 {
138 // parse design file and determine fractions
139 ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);
140
141 // determine if design defines more than one fraction
142 frac2files = ed.getFractionToMSFilesMapping();
143
144 writeDebug_(String("Grouping ") + String(ed.getNumberOfFractions()) + " fractions.", 3);
145
146 // check if all fractions have the same number of MS runs associated
147 if (!ed.sameNrOfMSFilesPerFraction())
148 {
149 writeLogError_("Error: Number of runs must match for every fraction!");
150 return ILLEGAL_PARAMETERS;
151 }
152 }
153 else // no design file given
154 {
155 for (Size i = 0; i != ins.size(); ++i)
156 {
157 frac2files[1].emplace_back(String("file") + String(i)); // associate each run with fraction 1
158 }
159 }
160
161 vector<FeatureMap > maps(ins.size());
162 FeatureXMLFile f;
163 FeatureFileOptions param = f.getOptions();
164
165 // to save memory don't load convex hulls and subordinates
166 param.setLoadSubordinates(false);
167 param.setLoadConvexHull(false);
168 f.setOptions(param);
169
170 Size progress = 0;
171 setLogType(ProgressLogger::CMD);
172 startProgress(0, ins.size(), "reading input");
173 for (Size i = 0; i < ins.size(); ++i)
174 {
175 FeatureMap tmp;
176 f.load(ins[i], tmp);
177
178 StringList ms_runs;
179 tmp.getPrimaryMSRunPath(ms_runs);
180
181 // associate mzML file with map i in consensusXML
182 if (ms_runs.size() > 1 || ms_runs.empty())
183 {
184 OPENMS_LOG_WARN << "Exactly one MS run should be associated with a FeatureMap. "
185 << ms_runs.size()
186 << " provided." << endl;
187 }
188 else
189 {
190 out_map.getColumnHeaders()[i].filename = ms_runs.front();
191 }
192 out_map.getColumnHeaders()[i].size = tmp.size();
193 out_map.getColumnHeaders()[i].unique_id = tmp.getUniqueId();
194
195 // copy over information on the primary MS run
196 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
197
198 // to save memory, remove convex hulls, subordinates:
199 for (Feature& ft : tmp)
200 {
201 String adduct;
202 String group;
203 //exception: addduct information
204 if (ft.metaValueExists(Constants::UserParam::DC_CHARGE_ADDUCTS))
205 {
206 adduct = ft.getMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS);
207 }
208 if (ft.metaValueExists(Constants::UserParam::ADDUCT_GROUP))
209 {
210 group = ft.getMetaValue(Constants::UserParam::ADDUCT_GROUP);
211 }
212 ft.getSubordinates().clear();
213 ft.getConvexHulls().clear();
214 ft.clearMetaInfo();
215 if (!adduct.empty())
216 {
217 ft.setMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS, adduct);
218 }
219 if (!group.empty())
220 {
221 ft.setMetaValue("Group", group);
222 }
223
224 }
225
226 maps[i] = tmp;
227 maps[i].updateRanges();
228
229 setProgress(progress++);
230 }
231 endProgress();
232
233 // exception for "labeled" algorithms: copy file descriptions
234 if (labeled)
235 {
236 out_map.getColumnHeaders()[1] = out_map.getColumnHeaders()[0];
237 out_map.getColumnHeaders()[0].label = "light";
238 out_map.getColumnHeaders()[1].label = "heavy";
239 ms_run_locations.push_back(ms_run_locations[0]);
240 }
241
243 // invoke feature grouping algorithm
244
245 if (frac2files.size() == 1) // group one fraction
246 {
247 algorithm->group(maps, out_map);
248 }
249 else // group multiple fractions
250 {
251 writeDebug_(String("Stored in ") + String(maps.size()) + " maps.", 3);
252 for (Size i = 1; i <= frac2files.size(); ++i)
253 {
254 vector<FeatureMap> fraction_maps;
255 // TODO FRACTIONS: here we assume that the order of featureXML is from fraction 1..n
256 // we should check if these are shuffled and error / warn
257 for (size_t feature_map_index = 0; feature_map_index != frac2files[i].size(); ++feature_map_index)
258 {
259 fraction_maps.push_back(maps[feature_map_index]);
260 }
261 algorithm->group(fraction_maps, out_map);
262 }
263 }
264 }
265 else
266 {
267 //TODO isn't it better to have this option/functionality in the FeatureGroupingAlgorithm class?
268 // Otherwise everyone has to remember e.g. to annotate the old map_index etc.
269 bool keep_subelements = getFlag_("keep_subelements");
270 vector<ConsensusMap> maps(ins.size());
271 ConsensusXMLFile f;
272 for (Size i = 0; i < ins.size(); ++i)
273 {
274 f.load(ins[i], maps[i]);
275 maps[i].updateRanges();
276 // copy over information on the primary MS run
277 StringList ms_runs;
278 maps[i].getPrimaryMSRunPath(ms_runs);
279 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
280 if (keep_subelements)
281 {
282 auto saveOldMapIndex =
283 [](PeptideIdentification &p)
284 {
285 if (p.metaValueExists("map_index"))
286 {
287 p.setMetaValue("old_map_index", p.getMetaValue("map_index"));
288 }
289 else
290 {
291 OPENMS_LOG_WARN << "Warning: map_index not found in PeptideID. The tool will not be able to assign a"
292 "consistent one. Check the settings of previous tools." << std::endl;
293 }
294 };
295 maps[i].applyFunctionOnPeptideIDs(saveOldMapIndex, true);
296 }
297 }
298 // group
299 algorithm->group(maps, out_map);
300
301 // set file descriptions:
302
303 if (!keep_subelements)
304 {
305 for (Size i = 0; i < ins.size(); ++i)
306 {
307 out_map.getColumnHeaders()[i].filename = ins[i];
308 out_map.getColumnHeaders()[i].size = maps[i].size();
309 out_map.getColumnHeaders()[i].unique_id = maps[i].getUniqueId();
310 }
311 }
312 else
313 {
314 // components of the output map are not the input maps themselves, but
315 // the components of the input maps:
316 algorithm->transferSubelements(maps, out_map);
317 }
318 }
319
320 // assign unique ids
321 out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);
322
323 // annotate output with data processing info
324 addDataProcessing_(out_map,
325 getProcessingInfo_(DataProcessing::FEATURE_GROUPING));
326
327
328 // sort list of peptide identifications in each consensus feature by map index
329 out_map.sortPeptideIdentificationsByMapIndex();
330
331 // write output
332 ConsensusXMLFile().store(out, out_map);
333
334 // some statistics
335 map<Size, UInt> num_consfeat_of_size;
336 for (const ConsensusFeature& cf : out_map)
337 {
338 ++num_consfeat_of_size[cf.size()];
339 }
340
341 OPENMS_LOG_INFO << "Number of consensus features:" << endl;
342 for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
343 i != num_consfeat_of_size.rend(); ++i)
344 {
345 OPENMS_LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6)
346 << i->second << endl;
347 }
348 OPENMS_LOG_INFO << " total: " << setw(6) << out_map.size() << endl;
349
350 return EXECUTION_OK;
351 }
352
353};
354
Definition FLASHDeconvWizardBase.cpp:26