From 8d2172c834e3534ce7a1582b171c2f7a248e10b5 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Fri, 8 Mar 2024 21:23:34 +0100 Subject: [PATCH 01/14] All data in data/ Both simulation.py and plot_com.py look for data in data/ Transcript data in /data - Closes Tango/tango_models#2 Remove copy of data from pipeline/ --- {pipeline => data}/TANGO_MTA_WF_310320.xlsx | Bin .../DataNormLL_RPKM_meaned.tsv | 0 .../DataNormLP_RPKM_meaned.tsv | 0 .../DataNormPF_RPKM_meaned.tsv | 0 .../exchanged_metabolites_smetana.xlsx | Bin .../freudenreichii_lactose.csv | 0 .../data_transcript/lactis_lactose.csv | 0 .../data_transcript/plantarum_lactose.csv | 0 pipeline/com_DM.tsv | 8 -- pipeline/com_time.tsv | 8 -- pipeline/community_data_tango.tsv | 21 ----- pipeline/data_GrowthExp.csv | 89 ------------------ pipeline/data_metabolites_freud.tsv | 9 -- scripts/plot_com.py | 2 +- src/tango_models/simulation.py | 2 +- tango_models.cwl | 6 +- 16 files changed, 5 insertions(+), 140 deletions(-) rename {pipeline => data}/TANGO_MTA_WF_310320.xlsx (100%) rename {pipeline => data}/data_transcript/DataNormLL_RPKM_meaned.tsv (100%) rename {pipeline => data}/data_transcript/DataNormLP_RPKM_meaned.tsv (100%) rename {pipeline => data}/data_transcript/DataNormPF_RPKM_meaned.tsv (100%) rename {pipeline => data}/data_transcript/exchanged_metabolites_smetana.xlsx (100%) rename {pipeline => data}/data_transcript/freudenreichii_lactose.csv (100%) rename {pipeline => data}/data_transcript/lactis_lactose.csv (100%) rename {pipeline => data}/data_transcript/plantarum_lactose.csv (100%) delete mode 100644 pipeline/com_DM.tsv delete mode 100644 pipeline/com_time.tsv delete mode 100644 pipeline/community_data_tango.tsv delete mode 100644 pipeline/data_GrowthExp.csv delete mode 100644 pipeline/data_metabolites_freud.tsv diff --git a/pipeline/TANGO_MTA_WF_310320.xlsx b/data/TANGO_MTA_WF_310320.xlsx similarity index 100% rename from pipeline/TANGO_MTA_WF_310320.xlsx rename to data/TANGO_MTA_WF_310320.xlsx diff --git a/pipeline/data_transcript/DataNormLL_RPKM_meaned.tsv b/data/data_transcript/DataNormLL_RPKM_meaned.tsv similarity index 100% rename from pipeline/data_transcript/DataNormLL_RPKM_meaned.tsv rename to data/data_transcript/DataNormLL_RPKM_meaned.tsv diff --git a/pipeline/data_transcript/DataNormLP_RPKM_meaned.tsv b/data/data_transcript/DataNormLP_RPKM_meaned.tsv similarity index 100% rename from pipeline/data_transcript/DataNormLP_RPKM_meaned.tsv rename to data/data_transcript/DataNormLP_RPKM_meaned.tsv diff --git a/pipeline/data_transcript/DataNormPF_RPKM_meaned.tsv b/data/data_transcript/DataNormPF_RPKM_meaned.tsv similarity index 100% rename from pipeline/data_transcript/DataNormPF_RPKM_meaned.tsv rename to data/data_transcript/DataNormPF_RPKM_meaned.tsv diff --git a/pipeline/data_transcript/exchanged_metabolites_smetana.xlsx b/data/data_transcript/exchanged_metabolites_smetana.xlsx similarity index 100% rename from pipeline/data_transcript/exchanged_metabolites_smetana.xlsx rename to data/data_transcript/exchanged_metabolites_smetana.xlsx diff --git a/pipeline/data_transcript/freudenreichii_lactose.csv b/data/data_transcript/freudenreichii_lactose.csv similarity index 100% rename from pipeline/data_transcript/freudenreichii_lactose.csv rename to data/data_transcript/freudenreichii_lactose.csv diff --git a/pipeline/data_transcript/lactis_lactose.csv b/data/data_transcript/lactis_lactose.csv similarity index 100% rename from pipeline/data_transcript/lactis_lactose.csv rename to data/data_transcript/lactis_lactose.csv diff --git a/pipeline/data_transcript/plantarum_lactose.csv b/data/data_transcript/plantarum_lactose.csv similarity index 100% rename from pipeline/data_transcript/plantarum_lactose.csv rename to data/data_transcript/plantarum_lactose.csv diff --git a/pipeline/com_DM.tsv b/pipeline/com_DM.tsv deleted file mode 100644 index db81330..0000000 --- a/pipeline/com_DM.tsv +++ /dev/null @@ -1,8 +0,0 @@ -State DM -Linoc 11.65 -Finoc 11.65 -Moul 40.53 -Demoul 51.44 -AS 52.28 -Aff4s 53.41 -Aff7s 53.55 diff --git a/pipeline/com_time.tsv b/pipeline/com_time.tsv deleted file mode 100644 index 2363b94..0000000 --- a/pipeline/com_time.tsv +++ /dev/null @@ -1,8 +0,0 @@ -State time -Linoc 0 -Finoc 18 -Moul 19.5 -Demoul 40 -AS 60 -Aff4s 732 -Aff7s 1236 diff --git a/pipeline/community_data_tango.tsv b/pipeline/community_data_tango.tsv deleted file mode 100644 index 74b2abe..0000000 --- a/pipeline/community_data_tango.tsv +++ /dev/null @@ -1,21 +0,0 @@ -bacteria density_com step time -lactis 5.7 Linoc 0 -lactis 7.5 Before_Moul 19.5 -lactis 8.78 Moul 40 -lactis 9.12 Demoul 60 -lactis 9.06 before_ripening 60 -lactis 8.95 Aff4s 672 -lactis 8.45 Aff7s 1176 -plantarum 5.2 Linoc 0 -plantarum 5.4 Before_Moul 19.5 -plantarum 6.49 Moul 40 -plantarum 7.53 Demoul 60 -plantarum 7.92 before_ripening 60 -plantarum 8.47 Aff4s 672 -plantarum 8.47 Aff7s 1176 -freudenreichii 6.1 Before_Moul 19.5 -freudenreichii 7.16 Moul 40 -freudenreichii 8.04 Demoul 60 -freudenreichii 8.11 before_ripening 60 -freudenreichii 8.56 Aff4s 672 -freudenreichii 8.59 Aff7s 1176 \ No newline at end of file diff --git a/pipeline/data_GrowthExp.csv b/pipeline/data_GrowthExp.csv deleted file mode 100644 index 6ef60b0..0000000 --- a/pipeline/data_GrowthExp.csv +++ /dev/null @@ -1,89 +0,0 @@ -species,experiment,replicate,time,type,unit,value -lactis,GrowthExp,1,0,pH,[-],6.7 -lactis,GrowthExp,1,3,pH,[-],6.63 -lactis,GrowthExp,1,4,pH,[-],6.55 -lactis,GrowthExp,1,5,pH,[-],6.49 -lactis,GrowthExp,1,6,pH,[-],6.45 -lactis,GrowthExp,1,7,pH,[-],6.33 -lactis,GrowthExp,1,8,pH,[-],6.25 -lactis,GrowthExp,1,9,pH,[-],6.07 -lactis,GrowthExp,1,10,pH,[-],6 -lactis,GrowthExp,1,11,pH,[-],5.99 -lactis,GrowthExp,1,12,pH,[-],5.94 -lactis,GrowthExp,1,13,pH,[-],5.92 -lactis,GrowthExp,1,14,pH,[-],5.93 -lactis,GrowthExp,1,15,pH,[-],5.82 -lactis,GrowthExp,1,16,pH,[-],5.88 -lactis,GrowthExp,1,17,pH,[-],5.8 -lactis,GrowthExp,1,79,pH,[-],5.12 -lactis,GrowthExp,2,0,pH,[-],6.7 -lactis,GrowthExp,2,3,pH,[-],6.61 -lactis,GrowthExp,2,4,pH,[-],6.54 -lactis,GrowthExp,2,5,pH,[-],6.49 -lactis,GrowthExp,2,6,pH,[-],6.43 -lactis,GrowthExp,2,7,pH,[-],6.3 -lactis,GrowthExp,2,8,pH,[-],6.23 -lactis,GrowthExp,2,9,pH,[-],6.08 -lactis,GrowthExp,2,10,pH,[-],6 -lactis,GrowthExp,2,11,pH,[-],6.01 -lactis,GrowthExp,2,12,pH,[-],5.93 -lactis,GrowthExp,2,13,pH,[-],5.95 -lactis,GrowthExp,2,14,pH,[-],5.94 -lactis,GrowthExp,2,15,pH,[-],5.85 -lactis,GrowthExp,2,16,pH,[-],5.85 -lactis,GrowthExp,2,17,pH,[-],5.81 -lactis,GrowthExp,2,79,pH,[-],5.11 -lactis,GrowthExp,1,0,numbering,CFU.g^-1,1000000 -lactis,GrowthExp,1,5,numbering,CFU.g^-1,165000000 -lactis,GrowthExp,1,7,numbering,CFU.g^-1,177000000 -lactis,GrowthExp,1,9,numbering,CFU.g^-1,186000000 -lactis,GrowthExp,1,14,numbering,CFU.g^-1,173000000 -lactis,GrowthExp,1,16,numbering,CFU.g^-1,155000000 -lactis,GrowthExp,1,79,numbering,CFU.g^-1,248500000 -lactis,GrowthExp,2,0,numbering,CFU.g^-1,4000000 -lactis,GrowthExp,2,5,numbering,CFU.g^-1,152000000 -lactis,GrowthExp,2,7,numbering,CFU.g^-1,237000000 -lactis,GrowthExp,2,9,numbering,CFU.g^-1,217000000 -lactis,GrowthExp,2,14,numbering,CFU.g^-1,219000000 -lactis,GrowthExp,2,16,numbering,CFU.g^-1,132000000 -lactis,GrowthExp,2,79,numbering,CFU.g^-1,125000000 -plantarum,GrowthExp,1,0,pH,[-],6.7 -plantarum,GrowthExp,1,5,pH,[-],6.68 -plantarum,GrowthExp,1,7,pH,[-],6.67 -plantarum,GrowthExp,1,8,pH,[-],6.64 -plantarum,GrowthExp,1,9,pH,[-],6.57 -plantarum,GrowthExp,1,14,pH,[-],6.55 -plantarum,GrowthExp,1,16,pH,[-],6.52 -plantarum,GrowthExp,1,79,pH,[-],5.7 -plantarum,GrowthExp,2,0,pH,[-],6.7 -plantarum,GrowthExp,2,5,pH,[-],6.68 -plantarum,GrowthExp,2,7,pH,[-],6.65 -plantarum,GrowthExp,2,8,pH,[-],6.65 -plantarum,GrowthExp,2,9,pH,[-],6.54 -plantarum,GrowthExp,2,14,pH,[-],6.53 -plantarum,GrowthExp,2,16,pH,[-],6.5 -plantarum,GrowthExp,2,79,pH,[-],5.71 -plantarum,GrowthExp,1,0,numbering,CFU.g^-1,3000000 -plantarum,GrowthExp,1,5,numbering,CFU.g^-1,9000000 -plantarum,GrowthExp,1,7,numbering,CFU.g^-1,17000000 -plantarum,GrowthExp,1,9,numbering,CFU.g^-1,26000000 -plantarum,GrowthExp,1,14,numbering,CFU.g^-1,44000000 -plantarum,GrowthExp,1,16,numbering,CFU.g^-1,84000000 -plantarum,GrowthExp,1,79,numbering,CFU.g^-1,110000000 -plantarum,GrowthExp,2,0,numbering,CFU.g^-1,6000000 -plantarum,GrowthExp,2,5,numbering,CFU.g^-1,14000000 -plantarum,GrowthExp,2,7,numbering,CFU.g^-1,15000000 -plantarum,GrowthExp,2,9,numbering,CFU.g^-1,25000000 -plantarum,GrowthExp,2,14,numbering,CFU.g^-1,40000000 -plantarum,GrowthExp,2,16,numbering,CFU.g^-1,59000000 -plantarum,GrowthExp,2,79,numbering,CFU.g^-1,117000000 -freudenreichii,GrowthExp,1,0,numbering,CFU.g^-1,4400000 -freudenreichii,GrowthExp,1,23,numbering,CFU.g^-1,230000000 -freudenreichii,GrowthExp,1,40,numbering,CFU.g^-1,1700000000 -freudenreichii,GrowthExp,1,48,numbering,CFU.g^-1,1200000000 -freudenreichii,GrowthExp,1,122,numbering,CFU.g^-1,2300000000 -freudenreichii,GrowthExp,2,0,numbering,CFU.g^-1,3000000 -freudenreichii,GrowthExp,2,23,numbering,CFU.g^-1,370000000 -freudenreichii,GrowthExp,2,40,numbering,CFU.g^-1,1700000000 -freudenreichii,GrowthExp,2,48,numbering,CFU.g^-1,1300000000 -freudenreichii,GrowthExp,2,122,numbering,CFU.g^-1,2000000000 diff --git a/pipeline/data_metabolites_freud.tsv b/pipeline/data_metabolites_freud.tsv deleted file mode 100644 index b83d438..0000000 --- a/pipeline/data_metabolites_freud.tsv +++ /dev/null @@ -1,9 +0,0 @@ -,time,mean,std,metabolite -0,0.0,16.5,0.08,EX_lac__L_e -1,89.0,7.88,0.08,EX_lac__L_e -2,0.0,0,0.02,EX_ac_e -3,89.0,3.07,0.02,EX_ac_e -4,0.0,0,0.063,EX_succ_e -5,89.0,0.371,0.063,EX_succ_e -6,0.0,0,0.02,EX_ppa_e -7,89.0,8.31,0.02,EX_ppa_e diff --git a/scripts/plot_com.py b/scripts/plot_com.py index dfc0fe8..40c0afe 100644 --- a/scripts/plot_com.py +++ b/scripts/plot_com.py @@ -23,7 +23,7 @@ def get_data_com(): time = time_o.loc[simulation.index] - experiments = pd.read_excel('pipeline/TANGO_MTA_WF_310320.xlsx').set_index('Stade') + experiments = pd.read_excel('data/TANGO_MTA_WF_310320.xlsx').set_index('Stade') diff --git a/src/tango_models/simulation.py b/src/tango_models/simulation.py index 2a1033c..51c032a 100644 --- a/src/tango_models/simulation.py +++ b/src/tango_models/simulation.py @@ -265,7 +265,7 @@ def sim(sp_reac,media_reac,param_sp,model_list,args_c): print('mod',mod) for p in param['theta_param'][mod]: print(p,param[p]) - std,times,DM = preprocessing_data.get_experimental_data(param['path_time_data_com'],param['path_DM_data_com'],'pipeline/TANGO_MTA_WF_310320.xlsx', interest_value = param['interest_value']) + std,times,DM = preprocessing_data.get_experimental_data(param['path_time_data_com'],param['path_DM_data_com'],'data/TANGO_MTA_WF_310320.xlsx', interest_value = param['interest_value']) res=dFBA.DFBA(param,times=times,DM=DM,com=True,transcrip=False,opt=None,VERBOSE=args_c.verbose,lactic_acid=args_c.lactic_acid_model) print(pd.DataFrame(res)) time_dFBA_end=time.time() diff --git a/tango_models.cwl b/tango_models.cwl index 3a7942a..fb0193f 100644 --- a/tango_models.cwl +++ b/tango_models.cwl @@ -20,9 +20,9 @@ requirements: listing: [] - basename: "pipeline" class: Directory - listing: - - class: File - location: pipeline/TANGO_MTA_WF_310320.xlsx + listing: [] + # - class: File + # location: pipeline/TANGO_MTA_WF_310320.xlsx NetworkAccess: class: NetworkAccess networkAccess: true -- GitLab From 21290ae6ce70dc65ba8d747028f656af2d541f70 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Sat, 9 Mar 2024 12:02:55 +0100 Subject: [PATCH 02/14] tango_plot stage models, optional, needed for transcripts --- tango_plots.cwl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tango_plots.cwl b/tango_plots.cwl index 21f761c..e4807a4 100644 --- a/tango_plots.cwl +++ b/tango_plots.cwl @@ -14,6 +14,8 @@ requirements: - $(inputs.data) - entry: $(inputs.results) entryname: results + - entry: $(inputs.models) + entryname: metabolic_models NetworkAccess: class: NetworkAccess networkAccess: true @@ -35,6 +37,12 @@ inputs: {indiv,flux,com,goodness_of_fit,switch_pathways} default: "indiv" + models: + type: Directory? + doc: |- + Directory containing experimental data, referenced in optimize + configuration + results: type: Directory doc: |- -- GitLab From 2580554eb772248b2c573768dbb54cf4205d18d9 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Sat, 9 Mar 2024 12:05:51 +0100 Subject: [PATCH 03/14] CWL use enum to limit possible options --- tango_models.cwl | 53 +++++++++++++++++++++++++++++++++++++++++------- tango_plots.cwl | 11 +++++++++- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/tango_models.cwl b/tango_models.cwl index fb0193f..566fe20 100644 --- a/tango_models.cwl +++ b/tango_models.cwl @@ -38,18 +38,32 @@ hints: inputs: cobra_solver: - type: string doc: |- solver used for FBA computation by Cobra {glpk,glpk_exact,cplex,scipy} + type: + - "null" + - type: enum + symbols: + - "glpk" + - "glpk_exact" + - "cplex" + - "scipy" + default: "glpk" inputBinding: position: 5 prefix: "-CobraSolver" community_scale: - type: string doc: |- dFBA at the community scale {True,False} + type: + - "null" + - type: enum + symbols: + - "True" + - "False" + default: "False" inputBinding: position: 5 prefix: "-com" @@ -83,22 +97,32 @@ inputs: prefix: "-dp" freud_sim: - type: string? doc: |- different initial conditions for lactate where defined in the experiments for growth and metabolite dosage => this parameter allows to switch between both situations {growth,metabolites} + type: + - "null" + - type: enum + symbols: + - "growth" + - "metabolites" default: "growth" inputBinding: position: 5 prefix: "-fsim" lactic_acid_model: - type: string? doc: |- lactic acid model use total if lactate represents total lactic acid concentration, or dissociated if lactate represents the dissociated lactic acid {total,dissociated} + type: + - "null" + - type: enum + symbols: + - "total" + - "dissociated" default: "total" inputBinding: position: 5 @@ -116,19 +140,29 @@ inputs: prefix: "-mp" optimize: - type: string? doc: |- activatee or not the optimization on parameters {True,False} + type: + - "null" + - type: enum + symbols: + - "True" + - "False" default: "False" inputBinding: position: 5 prefix: "-optim" recovery: - type: string? doc: |- activate or not recovery of the optimization on parameters {True,False} + type: + - "null" + - type: enum + symbols: + - "True" + - "False" default: "False" inputBinding: position: 6 @@ -152,9 +186,14 @@ inputs: prefix: "-sp" verbose: - type: string? doc: |- active or not verbose reporting {True,False} + type: + - "null" + - type: enum + symbols: + - "True" + - "False" default: "False" inputBinding: position: 6 diff --git a/tango_plots.cwl b/tango_plots.cwl index e4807a4..592c4c8 100644 --- a/tango_plots.cwl +++ b/tango_plots.cwl @@ -31,10 +31,19 @@ inputs: location: ./data figure: - type: string doc: |- Which predefined figure to make {indiv,flux,com,goodness_of_fit,switch_pathways} + type: + - "null" + - type: enum + symbols: + - "com" + - "flux" + - "goodness_of_fit" + - "indiv" + - "switch_pathways" + - "transcripts" default: "indiv" models: -- GitLab From 3741a121d483ed4c38a50721ae38aca55dcb0e0b Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Tue, 12 Mar 2024 07:10:58 +0100 Subject: [PATCH 04/14] workflow --- figures_monoculture_inline.cwl | 138 ++++++++++++++++ mock/data | 1 + mock/pipeline | 1 + mock/tango_models.cwl | 280 +++++++++++++++++++++++++++++++++ mock/tango_models_mock.py | 84 ++++++++++ pipeline/combine_files.cwl | 22 +++ pipeline/combine_res_optim.cwl | 21 +++ pipeline/flatten_array.cwl | 29 ++++ tango_models.cwl | 42 +++-- tango_workflow.cwl | 148 +++++++++++++++++ 10 files changed, 752 insertions(+), 14 deletions(-) create mode 100644 figures_monoculture_inline.cwl create mode 120000 mock/data create mode 120000 mock/pipeline create mode 100644 mock/tango_models.cwl create mode 100755 mock/tango_models_mock.py create mode 100644 pipeline/combine_files.cwl create mode 100644 pipeline/combine_res_optim.cwl create mode 100644 pipeline/flatten_array.cwl create mode 100644 tango_workflow.cwl diff --git a/figures_monoculture_inline.cwl b/figures_monoculture_inline.cwl new file mode 100644 index 0000000..38fd4ba --- /dev/null +++ b/figures_monoculture_inline.cwl @@ -0,0 +1,138 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + ScatterFeatureRequirement: {} + InlineJavascriptRequirement: {} + +inputs: + model_list: + type: File[] + default: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + freud_sim_list: + type: string[] + default: + - "metabolites" + - "growth" + - "growth" + - "growth" + +outputs: + scattered_results: + type: Directory + outputSource: flatten/flattened + plots: + type: File[] + outputSource: + - plot_indiv/plots + # standard_output: + # type: File + # outputSource: plot_indiv/standard_output + # standard_error: + # type: File + # outputSource: plot_indiv/standard_error + +steps: + run_indiv: + doc: |- + For each model file in the input model_list, run an individual + simulation. + run: tango_models.cwl + scatter: [model, freud_sim] + scatterMethod: dotproduct + in: + model: + source: model_list + valueFrom: $([self]) # wrap in singleton array + freud_sim: + source: freud_sim_list + cobra_solver: + valueFrom: "glpk" + community_scale: + valueFrom: "False" + culture: + default: + class: File + location: "pipeline/config_file/config_culture.yml" + format: http://edamontology.org/format_3750 + dynamics: + default: + class: File + location: "pipeline/config_file/config_dynamic.yml" + format: http://edamontology.org/format_3750 + solver: + default: + class: File + location: "pipeline/config_file/config_optim.yml" + format: http://edamontology.org/format_3750 + out: [results] + + plot_indiv: + doc: |- + Use the combined simulation results to generate figure plots. + run: tango_plots.cwl + in: + figure: + valueFrom: "indiv" + results: + source: flatten/flattened + out: [plots, standard_output, standard_error] + + flatten: + doc: |- + Each individual simulation creates a separate results directory, + but plot_indiv expects a single directory. This step combines + the results into a single directory. Since a result file might + be generated several times by different runs, skip duplicates to + avoid a CWL file staging conflict. + run: + class: ExpressionTool + requirements: + InlineJavascriptRequirement: {} + LoadListingRequirement: { loadListing: shallow_listing } + inputs: + nested: Directory[] + outputs: + flattened: Directory + expression: | + ${ + var flat = []; + for (var i = 0; i < inputs.nested.length; i++) { + flat = + flat.concat( + inputs.nested[i].listing.filter( + // This ES5.1 filtering is very ugly + function(item) { + return ! flat.find( + function(x) { + return x.basename === + item.basename; + }); + })); + } + return { + "flattened": { + "class": "Directory", + "basename": "results", + "listing": flat + } + }; + } + in: + nested: + source: run_indiv/results + out: [flattened] diff --git a/mock/data b/mock/data new file mode 120000 index 0000000..eed2d0b --- /dev/null +++ b/mock/data @@ -0,0 +1 @@ +../data/ \ No newline at end of file diff --git a/mock/pipeline b/mock/pipeline new file mode 120000 index 0000000..32aee15 --- /dev/null +++ b/mock/pipeline @@ -0,0 +1 @@ +../pipeline \ No newline at end of file diff --git a/mock/tango_models.cwl b/mock/tango_models.cwl new file mode 100644 index 0000000..b40b82c --- /dev/null +++ b/mock/tango_models.cwl @@ -0,0 +1,280 @@ +cwlVersion: v1.2 +class: CommandLineTool + +label: Numerical reconciliation of bacterial fermentation in cheese production +doc: |- + TANGO uses a numerical strategy to reconcile multi-omics data and + metabolic networks for characterising bacterial fermentation in + cheese production composed of 3 species: + *P. freudenreichii*, *L. lactis* and *L. plantarum* + +requirements: + ShellCommandRequirement: {} + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - $(inputs.data) + - $(inputs.initial_res_optim) + - basename: "results" + class: Directory + listing: [] + - basename: "pipeline" + class: Directory + listing: [] + NetworkAccess: + class: NetworkAccess + networkAccess: true + +hints: + DockerRequirement: + dockerImageId: tango_models + dockerFile: |- + FROM continuumio/miniconda + WORKDIR /usr/src/app + COPY ./ ./ + RUN conda env create -f env-minimal.yml + +inputs: + cobra_solver: + doc: |- + solver used for FBA computation by Cobra + {glpk,glpk_exact,cplex,scipy} + type: + # - "null" + - type: enum + symbols: + - "glpk" + - "glpk_exact" + - "cplex" + - "scipy" + default: "glpk" + inputBinding: + position: 5 + prefix: "-CobraSolver" + + community_scale: + doc: |- + dFBA at the community scale {True,False} + type: + # - "null" + - type: enum + symbols: + - "True" + - "False" + default: "False" + inputBinding: + position: 5 + prefix: "-com" + + culture: + type: File + format: edam:format_3750 + doc: |- + specific media and/or specif modification applied to the FBA + models + default: + class: File + location: "pipeline/config_file/config_culture.yml" + format: http://edamontology.org/format_3750 + inputBinding: + position: 2 + prefix: "-cp" + + data: + type: Directory + doc: |- + Directory containing experimental data, referenced in optimize + configuration + default: + class: Directory + location: ./data + + dynamics: + type: File + format: edam:format_3750 + doc: |- + specific paramters for your dFBA analysis + default: + class: File + location: "pipeline/config_file/config_dynamic.yml" + format: http://edamontology.org/format_3750 + inputBinding: + position: 3 + prefix: "-dp" + + freud_sim: + doc: |- + different initial conditions for lactate where defined in the + experiments for growth and metabolite dosage => this parameter + allows to switch between both situations {growth,metabolites} + type: + # - "null" + - type: enum + symbols: + - "growth" + - "metabolites" + default: "growth" + inputBinding: + position: 5 + prefix: "-fsim" + + lactic_acid_model: + doc: |- + lactic acid model use total if lactate represents total lactic + acid concentration, or dissociated if lactate represents the + dissociated lactic acid {total,dissociated} + type: + # - "null" + - type: enum + symbols: + - "total" + - "dissociated" + default: "total" + inputBinding: + position: 5 + prefix: "-lam" + + model: + type: + type: array + items: File + format: edam:format_2585 + doc: |- + SBML model + inputBinding: + position: 1 + prefix: "-mp" + + optimize: + doc: |- + activatee or not the optimization on parameters {True,False} + type: + # - "null" + - type: enum + symbols: + - "True" + - "False" + default: "False" + inputBinding: + position: 5 + prefix: "-optim" + + recovery: + doc: |- + activate or not recovery of the optimization on parameters + {True,False} + type: + # - "null" + - type: enum + symbols: + - "True" + - "False" + default: "False" + inputBinding: + position: 6 + prefix: "-r" + + initial_res_optim: + type: File? + doc: |- + Per-species optimization results + # default: + # class: File + # location: ./res_optim.txt + + solver: + type: File + format: edam:format_3750 + doc: |- + configuration file for optimization + default: + class: File + location: "pipeline/config_file/config_optim.yml" + format: http://edamontology.org/format_3750 + inputBinding: + position: 4 + prefix: "-sp" + + verbose: + doc: |- + active or not verbose reporting {True,False} + type: + # - "null" + - type: enum + symbols: + - "True" + - "False" + default: "False" + inputBinding: + position: 6 + prefix: "-v" + +baseCommand: + - ./tango_models_mock + - sim + +outputs: + results: + type: File[] + outputBinding: + glob: "results/*.pkz" + outputEval: | + ${ + return self.filter(function(i) { return -1 == i.basename.indexOf("_t_") }) + } + doc: | + Pickled Pandas data frames containing simulation results + format: edam:format_2333 + recovery_optimisation: + type: File? + outputBinding: + glob: "recover_optim*yml" + doc: |- + Pickle file of recovery optimization + format: edam:format_2333 + result_optimization: + type: File + outputBinding: + glob: "res_optim.txt" + doc: |- + optimization results + format: edam:format_3475 + standard_output: + type: stdout + format: edam:format_1964 + standard_error: + type: stderr + format: edam:format_1964 + +stdout: stdout.txt +stderr: stderr.txt + +s:author: + - class: s:Person + s:name: Simon Labarthe + s:identifier: https://orcid.org/0000-0003-2114-0697 + - class: s:Person + s:name: Clémence Frioux + s:identifier: https://orcid.org/0000-0003-2114-0697 + - class: s:Person + s:name: David James Sherman + s:identifier: https://orcid.org/0000-0002-2316-1005 + - class: s:Person + s:name: Maxime Lecomte + s:identifier: https://orcid.org/0000-0002-4558-6151 + - class: s:Person + s:name: Hélène Falentin + s:identifier: https://orcid.org/0000-0001-6254-5303 + - class: s:Person + s:name: Julie Aubert + s:identifier: https://orcid.org/0000-0001-5203-5748 + +s:dateCreated: "2024-03-02" +s:license: https://spdx.org/licenses/LGPL-3.0-or-later + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + - http://edamontology.org/EDAM_1.23.owl diff --git a/mock/tango_models_mock.py b/mock/tango_models_mock.py new file mode 100755 index 0000000..282f4d1 --- /dev/null +++ b/mock/tango_models_mock.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 + +import click +import sys +from pathlib import Path + + +@click.command() +@click.option( + "--cobra_solver", help="solver used for FBA computation by Cobra", default="glpk" +) +@click.option("--community_scale", help="dFBA at the community scale", default="False") +@click.option( + "--freud_sim", help="different initial conditions for lactate", default="growth" +) +@click.option("--lactic_acid_model", help="lactic acid model", default="total") +@click.option("--optimize", help="type: array", default="False") +@click.option( + "--recovery", help="activate or not recovery of the optimization", default="False" +) +@click.option( + "--verbose", help="active or not verbose reporting {True,False}", default="False" +) +@click.argument( + "culture", + # help="specific media and/or specif modification", + type=click.File("rb"), + default="pipeline/config_file/config_culture.yml", +) +@click.argument( + "dynamics", + # help="specific paramters for your dFBA analysis", + type=click.File("rb"), + default="pipeline/config_file/config_dynamic.yml", +) +@click.argument( + "solver", + # help="configuration file for optimization", + type=click.File("rb"), + default="pipeline/config_file/config_optim.yml", +) +@click.argument( + "data", + # help="experimental data", + type=click.Path("rb"), + default="./data", +) +@click.argument( + "initial_res_optim", + # help="Per-species optimization results", + type=click.Path(exists=False), + default="./res_optim.txt", +) +@click.argument( + "model", + # help="SBML model", + nargs=-1, +) +def tango_models( + cobra_solver, + community_scale, + freud_sim, + lactic_acid_model, + optimize, + recovery, + verbose, + culture, + dynamics, + solver, + data, + initial_res_optim, + model, +): + """Mock TANGO models.""" + click.echo(f"TANGO") + + results = Path("results/") + results.mkdir(parents=True, exist_ok=True) + + click.echo(f'Models { ", ".join(model) }') + + +if __name__ == "__main__": + tango_models() diff --git a/pipeline/combine_files.cwl b/pipeline/combine_files.cwl new file mode 100644 index 0000000..b1dc9aa --- /dev/null +++ b/pipeline/combine_files.cwl @@ -0,0 +1,22 @@ +cwlVersion: v1.2 +class: CommandLineTool +requirements: + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - entryname: $(inputs.name) + entry: ${ return inputs.file_list.map(function(f) {return f.contents}).join("") } +inputs: + name: string + file_list: + type: + type: array + items: File + inputBinding: + loadContents: true +outputs: + concat: + type: File + outputBinding: + glob: $(inputs.name) +baseCommand: ["true"] diff --git a/pipeline/combine_res_optim.cwl b/pipeline/combine_res_optim.cwl new file mode 100644 index 0000000..d0f479b --- /dev/null +++ b/pipeline/combine_res_optim.cwl @@ -0,0 +1,21 @@ +cwlVersion: v1.2 +class: CommandLineTool +requirements: + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - entryname: res_optim.txt + entry: ${ return inputs.file_list.map(function(f) {return f.contents}).join("") } +inputs: + file_list: + type: + type: array + items: File + inputBinding: + loadContents: true +outputs: + concat: + type: File + outputBinding: + glob: "res_optim.txt" +baseCommand: ["true"] diff --git a/pipeline/flatten_array.cwl b/pipeline/flatten_array.cwl new file mode 100644 index 0000000..9b701a9 --- /dev/null +++ b/pipeline/flatten_array.cwl @@ -0,0 +1,29 @@ +cwlVersion: v1.2 +class: ExpressionTool +doc: "Flatten a nested array of 'Any' type into an array." +requirements: + InlineJavascriptRequirement: {} +inputs: + nested: + type: + type: array + items: + type: array + items: ["null", Any] +outputs: + flattened: + type: + type: array + items: Any +expression: | + ${ + var flattened = []; + for (var i = 0; i < inputs.nested.length; i++) { + for (var j = 0; j < inputs.nested[i].length; j++) { + if (inputs.nested[i][j] != null) { + flattened.push(inputs.nested[i][j]); + } + } + } + return {"flattened": flattened}; + } diff --git a/tango_models.cwl b/tango_models.cwl index 566fe20..54ecc16 100644 --- a/tango_models.cwl +++ b/tango_models.cwl @@ -21,8 +21,6 @@ requirements: - basename: "pipeline" class: Directory listing: [] - # - class: File - # location: pipeline/TANGO_MTA_WF_310320.xlsx NetworkAccess: class: NetworkAccess networkAccess: true @@ -42,7 +40,7 @@ inputs: solver used for FBA computation by Cobra {glpk,glpk_exact,cplex,scipy} type: - - "null" + # - "null" - type: enum symbols: - "glpk" @@ -58,7 +56,7 @@ inputs: doc: |- dFBA at the community scale {True,False} type: - - "null" + # - "null" - type: enum symbols: - "True" @@ -74,6 +72,10 @@ inputs: doc: |- specific media and/or specif modification applied to the FBA models + default: + class: File + location: "pipeline/config_file/config_culture.yml" + format: http://edamontology.org/format_3750 inputBinding: position: 2 prefix: "-cp" @@ -92,6 +94,10 @@ inputs: format: edam:format_3750 doc: |- specific paramters for your dFBA analysis + default: + class: File + location: "pipeline/config_file/config_dynamic.yml" + format: http://edamontology.org/format_3750 inputBinding: position: 3 prefix: "-dp" @@ -102,7 +108,7 @@ inputs: experiments for growth and metabolite dosage => this parameter allows to switch between both situations {growth,metabolites} type: - - "null" + # - "null" - type: enum symbols: - "growth" @@ -118,7 +124,7 @@ inputs: acid concentration, or dissociated if lactate represents the dissociated lactic acid {total,dissociated} type: - - "null" + # - "null" - type: enum symbols: - "total" @@ -143,7 +149,7 @@ inputs: doc: |- activatee or not the optimization on parameters {True,False} type: - - "null" + # - "null" - type: enum symbols: - "True" @@ -158,7 +164,7 @@ inputs: activate or not recovery of the optimization on parameters {True,False} type: - - "null" + # - "null" - type: enum symbols: - "True" @@ -172,15 +178,19 @@ inputs: type: File? doc: |- Per-species optimization results - default: - class: File - location: ./res_optim.txt + # default: + # class: File + # location: ./res_optim.txt solver: type: File format: edam:format_3750 doc: |- configuration file for optimization + default: + class: File + location: "pipeline/config_file/config_optim.yml" + format: http://edamontology.org/format_3750 inputBinding: position: 4 prefix: "-sp" @@ -189,7 +199,7 @@ inputs: doc: |- active or not verbose reporting {True,False} type: - - "null" + # - "null" - type: enum symbols: - "True" @@ -205,9 +215,13 @@ baseCommand: outputs: results: - type: Directory + type: File[] outputBinding: - glob: "results" + glob: "results/*.pkz" + outputEval: | + ${ + return self.filter(function(i) { return -1 == i.basename.indexOf("_t_") }) + } doc: | Pickled Pandas data frames containing simulation results format: edam:format_2333 diff --git a/tango_workflow.cwl b/tango_workflow.cwl new file mode 100644 index 0000000..81a0274 --- /dev/null +++ b/tango_workflow.cwl @@ -0,0 +1,148 @@ +cwlVersion: v1.2 +class: Workflow + +requirements: + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + ScatterFeatureRequirement: {} + InlineJavascriptRequirement: {} + SubworkflowFeatureRequirement: {} + +inputs: + model_list: + type: File[] + default: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + combo_model_list: + type: File[] + default: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + combo_freud_sim_list: + type: {"type": "array", "items": [{"type": "enum", "symbols": ["file:///Users/sherman/work/tango_models/tango_models.cwl#freud_sim/growth", "file:///Users/sherman/work/tango_models/tango_models.cwl#freud_sim/metabolites"]}]} + default: + - "metabolites" + - "growth" + - "growth" + - "growth" + +outputs: + results: + type: File[] + outputSource: [individual/results, community/results] + linkMerge: merge_flattened + +steps: + optimize: + doc: |- + For each model file in the input model_list, run an individual + optimization. Return combined result_optimization. + run: + class: Workflow + inputs: + model_list: File[] + outputs: + result_optimization: + type: File + outputSource: combine/concat + steps: + optimize_each: + run: tango_models.cwl + scatter: model + in: + model: + source: model_list + valueFrom: $([self]) # wrap in singleton array + optimize: + valueFrom: "True" + community_scale: + valueFrom: "False" + out: [result_optimization] + combine: + run: pipeline/combine_files.cwl + in: + name: + valueFrom: "res_optim.txt" + file_list: + source: optimize_each/result_optimization + out: [concat] + in: + model_list: + source: model_list + out: [result_optimization] + + individual: + doc: |- + For each model file in the input model_list, run an individual + simulation. + run: + class: Workflow + inputs: + model_list: File[] + outputs: + results: + type: File[] + outputSource: combine/flattened + steps: + individual_each: + run: tango_models.cwl + scatter: [model, freud_sim] + scatterMethod: dotproduct + in: + model: + source: combo_model_list + valueFrom: $([self]) # wrap in singleton array + freud_sim: + source: combo_freud_sim_list + optimize: + valueFrom: "False" + community_scale: + valueFrom: "False" + initial_res_optim: + source: [optimize/result_optimization] + out: [results] + combine: + run: pipeline/flatten_array.cwl + in: + nested: + source: individual_each/results + out: [flattened] + in: + combo_model_list: + source: combo_model_list + combo_freud_sim_list: + source: combo_freud_sim_list + out: [results] + + community: + doc: |- + Simulate the community. + run: tango_models.cwl + in: + model: + source: model_list + optimize: + valueFrom: "False" + community_scale: + valueFrom: "True" + initial_res_optim: + source: [optimize/result_optimization] + out: [results] -- GitLab From 25428824549a33ff22c086b4567a922524893eb7 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Wed, 13 Mar 2024 17:34:57 +0100 Subject: [PATCH 05/14] tango_experiments_job.yml defines pure cultures and communities --- tango_dynamics_wf.cwl | 105 ++++++++++++++++++++++++++++++++++++++ tango_experiments_job.yml | 50 ++++++++++++++++++ tango_optimization_wf.cwl | 67 ++++++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 tango_dynamics_wf.cwl create mode 100644 tango_experiments_job.yml create mode 100644 tango_optimization_wf.cwl diff --git a/tango_dynamics_wf.cwl b/tango_dynamics_wf.cwl new file mode 100644 index 0000000..6c9c3e3 --- /dev/null +++ b/tango_dynamics_wf.cwl @@ -0,0 +1,105 @@ +--- +cwlVersion: v1.2 +class: Workflow + +requirements: + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + ScatterFeatureRequirement: {} + InlineJavascriptRequirement: {} + +inputs: + pure_culture: + type: + type: array + items: + type: record + fields: + models: File[] + freud_sim: string + community: + type: + type: array + items: + type: record + fields: + models: File[] + +outputs: + results: + type: Directory + outputSource: combine/results + +doc: |- + For each model file in the input model_list, run an individual + simulation. + +steps: + + individual_each: + run: "pipeline/tango_mock.cwl" + scatter: [model, freud_sim] + scatterMethod: dotproduct + in: + model: + source: pure_culture + valueFrom: $([self.models[0]]) # wrap model in singleton array + freud_sim: + source: pure_culture + valueFrom: $(self.freud_sim) + optimize: + valueFrom: "False" + community_scale: + valueFrom: "False" + out: [results] + + community: + run: "pipeline/tango_mock.cwl" + scatter: model + in: + model: + source: community + valueFrom: $(self.models) + optimize: + valueFrom: "False" + community_scale: + valueFrom: "True" + out: [results] + + combine: + run: + class: ExpressionTool + requirements: + InlineJavascriptRequirement: {} + MultipleInputFeatureRequirement: {} + LoadListingRequirement: { loadListing: shallow_listing } + inputs: + results_list: + type: + type: array + items: + type: array + items: File + outputs: + results: Directory + expression: | + ${ + var flat = [] + for (var i=0; i<inputs.results_list.length; i++) { + for (var j=0; j<inputs.results_list[i].length; j++) { + flat = flat.concat(inputs.results_list[i][j]) + } + } + return { + "results": { + "class": "Directory", + "basename": "results", + "listing": flat + } + } + } + in: + results_list: + source: [individual_each/results, community/results] + linkMerge: merge_flattened + out: [results] diff --git a/tango_experiments_job.yml b/tango_experiments_job.yml new file mode 100644 index 0000000..1d593cb --- /dev/null +++ b/tango_experiments_job.yml @@ -0,0 +1,50 @@ +pure_culture: + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "metabolites" + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + - models: + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + - models: + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + +community: + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + dynamics: + class: File + location: "pipeline/config_file/config_dynamic.yml" + format: http://edamontology.org/format_3750 + culture: + class: File + location: "pipeline/config_file/config_culture.yml" + format: http://edamontology.org/format_3750 + solver: + class: File + location: "pipeline/config_file/config_optim.yml" + format: http://edamontology.org/format_3750 diff --git a/tango_optimization_wf.cwl b/tango_optimization_wf.cwl new file mode 100644 index 0000000..ec0c542 --- /dev/null +++ b/tango_optimization_wf.cwl @@ -0,0 +1,67 @@ +--- +cwlVersion: v1.2 +class: Workflow + +requirements: + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + ScatterFeatureRequirement: {} + InlineJavascriptRequirement: {} + +inputs: + pure_culture: + type: + type: array + items: + type: record + fields: + models: File[] + freud_sim: string + +outputs: + result_optimization: + type: File + outputSource: combine/result_optimization + +doc: |- + For each model file in the input model_list, run an individual + optimization. Return combined result_optimization. + +steps: + + optimize_each: + run: "pipeline/tango_mock.cwl" + scatter: [model, freud_sim] + scatterMethod: dotproduct + in: + model: + source: pure_culture + valueFrom: $([self.models[0]]) # wrap model in singleton array + freud_sim: + source: pure_culture + valueFrom: $(self.freud_sim) + optimize: + valueFrom: "True" + community_scale: + valueFrom: "False" + out: [result_optimization] + + combine: + in: + file_list: + source: optimize_each/result_optimization + out: [result_optimization] + run: + class: CommandLineTool + baseCommand: ["cat"] + inputs: + file_list: + type: File[] + inputBinding: + position: 1 + outputs: + result_optimization: + type: File + streamable: true + outputBinding: {glob: "res_optim.txt"} + stdout: "res_optim.txt" -- GitLab From d9eeb341d52444cd1b99a05acf9e7f36f9db9b9f Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Mon, 18 Mar 2024 08:51:40 +0100 Subject: [PATCH 06/14] Tango workflow with plots per-community parameters --- pipeline/mkdir_files.cwl | 28 ++++++ tango_dynamics_wf.cwl | 62 +++++-------- tango_experiments_job.yml | 25 +++-- tango_optimization_wf.cwl | 2 +- tango_plots.cwl | 20 ++-- tango_workflow.cwl | 186 +++++++++++++------------------------- 6 files changed, 135 insertions(+), 188 deletions(-) create mode 100644 pipeline/mkdir_files.cwl diff --git a/pipeline/mkdir_files.cwl b/pipeline/mkdir_files.cwl new file mode 100644 index 0000000..b6f8da9 --- /dev/null +++ b/pipeline/mkdir_files.cwl @@ -0,0 +1,28 @@ +--- +cwlVersion: v1.2 +class: ExpressionTool +requirements: + InlineJavascriptRequirement: {} + MultipleInputFeatureRequirement: {} + LoadListingRequirement: { loadListing: shallow_listing } +inputs: + name: + type: string + default: "results" + file_list: File[] +outputs: + results: Directory +expression: | + ${ + var flat = [] + for (var i=0; i<inputs.file_list.length; i++) { + flat = flat.concat(inputs.file_list[i]) + } + return { + "results": { + "class": "Directory", + "basename": inputs.name, + "listing": flat + } + } + } diff --git a/tango_dynamics_wf.cwl b/tango_dynamics_wf.cwl index 6c9c3e3..66f4624 100644 --- a/tango_dynamics_wf.cwl +++ b/tango_dynamics_wf.cwl @@ -9,6 +9,7 @@ requirements: InlineJavascriptRequirement: {} inputs: + initial_res_optim: File pure_culture: type: type: array @@ -24,11 +25,12 @@ inputs: type: record fields: models: File[] + outputs: results: - type: Directory - outputSource: combine/results + type: File[] + outputSource: combine/flattened doc: |- For each model file in the input model_list, run an individual @@ -37,7 +39,7 @@ doc: |- steps: individual_each: - run: "pipeline/tango_mock.cwl" + run: tango_models.cwl scatter: [model, freud_sim] scatterMethod: dotproduct in: @@ -51,11 +53,14 @@ steps: valueFrom: "False" community_scale: valueFrom: "False" + initial_res_optim: + source: initial_res_optim out: [results] community: - run: "pipeline/tango_mock.cwl" - scatter: model + run: tango_models.cwl + scatter: [model, culture, dynamics, solver] + scatterMethod: dotproduct in: model: source: community @@ -64,42 +69,23 @@ steps: valueFrom: "False" community_scale: valueFrom: "True" + initial_res_optim: + source: initial_res_optim + culture: + source: community + valueFrom: $(self.culture || null) + dynamics: + source: community + valueFrom: $(self.dynamics || null) + solver: + source: community + valueFrom: $(self.solver || null) out: [results] combine: - run: - class: ExpressionTool - requirements: - InlineJavascriptRequirement: {} - MultipleInputFeatureRequirement: {} - LoadListingRequirement: { loadListing: shallow_listing } - inputs: - results_list: - type: - type: array - items: - type: array - items: File - outputs: - results: Directory - expression: | - ${ - var flat = [] - for (var i=0; i<inputs.results_list.length; i++) { - for (var j=0; j<inputs.results_list[i].length; j++) { - flat = flat.concat(inputs.results_list[i][j]) - } - } - return { - "results": { - "class": "Directory", - "basename": "results", - "listing": flat - } - } - } + run: pipeline/flatten_array.cwl in: - results_list: + nested: source: [individual_each/results, community/results] linkMerge: merge_flattened - out: [results] + out: [flattened] diff --git a/tango_experiments_job.yml b/tango_experiments_job.yml index 1d593cb..150e9b5 100644 --- a/tango_experiments_job.yml +++ b/tango_experiments_job.yml @@ -9,11 +9,6 @@ pure_culture: path: "metabolic_models/freudenreichii.sbml" format: http://edamontology.org/format_2585 freud_sim: "growth" - - models: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - freud_sim: "growth" - models: - class: File path: "metabolic_models/lactis.sbml" @@ -38,13 +33,15 @@ community: format: http://edamontology.org/format_2585 dynamics: class: File - location: "pipeline/config_file/config_dynamic.yml" - format: http://edamontology.org/format_3750 - culture: - class: File - location: "pipeline/config_file/config_culture.yml" - format: http://edamontology.org/format_3750 - solver: - class: File - location: "pipeline/config_file/config_optim.yml" + location: "pipeline/config_file/config_dynamic_com.yml" format: http://edamontology.org/format_3750 + +figures: ["com", "flux"] + +initial_res_optim: + class: File + location: ./res_optim.txt + +models_dir: + class: Directory + location: ./models diff --git a/tango_optimization_wf.cwl b/tango_optimization_wf.cwl index ec0c542..3af2e68 100644 --- a/tango_optimization_wf.cwl +++ b/tango_optimization_wf.cwl @@ -30,7 +30,7 @@ doc: |- steps: optimize_each: - run: "pipeline/tango_mock.cwl" + run: tango_models.cwl scatter: [model, freud_sim] scatterMethod: dotproduct in: diff --git a/tango_plots.cwl b/tango_plots.cwl index 592c4c8..ca4179a 100644 --- a/tango_plots.cwl +++ b/tango_plots.cwl @@ -34,16 +34,16 @@ inputs: doc: |- Which predefined figure to make {indiv,flux,com,goodness_of_fit,switch_pathways} - type: - - "null" - - type: enum - symbols: - - "com" - - "flux" - - "goodness_of_fit" - - "indiv" - - "switch_pathways" - - "transcripts" + type: string + # - "null" + # - type: enum + # symbols: + # - "com" + # - "flux" + # - "goodness_of_fit" + # - "indiv" + # - "switch_pathways" + # - "transcripts" default: "indiv" models: diff --git a/tango_workflow.cwl b/tango_workflow.cwl index 81a0274..ee5d386 100644 --- a/tango_workflow.cwl +++ b/tango_workflow.cwl @@ -9,140 +9,76 @@ requirements: SubworkflowFeatureRequirement: {} inputs: - model_list: - type: File[] - default: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/lactis.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/plantarum.sbml" - format: http://edamontology.org/format_2585 - combo_model_list: - type: File[] - default: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/lactis.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/plantarum.sbml" - format: http://edamontology.org/format_2585 - combo_freud_sim_list: - type: {"type": "array", "items": [{"type": "enum", "symbols": ["file:///Users/sherman/work/tango_models/tango_models.cwl#freud_sim/growth", "file:///Users/sherman/work/tango_models/tango_models.cwl#freud_sim/metabolites"]}]} - default: - - "metabolites" - - "growth" - - "growth" - - "growth" + # Choose experiments to include in simulations + pure_culture: + type: + type: array + items: + type: record + fields: + models: File[] + freud_sim: string + community: + type: + type: array + items: + type: record + fields: + models: File[] + + # Choose plots to make from silumation results + figures: string[] + models_dir: Directory outputs: results: + type: Directory + outputSource: combine/results + figures: type: File[] - outputSource: [individual/results, community/results] - linkMerge: merge_flattened + outputSource: flatten/flattened steps: optimize: - doc: |- - For each model file in the input model_list, run an individual - optimization. Return combined result_optimization. - run: - class: Workflow - inputs: - model_list: File[] - outputs: - result_optimization: - type: File - outputSource: combine/concat - steps: - optimize_each: - run: tango_models.cwl - scatter: model - in: - model: - source: model_list - valueFrom: $([self]) # wrap in singleton array - optimize: - valueFrom: "True" - community_scale: - valueFrom: "False" - out: [result_optimization] - combine: - run: pipeline/combine_files.cwl - in: - name: - valueFrom: "res_optim.txt" - file_list: - source: optimize_each/result_optimization - out: [concat] in: - model_list: - source: model_list + pure_culture: + source: pure_culture out: [result_optimization] - - individual: - doc: |- - For each model file in the input model_list, run an individual - simulation. - run: - class: Workflow - inputs: - model_list: File[] - outputs: - results: - type: File[] - outputSource: combine/flattened - steps: - individual_each: - run: tango_models.cwl - scatter: [model, freud_sim] - scatterMethod: dotproduct - in: - model: - source: combo_model_list - valueFrom: $([self]) # wrap in singleton array - freud_sim: - source: combo_freud_sim_list - optimize: - valueFrom: "False" - community_scale: - valueFrom: "False" - initial_res_optim: - source: [optimize/result_optimization] - out: [results] - combine: - run: pipeline/flatten_array.cwl - in: - nested: - source: individual_each/results - out: [flattened] - in: - combo_model_list: - source: combo_model_list - combo_freud_sim_list: - source: combo_freud_sim_list - out: [results] - - community: - doc: |- - Simulate the community. - run: tango_models.cwl + run: tango_optimization_wf.cwl + + dynamics: in: - model: - source: model_list - optimize: - valueFrom: "False" - community_scale: - valueFrom: "True" initial_res_optim: - source: [optimize/result_optimization] + source: optimize/result_optimization + pure_culture: + source: pure_culture + community: + source: community out: [results] + run: tango_dynamics_wf.cwl + + combine: + in: + file_list: + source: dynamics/results + out: [results] + run: pipeline/mkdir_files.cwl + + figures: + scatter: [figure] + in: + results: + source: combine/results + figure: + source: figures + models: + source: models_dir + out: [plots] + run: tango_plots.cwl + + flatten: + in: + nested: + source: figures/plots + out: [flattened] + run: pipeline/flatten_array.cwl -- GitLab From a87be0ba287322e0827ea8fd1d02810dcd92d955 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Tue, 26 Mar 2024 08:39:10 +0100 Subject: [PATCH 07/14] Working, array outputs --- scripts/plot_com.py | 2 +- tango_dynamics_wf.cwl | 1 - tango_experiments+types_job.yml | 46 +++++++++++++++++++++++++++++++++ tango_experiments_job.yml | 2 +- tango_models.cwl | 4 ++- tango_plots.cwl | 25 +++++++++++------- tango_workflow.cwl | 27 +++++++++---------- 7 files changed, 81 insertions(+), 26 deletions(-) create mode 100644 tango_experiments+types_job.yml diff --git a/scripts/plot_com.py b/scripts/plot_com.py index 40c0afe..52b3e28 100644 --- a/scripts/plot_com.py +++ b/scripts/plot_com.py @@ -19,7 +19,7 @@ def get_data_com(): results_w = pd.DataFrame(results_w,columns=[param['Y_output_id_inv'][i] for i in range(results_w.shape[1])]) simulation = pd.read_csv('results/result_dfba_com_without_optim.csv',sep='\t').set_index('State') - time_o = pd.read_csv('pipeline/com_time.tsv',sep='\t').set_index('State') + time_o = pd.read_csv('data/com_time.tsv',sep='\t').set_index('State') time = time_o.loc[simulation.index] diff --git a/tango_dynamics_wf.cwl b/tango_dynamics_wf.cwl index 66f4624..3bbae37 100644 --- a/tango_dynamics_wf.cwl +++ b/tango_dynamics_wf.cwl @@ -25,7 +25,6 @@ inputs: type: record fields: models: File[] - outputs: results: diff --git a/tango_experiments+types_job.yml b/tango_experiments+types_job.yml new file mode 100644 index 0000000..bb64016 --- /dev/null +++ b/tango_experiments+types_job.yml @@ -0,0 +1,46 @@ +pure_culture: + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "metabolites" + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + - models: + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + - models: + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + freud_sim: "growth" + +community: + - models: + - class: File + path: "metabolic_models/freudenreichii.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/lactis.sbml" + format: http://edamontology.org/format_2585 + - class: File + path: "metabolic_models/plantarum.sbml" + format: http://edamontology.org/format_2585 + +dynamics: + class: File + location: "pipeline/config_file/config_dynamic.yml" + format: http://edamontology.org/format_3750 +culture: + class: File + location: "pipeline/config_file/config_culture.yml" + format: http://edamontology.org/format_3750 +solver: + class: File + location: "pipeline/config_file/config_optim.yml" + format: http://edamontology.org/format_3750 diff --git a/tango_experiments_job.yml b/tango_experiments_job.yml index 150e9b5..18d83c4 100644 --- a/tango_experiments_job.yml +++ b/tango_experiments_job.yml @@ -36,7 +36,7 @@ community: location: "pipeline/config_file/config_dynamic_com.yml" format: http://edamontology.org/format_3750 -figures: ["com", "flux"] +which_figures: ["com", "flux"] initial_res_optim: class: File diff --git a/tango_models.cwl b/tango_models.cwl index 54ecc16..a282e9a 100644 --- a/tango_models.cwl +++ b/tango_models.cwl @@ -217,7 +217,9 @@ outputs: results: type: File[] outputBinding: - glob: "results/*.pkz" + glob: + - "results/*.pkz" + - "results/*.csv" outputEval: | ${ return self.filter(function(i) { return -1 == i.basename.indexOf("_t_") }) diff --git a/tango_plots.cwl b/tango_plots.cwl index ca4179a..f846fa0 100644 --- a/tango_plots.cwl +++ b/tango_plots.cwl @@ -10,12 +10,22 @@ requirements: InlineJavascriptRequirement: {} InitialWorkDirRequirement: listing: - - $(inputs.scripts) - - $(inputs.data) - - entry: $(inputs.results) - entryname: results + - entry: $(inputs.scripts) + entryname: scripts + - entry: $(inputs.data) + entryname: data - entry: $(inputs.models) entryname: metabolic_models + - | + ${ + return { + "class": "Directory", + "basename": "results", + "listing": inputs.results + } + } + + # LoadListingRequirement: { loadListing: shallow_listing } NetworkAccess: class: NetworkAccess networkAccess: true @@ -53,12 +63,9 @@ inputs: configuration results: - type: Directory + type: File[] doc: |- - Directory containing TANGO results - default: - class: Directory - location: ./results + Array of TANGO result files scripts: type: Directory diff --git a/tango_workflow.cwl b/tango_workflow.cwl index ee5d386..7fccfd4 100644 --- a/tango_workflow.cwl +++ b/tango_workflow.cwl @@ -27,13 +27,13 @@ inputs: models: File[] # Choose plots to make from silumation results - figures: string[] + which_figures: string[] models_dir: Directory outputs: results: - type: Directory - outputSource: combine/results + type: File[] + outputSource: dynamics/results figures: type: File[] outputSource: flatten/flattened @@ -57,20 +57,21 @@ steps: out: [results] run: tango_dynamics_wf.cwl - combine: - in: - file_list: - source: dynamics/results - out: [results] - run: pipeline/mkdir_files.cwl + # combine: + # in: + # file_list: + # source: dynamics/results + # out: [results] + # run: pipeline/mkdir_files.cwl - figures: + plot_figures: scatter: [figure] in: results: - source: combine/results + source: dynamics/results + # source: combine/results figure: - source: figures + source: which_figures models: source: models_dir out: [plots] @@ -79,6 +80,6 @@ steps: flatten: in: nested: - source: figures/plots + source: plot_figures/plots out: [flattened] run: pipeline/flatten_array.cwl -- GitLab From e5e0d36482e1a375be4c5e5449d2cedf3e750477 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Tue, 26 Mar 2024 08:40:40 +0100 Subject: [PATCH 08/14] plot jobs tango_plots is scatter workflow that calls tango_one_plot Call plot scripts using module Use tango_plots for scatter. Combine outputs into directories --- pipeline/mkdir_files.cwl | 2 +- plot_transcripts_job.yml | 7 ++ tango_one_plot.cwl | 119 +++++++++++++++++++++++++++++ tango_plots.cwl | 159 ++++++++------------------------------- tango_workflow.cwl | 46 +++++------ 5 files changed, 184 insertions(+), 149 deletions(-) create mode 100644 plot_transcripts_job.yml create mode 100644 tango_one_plot.cwl diff --git a/pipeline/mkdir_files.cwl b/pipeline/mkdir_files.cwl index b6f8da9..7a9e0c5 100644 --- a/pipeline/mkdir_files.cwl +++ b/pipeline/mkdir_files.cwl @@ -11,7 +11,7 @@ inputs: default: "results" file_list: File[] outputs: - results: Directory + out_dir: Directory expression: | ${ var flat = [] diff --git a/plot_transcripts_job.yml b/plot_transcripts_job.yml new file mode 100644 index 0000000..fba36c6 --- /dev/null +++ b/plot_transcripts_job.yml @@ -0,0 +1,7 @@ +figure: "transcripts" +results: + class: Directory + path: "./results" +models: + class: Directory + path: "./metabolic_models" diff --git a/tango_one_plot.cwl b/tango_one_plot.cwl new file mode 100644 index 0000000..2de3440 --- /dev/null +++ b/tango_one_plot.cwl @@ -0,0 +1,119 @@ +cwlVersion: v1.2 +class: CommandLineTool + +label: Plot TANGO results +doc: |- + Use plot_* scripts to make figures from TANGO results + +requirements: + ShellCommandRequirement: {} + InlineJavascriptRequirement: {} + InitialWorkDirRequirement: + listing: + - entry: $(inputs.scripts) + entryname: scripts + - entry: $(inputs.data) + entryname: data + - entry: $(inputs.models) + entryname: metabolic_models + - entry: $(inputs.results) + entryname: results + NetworkAccess: + class: NetworkAccess + networkAccess: true + +inputs: + data: + type: Directory + doc: |- + Directory containing experimental data, referenced in optimize + configuration + default: + class: Directory + location: ./data + + figure: + doc: |- + Which predefined figure to make + {indiv,flux,com,goodness_of_fit,switch_pathways} + type: string + default: "indiv" + + models: + type: Directory? + doc: |- + Directory containing experimental data, referenced in optimize + configuration + + results: + # type: File[] + type: Directory + doc: |- + Array of TANGO result files + + scripts: + type: Directory + doc: |- + Scripts for predefined figures + default: + class: Directory + location: ./scripts + +baseCommand: + - python + - "-m" + +arguments: + - valueFrom: $("scripts.plot_" + inputs.figure) + +outputs: + plots: + type: + type: array + items: File + outputBinding: + glob: + - "*.pdf" + - "*.svg" + doc: |- + Figure in PDF format + format: edam:format_3508 + standard_output: + type: stdout + format: edam:format_1964 + standard_error: + type: stderr + format: edam:format_1964 + +stdout: stdout.txt +stderr: stderr.txt + +s:author: + - class: s:Person + s:name: Simon Labarthe + s:identifier: https://orcid.org/0000-0003-2114-0697 + - class: s:Person + s:name: Clémence Frioux + s:identifier: https://orcid.org/0000-0003-2114-0697 + - class: s:Person + s:name: David James Sherman + s:identifier: https://orcid.org/0000-0002-2316-1005 + - class: s:Person + s:name: Maxime Lecomte + s:identifier: https://orcid.org/0000-0002-4558-6151 + - class: s:Person + s:name: Hélène Falentin + s:identifier: https://orcid.org/0000-0001-6254-5303 + - class: s:Person + s:name: Julie Aubert + s:identifier: https://orcid.org/0000-0001-5203-5748 + +s:dateCreated: "2024-03-04" +s:license: https://spdx.org/licenses/LGPL-3.0-or-later + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + - http://edamontology.org/EDAM_1.23.owl diff --git a/tango_plots.cwl b/tango_plots.cwl index f846fa0..d0ed96b 100644 --- a/tango_plots.cwl +++ b/tango_plots.cwl @@ -1,134 +1,39 @@ cwlVersion: v1.2 -class: CommandLineTool - -label: Plot TANGO results -doc: |- - Use plot_* scripts to make figures from TANGO results +class: Workflow requirements: - ShellCommandRequirement: {} + MultipleInputFeatureRequirement: {} + StepInputExpressionRequirement: {} + ScatterFeatureRequirement: {} InlineJavascriptRequirement: {} - InitialWorkDirRequirement: - listing: - - entry: $(inputs.scripts) - entryname: scripts - - entry: $(inputs.data) - entryname: data - - entry: $(inputs.models) - entryname: metabolic_models - - | - ${ - return { - "class": "Directory", - "basename": "results", - "listing": inputs.results - } - } - - # LoadListingRequirement: { loadListing: shallow_listing } - NetworkAccess: - class: NetworkAccess - networkAccess: true + SubworkflowFeatureRequirement: {} inputs: - data: - type: Directory - doc: |- - Directory containing experimental data, referenced in optimize - configuration - default: - class: Directory - location: ./data - - figure: - doc: |- - Which predefined figure to make - {indiv,flux,com,goodness_of_fit,switch_pathways} - type: string - # - "null" - # - type: enum - # symbols: - # - "com" - # - "flux" - # - "goodness_of_fit" - # - "indiv" - # - "switch_pathways" - # - "transcripts" - default: "indiv" - - models: - type: Directory? - doc: |- - Directory containing experimental data, referenced in optimize - configuration - - results: - type: File[] - doc: |- - Array of TANGO result files - - scripts: - type: Directory - doc: |- - Scripts for predefined figures - default: - class: Directory - location: ./scripts - -baseCommand: - - python - -arguments: - - valueFrom: $("scripts/plot_" + inputs.figure + ".py") - + results: Directory + which_figures: string[] + models: Directory + outputs: - plots: - type: - type: array - items: File - outputBinding: - glob: - - "*.pdf" - - "*.svg" - doc: |- - Figure in PDF format - format: edam:format_3508 - standard_output: - type: stdout - format: edam:format_1964 - standard_error: - type: stderr - format: edam:format_1964 - -stdout: stdout.txt -stderr: stderr.txt - -s:author: - - class: s:Person - s:name: Simon Labarthe - s:identifier: https://orcid.org/0000-0003-2114-0697 - - class: s:Person - s:name: Clémence Frioux - s:identifier: https://orcid.org/0000-0003-2114-0697 - - class: s:Person - s:name: David James Sherman - s:identifier: https://orcid.org/0000-0002-2316-1005 - - class: s:Person - s:name: Maxime Lecomte - s:identifier: https://orcid.org/0000-0002-4558-6151 - - class: s:Person - s:name: Hélène Falentin - s:identifier: https://orcid.org/0000-0001-6254-5303 - - class: s:Person - s:name: Julie Aubert - s:identifier: https://orcid.org/0000-0001-5203-5748 - -s:dateCreated: "2024-03-04" -s:license: https://spdx.org/licenses/LGPL-3.0-or-later - -$namespaces: - s: https://schema.org/ - edam: http://edamontology.org/ -$schemas: - - https://schema.org/version/latest/schemaorg-current-http.rdf - - http://edamontology.org/EDAM_1.23.owl + figures: + type: File[] + outputSource: flatten/flattened + +steps: + plot_figures: + scatter: [figure] + in: + results: + source: results + figure: + source: which_figures + models: + source: models + out: [plots] + run: tango_one_plot.cwl + + flatten: + in: + nested: + source: plot_figures/plots + out: [flattened] + run: pipeline/flatten_array.cwl diff --git a/tango_workflow.cwl b/tango_workflow.cwl index 7fccfd4..55584d8 100644 --- a/tango_workflow.cwl +++ b/tango_workflow.cwl @@ -26,17 +26,19 @@ inputs: fields: models: File[] - # Choose plots to make from silumation results + # Choose plots to make from simulation results which_figures: string[] + + # Transcripts plot need input SBML models models_dir: Directory outputs: results: - type: File[] - outputSource: dynamics/results + type: Directory + outputSource: mkdir_results/out_dir figures: - type: File[] - outputSource: flatten/flattened + type: Directory + outputSource: mkdir_figures/out_dir steps: optimize: @@ -57,29 +59,31 @@ steps: out: [results] run: tango_dynamics_wf.cwl - # combine: - # in: - # file_list: - # source: dynamics/results - # out: [results] - # run: pipeline/mkdir_files.cwl + mkdir_results: + in: + file_list: + source: dynamics/results + name: + valueFrom: "results" + out: [out_dir] + run: pipeline/mkdir_files.cwl plot_figures: - scatter: [figure] in: results: - source: dynamics/results - # source: combine/results - figure: + source: mkdir_results/out_dir + which_figures: source: which_figures models: source: models_dir - out: [plots] + out: [figures] run: tango_plots.cwl - flatten: + mkdir_figures: in: - nested: - source: plot_figures/plots - out: [flattened] - run: pipeline/flatten_array.cwl + file_list: + source: plot_figures/figures + name: + valueFrom: "figures" + out: [out_dir] + run: pipeline/mkdir_files.cwl -- GitLab From 9f2212583a9f1a1506b654641df454ad72d911e0 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Tue, 26 Mar 2024 16:29:10 +0100 Subject: [PATCH 09/14] Make figures .gitignore figures --- .gitignore | 1 + tango_experiments_job.yml | 8 +++++++- tango_plots_job.yml | 13 +++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tango_plots_job.yml diff --git a/.gitignore b/.gitignore index abb4b30..5c480d1 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ commandline* /result* /pipeline/dfba_trajectory*.png /pipeline/result_pure_culture*.csv +/figures ro-crate diff --git a/tango_experiments_job.yml b/tango_experiments_job.yml index 18d83c4..f26459d 100644 --- a/tango_experiments_job.yml +++ b/tango_experiments_job.yml @@ -36,7 +36,13 @@ community: location: "pipeline/config_file/config_dynamic_com.yml" format: http://edamontology.org/format_3750 -which_figures: ["com", "flux"] +which_figures: + - com + - flux + - indiv + - goodness_of_fit + # - switch_pathways + - transcripts initial_res_optim: class: File diff --git a/tango_plots_job.yml b/tango_plots_job.yml new file mode 100644 index 0000000..be08f30 --- /dev/null +++ b/tango_plots_job.yml @@ -0,0 +1,13 @@ +results: + class: Directory + location: ./results +models: + class: Directory + location: ./metabolic_models +which_figures: + - com + - flux + - indiv + - goodness_of_fit + # - switch_pathways + - transcripts -- GitLab From 07f1d452c7918938722837c439d80b31895c6f69 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Tue, 26 Mar 2024 17:18:17 +0100 Subject: [PATCH 10/14] Shuttle time-series results to plot_switch_pathway Output of mkdir_files is "out_dir" --- pipeline/mkdir_files.cwl | 6 +++++- tango_dynamics_wf.cwl | 17 ++++++++++++++--- tango_experiments_job.yml | 2 +- tango_models.cwl | 12 ++++++++++++ tango_plots.cwl | 15 ++++++++++++--- tango_plots_job.yml | 17 ++++++++++++++--- tango_workflow.cwl | 5 +++-- 7 files changed, 61 insertions(+), 13 deletions(-) diff --git a/pipeline/mkdir_files.cwl b/pipeline/mkdir_files.cwl index 7a9e0c5..2c8738a 100644 --- a/pipeline/mkdir_files.cwl +++ b/pipeline/mkdir_files.cwl @@ -1,17 +1,21 @@ --- cwlVersion: v1.2 class: ExpressionTool + requirements: InlineJavascriptRequirement: {} MultipleInputFeatureRequirement: {} LoadListingRequirement: { loadListing: shallow_listing } + inputs: name: type: string default: "results" file_list: File[] + outputs: out_dir: Directory + expression: | ${ var flat = [] @@ -19,7 +23,7 @@ expression: | flat = flat.concat(inputs.file_list[i]) } return { - "results": { + "out_dir": { "class": "Directory", "basename": inputs.name, "listing": flat diff --git a/tango_dynamics_wf.cwl b/tango_dynamics_wf.cwl index 3bbae37..d829bef 100644 --- a/tango_dynamics_wf.cwl +++ b/tango_dynamics_wf.cwl @@ -30,6 +30,9 @@ outputs: results: type: File[] outputSource: combine/flattened + time_series: + type: File[] + outputSource: combine_time_series/flattened doc: |- For each model file in the input model_list, run an individual @@ -54,9 +57,9 @@ steps: valueFrom: "False" initial_res_optim: source: initial_res_optim - out: [results] + out: [results, time_series] - community: + community_together: run: tango_models.cwl scatter: [model, culture, dynamics, solver] scatterMethod: dotproduct @@ -85,6 +88,14 @@ steps: run: pipeline/flatten_array.cwl in: nested: - source: [individual_each/results, community/results] + source: [individual_each/results, community_together/results] + linkMerge: merge_flattened + out: [flattened] + + combine_time_series: + run: pipeline/flatten_array.cwl + in: + nested: + source: [individual_each/time_series] linkMerge: merge_flattened out: [flattened] diff --git a/tango_experiments_job.yml b/tango_experiments_job.yml index f26459d..0ecad9f 100644 --- a/tango_experiments_job.yml +++ b/tango_experiments_job.yml @@ -41,7 +41,7 @@ which_figures: - flux - indiv - goodness_of_fit - # - switch_pathways + - switch_pathways - transcripts initial_res_optim: diff --git a/tango_models.cwl b/tango_models.cwl index a282e9a..46100c4 100644 --- a/tango_models.cwl +++ b/tango_models.cwl @@ -247,6 +247,18 @@ outputs: standard_error: type: stderr format: edam:format_1964 + time_series: + type: File[] + outputBinding: + glob: + - "results/*.pkz" + outputEval: | + ${ + return self.filter(function(i) { return -1 != i.basename.indexOf("_t_") }) + } + doc: | + Pickled Pandas data frames containing time series of simulation results + format: edam:format_2333 stdout: stdout.txt stderr: stderr.txt diff --git a/tango_plots.cwl b/tango_plots.cwl index d0ed96b..c3f2911 100644 --- a/tango_plots.cwl +++ b/tango_plots.cwl @@ -9,7 +9,7 @@ requirements: SubworkflowFeatureRequirement: {} inputs: - results: Directory + results: File[] which_figures: string[] models: Directory @@ -18,12 +18,21 @@ outputs: type: File[] outputSource: flatten/flattened -steps: +steps: + prepare: + in: + file_list: + source: results + name: + valueFrom: "results" + out: [out_dir] + run: pipeline/mkdir_files.cwl + plot_figures: scatter: [figure] in: results: - source: results + source: prepare/out_dir figure: source: which_figures models: diff --git a/tango_plots_job.yml b/tango_plots_job.yml index be08f30..bcbb122 100644 --- a/tango_plots_job.yml +++ b/tango_plots_job.yml @@ -1,6 +1,17 @@ results: - class: Directory - location: ./results + - class: File + path: "results/result_community_model_whole.pkz" + - class: File + path: "results/result_dfba_com_without_optim.csv" + - class: File + path: "results/result_pure_culture_model_freudenreichii_growth.pkz" + - class: File + path: "results/result_pure_culture_model_freudenreichii_metabolites.pkz" + - class: File + path: "results/result_pure_culture_model_lactis.pkz" + - class: File + path: "results/result_pure_culture_model_plantarum.pkz" + models: class: Directory location: ./metabolic_models @@ -9,5 +20,5 @@ which_figures: - flux - indiv - goodness_of_fit - # - switch_pathways + - switch_pathways - transcripts diff --git a/tango_workflow.cwl b/tango_workflow.cwl index 55584d8..d82f655 100644 --- a/tango_workflow.cwl +++ b/tango_workflow.cwl @@ -56,7 +56,7 @@ steps: source: pure_culture community: source: community - out: [results] + out: [results, time_series] run: tango_dynamics_wf.cwl mkdir_results: @@ -71,7 +71,8 @@ steps: plot_figures: in: results: - source: mkdir_results/out_dir + source: [dynamics/results, dynamics/time_series] + linkMerge: merge_flattened which_figures: source: which_figures models: -- GitLab From 60e47a6185944c49d5148cbc5057833de578d97d Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Mon, 22 Apr 2024 14:36:48 +0200 Subject: [PATCH 11/14] ExpressionTool mkdir_files: skip files with duplicate basename Correct location models_dir --- pipeline/mkdir_files.cwl | 12 +++++++++++- tango_experiments_job.yml | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pipeline/mkdir_files.cwl b/pipeline/mkdir_files.cwl index 2c8738a..d0e1bf8 100644 --- a/pipeline/mkdir_files.cwl +++ b/pipeline/mkdir_files.cwl @@ -2,6 +2,11 @@ cwlVersion: v1.2 class: ExpressionTool +doc: | + Combine an array of Files into a single named directory. If + files with duplicate basenames are provided, only the first + one is included, whether or not their contents are the same. + requirements: InlineJavascriptRequirement: {} MultipleInputFeatureRequirement: {} @@ -19,8 +24,13 @@ outputs: expression: | ${ var flat = [] + var seen = {} for (var i=0; i<inputs.file_list.length; i++) { - flat = flat.concat(inputs.file_list[i]) + var key = inputs.file_list[i].basename + if (!seen.hasOwnProperty(key)) { + flat = flat.concat(inputs.file_list[i]) + } + seen[key] = true } return { "out_dir": { diff --git a/tango_experiments_job.yml b/tango_experiments_job.yml index 0ecad9f..0736985 100644 --- a/tango_experiments_job.yml +++ b/tango_experiments_job.yml @@ -50,4 +50,4 @@ initial_res_optim: models_dir: class: Directory - location: ./models + location: ./metabolic_models -- GitLab From 749650b18e30d4d3b8731521d404c7d37f4c4fc1 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Mon, 22 Apr 2024 15:09:08 +0200 Subject: [PATCH 12/14] Cleanup: move CWL tools to pipeline/ Default location for data/ --- figures_monoculture.cwl | 118 --------------- figures_monoculture_inline.cwl | 138 ------------------ figures_monoculture_job.yml | 18 --- indiv_freud_job.yml | 18 --- .../tango_dynamics_wf.cwl | 4 +- tango_models.cwl => pipeline/tango_models.cwl | 8 +- .../tango_one_plot.cwl | 4 +- .../tango_optimization_wf.cwl | 0 tango_plots.cwl => pipeline/tango_plots.cwl | 4 +- tango_experiments+types_job.yml | 46 ------ tango_plots_job.yml | 24 --- tango_workflow.cwl | 6 +- 12 files changed, 13 insertions(+), 375 deletions(-) delete mode 100644 figures_monoculture.cwl delete mode 100644 figures_monoculture_inline.cwl delete mode 100644 figures_monoculture_job.yml delete mode 100644 indiv_freud_job.yml rename tango_dynamics_wf.cwl => pipeline/tango_dynamics_wf.cwl (96%) rename tango_models.cwl => pipeline/tango_models.cwl (97%) rename tango_one_plot.cwl => pipeline/tango_one_plot.cwl (98%) rename tango_optimization_wf.cwl => pipeline/tango_optimization_wf.cwl (100%) rename tango_plots.cwl => pipeline/tango_plots.cwl (92%) delete mode 100644 tango_experiments+types_job.yml delete mode 100644 tango_plots_job.yml diff --git a/figures_monoculture.cwl b/figures_monoculture.cwl deleted file mode 100644 index 8cb6018..0000000 --- a/figures_monoculture.cwl +++ /dev/null @@ -1,118 +0,0 @@ -cwlVersion: v1.2 -class: Workflow - -requirements: - MultipleInputFeatureRequirement: {} - StepInputExpressionRequirement: {} - ScatterFeatureRequirement: {} - InlineJavascriptRequirement: {} - -inputs: - model_list: File[] - freud_sim_list: string[] - -outputs: - scattered_results: - type: Directory - outputSource: flatten/flattened - plots: - type: File[] - outputSource: - - plot_indiv/plots - # standard_output: - # type: File - # outputSource: plot_indiv/standard_output - # standard_error: - # type: File - # outputSource: plot_indiv/standard_error - -steps: - run_indiv: - doc: |- - For each model file in the input model_list, run an individual - simulation. - run: tango_models.cwl - scatter: [model, freud_sim] - scatterMethod: dotproduct - in: - model: - source: model_list - valueFrom: $([self]) # wrap in singleton array - freud_sim: - source: freud_sim_list - cobra_solver: - valueFrom: "glpk" - community_scale: - valueFrom: "False" - culture: - default: - class: File - location: "pipeline/config_file/config_culture.yml" - format: http://edamontology.org/format_3750 - dynamics: - default: - class: File - location: "pipeline/config_file/config_dynamic.yml" - format: http://edamontology.org/format_3750 - solver: - default: - class: File - location: "pipeline/config_file/config_optim.yml" - format: http://edamontology.org/format_3750 - out: [results] - - plot_indiv: - doc: |- - Use the combined simulation results to generate figure plots. - run: tango_plots.cwl - in: - figure: - valueFrom: "indiv" - results: - source: flatten/flattened - out: [plots, standard_output, standard_error] - - flatten: - doc: |- - Each individual simulation creates a separate results directory, - but plot_indiv expects a single directory. This step combines - the results into a single directory. Since a result file might - be generated several times by different runs, skip duplicates to - avoid a CWL file staging conflict. - run: - class: ExpressionTool - requirements: - InlineJavascriptRequirement: {} - LoadListingRequirement: { loadListing: shallow_listing } - inputs: - nested: Directory[] - outputs: - flattened: Directory - expression: | - ${ - var flat = []; - for (var i = 0; i < inputs.nested.length; i++) { - flat = - flat.concat( - inputs.nested[i].listing.filter( - // This ES5.1 filtering is very ugly - function(item) { - return ! flat.find( - function(x) { - return x.basename === - item.basename; - }); - })); - } - return { - "flattened": { - "class": "Directory", - "basename": "results", - "listing": flat - } - }; - } - in: - nested: - source: run_indiv/results - out: [flattened] diff --git a/figures_monoculture_inline.cwl b/figures_monoculture_inline.cwl deleted file mode 100644 index 38fd4ba..0000000 --- a/figures_monoculture_inline.cwl +++ /dev/null @@ -1,138 +0,0 @@ -cwlVersion: v1.2 -class: Workflow - -requirements: - MultipleInputFeatureRequirement: {} - StepInputExpressionRequirement: {} - ScatterFeatureRequirement: {} - InlineJavascriptRequirement: {} - -inputs: - model_list: - type: File[] - default: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/lactis.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/plantarum.sbml" - format: http://edamontology.org/format_2585 - freud_sim_list: - type: string[] - default: - - "metabolites" - - "growth" - - "growth" - - "growth" - -outputs: - scattered_results: - type: Directory - outputSource: flatten/flattened - plots: - type: File[] - outputSource: - - plot_indiv/plots - # standard_output: - # type: File - # outputSource: plot_indiv/standard_output - # standard_error: - # type: File - # outputSource: plot_indiv/standard_error - -steps: - run_indiv: - doc: |- - For each model file in the input model_list, run an individual - simulation. - run: tango_models.cwl - scatter: [model, freud_sim] - scatterMethod: dotproduct - in: - model: - source: model_list - valueFrom: $([self]) # wrap in singleton array - freud_sim: - source: freud_sim_list - cobra_solver: - valueFrom: "glpk" - community_scale: - valueFrom: "False" - culture: - default: - class: File - location: "pipeline/config_file/config_culture.yml" - format: http://edamontology.org/format_3750 - dynamics: - default: - class: File - location: "pipeline/config_file/config_dynamic.yml" - format: http://edamontology.org/format_3750 - solver: - default: - class: File - location: "pipeline/config_file/config_optim.yml" - format: http://edamontology.org/format_3750 - out: [results] - - plot_indiv: - doc: |- - Use the combined simulation results to generate figure plots. - run: tango_plots.cwl - in: - figure: - valueFrom: "indiv" - results: - source: flatten/flattened - out: [plots, standard_output, standard_error] - - flatten: - doc: |- - Each individual simulation creates a separate results directory, - but plot_indiv expects a single directory. This step combines - the results into a single directory. Since a result file might - be generated several times by different runs, skip duplicates to - avoid a CWL file staging conflict. - run: - class: ExpressionTool - requirements: - InlineJavascriptRequirement: {} - LoadListingRequirement: { loadListing: shallow_listing } - inputs: - nested: Directory[] - outputs: - flattened: Directory - expression: | - ${ - var flat = []; - for (var i = 0; i < inputs.nested.length; i++) { - flat = - flat.concat( - inputs.nested[i].listing.filter( - // This ES5.1 filtering is very ugly - function(item) { - return ! flat.find( - function(x) { - return x.basename === - item.basename; - }); - })); - } - return { - "flattened": { - "class": "Directory", - "basename": "results", - "listing": flat - } - }; - } - in: - nested: - source: run_indiv/results - out: [flattened] diff --git a/figures_monoculture_job.yml b/figures_monoculture_job.yml deleted file mode 100644 index 44c2101..0000000 --- a/figures_monoculture_job.yml +++ /dev/null @@ -1,18 +0,0 @@ -model_list: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/lactis.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/plantarum.sbml" - format: http://edamontology.org/format_2585 -freud_sim_list: - - "growth" - - "metabolites" - - "growth" - - "growth" diff --git a/indiv_freud_job.yml b/indiv_freud_job.yml deleted file mode 100644 index 0dd70b6..0000000 --- a/indiv_freud_job.yml +++ /dev/null @@ -1,18 +0,0 @@ -cobra_solver: "glpk" -community_scale: "False" -culture: - class: File - path: "pipeline/config_file/config_culture.yml" - format: http://edamontology.org/format_3750 -dynamics: - class: File - path: "pipeline/config_file/config_dynamic.yml" - format: http://edamontology.org/format_3750 -model: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 -solver: - class: File - path: "pipeline/config_file/config_optim.yml" - format: http://edamontology.org/format_3750 diff --git a/tango_dynamics_wf.cwl b/pipeline/tango_dynamics_wf.cwl similarity index 96% rename from tango_dynamics_wf.cwl rename to pipeline/tango_dynamics_wf.cwl index d829bef..0b7a050 100644 --- a/tango_dynamics_wf.cwl +++ b/pipeline/tango_dynamics_wf.cwl @@ -85,7 +85,7 @@ steps: out: [results] combine: - run: pipeline/flatten_array.cwl + run: flatten_array.cwl in: nested: source: [individual_each/results, community_together/results] @@ -93,7 +93,7 @@ steps: out: [flattened] combine_time_series: - run: pipeline/flatten_array.cwl + run: flatten_array.cwl in: nested: source: [individual_each/time_series] diff --git a/tango_models.cwl b/pipeline/tango_models.cwl similarity index 97% rename from tango_models.cwl rename to pipeline/tango_models.cwl index 46100c4..20be795 100644 --- a/tango_models.cwl +++ b/pipeline/tango_models.cwl @@ -74,7 +74,7 @@ inputs: models default: class: File - location: "pipeline/config_file/config_culture.yml" + location: "config_file/config_culture.yml" format: http://edamontology.org/format_3750 inputBinding: position: 2 @@ -87,7 +87,7 @@ inputs: configuration default: class: Directory - location: ./data + location: ../data dynamics: type: File @@ -96,7 +96,7 @@ inputs: specific paramters for your dFBA analysis default: class: File - location: "pipeline/config_file/config_dynamic.yml" + location: "config_file/config_dynamic.yml" format: http://edamontology.org/format_3750 inputBinding: position: 3 @@ -189,7 +189,7 @@ inputs: configuration file for optimization default: class: File - location: "pipeline/config_file/config_optim.yml" + location: "config_file/config_optim.yml" format: http://edamontology.org/format_3750 inputBinding: position: 4 diff --git a/tango_one_plot.cwl b/pipeline/tango_one_plot.cwl similarity index 98% rename from tango_one_plot.cwl rename to pipeline/tango_one_plot.cwl index 2de3440..135d9aa 100644 --- a/tango_one_plot.cwl +++ b/pipeline/tango_one_plot.cwl @@ -30,7 +30,7 @@ inputs: configuration default: class: Directory - location: ./data + location: ../data figure: doc: |- @@ -57,7 +57,7 @@ inputs: Scripts for predefined figures default: class: Directory - location: ./scripts + location: ../scripts baseCommand: - python diff --git a/tango_optimization_wf.cwl b/pipeline/tango_optimization_wf.cwl similarity index 100% rename from tango_optimization_wf.cwl rename to pipeline/tango_optimization_wf.cwl diff --git a/tango_plots.cwl b/pipeline/tango_plots.cwl similarity index 92% rename from tango_plots.cwl rename to pipeline/tango_plots.cwl index c3f2911..2bdd2d4 100644 --- a/tango_plots.cwl +++ b/pipeline/tango_plots.cwl @@ -26,7 +26,7 @@ steps: name: valueFrom: "results" out: [out_dir] - run: pipeline/mkdir_files.cwl + run: mkdir_files.cwl plot_figures: scatter: [figure] @@ -45,4 +45,4 @@ steps: nested: source: plot_figures/plots out: [flattened] - run: pipeline/flatten_array.cwl + run: flatten_array.cwl diff --git a/tango_experiments+types_job.yml b/tango_experiments+types_job.yml deleted file mode 100644 index bb64016..0000000 --- a/tango_experiments+types_job.yml +++ /dev/null @@ -1,46 +0,0 @@ -pure_culture: - - models: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - freud_sim: "metabolites" - - models: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - freud_sim: "growth" - - models: - - class: File - path: "metabolic_models/lactis.sbml" - format: http://edamontology.org/format_2585 - freud_sim: "growth" - - models: - - class: File - path: "metabolic_models/plantarum.sbml" - format: http://edamontology.org/format_2585 - freud_sim: "growth" - -community: - - models: - - class: File - path: "metabolic_models/freudenreichii.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/lactis.sbml" - format: http://edamontology.org/format_2585 - - class: File - path: "metabolic_models/plantarum.sbml" - format: http://edamontology.org/format_2585 - -dynamics: - class: File - location: "pipeline/config_file/config_dynamic.yml" - format: http://edamontology.org/format_3750 -culture: - class: File - location: "pipeline/config_file/config_culture.yml" - format: http://edamontology.org/format_3750 -solver: - class: File - location: "pipeline/config_file/config_optim.yml" - format: http://edamontology.org/format_3750 diff --git a/tango_plots_job.yml b/tango_plots_job.yml deleted file mode 100644 index bcbb122..0000000 --- a/tango_plots_job.yml +++ /dev/null @@ -1,24 +0,0 @@ -results: - - class: File - path: "results/result_community_model_whole.pkz" - - class: File - path: "results/result_dfba_com_without_optim.csv" - - class: File - path: "results/result_pure_culture_model_freudenreichii_growth.pkz" - - class: File - path: "results/result_pure_culture_model_freudenreichii_metabolites.pkz" - - class: File - path: "results/result_pure_culture_model_lactis.pkz" - - class: File - path: "results/result_pure_culture_model_plantarum.pkz" - -models: - class: Directory - location: ./metabolic_models -which_figures: - - com - - flux - - indiv - - goodness_of_fit - - switch_pathways - - transcripts diff --git a/tango_workflow.cwl b/tango_workflow.cwl index d82f655..5c6d864 100644 --- a/tango_workflow.cwl +++ b/tango_workflow.cwl @@ -46,7 +46,7 @@ steps: pure_culture: source: pure_culture out: [result_optimization] - run: tango_optimization_wf.cwl + run: pipeline/tango_optimization_wf.cwl dynamics: in: @@ -57,7 +57,7 @@ steps: community: source: community out: [results, time_series] - run: tango_dynamics_wf.cwl + run: pipeline/tango_dynamics_wf.cwl mkdir_results: in: @@ -78,7 +78,7 @@ steps: models: source: models_dir out: [figures] - run: tango_plots.cwl + run: pipeline/tango_plots.cwl mkdir_figures: in: -- GitLab From 04e9ab8511370946e2f183bf094422b857203b54 Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Tue, 23 Apr 2024 16:11:08 +0200 Subject: [PATCH 13/14] Also record time series from community simulation --- pipeline/tango_dynamics_wf.cwl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/tango_dynamics_wf.cwl b/pipeline/tango_dynamics_wf.cwl index 0b7a050..ff10800 100644 --- a/pipeline/tango_dynamics_wf.cwl +++ b/pipeline/tango_dynamics_wf.cwl @@ -82,7 +82,7 @@ steps: solver: source: community valueFrom: $(self.solver || null) - out: [results] + out: [results, time_series] combine: run: flatten_array.cwl @@ -96,6 +96,6 @@ steps: run: flatten_array.cwl in: nested: - source: [individual_each/time_series] + source: [individual_each/time_series, community_together/time_series] linkMerge: merge_flattened out: [flattened] -- GitLab From 1956c35ed5c2247f8d086299d79ea345ed4eb57d Mon Sep 17 00:00:00 2001 From: David James Sherman <david.sherman@inria.fr> Date: Wed, 24 Apr 2024 08:50:40 +0200 Subject: [PATCH 14/14] Cleanup and improve documentation --- mock/data | 1 - mock/pipeline | 1 - mock/tango_models.cwl | 280 ----------------------------- mock/tango_models_mock.py | 84 --------- pipeline/combine_files.cwl | 22 --- pipeline/combine_res_optim.cwl | 21 --- pipeline/flatten_array.cwl | 9 +- pipeline/tango_dynamics_wf.cwl | 4 +- pipeline/tango_models.cwl | 9 +- pipeline/tango_one_plot.cwl | 2 +- pipeline/tango_optimization_wf.cwl | 3 +- pipeline/tango_plots.cwl | 4 + tango_workflow.cwl | 44 +++++ 13 files changed, 68 insertions(+), 416 deletions(-) delete mode 120000 mock/data delete mode 120000 mock/pipeline delete mode 100644 mock/tango_models.cwl delete mode 100755 mock/tango_models_mock.py delete mode 100644 pipeline/combine_files.cwl delete mode 100644 pipeline/combine_res_optim.cwl diff --git a/mock/data b/mock/data deleted file mode 120000 index eed2d0b..0000000 --- a/mock/data +++ /dev/null @@ -1 +0,0 @@ -../data/ \ No newline at end of file diff --git a/mock/pipeline b/mock/pipeline deleted file mode 120000 index 32aee15..0000000 --- a/mock/pipeline +++ /dev/null @@ -1 +0,0 @@ -../pipeline \ No newline at end of file diff --git a/mock/tango_models.cwl b/mock/tango_models.cwl deleted file mode 100644 index b40b82c..0000000 --- a/mock/tango_models.cwl +++ /dev/null @@ -1,280 +0,0 @@ -cwlVersion: v1.2 -class: CommandLineTool - -label: Numerical reconciliation of bacterial fermentation in cheese production -doc: |- - TANGO uses a numerical strategy to reconcile multi-omics data and - metabolic networks for characterising bacterial fermentation in - cheese production composed of 3 species: - *P. freudenreichii*, *L. lactis* and *L. plantarum* - -requirements: - ShellCommandRequirement: {} - InlineJavascriptRequirement: {} - InitialWorkDirRequirement: - listing: - - $(inputs.data) - - $(inputs.initial_res_optim) - - basename: "results" - class: Directory - listing: [] - - basename: "pipeline" - class: Directory - listing: [] - NetworkAccess: - class: NetworkAccess - networkAccess: true - -hints: - DockerRequirement: - dockerImageId: tango_models - dockerFile: |- - FROM continuumio/miniconda - WORKDIR /usr/src/app - COPY ./ ./ - RUN conda env create -f env-minimal.yml - -inputs: - cobra_solver: - doc: |- - solver used for FBA computation by Cobra - {glpk,glpk_exact,cplex,scipy} - type: - # - "null" - - type: enum - symbols: - - "glpk" - - "glpk_exact" - - "cplex" - - "scipy" - default: "glpk" - inputBinding: - position: 5 - prefix: "-CobraSolver" - - community_scale: - doc: |- - dFBA at the community scale {True,False} - type: - # - "null" - - type: enum - symbols: - - "True" - - "False" - default: "False" - inputBinding: - position: 5 - prefix: "-com" - - culture: - type: File - format: edam:format_3750 - doc: |- - specific media and/or specif modification applied to the FBA - models - default: - class: File - location: "pipeline/config_file/config_culture.yml" - format: http://edamontology.org/format_3750 - inputBinding: - position: 2 - prefix: "-cp" - - data: - type: Directory - doc: |- - Directory containing experimental data, referenced in optimize - configuration - default: - class: Directory - location: ./data - - dynamics: - type: File - format: edam:format_3750 - doc: |- - specific paramters for your dFBA analysis - default: - class: File - location: "pipeline/config_file/config_dynamic.yml" - format: http://edamontology.org/format_3750 - inputBinding: - position: 3 - prefix: "-dp" - - freud_sim: - doc: |- - different initial conditions for lactate where defined in the - experiments for growth and metabolite dosage => this parameter - allows to switch between both situations {growth,metabolites} - type: - # - "null" - - type: enum - symbols: - - "growth" - - "metabolites" - default: "growth" - inputBinding: - position: 5 - prefix: "-fsim" - - lactic_acid_model: - doc: |- - lactic acid model use total if lactate represents total lactic - acid concentration, or dissociated if lactate represents the - dissociated lactic acid {total,dissociated} - type: - # - "null" - - type: enum - symbols: - - "total" - - "dissociated" - default: "total" - inputBinding: - position: 5 - prefix: "-lam" - - model: - type: - type: array - items: File - format: edam:format_2585 - doc: |- - SBML model - inputBinding: - position: 1 - prefix: "-mp" - - optimize: - doc: |- - activatee or not the optimization on parameters {True,False} - type: - # - "null" - - type: enum - symbols: - - "True" - - "False" - default: "False" - inputBinding: - position: 5 - prefix: "-optim" - - recovery: - doc: |- - activate or not recovery of the optimization on parameters - {True,False} - type: - # - "null" - - type: enum - symbols: - - "True" - - "False" - default: "False" - inputBinding: - position: 6 - prefix: "-r" - - initial_res_optim: - type: File? - doc: |- - Per-species optimization results - # default: - # class: File - # location: ./res_optim.txt - - solver: - type: File - format: edam:format_3750 - doc: |- - configuration file for optimization - default: - class: File - location: "pipeline/config_file/config_optim.yml" - format: http://edamontology.org/format_3750 - inputBinding: - position: 4 - prefix: "-sp" - - verbose: - doc: |- - active or not verbose reporting {True,False} - type: - # - "null" - - type: enum - symbols: - - "True" - - "False" - default: "False" - inputBinding: - position: 6 - prefix: "-v" - -baseCommand: - - ./tango_models_mock - - sim - -outputs: - results: - type: File[] - outputBinding: - glob: "results/*.pkz" - outputEval: | - ${ - return self.filter(function(i) { return -1 == i.basename.indexOf("_t_") }) - } - doc: | - Pickled Pandas data frames containing simulation results - format: edam:format_2333 - recovery_optimisation: - type: File? - outputBinding: - glob: "recover_optim*yml" - doc: |- - Pickle file of recovery optimization - format: edam:format_2333 - result_optimization: - type: File - outputBinding: - glob: "res_optim.txt" - doc: |- - optimization results - format: edam:format_3475 - standard_output: - type: stdout - format: edam:format_1964 - standard_error: - type: stderr - format: edam:format_1964 - -stdout: stdout.txt -stderr: stderr.txt - -s:author: - - class: s:Person - s:name: Simon Labarthe - s:identifier: https://orcid.org/0000-0003-2114-0697 - - class: s:Person - s:name: Clémence Frioux - s:identifier: https://orcid.org/0000-0003-2114-0697 - - class: s:Person - s:name: David James Sherman - s:identifier: https://orcid.org/0000-0002-2316-1005 - - class: s:Person - s:name: Maxime Lecomte - s:identifier: https://orcid.org/0000-0002-4558-6151 - - class: s:Person - s:name: Hélène Falentin - s:identifier: https://orcid.org/0000-0001-6254-5303 - - class: s:Person - s:name: Julie Aubert - s:identifier: https://orcid.org/0000-0001-5203-5748 - -s:dateCreated: "2024-03-02" -s:license: https://spdx.org/licenses/LGPL-3.0-or-later - -$namespaces: - s: https://schema.org/ - edam: http://edamontology.org/ -$schemas: - - https://schema.org/version/latest/schemaorg-current-http.rdf - - http://edamontology.org/EDAM_1.23.owl diff --git a/mock/tango_models_mock.py b/mock/tango_models_mock.py deleted file mode 100755 index 282f4d1..0000000 --- a/mock/tango_models_mock.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 - -import click -import sys -from pathlib import Path - - -@click.command() -@click.option( - "--cobra_solver", help="solver used for FBA computation by Cobra", default="glpk" -) -@click.option("--community_scale", help="dFBA at the community scale", default="False") -@click.option( - "--freud_sim", help="different initial conditions for lactate", default="growth" -) -@click.option("--lactic_acid_model", help="lactic acid model", default="total") -@click.option("--optimize", help="type: array", default="False") -@click.option( - "--recovery", help="activate or not recovery of the optimization", default="False" -) -@click.option( - "--verbose", help="active or not verbose reporting {True,False}", default="False" -) -@click.argument( - "culture", - # help="specific media and/or specif modification", - type=click.File("rb"), - default="pipeline/config_file/config_culture.yml", -) -@click.argument( - "dynamics", - # help="specific paramters for your dFBA analysis", - type=click.File("rb"), - default="pipeline/config_file/config_dynamic.yml", -) -@click.argument( - "solver", - # help="configuration file for optimization", - type=click.File("rb"), - default="pipeline/config_file/config_optim.yml", -) -@click.argument( - "data", - # help="experimental data", - type=click.Path("rb"), - default="./data", -) -@click.argument( - "initial_res_optim", - # help="Per-species optimization results", - type=click.Path(exists=False), - default="./res_optim.txt", -) -@click.argument( - "model", - # help="SBML model", - nargs=-1, -) -def tango_models( - cobra_solver, - community_scale, - freud_sim, - lactic_acid_model, - optimize, - recovery, - verbose, - culture, - dynamics, - solver, - data, - initial_res_optim, - model, -): - """Mock TANGO models.""" - click.echo(f"TANGO") - - results = Path("results/") - results.mkdir(parents=True, exist_ok=True) - - click.echo(f'Models { ", ".join(model) }') - - -if __name__ == "__main__": - tango_models() diff --git a/pipeline/combine_files.cwl b/pipeline/combine_files.cwl deleted file mode 100644 index b1dc9aa..0000000 --- a/pipeline/combine_files.cwl +++ /dev/null @@ -1,22 +0,0 @@ -cwlVersion: v1.2 -class: CommandLineTool -requirements: - InlineJavascriptRequirement: {} - InitialWorkDirRequirement: - listing: - - entryname: $(inputs.name) - entry: ${ return inputs.file_list.map(function(f) {return f.contents}).join("") } -inputs: - name: string - file_list: - type: - type: array - items: File - inputBinding: - loadContents: true -outputs: - concat: - type: File - outputBinding: - glob: $(inputs.name) -baseCommand: ["true"] diff --git a/pipeline/combine_res_optim.cwl b/pipeline/combine_res_optim.cwl deleted file mode 100644 index d0f479b..0000000 --- a/pipeline/combine_res_optim.cwl +++ /dev/null @@ -1,21 +0,0 @@ -cwlVersion: v1.2 -class: CommandLineTool -requirements: - InlineJavascriptRequirement: {} - InitialWorkDirRequirement: - listing: - - entryname: res_optim.txt - entry: ${ return inputs.file_list.map(function(f) {return f.contents}).join("") } -inputs: - file_list: - type: - type: array - items: File - inputBinding: - loadContents: true -outputs: - concat: - type: File - outputBinding: - glob: "res_optim.txt" -baseCommand: ["true"] diff --git a/pipeline/flatten_array.cwl b/pipeline/flatten_array.cwl index 9b701a9..b70bf55 100644 --- a/pipeline/flatten_array.cwl +++ b/pipeline/flatten_array.cwl @@ -1,8 +1,13 @@ +--- cwlVersion: v1.2 class: ExpressionTool -doc: "Flatten a nested array of 'Any' type into an array." + +doc: | + Flatten a nested array of 'Any' type into an array. + requirements: InlineJavascriptRequirement: {} + inputs: nested: type: @@ -10,11 +15,13 @@ inputs: items: type: array items: ["null", Any] + outputs: flattened: type: type: array items: Any + expression: | ${ var flattened = []; diff --git a/pipeline/tango_dynamics_wf.cwl b/pipeline/tango_dynamics_wf.cwl index ff10800..c1694f4 100644 --- a/pipeline/tango_dynamics_wf.cwl +++ b/pipeline/tango_dynamics_wf.cwl @@ -36,7 +36,9 @@ outputs: doc: |- For each model file in the input model_list, run an individual - simulation. + simulation. For each collection of models into a community, run + a community simulation. Return both summary and time series + simulation results. steps: diff --git a/pipeline/tango_models.cwl b/pipeline/tango_models.cwl index 20be795..1fdaf43 100644 --- a/pipeline/tango_models.cwl +++ b/pipeline/tango_models.cwl @@ -6,7 +6,7 @@ doc: |- TANGO uses a numerical strategy to reconcile multi-omics data and metabolic networks for characterising bacterial fermentation in cheese production composed of 3 species: - *P. freudenreichii*, *L. lactis* and *L. plantarum* + *P. freudenreichii*, *L. lactis* and *L. plantarum*. requirements: ShellCommandRequirement: {} @@ -282,9 +282,12 @@ s:author: - class: s:Person s:name: Julie Aubert s:identifier: https://orcid.org/0000-0001-5203-5748 - -s:dateCreated: "2024-03-02" + +s:citation: https://doi.org/10.1016/j.ymben.2024.02.014 +s:codeRepository: https://forgemia.inra.fr/tango/tango_models.git s:license: https://spdx.org/licenses/LGPL-3.0-or-later +s:programmingLanguage: Python +s:dateCreated: "2024-03-02" $namespaces: s: https://schema.org/ diff --git a/pipeline/tango_one_plot.cwl b/pipeline/tango_one_plot.cwl index 135d9aa..7a77c21 100644 --- a/pipeline/tango_one_plot.cwl +++ b/pipeline/tango_one_plot.cwl @@ -3,7 +3,7 @@ class: CommandLineTool label: Plot TANGO results doc: |- - Use plot_* scripts to make figures from TANGO results + Use plot_* scripts to make figures from TANGO results. requirements: ShellCommandRequirement: {} diff --git a/pipeline/tango_optimization_wf.cwl b/pipeline/tango_optimization_wf.cwl index 3af2e68..d2e2856 100644 --- a/pipeline/tango_optimization_wf.cwl +++ b/pipeline/tango_optimization_wf.cwl @@ -25,7 +25,8 @@ outputs: doc: |- For each model file in the input model_list, run an individual - optimization. Return combined result_optimization. + optimization to obtain lambda, k_lactate, vmax_lactose parameters + as each model requires. Return a combined result_optimization. steps: diff --git a/pipeline/tango_plots.cwl b/pipeline/tango_plots.cwl index 2bdd2d4..3208be1 100644 --- a/pipeline/tango_plots.cwl +++ b/pipeline/tango_plots.cwl @@ -1,6 +1,10 @@ cwlVersion: v1.2 class: Workflow +label: Plot TANGO results +doc: |- + Assemble all figures requested by which_figures. + requirements: MultipleInputFeatureRequirement: {} StepInputExpressionRequirement: {} diff --git a/tango_workflow.cwl b/tango_workflow.cwl index 5c6d864..fc5527b 100644 --- a/tango_workflow.cwl +++ b/tango_workflow.cwl @@ -1,6 +1,17 @@ cwlVersion: v1.2 class: Workflow +label: Numerical reconciliation of bacterial fermentation in cheese production +doc: |- + Complete workflow for TANGO as reported in Lecomte et al (2024), + "Revealing the dynamics and mechanisms of bacterial interactions in + cheese production with metabolic modelling", Metabolic Eng. 83:24-38 + https://doi.org/10.1016/j.ymben.2024.02.014 + + 1. Parameters for individual models are obtained by optimization + 2. Individual dynamics and community dynamics are simulated + 3. Figures for the manuscript are assembled from the results. + requirements: MultipleInputFeatureRequirement: {} StepInputExpressionRequirement: {} @@ -88,3 +99,36 @@ steps: valueFrom: "figures" out: [out_dir] run: pipeline/mkdir_files.cwl + +s:author: + - class: s:Person + s:name: Simon Labarthe + s:identifier: https://orcid.org/0000-0003-2114-0697 + - class: s:Person + s:name: Clémence Frioux + s:identifier: https://orcid.org/0000-0003-2114-0697 + - class: s:Person + s:name: David James Sherman + s:identifier: https://orcid.org/0000-0002-2316-1005 + - class: s:Person + s:name: Maxime Lecomte + s:identifier: https://orcid.org/0000-0002-4558-6151 + - class: s:Person + s:name: Hélène Falentin + s:identifier: https://orcid.org/0000-0001-6254-5303 + - class: s:Person + s:name: Julie Aubert + s:identifier: https://orcid.org/0000-0001-5203-5748 + +s:citation: https://doi.org/10.1016/j.ymben.2024.02.014 +s:codeRepository: https://forgemia.inra.fr/tango/tango_models.git +s:license: https://spdx.org/licenses/LGPL-3.0-or-later +s:programmingLanguage: Python +s:dateCreated: "2024-03-02" + +$namespaces: + s: https://schema.org/ + edam: http://edamontology.org/ +$schemas: + - https://schema.org/version/latest/schemaorg-current-http.rdf + - http://edamontology.org/EDAM_1.23.owl -- GitLab