Mercurial > repos > siwaa > carboseq_s
changeset 5:3e7978af2ba2 draft
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
author | siwaa |
---|---|
date | Wed, 28 Aug 2024 14:53:30 +0000 |
parents | a069fc43b0b1 |
children | f9183e245cdb |
files | carboseqSplit.xml carboseqUnsplit.R carboseqUnsplit.xml csopraSplitInput.py |
diffstat | 4 files changed, 127 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/carboseqSplit.xml Wed Aug 28 14:53:30 2024 +0000 @@ -0,0 +1,31 @@ +<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.1"> + <description>To split input data of CarboSeq</description> + <requirements> + <container type="singularity">docker://registry.forgemia.inra.fr/carboseq/record-projet-carboseq:latest</container> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/csopraSplitInput.py ${inputFile} ${chunkSize} + ]]> + </command> + <inputs> + <param name="inputFile" type="data" format="zip" label="CarboSeq input (zip)"/> + <param name="chunkSize" type="integer" value="1" min= "1" max= "20000" format="csv" label="chunck size" help="chunck size" /> + </inputs> + <outputs> + <collection name="splited" type="list" label="A list of CarboSeq input (zip)"> + <discover_datasets pattern="__name_and_ext__" directory="outputs"/> + </collection> + </outputs> +<tests> +</tests> +<help> +the CarboSeq CarboseqSimulator tool +=================================== + +credits +------- +- Wrapper Maintainer: Patrick Chabrier patrick.chabrier@inrae.fr +- Tool Site: https://forgemia.inra.fr/carboseq/record-projet-carboseq +- Copyright: INRAE +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/carboseqUnsplit.R Wed Aug 28 14:53:30 2024 +0000 @@ -0,0 +1,13 @@ + +resU <- list(SOC=data.frame(), forcings=data.frame()) + +for (i in list.files('./inputs')) { + load(file.path('.', 'inputs',i)) + resU$SOC <- rbind(resU$SOC, res$SOC); + resU$forcings <- rbind(resU$forcings, res$forcings) + rm(res) + } + +res <-resU + +save(res, file = 'csopraRes.Rdata') \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/carboseqUnsplit.xml Wed Aug 28 14:53:30 2024 +0000 @@ -0,0 +1,33 @@ +<tool id="CarboSeqUnsplit" name="CarboSeqUnsplit" version="1.0.1"> + <description>To unsplit CarboSeq results</description> + <requirements> + <container type="singularity">docker://registry.forgemia.inra.fr/csopra/csopralibs:siwa</container> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + mkdir inputs && + #for $input in $inputList + cp $input inputs && + #end for + Rscript $__tool_directory__/carboseqUnsplit.R && + cp csopraRes.Rdata $output + ]]> + </command> + <inputs> + <param name="inputList" type="data_collection" collection_type="list" label="CarboSeq results list (zip of .rdata)"/> + </inputs> + <outputs> + <data format='rdata' name="output" label="CarboSeq output (rdata)"/> + </outputs> +<tests> +</tests> +<help> +the CarboSeq CarboseqSimulator tool +=================================== + +credits +------- +- Wrapper Maintainer: Patrick Chabrier patrick.chabrier@inrae.fr +- Tool Site: https://forgemia.inra.fr/carboseq/record-projet-carboseq +- Copyright: INRAE +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csopraSplitInput.py Wed Aug 28 14:53:30 2024 +0000 @@ -0,0 +1,50 @@ +import pandas as pd +import os +import shutil +import math +import sys + +def split_csv(input_folder, output_folder, max_chunck_size): + unit_path = input_folder + "/units.csv" + soil_path = input_folder + "/soil.csv" + crop_path = input_folder + "/crop.csv" + + os.mkdir(output_folder) + + dfs = pd.read_csv(soil_path) + dfc = pd.read_csv(crop_path) + + max_ID = dfs.loc[dfs['ID'].idxmax()]['ID'] + + step = max_chunck_size + + nb_split = math.ceil(max_ID / max_chunck_size) + + print(max_ID) + + print(nb_split) + + for i in range(1,nb_split + 1): + + dfSplit = pd.DataFrame() + dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))] + + dfcSplit = pd.DataFrame() + dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))] + + os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) + shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) + dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) + dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) + shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}")) + +# Example usage +input_folder = "input" +output_folder = "outputs" + +input_file = sys.argv[1] +max_chunck_size = int(sys.argv[2]) + +shutil.unpack_archive(input_file, input_folder, 'zip') + +split_csv(input_folder, output_folder, max_chunck_size)