changeset 5:3e7978af2ba2 draft

"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
author siwaa
date Wed, 28 Aug 2024 14:53:30 +0000
parents a069fc43b0b1
children f9183e245cdb
files carboseqSplit.xml carboseqUnsplit.R carboseqUnsplit.xml csopraSplitInput.py
diffstat 4 files changed, 127 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/carboseqSplit.xml	Wed Aug 28 14:53:30 2024 +0000
@@ -0,0 +1,31 @@
+<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.1">
+  <description>To split input data of CarboSeq</description>
+  <requirements>
+    <container type="singularity">docker://registry.forgemia.inra.fr/carboseq/record-projet-carboseq:latest</container>
+  </requirements>
+  <command detect_errors="exit_code"><![CDATA[
+  python $__tool_directory__/csopraSplitInput.py ${inputFile} ${chunkSize}
+    ]]>
+  </command>
+  <inputs>
+    <param name="inputFile" type="data" format="zip" label="CarboSeq input (zip)"/>
+    <param name="chunkSize" type="integer" value="1" min= "1" max= "20000" format="csv" label="chunck size" help="chunck size" />    
+  </inputs>
+  <outputs>
+    <collection name="splited" type="list" label="A list of CarboSeq input (zip)">
+      <discover_datasets pattern="__name_and_ext__" directory="outputs"/>
+    </collection>
+  </outputs>
+<tests>
+</tests>
+<help>
+the CarboSeq CarboseqSimulator tool
+===================================
+
+credits
+-------
+-  Wrapper Maintainer: Patrick Chabrier patrick.chabrier@inrae.fr
+-  Tool Site: https://forgemia.inra.fr/carboseq/record-projet-carboseq
+-  Copyright: INRAE
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/carboseqUnsplit.R	Wed Aug 28 14:53:30 2024 +0000
@@ -0,0 +1,13 @@
+
+resU <- list(SOC=data.frame(), forcings=data.frame())
+
+for (i in list.files('./inputs')) {
+    load(file.path('.', 'inputs',i))
+    resU$SOC <- rbind(resU$SOC, res$SOC);
+    resU$forcings <- rbind(resU$forcings, res$forcings)
+    rm(res)
+    }
+
+res <-resU
+
+save(res, file = 'csopraRes.Rdata')
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/carboseqUnsplit.xml	Wed Aug 28 14:53:30 2024 +0000
@@ -0,0 +1,33 @@
+<tool id="CarboSeqUnsplit" name="CarboSeqUnsplit" version="1.0.1">
+  <description>To unsplit CarboSeq results</description>
+  <requirements>
+    <container type="singularity">docker://registry.forgemia.inra.fr/csopra/csopralibs:siwa</container>
+  </requirements>
+  <command detect_errors="exit_code"><![CDATA[
+  mkdir inputs &&
+  #for $input in $inputList
+  cp $input inputs &&
+  #end for
+  Rscript $__tool_directory__/carboseqUnsplit.R &&
+  cp csopraRes.Rdata $output
+    ]]>
+  </command>
+  <inputs>
+    <param name="inputList" type="data_collection" collection_type="list" label="CarboSeq results list (zip of .rdata)"/>
+  </inputs>
+  <outputs>
+    <data format='rdata' name="output" label="CarboSeq output (rdata)"/>
+  </outputs>
+<tests>
+</tests>
+<help>
+the CarboSeq CarboseqSimulator tool
+===================================
+
+credits
+-------
+-  Wrapper Maintainer: Patrick Chabrier patrick.chabrier@inrae.fr
+-  Tool Site: https://forgemia.inra.fr/carboseq/record-projet-carboseq
+-  Copyright: INRAE
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/csopraSplitInput.py	Wed Aug 28 14:53:30 2024 +0000
@@ -0,0 +1,50 @@
+import pandas as pd
+import os
+import shutil
+import math
+import sys
+
+def split_csv(input_folder, output_folder, max_chunck_size):
+    unit_path = input_folder + "/units.csv"
+    soil_path = input_folder + "/soil.csv"
+    crop_path = input_folder + "/crop.csv"
+
+    os.mkdir(output_folder)
+    
+    dfs = pd.read_csv(soil_path)
+    dfc = pd.read_csv(crop_path)
+
+    max_ID = dfs.loc[dfs['ID'].idxmax()]['ID']
+
+    step = max_chunck_size
+    
+    nb_split = math.ceil(max_ID / max_chunck_size)
+
+    print(max_ID)
+
+    print(nb_split)
+
+    for i in range(1,nb_split + 1):
+
+        dfSplit = pd.DataFrame()
+        dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))]
+
+        dfcSplit = pd.DataFrame()
+        dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))]
+        
+        os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}"))
+        shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}"))
+        dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False)
+        dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False)
+        shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip',  os.path.join(output_folder, f"{input_folder}_{i}"))
+
+# Example usage
+input_folder = "input"
+output_folder = "outputs"
+
+input_file = sys.argv[1]
+max_chunck_size = int(sys.argv[2])
+
+shutil.unpack_archive(input_file, input_folder, 'zip')
+
+split_csv(input_folder, output_folder, max_chunck_size)