changeset 10:20886bc40659 draft

"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf/toos/CarboSeqSimulator commit b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf-dirty"
author siwaa
date Tue, 17 Sep 2024 16:01:18 +0000
parents 2af4b1e46d90
children 097985b1b7c6
files carboseqSplit.xml csopraSplitInput.py
diffstat 2 files changed, 16 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/carboseqSplit.xml	Tue Sep 17 10:41:39 2024 +0000
+++ b/carboseqSplit.xml	Tue Sep 17 16:01:18 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.1">
+<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.2">
   <description>To split input data of CarboSeq</description>
   <requirements>
     <container type="singularity">docker://registry.forgemia.inra.fr/carboseq/record-projet-carboseq:latest</container>
--- a/csopraSplitInput.py	Tue Sep 17 10:41:39 2024 +0000
+++ b/csopraSplitInput.py	Tue Sep 17 16:01:18 2024 +0000
@@ -14,23 +14,30 @@
     dfs = pd.read_csv(soil_path)
     dfc = pd.read_csv(crop_path)
 
-    max_ID = dfs.loc[dfs['ID'].idxmax()]['ID']
+    dfs['ID'] = dfs['ID'].astype('int')
+    dfc['ID'] = dfc['ID'].astype('int')
+
+    IDs = dfs['ID'].unique()
+    
+    max_ID = len(IDs)
 
     step = max_chunck_size
     
-    nb_split = math.ceil(max_ID / max_chunck_size)
-
-    print(max_ID)
+    nb_split = math.ceil(max_ID / max_chunck_size)   
 
-    print(nb_split)
+    for i in range(1, nb_split + 1):
+        
+        firstID = IDs[(i-1) * step]
+        lastID = IDs[min((i * step) - 1 , max_ID - 1)]
 
-    for i in range(1,nb_split + 1):
+        splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)]
+        
 
         dfSplit = pd.DataFrame()
-        dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))]
+        dfSplit = dfs[dfs['ID'].isin(splitIDs)]
 
         dfcSplit = pd.DataFrame()
-        dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))]
+        dfcSplit = dfc[dfc['ID'].isin(splitIDs)]
         
         os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}"))
         shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}"))