changeset 14:e455667fe22f draft

"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/494bf06621d4e5799cab9ef8384d4411a2cd4b84/toos/CarboSeqSimulator commit 494bf06621d4e5799cab9ef8384d4411a2cd4b84-dirty"
author siwaa
date Fri, 20 Sep 2024 09:00:02 +0000
parents 9ad79c2d8e0b
children de71663fbce4
files carboseqSplit.xml csopraSplitInput.py
diffstat 2 files changed, 35 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/carboseqSplit.xml	Thu Sep 19 17:57:10 2024 +0000
+++ b/carboseqSplit.xml	Fri Sep 20 09:00:02 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.2">
+<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.3">
   <description>To split input data of CarboSeq</description>
   <requirements>
     <container type="singularity">docker://registry.forgemia.inra.fr/carboseq/record-projet-carboseq:latest</container>
--- a/csopraSplitInput.py	Thu Sep 19 17:57:10 2024 +0000
+++ b/csopraSplitInput.py	Fri Sep 20 09:00:02 2024 +0000
@@ -14,6 +14,15 @@
     dfs = pd.read_csv(soil_path)
     dfc = pd.read_csv(crop_path)
 
+    fileS = open(soil_path, 'r')
+    fileC = open(crop_path, 'r')
+    
+    LineS = fileS.readlines()
+    LineC = fileC.readlines()
+
+    headerS = LineS.pop(0)
+    headerC = LineC.pop(0)
+    
     dfs['ID'] = dfs['ID'].astype('int')
     dfc['ID'] = dfc['ID'].astype('int')
 
@@ -32,19 +41,37 @@
 
         splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)]
         
+        whereS = dfs['ID'].isin(splitIDs)
+        whereSid = whereS[whereS].index.values
+        firstSid = min(whereSid)
+        lastSid = max(whereSid)
+        
+        lineSsplit = LineS[firstSid:lastSid+1]
 
-        dfSplit = pd.DataFrame()
-        dfSplit = dfs[dfs['ID'].isin(splitIDs)]
-
-        dfcSplit = pd.DataFrame()
-        dfcSplit = dfc[dfc['ID'].isin(splitIDs)]
+        whereC = dfc['ID'].isin(splitIDs)
+        whereCid = whereC[whereC].index.values
+        firstCid = min(whereCid)
+        lastCid = max(whereCid)
+        
+        lineCsplit = LineC[firstCid:lastCid+1]
         
         os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}"))
         shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}"))
-        dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False)
-        dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False)
+        
+        splitFileS = open(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), 'w')
+        splitFileS.writelines(headerS)
+        splitFileS.writelines(lineSsplit)
+        splitFileS.close()
+
+        splitFileC = open(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), 'w')
+        splitFileC.writelines(headerC)
+        splitFileC.writelines(lineCsplit)
+        splitFileC.close()
+        
         shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip',  os.path.join(output_folder, f"{input_folder}_{i}"))
 
+
+
 # Example usage
 input_folder = "input"
 output_folder = "outputs"