Mercurial > repos > siwaa > carboseq_s
changeset 14:e455667fe22f draft
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/494bf06621d4e5799cab9ef8384d4411a2cd4b84/toos/CarboSeqSimulator commit 494bf06621d4e5799cab9ef8384d4411a2cd4b84-dirty"
author | siwaa |
---|---|
date | Fri, 20 Sep 2024 09:00:02 +0000 |
parents | 9ad79c2d8e0b |
children | de71663fbce4 |
files | carboseqSplit.xml csopraSplitInput.py |
diffstat | 2 files changed, 35 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/carboseqSplit.xml Thu Sep 19 17:57:10 2024 +0000 +++ b/carboseqSplit.xml Fri Sep 20 09:00:02 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.2"> +<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.3"> <description>To split input data of CarboSeq</description> <requirements> <container type="singularity">docker://registry.forgemia.inra.fr/carboseq/record-projet-carboseq:latest</container>
--- a/csopraSplitInput.py Thu Sep 19 17:57:10 2024 +0000 +++ b/csopraSplitInput.py Fri Sep 20 09:00:02 2024 +0000 @@ -14,6 +14,15 @@ dfs = pd.read_csv(soil_path) dfc = pd.read_csv(crop_path) + fileS = open(soil_path, 'r') + fileC = open(crop_path, 'r') + + LineS = fileS.readlines() + LineC = fileC.readlines() + + headerS = LineS.pop(0) + headerC = LineC.pop(0) + dfs['ID'] = dfs['ID'].astype('int') dfc['ID'] = dfc['ID'].astype('int') @@ -32,19 +41,37 @@ splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)] + whereS = dfs['ID'].isin(splitIDs) + whereSid = whereS[whereS].index.values + firstSid = min(whereSid) + lastSid = max(whereSid) + + lineSsplit = LineS[firstSid:lastSid+1] - dfSplit = pd.DataFrame() - dfSplit = dfs[dfs['ID'].isin(splitIDs)] - - dfcSplit = pd.DataFrame() - dfcSplit = dfc[dfc['ID'].isin(splitIDs)] + whereC = dfc['ID'].isin(splitIDs) + whereCid = whereC[whereC].index.values + firstCid = min(whereCid) + lastCid = max(whereCid) + + lineCsplit = LineC[firstCid:lastCid+1] os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) - dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) - dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) + + splitFileS = open(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), 'w') + splitFileS.writelines(headerS) + splitFileS.writelines(lineSsplit) + splitFileS.close() + + splitFileC = open(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), 'w') + splitFileC.writelines(headerC) + splitFileC.writelines(lineCsplit) + splitFileC.close() + shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}")) + + # Example usage input_folder = "input" output_folder = "outputs"