Mercurial > repos > siwaa > carboseq_s
changeset 10:20886bc40659 draft
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf/toos/CarboSeqSimulator commit b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf-dirty"
author | siwaa |
---|---|
date | Tue, 17 Sep 2024 16:01:18 +0000 |
parents | 2af4b1e46d90 |
children | 097985b1b7c6 |
files | carboseqSplit.xml csopraSplitInput.py |
diffstat | 2 files changed, 16 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/carboseqSplit.xml Tue Sep 17 10:41:39 2024 +0000 +++ b/carboseqSplit.xml Tue Sep 17 16:01:18 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.1"> +<tool id="CarboSeqSplit" name="CarboSeqSplit" version="1.0.2"> <description>To split input data of CarboSeq</description> <requirements> <container type="singularity">docker://registry.forgemia.inra.fr/carboseq/record-projet-carboseq:latest</container>
--- a/csopraSplitInput.py Tue Sep 17 10:41:39 2024 +0000 +++ b/csopraSplitInput.py Tue Sep 17 16:01:18 2024 +0000 @@ -14,23 +14,30 @@ dfs = pd.read_csv(soil_path) dfc = pd.read_csv(crop_path) - max_ID = dfs.loc[dfs['ID'].idxmax()]['ID'] + dfs['ID'] = dfs['ID'].astype('int') + dfc['ID'] = dfc['ID'].astype('int') + + IDs = dfs['ID'].unique() + + max_ID = len(IDs) step = max_chunck_size - nb_split = math.ceil(max_ID / max_chunck_size) - - print(max_ID) + nb_split = math.ceil(max_ID / max_chunck_size) - print(nb_split) + for i in range(1, nb_split + 1): + + firstID = IDs[(i-1) * step] + lastID = IDs[min((i * step) - 1 , max_ID - 1)] - for i in range(1,nb_split + 1): + splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)] + dfSplit = pd.DataFrame() - dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))] + dfSplit = dfs[dfs['ID'].isin(splitIDs)] dfcSplit = pd.DataFrame() - dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))] + dfcSplit = dfc[dfc['ID'].isin(splitIDs)] os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}"))