Mercurial > repos > siwaa > carboseq_s
comparison csopraSplitInput.py @ 10:20886bc40659 draft
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf/toos/CarboSeqSimulator commit b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf-dirty"
author | siwaa |
---|---|
date | Tue, 17 Sep 2024 16:01:18 +0000 |
parents | 3e7978af2ba2 |
children | e455667fe22f |
comparison
equal
deleted
inserted
replaced
9:2af4b1e46d90 | 10:20886bc40659 |
---|---|
12 os.mkdir(output_folder) | 12 os.mkdir(output_folder) |
13 | 13 |
14 dfs = pd.read_csv(soil_path) | 14 dfs = pd.read_csv(soil_path) |
15 dfc = pd.read_csv(crop_path) | 15 dfc = pd.read_csv(crop_path) |
16 | 16 |
17 max_ID = dfs.loc[dfs['ID'].idxmax()]['ID'] | 17 dfs['ID'] = dfs['ID'].astype('int') |
18 dfc['ID'] = dfc['ID'].astype('int') | |
19 | |
20 IDs = dfs['ID'].unique() | |
21 | |
22 max_ID = len(IDs) | |
18 | 23 |
19 step = max_chunck_size | 24 step = max_chunck_size |
20 | 25 |
21 nb_split = math.ceil(max_ID / max_chunck_size) | 26 nb_split = math.ceil(max_ID / max_chunck_size) |
22 | 27 |
23 print(max_ID) | 28 for i in range(1, nb_split + 1): |
29 | |
30 firstID = IDs[(i-1) * step] | |
31 lastID = IDs[min((i * step) - 1 , max_ID - 1)] | |
24 | 32 |
25 print(nb_split) | 33 splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)] |
26 | 34 |
27 for i in range(1,nb_split + 1): | |
28 | 35 |
29 dfSplit = pd.DataFrame() | 36 dfSplit = pd.DataFrame() |
30 dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))] | 37 dfSplit = dfs[dfs['ID'].isin(splitIDs)] |
31 | 38 |
32 dfcSplit = pd.DataFrame() | 39 dfcSplit = pd.DataFrame() |
33 dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))] | 40 dfcSplit = dfc[dfc['ID'].isin(splitIDs)] |
34 | 41 |
35 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) | 42 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) |
36 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) | 43 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) |
37 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) | 44 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) |
38 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) | 45 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) |