comparison csopraSplitInput.py @ 10:20886bc40659 draft

"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf/toos/CarboSeqSimulator commit b1cafd3eb61b631ee0b1f8f6c5ef2f9a0e2b4ccf-dirty"
author siwaa
date Tue, 17 Sep 2024 16:01:18 +0000
parents 3e7978af2ba2
children e455667fe22f
comparison
equal deleted inserted replaced
9:2af4b1e46d90 10:20886bc40659
12 os.mkdir(output_folder) 12 os.mkdir(output_folder)
13 13
14 dfs = pd.read_csv(soil_path) 14 dfs = pd.read_csv(soil_path)
15 dfc = pd.read_csv(crop_path) 15 dfc = pd.read_csv(crop_path)
16 16
17 max_ID = dfs.loc[dfs['ID'].idxmax()]['ID'] 17 dfs['ID'] = dfs['ID'].astype('int')
18 dfc['ID'] = dfc['ID'].astype('int')
19
20 IDs = dfs['ID'].unique()
21
22 max_ID = len(IDs)
18 23
19 step = max_chunck_size 24 step = max_chunck_size
20 25
21 nb_split = math.ceil(max_ID / max_chunck_size) 26 nb_split = math.ceil(max_ID / max_chunck_size)
22 27
23 print(max_ID) 28 for i in range(1, nb_split + 1):
29
30 firstID = IDs[(i-1) * step]
31 lastID = IDs[min((i * step) - 1 , max_ID - 1)]
24 32
25 print(nb_split) 33 splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)]
26 34
27 for i in range(1,nb_split + 1):
28 35
29 dfSplit = pd.DataFrame() 36 dfSplit = pd.DataFrame()
30 dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))] 37 dfSplit = dfs[dfs['ID'].isin(splitIDs)]
31 38
32 dfcSplit = pd.DataFrame() 39 dfcSplit = pd.DataFrame()
33 dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))] 40 dfcSplit = dfc[dfc['ID'].isin(splitIDs)]
34 41
35 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) 42 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}"))
36 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) 43 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}"))
37 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) 44 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False)
38 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) 45 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False)