comparison csopraSplitInput.py @ 14:e455667fe22f draft

"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/494bf06621d4e5799cab9ef8384d4411a2cd4b84/toos/CarboSeqSimulator commit 494bf06621d4e5799cab9ef8384d4411a2cd4b84-dirty"
author siwaa
date Fri, 20 Sep 2024 09:00:02 +0000
parents 20886bc40659
children
comparison
equal deleted inserted replaced
13:9ad79c2d8e0b 14:e455667fe22f
12 os.mkdir(output_folder) 12 os.mkdir(output_folder)
13 13
14 dfs = pd.read_csv(soil_path) 14 dfs = pd.read_csv(soil_path)
15 dfc = pd.read_csv(crop_path) 15 dfc = pd.read_csv(crop_path)
16 16
17 fileS = open(soil_path, 'r')
18 fileC = open(crop_path, 'r')
19
20 LineS = fileS.readlines()
21 LineC = fileC.readlines()
22
23 headerS = LineS.pop(0)
24 headerC = LineC.pop(0)
25
17 dfs['ID'] = dfs['ID'].astype('int') 26 dfs['ID'] = dfs['ID'].astype('int')
18 dfc['ID'] = dfc['ID'].astype('int') 27 dfc['ID'] = dfc['ID'].astype('int')
19 28
20 IDs = dfs['ID'].unique() 29 IDs = dfs['ID'].unique()
21 30
30 firstID = IDs[(i-1) * step] 39 firstID = IDs[(i-1) * step]
31 lastID = IDs[min((i * step) - 1 , max_ID - 1)] 40 lastID = IDs[min((i * step) - 1 , max_ID - 1)]
32 41
33 splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)] 42 splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)]
34 43
44 whereS = dfs['ID'].isin(splitIDs)
45 whereSid = whereS[whereS].index.values
46 firstSid = min(whereSid)
47 lastSid = max(whereSid)
48
49 lineSsplit = LineS[firstSid:lastSid+1]
35 50
36 dfSplit = pd.DataFrame() 51 whereC = dfc['ID'].isin(splitIDs)
37 dfSplit = dfs[dfs['ID'].isin(splitIDs)] 52 whereCid = whereC[whereC].index.values
38 53 firstCid = min(whereCid)
39 dfcSplit = pd.DataFrame() 54 lastCid = max(whereCid)
40 dfcSplit = dfc[dfc['ID'].isin(splitIDs)] 55
56 lineCsplit = LineC[firstCid:lastCid+1]
41 57
42 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) 58 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}"))
43 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) 59 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}"))
44 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) 60
45 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) 61 splitFileS = open(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), 'w')
62 splitFileS.writelines(headerS)
63 splitFileS.writelines(lineSsplit)
64 splitFileS.close()
65
66 splitFileC = open(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), 'w')
67 splitFileC.writelines(headerC)
68 splitFileC.writelines(lineCsplit)
69 splitFileC.close()
70
46 shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}")) 71 shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}"))
72
73
47 74
48 # Example usage 75 # Example usage
49 input_folder = "input" 76 input_folder = "input"
50 output_folder = "outputs" 77 output_folder = "outputs"
51 78