Mercurial > repos > siwaa > carboseq_s
comparison csopraSplitInput.py @ 14:e455667fe22f draft
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/494bf06621d4e5799cab9ef8384d4411a2cd4b84/toos/CarboSeqSimulator commit 494bf06621d4e5799cab9ef8384d4411a2cd4b84-dirty"
| author | siwaa |
|---|---|
| date | Fri, 20 Sep 2024 09:00:02 +0000 |
| parents | 20886bc40659 |
| children |
comparison
equal
deleted
inserted
replaced
| 13:9ad79c2d8e0b | 14:e455667fe22f |
|---|---|
| 12 os.mkdir(output_folder) | 12 os.mkdir(output_folder) |
| 13 | 13 |
| 14 dfs = pd.read_csv(soil_path) | 14 dfs = pd.read_csv(soil_path) |
| 15 dfc = pd.read_csv(crop_path) | 15 dfc = pd.read_csv(crop_path) |
| 16 | 16 |
| 17 fileS = open(soil_path, 'r') | |
| 18 fileC = open(crop_path, 'r') | |
| 19 | |
| 20 LineS = fileS.readlines() | |
| 21 LineC = fileC.readlines() | |
| 22 | |
| 23 headerS = LineS.pop(0) | |
| 24 headerC = LineC.pop(0) | |
| 25 | |
| 17 dfs['ID'] = dfs['ID'].astype('int') | 26 dfs['ID'] = dfs['ID'].astype('int') |
| 18 dfc['ID'] = dfc['ID'].astype('int') | 27 dfc['ID'] = dfc['ID'].astype('int') |
| 19 | 28 |
| 20 IDs = dfs['ID'].unique() | 29 IDs = dfs['ID'].unique() |
| 21 | 30 |
| 30 firstID = IDs[(i-1) * step] | 39 firstID = IDs[(i-1) * step] |
| 31 lastID = IDs[min((i * step) - 1 , max_ID - 1)] | 40 lastID = IDs[min((i * step) - 1 , max_ID - 1)] |
| 32 | 41 |
| 33 splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)] | 42 splitIDs = IDs[(i-1) * step : min((i * step) , max_ID)] |
| 34 | 43 |
| 44 whereS = dfs['ID'].isin(splitIDs) | |
| 45 whereSid = whereS[whereS].index.values | |
| 46 firstSid = min(whereSid) | |
| 47 lastSid = max(whereSid) | |
| 48 | |
| 49 lineSsplit = LineS[firstSid:lastSid+1] | |
| 35 | 50 |
| 36 dfSplit = pd.DataFrame() | 51 whereC = dfc['ID'].isin(splitIDs) |
| 37 dfSplit = dfs[dfs['ID'].isin(splitIDs)] | 52 whereCid = whereC[whereC].index.values |
| 38 | 53 firstCid = min(whereCid) |
| 39 dfcSplit = pd.DataFrame() | 54 lastCid = max(whereCid) |
| 40 dfcSplit = dfc[dfc['ID'].isin(splitIDs)] | 55 |
| 56 lineCsplit = LineC[firstCid:lastCid+1] | |
| 41 | 57 |
| 42 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) | 58 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) |
| 43 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) | 59 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) |
| 44 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) | 60 |
| 45 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) | 61 splitFileS = open(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), 'w') |
| 62 splitFileS.writelines(headerS) | |
| 63 splitFileS.writelines(lineSsplit) | |
| 64 splitFileS.close() | |
| 65 | |
| 66 splitFileC = open(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), 'w') | |
| 67 splitFileC.writelines(headerC) | |
| 68 splitFileC.writelines(lineCsplit) | |
| 69 splitFileC.close() | |
| 70 | |
| 46 shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}")) | 71 shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}")) |
| 72 | |
| 73 | |
| 47 | 74 |
| 48 # Example usage | 75 # Example usage |
| 49 input_folder = "input" | 76 input_folder = "input" |
| 50 output_folder = "outputs" | 77 output_folder = "outputs" |
| 51 | 78 |
