Mercurial > repos > siwaa > carboseq_s
annotate csopraSplitInput.py @ 5:3e7978af2ba2 draft
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
author | siwaa |
---|---|
date | Wed, 28 Aug 2024 14:53:30 +0000 |
parents | |
children | 20886bc40659 |
rev | line source |
---|---|
5
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
1 import pandas as pd |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
2 import os |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
3 import shutil |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
4 import math |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
5 import sys |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
6 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
7 def split_csv(input_folder, output_folder, max_chunck_size): |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
8 unit_path = input_folder + "/units.csv" |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
9 soil_path = input_folder + "/soil.csv" |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
10 crop_path = input_folder + "/crop.csv" |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
11 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
12 os.mkdir(output_folder) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
13 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
14 dfs = pd.read_csv(soil_path) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
15 dfc = pd.read_csv(crop_path) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
16 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
17 max_ID = dfs.loc[dfs['ID'].idxmax()]['ID'] |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
18 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
19 step = max_chunck_size |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
20 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
21 nb_split = math.ceil(max_ID / max_chunck_size) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
22 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
23 print(max_ID) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
24 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
25 print(nb_split) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
26 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
27 for i in range(1,nb_split + 1): |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
28 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
29 dfSplit = pd.DataFrame() |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
30 dfSplit = dfs.loc[((dfs['ID'] >= (((i-1) * step)) + 1) & (dfs['ID'] <= ((((i-1) * step)) + step )))] |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
31 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
32 dfcSplit = pd.DataFrame() |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
33 dfcSplit = dfc.loc[((dfc['ID'] >= (((i-1) * step)) + 1) & (dfc['ID'] <= ((((i-1) * step)) + step )))] |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
34 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
35 os.mkdir(os.path.join(output_folder, f"{input_folder}_{i}")) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
36 shutil.copy(unit_path, os.path.join(output_folder, f"{input_folder}_{i}")) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
37 dfSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "soil.csv"), index=False) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
38 dfcSplit.to_csv(os.path.join(output_folder, f"{input_folder}_{i}", "crop.csv"), index=False) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
39 shutil.make_archive( os.path.join(output_folder,f"{input_folder}_{i}"), 'zip', os.path.join(output_folder, f"{input_folder}_{i}")) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
40 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
41 # Example usage |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
42 input_folder = "input" |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
43 output_folder = "outputs" |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
44 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
45 input_file = sys.argv[1] |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
46 max_chunck_size = int(sys.argv[2]) |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
47 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
48 shutil.unpack_archive(input_file, input_folder, 'zip') |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
49 |
3e7978af2ba2
"planemo upload for repository https://forgemia.inra.fr/carboseq/record-projet-carboseq/-/tree/5f761ea7dab0a6f8c978cbbe4946e16edc6d032e/toos/CarboSeqSimulator commit 5f761ea7dab0a6f8c978cbbe4946e16edc6d032e-dirty"
siwaa
parents:
diff
changeset
|
50 split_csv(input_folder, output_folder, max_chunck_size) |