comparison redelacPlanExpander.py @ 0:bc7ae8dd3d3c draft

"planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
author siwaa
date Mon, 19 Jun 2023 15:51:01 +0000
parents
children 47dafd3de5ef
comparison
equal deleted inserted replaced
-1:000000000000 0:bc7ae8dd3d3c
1 import sys
2 import pandas as pd
3 import numpy as np
4
5 typeOfRotation = sys.argv [1]
6 USMsFileName = sys.argv[2]
7 Liste_plantesFileName = sys.argv[3]
8 planFileName = sys.argv[4]
9 maillesFileName = sys.argv[5]
10 expandedPlanFileName = sys.argv[6]
11 successiveFileName = sys.argv[7]
12 StationsFileName = sys.argv[8]
13 aclimParHorizonFileName = sys.argv[9]
14 latitudeMailleDriasDFileName= sys.argv[10]
15 expandedStationsFileName = sys.argv[11]
16
17 # 1) reading the base list of USMs
18
19 #USMsDf = pd.read_excel(basePlanFileName, sheet_name='USMs')
20 USMsDf = pd.read_csv(USMsFileName, sep=";", keep_default_na=False, na_filter = False)
21
22 # 2) reading the list of plant
23
24 #plantsDf = pd.read_excel(basePlanFileName, sheet_name='Liste_plantes')
25 plantsDf = pd.read_csv(Liste_plantesFileName, sep=";")
26
27 # 3) reading the plan
28
29 planDf = pd.read_csv(planFileName, sep=";")
30
31 # 4) reading the "mailles" file
32 maillesDf = pd.read_csv(maillesFileName, sep=";")
33
34 # 5) reading the stations template
35 StationsDf = pd.read_csv(StationsFileName, sep=";")
36
37 aclimParHorizonDf = pd.read_csv(aclimParHorizonFileName, sep=";")
38
39 latitudeMailleDriasDf = pd.read_csv(latitudeMailleDriasDFileName, sep=";")
40
41 # a new dataframe to hosts the stations in expansion
42 expandedStationsDf = pd.DataFrame()
43
44 for i, row in planDf.iterrows():
45 tmpDf = StationsDf
46
47 safran = int(row['upc'].split('_')[1])
48 drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0]
49
50 tmpDfBis = tmpDf;
51
52 for j, rowS in tmpDf.iterrows():
53 horizon = rowS['Sta_name'].split('_')[1]
54 tmpDfBis.at[j, 'Sta_name'] = str(drias) + '_' + horizon + '_sta.xml'
55 tmpDfBis.at[j, 'aclim'] = aclimParHorizonDf[(aclimParHorizonDf['maille_DRIAS'] == drias) & (aclimParHorizonDf['horizon'] == horizon)].reset_index(drop=True)["aclim"][0]
56 tmpDfBis.at[j, 'latitude'] = latitudeMailleDriasDf[(latitudeMailleDriasDf['maille_DRIAS'] == drias)].reset_index(drop=True)["latitude"][0]
57
58 expandedStationsDf = pd.concat([expandedStationsDf, tmpDfBis])
59
60 expandedStationsDf.to_csv(expandedStationsFileName, index = False, sep = ';')
61
62
63 # a new dataFrame to host the list of USMs in expansion
64 expandedDf = pd.DataFrame()
65
66 for i, row in planDf.iterrows():
67 tmpDf = USMsDf
68
69 safran = int(row['upc'].split('_')[1])
70 drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0]
71
72 tmpDf['upc'] = row['upc']
73 tmpDf['ucs'] = row['ucs']
74 tmpDf['num_sol'] = row['num_sol']
75
76 if typeOfRotation == '' :
77 tmpDf['Norg_sols'] = row['Norg_sols_GC']
78 tmpDf['nom_sol_sans_point'] = row['nom_sols_GC_dans_USM']
79 else :
80 tmpDf['Norg_sols'] = row['Norg_sols_PP']
81 tmpDf['nom_sol_sans_point'] = row['nom_sols_PP_dans_USM']
82
83 tmpDf['maille_drias'] = drias
84
85 # Updating the id_simu column
86 # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2)
87 # new alias = =CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2)
88
89 tmpDf['id_simu'] = \
90 tmpDf.upc + '-' + \
91 tmpDf['nom_sol_sans_point'].astype(str) + '-' + \
92 tmpDf['systeme'].astype(str) + '-' + \
93 tmpDf['code_rotation'].astype(str) + '-' + \
94 tmpDf['tete_rotation'].astype(str) + '-' + \
95 tmpDf['horizon '].astype(str)
96
97 # Merging 2 sheets in order to solve the VLOOKUP
98 plantsDf.rename(columns = {'Nom_plante':'culture'}, inplace = True)
99 tmpDf = pd.merge(tmpDf,plantsDf, how='left', on='culture', sort=False)
100
101 # Updating the usm_name column
102 # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2;"-";N2;"-";VLOOKUP(O2;$Liste_plantes.$A$2:$B$7;2;0))
103 # newalias ==CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2;"-";P2;"-";VLOOKUP(Q2;$Liste_plantes.$A$2:$B$7;2;0))
104
105 tmpDf['usm_name'] = \
106 tmpDf.upc + '-' + \
107 tmpDf['nom_sol_sans_point'].astype(str) + '-' + \
108 tmpDf['systeme'].astype(str) + '-' + \
109 tmpDf['code_rotation'].astype(str) + '-' + \
110 tmpDf['tete_rotation'].astype(str) + '-' + \
111 tmpDf['horizon '].astype(str) + '-' + \
112 tmpDf['annee_fin'].astype(str) + '-' + \
113 tmpDf['Code_plante']
114
115
116 # Updating the nomsol column
117 # alias =CONCATENATE(G2;"_";H2)
118
119 tmpDf['nomsol'] = \
120 tmpDf['num_sol'].astype(str) + '_' + \
121 tmpDf['Norg_sols'].astype(str)
122
123
124 # Updating the fstation column
125 # alias =CONCATENATE($E2;"_";M2;"_sta.xml")
126
127 tmpDf['fstation'] = \
128 tmpDf['maille_drias'].astype(str) + '_' + \
129 tmpDf['horizon '].astype(str) + '_sta.xml'
130
131 # Updating the fclim1 column
132 # alias =CONCATENATE($E2;".";YEAR(R2))
133
134 tmpDf['fclim1'] = \
135 tmpDf['maille_drias'].astype(str) + '.' + \
136 pd.to_datetime(tmpDf['Début'], dayfirst = True).dt.year.astype(str)
137
138 # Updating the fclim2 column
139 # alias =CONCATENATE($E2;".";YEAR(S2))
140
141 tmpDf['fclim2'] = \
142 tmpDf['maille_drias'].astype(str) + '.' + \
143 pd.to_datetime(tmpDf['Fin'], dayfirst = True).dt.year.astype(str)
144
145 expandedDf = pd.concat([expandedDf, tmpDf])
146
147 # now getting the list of simu
148
149 sucessDf = pd.DataFrame()
150
151 listOfSuccessWithNA = expandedDf['id_simu'].unique()
152
153 # en raison des NA on filtre
154
155 listOfSuccess = [x for x in listOfSuccessWithNA if not x.endswith('NA')]
156
157 nbL = len(listOfSuccess)
158
159 # fixing first station before to save
160
161 fi = 0
162
163 expandedDf.reset_index(inplace = True)
164
165 for succ in listOfSuccess:
166
167 li = expandedDf[expandedDf['id_simu'] == succ].index.max()
168
169 expandedDf.loc[fi:li - 1, 'fstation'] = expandedDf['fstation'][fi + 5]
170
171 fi = li + 1
172
173 expandedDf.drop(['ucs', 'Code_plante', 'Fichier_plante_STICS'], axis = 1, inplace = True)
174 expandedDf.to_csv(expandedPlanFileName, index=False, sep = ';')
175
176 fi = 0
177
178 for succ in listOfSuccess:
179
180 li = expandedDf[expandedDf['id_simu'] == succ].index.max()
181
182 listUofSuccDf = expandedDf.loc[fi:li, "usm_name"].to_frame().reset_index(drop=True)
183
184 fi = li + 1
185
186 # Adding the USM Columns Name
187 listUofSuccDf['USM'] = 'USM_' + (listUofSuccDf.index + 1).astype(str)
188
189 # Number of usms inside the succession
190 nbUofSucc = listUofSuccDf.shape[0]
191
192 # in order to prepare the transposition set USM column as index
193 listUofSuccDf = listUofSuccDf.set_index('USM')
194
195 listUofSuccDf = listUofSuccDf.transpose()
196
197 # complete the data
198
199 listUofSuccDf['Number_USM'] = nbUofSucc
200 listUofSuccDf['Treatment'] = succ
201
202 # reorder the columns
203
204 cols = listUofSuccDf.columns.tolist()
205 cols = cols[-1:] + cols[-2:-1] + cols [:-2]
206
207 listUofSuccDf = listUofSuccDf[cols]
208
209 # complete the list of sucessions
210 sucessDf = pd.concat([sucessDf,listUofSuccDf])
211
212 sucessDf.to_csv(successiveFileName, index=False, sep = ';')