diff redelacPlanExpander.py @ 0:bc7ae8dd3d3c draft

"planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
author siwaa
date Mon, 19 Jun 2023 15:51:01 +0000
parents
children 47dafd3de5ef
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/redelacPlanExpander.py	Mon Jun 19 15:51:01 2023 +0000
@@ -0,0 +1,212 @@
+import sys
+import pandas as pd
+import numpy as np
+
+typeOfRotation = sys.argv [1]
+USMsFileName = sys.argv[2]
+Liste_plantesFileName = sys.argv[3]
+planFileName = sys.argv[4]
+maillesFileName = sys.argv[5]
+expandedPlanFileName = sys.argv[6]
+successiveFileName = sys.argv[7]
+StationsFileName = sys.argv[8]
+aclimParHorizonFileName = sys.argv[9]
+latitudeMailleDriasDFileName= sys.argv[10]
+expandedStationsFileName = sys.argv[11]
+
+# 1) reading the base list of USMs
+
+#USMsDf = pd.read_excel(basePlanFileName, sheet_name='USMs')
+USMsDf = pd.read_csv(USMsFileName, sep=";", keep_default_na=False, na_filter = False)
+
+# 2) reading the list of plant
+
+#plantsDf = pd.read_excel(basePlanFileName, sheet_name='Liste_plantes')
+plantsDf = pd.read_csv(Liste_plantesFileName, sep=";")
+
+# 3) reading the plan
+
+planDf = pd.read_csv(planFileName, sep=";")
+
+# 4) reading the "mailles" file
+maillesDf = pd.read_csv(maillesFileName, sep=";")
+
+# 5) reading the stations template
+StationsDf = pd.read_csv(StationsFileName, sep=";")
+
+aclimParHorizonDf = pd.read_csv(aclimParHorizonFileName, sep=";")
+
+latitudeMailleDriasDf = pd.read_csv(latitudeMailleDriasDFileName, sep=";")
+
+# a new dataframe to hosts the stations in expansion
+expandedStationsDf = pd.DataFrame()
+
+for i, row in planDf.iterrows():
+    tmpDf = StationsDf
+    
+    safran = int(row['upc'].split('_')[1])
+    drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0]
+
+    tmpDfBis = tmpDf;
+    
+    for j, rowS in tmpDf.iterrows():
+        horizon = rowS['Sta_name'].split('_')[1]
+        tmpDfBis.at[j, 'Sta_name'] = str(drias) +  '_' + horizon + '_sta.xml'
+        tmpDfBis.at[j, 'aclim'] = aclimParHorizonDf[(aclimParHorizonDf['maille_DRIAS'] == drias) & (aclimParHorizonDf['horizon'] == horizon)].reset_index(drop=True)["aclim"][0]
+        tmpDfBis.at[j, 'latitude'] = latitudeMailleDriasDf[(latitudeMailleDriasDf['maille_DRIAS'] == drias)].reset_index(drop=True)["latitude"][0]
+              
+    expandedStationsDf = pd.concat([expandedStationsDf, tmpDfBis])
+
+expandedStationsDf.to_csv(expandedStationsFileName, index = False, sep = ';')
+
+
+# a new dataFrame to host the list of USMs in expansion
+expandedDf = pd.DataFrame()
+
+for i, row in planDf.iterrows():
+    tmpDf = USMsDf
+
+    safran = int(row['upc'].split('_')[1])
+    drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0]
+
+    tmpDf['upc'] = row['upc']
+    tmpDf['ucs'] = row['ucs']
+    tmpDf['num_sol'] = row['num_sol']
+    
+    if typeOfRotation == '' :
+        tmpDf['Norg_sols'] = row['Norg_sols_GC']
+        tmpDf['nom_sol_sans_point'] = row['nom_sols_GC_dans_USM']
+    else : 
+        tmpDf['Norg_sols'] = row['Norg_sols_PP']
+        tmpDf['nom_sol_sans_point'] = row['nom_sols_PP_dans_USM']
+
+    tmpDf['maille_drias'] = drias
+
+    # Updating the id_simu column 
+    # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2)
+    # new alias = =CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2)
+
+    tmpDf['id_simu'] = \
+        tmpDf.upc + '-' + \
+        tmpDf['nom_sol_sans_point'].astype(str) + '-' + \
+        tmpDf['systeme'].astype(str)  + '-' + \
+        tmpDf['code_rotation'].astype(str)  + '-' + \
+        tmpDf['tete_rotation'].astype(str) + '-' + \
+        tmpDf['horizon '].astype(str)
+        
+    # Merging 2 sheets in order to solve the VLOOKUP
+    plantsDf.rename(columns = {'Nom_plante':'culture'}, inplace = True)
+    tmpDf = pd.merge(tmpDf,plantsDf, how='left', on='culture', sort=False)
+
+    # Updating the usm_name column
+    # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2;"-";N2;"-";VLOOKUP(O2;$Liste_plantes.$A$2:$B$7;2;0))
+    # newalias ==CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2;"-";P2;"-";VLOOKUP(Q2;$Liste_plantes.$A$2:$B$7;2;0))
+    
+    tmpDf['usm_name'] = \
+        tmpDf.upc + '-' + \
+        tmpDf['nom_sol_sans_point'].astype(str) + '-' + \
+        tmpDf['systeme'].astype(str)  + '-' + \
+        tmpDf['code_rotation'].astype(str)  + '-' + \
+        tmpDf['tete_rotation'].astype(str) + '-' + \
+        tmpDf['horizon '].astype(str) + '-' + \
+        tmpDf['annee_fin'].astype(str) + '-' + \
+        tmpDf['Code_plante']
+
+    
+    # Updating the nomsol column
+    # alias =CONCATENATE(G2;"_";H2)
+
+    tmpDf['nomsol'] = \
+        tmpDf['num_sol'].astype(str) + '_' + \
+        tmpDf['Norg_sols'].astype(str)
+
+    
+    # Updating the fstation column
+    # alias =CONCATENATE($E2;"_";M2;"_sta.xml")
+    
+    tmpDf['fstation'] = \
+      tmpDf['maille_drias'].astype(str) + '_' + \
+      tmpDf['horizon '].astype(str) + '_sta.xml'
+
+    # Updating the fclim1 column
+    # alias =CONCATENATE($E2;".";YEAR(R2))
+
+    tmpDf['fclim1'] = \
+      tmpDf['maille_drias'].astype(str) + '.' + \
+      pd.to_datetime(tmpDf['Début'], dayfirst = True).dt.year.astype(str)
+
+    # Updating the fclim2 column
+    # alias =CONCATENATE($E2;".";YEAR(S2))
+    
+    tmpDf['fclim2'] = \
+      tmpDf['maille_drias'].astype(str) + '.' + \
+      pd.to_datetime(tmpDf['Fin'], dayfirst = True).dt.year.astype(str)
+
+    expandedDf = pd.concat([expandedDf, tmpDf])
+
+# now getting the list of simu
+
+sucessDf = pd.DataFrame()
+
+listOfSuccessWithNA = expandedDf['id_simu'].unique()
+
+# en raison des NA on filtre
+
+listOfSuccess  = [x for x in listOfSuccessWithNA if not x.endswith('NA')]
+
+nbL = len(listOfSuccess)
+
+# fixing first station before to save
+
+fi = 0
+
+expandedDf.reset_index(inplace = True)
+
+for succ in listOfSuccess:
+
+    li = expandedDf[expandedDf['id_simu'] == succ].index.max()
+    
+    expandedDf.loc[fi:li - 1, 'fstation'] = expandedDf['fstation'][fi + 5]
+    
+    fi = li + 1
+
+expandedDf.drop(['ucs', 'Code_plante', 'Fichier_plante_STICS'], axis = 1, inplace = True)
+expandedDf.to_csv(expandedPlanFileName, index=False, sep = ';')
+    
+fi = 0
+
+for succ in listOfSuccess:
+    
+    li = expandedDf[expandedDf['id_simu'] == succ].index.max()
+    
+    listUofSuccDf = expandedDf.loc[fi:li, "usm_name"].to_frame().reset_index(drop=True)
+
+    fi = li + 1
+
+    # Adding the USM Columns Name
+    listUofSuccDf['USM'] = 'USM_' + (listUofSuccDf.index + 1).astype(str)
+
+    # Number of usms inside the succession
+    nbUofSucc = listUofSuccDf.shape[0]
+
+    # in order to prepare the transposition set USM column as index
+    listUofSuccDf = listUofSuccDf.set_index('USM')
+
+    listUofSuccDf = listUofSuccDf.transpose()
+
+    # complete the data
+
+    listUofSuccDf['Number_USM'] = nbUofSucc
+    listUofSuccDf['Treatment'] = succ
+
+    # reorder the columns
+
+    cols = listUofSuccDf.columns.tolist()
+    cols = cols[-1:] + cols[-2:-1] + cols [:-2]
+
+    listUofSuccDf = listUofSuccDf[cols]
+
+    # complete the list of sucessions
+    sucessDf = pd.concat([sucessDf,listUofSuccDf])
+
+sucessDf.to_csv(successiveFileName, index=False, sep = ';')