Mercurial > repos > siwaa > redelac_stics_e
diff redelacPlanExpander.py @ 0:bc7ae8dd3d3c draft
"planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
author | siwaa |
---|---|
date | Mon, 19 Jun 2023 15:51:01 +0000 |
parents | |
children | 47dafd3de5ef |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/redelacPlanExpander.py Mon Jun 19 15:51:01 2023 +0000 @@ -0,0 +1,212 @@ +import sys +import pandas as pd +import numpy as np + +typeOfRotation = sys.argv [1] +USMsFileName = sys.argv[2] +Liste_plantesFileName = sys.argv[3] +planFileName = sys.argv[4] +maillesFileName = sys.argv[5] +expandedPlanFileName = sys.argv[6] +successiveFileName = sys.argv[7] +StationsFileName = sys.argv[8] +aclimParHorizonFileName = sys.argv[9] +latitudeMailleDriasDFileName= sys.argv[10] +expandedStationsFileName = sys.argv[11] + +# 1) reading the base list of USMs + +#USMsDf = pd.read_excel(basePlanFileName, sheet_name='USMs') +USMsDf = pd.read_csv(USMsFileName, sep=";", keep_default_na=False, na_filter = False) + +# 2) reading the list of plant + +#plantsDf = pd.read_excel(basePlanFileName, sheet_name='Liste_plantes') +plantsDf = pd.read_csv(Liste_plantesFileName, sep=";") + +# 3) reading the plan + +planDf = pd.read_csv(planFileName, sep=";") + +# 4) reading the "mailles" file +maillesDf = pd.read_csv(maillesFileName, sep=";") + +# 5) reading the stations template +StationsDf = pd.read_csv(StationsFileName, sep=";") + +aclimParHorizonDf = pd.read_csv(aclimParHorizonFileName, sep=";") + +latitudeMailleDriasDf = pd.read_csv(latitudeMailleDriasDFileName, sep=";") + +# a new dataframe to hosts the stations in expansion +expandedStationsDf = pd.DataFrame() + +for i, row in planDf.iterrows(): + tmpDf = StationsDf + + safran = int(row['upc'].split('_')[1]) + drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0] + + tmpDfBis = tmpDf; + + for j, rowS in tmpDf.iterrows(): + horizon = rowS['Sta_name'].split('_')[1] + tmpDfBis.at[j, 'Sta_name'] = str(drias) + '_' + horizon + '_sta.xml' + tmpDfBis.at[j, 'aclim'] = aclimParHorizonDf[(aclimParHorizonDf['maille_DRIAS'] == drias) & (aclimParHorizonDf['horizon'] == horizon)].reset_index(drop=True)["aclim"][0] + tmpDfBis.at[j, 'latitude'] = latitudeMailleDriasDf[(latitudeMailleDriasDf['maille_DRIAS'] == drias)].reset_index(drop=True)["latitude"][0] + + expandedStationsDf = pd.concat([expandedStationsDf, tmpDfBis]) + +expandedStationsDf.to_csv(expandedStationsFileName, index = False, sep = ';') + + +# a new dataFrame to host the list of USMs in expansion +expandedDf = pd.DataFrame() + +for i, row in planDf.iterrows(): + tmpDf = USMsDf + + safran = int(row['upc'].split('_')[1]) + drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0] + + tmpDf['upc'] = row['upc'] + tmpDf['ucs'] = row['ucs'] + tmpDf['num_sol'] = row['num_sol'] + + if typeOfRotation == '' : + tmpDf['Norg_sols'] = row['Norg_sols_GC'] + tmpDf['nom_sol_sans_point'] = row['nom_sols_GC_dans_USM'] + else : + tmpDf['Norg_sols'] = row['Norg_sols_PP'] + tmpDf['nom_sol_sans_point'] = row['nom_sols_PP_dans_USM'] + + tmpDf['maille_drias'] = drias + + # Updating the id_simu column + # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2) + # new alias = =CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2) + + tmpDf['id_simu'] = \ + tmpDf.upc + '-' + \ + tmpDf['nom_sol_sans_point'].astype(str) + '-' + \ + tmpDf['systeme'].astype(str) + '-' + \ + tmpDf['code_rotation'].astype(str) + '-' + \ + tmpDf['tete_rotation'].astype(str) + '-' + \ + tmpDf['horizon '].astype(str) + + # Merging 2 sheets in order to solve the VLOOKUP + plantsDf.rename(columns = {'Nom_plante':'culture'}, inplace = True) + tmpDf = pd.merge(tmpDf,plantsDf, how='left', on='culture', sort=False) + + # Updating the usm_name column + # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2;"-";N2;"-";VLOOKUP(O2;$Liste_plantes.$A$2:$B$7;2;0)) + # newalias ==CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2;"-";P2;"-";VLOOKUP(Q2;$Liste_plantes.$A$2:$B$7;2;0)) + + tmpDf['usm_name'] = \ + tmpDf.upc + '-' + \ + tmpDf['nom_sol_sans_point'].astype(str) + '-' + \ + tmpDf['systeme'].astype(str) + '-' + \ + tmpDf['code_rotation'].astype(str) + '-' + \ + tmpDf['tete_rotation'].astype(str) + '-' + \ + tmpDf['horizon '].astype(str) + '-' + \ + tmpDf['annee_fin'].astype(str) + '-' + \ + tmpDf['Code_plante'] + + + # Updating the nomsol column + # alias =CONCATENATE(G2;"_";H2) + + tmpDf['nomsol'] = \ + tmpDf['num_sol'].astype(str) + '_' + \ + tmpDf['Norg_sols'].astype(str) + + + # Updating the fstation column + # alias =CONCATENATE($E2;"_";M2;"_sta.xml") + + tmpDf['fstation'] = \ + tmpDf['maille_drias'].astype(str) + '_' + \ + tmpDf['horizon '].astype(str) + '_sta.xml' + + # Updating the fclim1 column + # alias =CONCATENATE($E2;".";YEAR(R2)) + + tmpDf['fclim1'] = \ + tmpDf['maille_drias'].astype(str) + '.' + \ + pd.to_datetime(tmpDf['Début'], dayfirst = True).dt.year.astype(str) + + # Updating the fclim2 column + # alias =CONCATENATE($E2;".";YEAR(S2)) + + tmpDf['fclim2'] = \ + tmpDf['maille_drias'].astype(str) + '.' + \ + pd.to_datetime(tmpDf['Fin'], dayfirst = True).dt.year.astype(str) + + expandedDf = pd.concat([expandedDf, tmpDf]) + +# now getting the list of simu + +sucessDf = pd.DataFrame() + +listOfSuccessWithNA = expandedDf['id_simu'].unique() + +# en raison des NA on filtre + +listOfSuccess = [x for x in listOfSuccessWithNA if not x.endswith('NA')] + +nbL = len(listOfSuccess) + +# fixing first station before to save + +fi = 0 + +expandedDf.reset_index(inplace = True) + +for succ in listOfSuccess: + + li = expandedDf[expandedDf['id_simu'] == succ].index.max() + + expandedDf.loc[fi:li - 1, 'fstation'] = expandedDf['fstation'][fi + 5] + + fi = li + 1 + +expandedDf.drop(['ucs', 'Code_plante', 'Fichier_plante_STICS'], axis = 1, inplace = True) +expandedDf.to_csv(expandedPlanFileName, index=False, sep = ';') + +fi = 0 + +for succ in listOfSuccess: + + li = expandedDf[expandedDf['id_simu'] == succ].index.max() + + listUofSuccDf = expandedDf.loc[fi:li, "usm_name"].to_frame().reset_index(drop=True) + + fi = li + 1 + + # Adding the USM Columns Name + listUofSuccDf['USM'] = 'USM_' + (listUofSuccDf.index + 1).astype(str) + + # Number of usms inside the succession + nbUofSucc = listUofSuccDf.shape[0] + + # in order to prepare the transposition set USM column as index + listUofSuccDf = listUofSuccDf.set_index('USM') + + listUofSuccDf = listUofSuccDf.transpose() + + # complete the data + + listUofSuccDf['Number_USM'] = nbUofSucc + listUofSuccDf['Treatment'] = succ + + # reorder the columns + + cols = listUofSuccDf.columns.tolist() + cols = cols[-1:] + cols[-2:-1] + cols [:-2] + + listUofSuccDf = listUofSuccDf[cols] + + # complete the list of sucessions + sucessDf = pd.concat([sucessDf,listUofSuccDf]) + +sucessDf.to_csv(successiveFileName, index=False, sep = ';')