view redelacPlanExpander.py @ 3:ecbd096ba43b draft default tip

"planemo upload for repository https://forgemia.inra.fr/redelac/redelac-toolbox/-/tree/b4ec8ff39b5d159c4f9def1a6a450b1fbcd698a2/tools/REDELACplanExpander commit b4ec8ff39b5d159c4f9def1a6a450b1fbcd698a2-dirty"
author siwaa
date Tue, 07 May 2024 11:45:15 +0000
parents 47dafd3de5ef
children
line wrap: on
line source

import sys
import pandas as pd
import numpy as np

# typeOfRotation = sys.argv [1]
USMsFileName = sys.argv[1]
Liste_plantesFileName = sys.argv[2]
planFileName = sys.argv[3]
maillesFileName = sys.argv[4]
expandedPlanFileName = sys.argv[5]
successiveFileName = sys.argv[6]
StationsFileName = sys.argv[7]
aclimParHorizonFileName = sys.argv[8]
latitudeMailleDriasDFileName= sys.argv[9]
expandedStationsFileName = sys.argv[10]

# 1) reading the base list of USMs

#USMsDf = pd.read_excel(basePlanFileName, sheet_name='USMs')
USMsDf = pd.read_csv(USMsFileName, sep=";", keep_default_na=False, na_filter = False)

# 2) reading the list of plant

#plantsDf = pd.read_excel(basePlanFileName, sheet_name='Liste_plantes')
plantsDf = pd.read_csv(Liste_plantesFileName, sep=";")

# 3) reading the plan

planDf = pd.read_csv(planFileName, sep=";")

# 4) reading the "mailles" file
maillesDf = pd.read_csv(maillesFileName, sep=";")

# 5) reading the stations template
StationsDf = pd.read_csv(StationsFileName, sep=";")

aclimParHorizonDf = pd.read_csv(aclimParHorizonFileName, sep=";")

latitudeMailleDriasDf = pd.read_csv(latitudeMailleDriasDFileName, sep=";")

# a new dataframe to hosts the stations in expansion
expandedStationsDf = pd.DataFrame()

for i, row in planDf.iterrows():
    tmpDf = StationsDf
    
    safran = int(row['upc'].split('_')[1])
    drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0]

    tmpDfBis = tmpDf;
    
    for j, rowS in tmpDf.iterrows():
        horizon = rowS['Sta_name'].split('_')[1]
        tmpDfBis.at[j, 'Sta_name'] = str(drias) +  '_' + horizon + '_sta.xml'
        tmpDfBis.at[j, 'aclim'] = aclimParHorizonDf[(aclimParHorizonDf['maille_DRIAS'] == drias) & (aclimParHorizonDf['horizon'] == horizon)].reset_index(drop=True)["aclim"][0]
        tmpDfBis.at[j, 'latitude'] = latitudeMailleDriasDf[(latitudeMailleDriasDf['maille_DRIAS'] == drias)].reset_index(drop=True)["latitude"][0]
              
    expandedStationsDf = pd.concat([expandedStationsDf, tmpDfBis])

expandedStationsDf.to_csv(expandedStationsFileName, index = False, sep = ';')


# a new dataFrame to host the list of USMs in expansion
expandedDf = pd.DataFrame()

# define the type of rotation according to content

if USMsDf.iloc[0]['rotation'] == 'PP' :
    typeOfRotation = 'PP'
else :
    typeOfRotation = 'GC'    

for i, row in planDf.iterrows():
    tmpDf = USMsDf

    safran = int(row['upc'].split('_')[1])
    drias = maillesDf[maillesDf["maille_safran"] == safran].reset_index(drop=True)["maille_drias"][0]

    tmpDf['upc'] = row['upc']
    tmpDf['ucs'] = row['ucs']
    tmpDf['num_sol'] = row['num_sol']
    
    if typeOfRotation == 'GC' :
        tmpDf['Norg_sols'] = row['Norg_sols_GC']
        tmpDf['nom_sol_sans_point'] = row['nom_sols_GC_dans_USM']
    else : 
        tmpDf['Norg_sols'] = row['Norg_sols_PP']
        tmpDf['nom_sol_sans_point'] = row['nom_sols_PP_dans_USM']

    tmpDf['maille_drias'] = drias

    # Updating the id_simu column 
    # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2)
    # new alias = =CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2)

    tmpDf['id_simu'] = \
        tmpDf.upc + '-' + \
        tmpDf['nom_sol_sans_point'].astype(str) + '-' + \
        tmpDf['systeme'].astype(str)  + '-' + \
        tmpDf['code_rotation'].astype(str)  + '-' + \
        tmpDf['tete_rotation'].astype(str) + '-' + \
        tmpDf['horizon '].astype(str)
        
    # Merging 2 sheets in order to solve the VLOOKUP
    plantsDf.rename(columns = {'Nom_plante':'culture'}, inplace = True)
    tmpDf = pd.merge(tmpDf,plantsDf, how='left', on='culture', sort=False)

    # Updating the usm_name column
    # alias =CONCATENATE(F2;"-";G2;"_";H2;"-";I2;"-";K2;"-";L2;"-";M2;"-";N2;"-";VLOOKUP(O2;$Liste_plantes.$A$2:$B$7;2;0))
    # newalias ==CONCATENATE(F2;"-";I2;"-";J2;"-";L2;"-";M2;"-";N2;"-";P2;"-";VLOOKUP(Q2;$Liste_plantes.$A$2:$B$7;2;0))
    
    tmpDf['usm_name'] = \
        tmpDf.upc + '-' + \
        tmpDf['nom_sol_sans_point'].astype(str) + '-' + \
        tmpDf['systeme'].astype(str)  + '-' + \
        tmpDf['code_rotation'].astype(str)  + '-' + \
        tmpDf['tete_rotation'].astype(str) + '-' + \
        tmpDf['horizon '].astype(str) + '-' + \
        tmpDf['annee_fin'].astype(str) + '-' + \
        tmpDf['Code_plante']

    
    # Updating the nomsol column
    # alias =CONCATENATE(G2;"_";H2)

    tmpDf['nomsol'] = \
        tmpDf['num_sol'].astype(str) + '_' + \
        tmpDf['Norg_sols'].astype(str)

    
    # Updating the fstation column
    # alias =CONCATENATE($E2;"_";M2;"_sta.xml")
    
    tmpDf['fstation'] = \
      tmpDf['maille_drias'].astype(str) + '_' + \
      tmpDf['horizon '].astype(str) + '_sta.xml'

    # Updating the fclim1 column
    # alias =CONCATENATE($E2;".";YEAR(R2))

    tmpDf['fclim1'] = \
      tmpDf['maille_drias'].astype(str) + '.' + \
      pd.to_datetime(tmpDf['Début'], dayfirst = True).dt.year.astype(str)

    # Updating the fclim2 column
    # alias =CONCATENATE($E2;".";YEAR(S2))
    
    tmpDf['fclim2'] = \
      tmpDf['maille_drias'].astype(str) + '.' + \
      pd.to_datetime(tmpDf['Fin'], dayfirst = True).dt.year.astype(str)

    expandedDf = pd.concat([expandedDf, tmpDf])

# now getting the list of simu

sucessDf = pd.DataFrame()

listOfSuccessWithNA = expandedDf['id_simu'].unique()

# en raison des NA on filtre

listOfSuccess  = [x for x in listOfSuccessWithNA if not x.endswith('NA')]

nbL = len(listOfSuccess)

# fixing first station before to save

fi = 0

expandedDf.reset_index(inplace = True)

for succ in listOfSuccess:

    li = expandedDf[expandedDf['id_simu'] == succ].index.max()
    
    expandedDf.loc[fi:li - 1, 'fstation'] = expandedDf['fstation'][fi + 5]
    
    fi = li + 1

expandedDf.drop(['ucs', 'Code_plante', 'Fichier_plante_STICS'], axis = 1, inplace = True)
expandedDf.to_csv(expandedPlanFileName, index=False, sep = ';')
    
fi = 0

for succ in listOfSuccess:
    
    li = expandedDf[expandedDf['id_simu'] == succ].index.max()
    
    listUofSuccDf = expandedDf.loc[fi:li, "usm_name"].to_frame().reset_index(drop=True)

    fi = li + 1

    # Adding the USM Columns Name
    listUofSuccDf['USM'] = 'USM_' + (listUofSuccDf.index + 1).astype(str)

    # Number of usms inside the succession
    nbUofSucc = listUofSuccDf.shape[0]

    # in order to prepare the transposition set USM column as index
    listUofSuccDf = listUofSuccDf.set_index('USM')

    listUofSuccDf = listUofSuccDf.transpose()

    # complete the data

    listUofSuccDf['Number_USM'] = nbUofSucc
    listUofSuccDf['Treatment'] = succ

    # reorder the columns

    cols = listUofSuccDf.columns.tolist()
    cols = cols[-1:] + cols[-2:-1] + cols [:-2]

    listUofSuccDf = listUofSuccDf[cols]

    # complete the list of sucessions
    sucessDf = pd.concat([sucessDf,listUofSuccDf])

sucessDf.to_csv(successiveFileName, index=False, sep = ';')