diff redelacxlsx2csv.py @ 0:dd02fab383bf draft

"planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
author siwaa
date Mon, 19 Jun 2023 15:32:54 +0000
parents
children b91beff0963c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/redelacxlsx2csv.py	Mon Jun 19 15:32:54 2023 +0000
@@ -0,0 +1,38 @@
+import sys
+import pandas as pd
+
+redelacXlsxInputFile = sys.argv[1]
+redelacUSMsOutputFile = sys.argv[2]
+redelacTecOutputFile = sys.argv[3]
+redelacIniOutputFile = sys.argv[4]
+redelacStationOutputFile = sys.argv[5]
+redelacListe_plantesOutputFile = sys.argv[6]
+
+# loading
+
+USMsPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='USMs', keep_default_na=False, na_filter = False)
+TecPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Tec')
+IniPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Ini')
+StationPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Station')
+Liste_plantesPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Liste_plantes')
+
+# sanityzing
+
+julCol = [x for x in TecPdf.columns if x.startswith('jul')]
+
+TecPdf[julCol] = TecPdf[julCol].astype('Int64')
+
+USMsPdf['fclim1'] = USMsPdf['fclim1'].astype('string')
+USMsPdf['fclim2'] = USMsPdf['fclim1'].astype('string')
+USMsPdf['rcp'] = USMsPdf['rcp'].astype('string')
+
+USMsPdf['Début'] = pd.to_datetime(USMsPdf['Début'], dayfirst = True)
+USMsPdf['Fin'] = pd.to_datetime(USMsPdf['Fin'], dayfirst = True)
+
+# saving to csv
+
+USMsPdf.to_csv(redelacUSMsOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)
+TecPdf.to_csv(redelacTecOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', float_format='%.2f', index=False)
+IniPdf.to_csv(redelacIniOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)
+StationPdf.to_csv(redelacStationOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)
+Liste_plantesPdf.to_csv(redelacListe_plantesOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)