annotate redelacxlsx2csv.py @ 1:b91beff0963c draft default tip

"planemo upload for repository https://forgemia.inra.fr/redelac/redelac-toolbox/-/tree/b275711eca6a471af5cb60ae996e0bd6c11461d6/tools/REDELACxlsx2csv commit b275711eca6a471af5cb60ae996e0bd6c11461d6-dirty"
author siwaa
date Thu, 06 Jul 2023 10:16:26 +0000
parents dd02fab383bf
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
1 import sys
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
2 import pandas as pd
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
3
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
4 redelacXlsxInputFile = sys.argv[1]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
5 redelacUSMsOutputFile = sys.argv[2]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
6 redelacTecOutputFile = sys.argv[3]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
7 redelacIniOutputFile = sys.argv[4]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
8 redelacStationOutputFile = sys.argv[5]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
9 redelacListe_plantesOutputFile = sys.argv[6]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
10
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
11 # loading
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
12
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
13 USMsPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='USMs', keep_default_na=False, na_filter = False)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
14 TecPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Tec')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
15 IniPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Ini')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
16 StationPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Station')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
17 Liste_plantesPdf = pd.read_excel(redelacXlsxInputFile, engine = 'openpyxl', sheet_name='Liste_plantes')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
18
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
19 # sanityzing
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
20
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
21 julCol = [x for x in TecPdf.columns if x.startswith('jul')]
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
22
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
23 TecPdf[julCol] = TecPdf[julCol].astype('Int64')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
24
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
25 USMsPdf['fclim1'] = USMsPdf['fclim1'].astype('string')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
26 USMsPdf['fclim2'] = USMsPdf['fclim1'].astype('string')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
27 USMsPdf['rcp'] = USMsPdf['rcp'].astype('string')
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
28
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
29 USMsPdf['Début'] = pd.to_datetime(USMsPdf['Début'], dayfirst = True)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
30 USMsPdf['Fin'] = pd.to_datetime(USMsPdf['Fin'], dayfirst = True)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
31
1
b91beff0963c "planemo upload for repository https://forgemia.inra.fr/redelac/redelac-toolbox/-/tree/b275711eca6a471af5cb60ae996e0bd6c11461d6/tools/REDELACxlsx2csv commit b275711eca6a471af5cb60ae996e0bd6c11461d6-dirty"
siwaa
parents: 0
diff changeset
32 USMsPdf.loc[USMsPdf['culturean'] != 1, 'culturean'] = 0
b91beff0963c "planemo upload for repository https://forgemia.inra.fr/redelac/redelac-toolbox/-/tree/b275711eca6a471af5cb60ae996e0bd6c11461d6/tools/REDELACxlsx2csv commit b275711eca6a471af5cb60ae996e0bd6c11461d6-dirty"
siwaa
parents: 0
diff changeset
33
0
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
34 # saving to csv
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
35
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
36 USMsPdf.to_csv(redelacUSMsOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
37 TecPdf.to_csv(redelacTecOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', float_format='%.2f', index=False)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
38 IniPdf.to_csv(redelacIniOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
39 StationPdf.to_csv(redelacStationOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)
dd02fab383bf "planemo upload for repository https://forgemia.inra.fr/redelac commit 841d6c6ada2dad3dd575d25aab620491fda5c611"
siwaa
parents:
diff changeset
40 Liste_plantesPdf.to_csv(redelacListe_plantesOutputFile, sep=';', date_format='%d/%m/%Y', decimal = ',', index=False)