# HG changeset patch # User siwaa # Date 1733742403 0 # Node ID a9b2e3ba898b0f417200ed8d7ef99a981cda81dd # Parent 8916f75916b61c1866b1ba6a806b86eca6bdca2a planemo upload for repository https://forgemia.inra.fr/nathalie.rousse/use/-/tree/dnn/DNN/galaxy-tools/wine_quality_train_eval commit c595a7b4b556ab642ecf0396957f2959139dc4ab-dirty diff -r 8916f75916b6 -r a9b2e3ba898b fidle__init__.py --- a/fidle__init__.py Fri Dec 06 19:19:52 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,308 +0,0 @@ -# ================================================================== -# ______ _ _ _ __ __ _ -# | ____(_) | | | | \/ | | | -# | |__ _ __| | | ___ | \ / | ___ __| | -# | __| | |/ _` | |/ _ \ | |\/| |/ _ \ / _` | -# | | | | (_| | | __/ | | | | (_) | (_| | -# |_| |_|\__,_|_|\___| |_| |_|\___/ \__,_| -# -# ================================================================== -# A simple module to host usefull functions for Fidle practical work -# Jean-Luc Parouty CNRS/MIAI/SIMaP 2022 -# Contributed by Achille Mbogol Touye MIAI/SIMAP 2023 (PyTorch support) - -import os,sys,platform -import glob -import shutil -import pathlib - -from random import randint -import matplotlib - -from IPython.display import display,HTML - -import fidle.config -import fidle.scrawler -import fidle.utils - -from fidle.Chrono import Chrono - - -__version__ = fidle.config.VERSION - -__run_id = None -__run_dir = None -__datasets_dir = None - -utils = fidle.utils -plot = fidle.scrawler -config = fidle.config -chrono = Chrono() - - -# ------------------------------------------------------------- -# init -# ------------------------------------------------------------- -# -''' -Initialization with parameters run_id and run_dir. -These two parameters can be overridden via environment variables -FIDLE_OVERRIDE_run_id and FIDLE_OVERRIDE_run_dir. -datasets_dir is retrieved via the environment variable . -params: - run_id : Run id for the notebook (random number if None) (None) - run_dir : Run directory (./run/{run_id} if None) (None) - datasets_env_var : Name of env. var. specifying the location of the datasets (FIDLE_DATASETS_DIR) -return: - run_id, run_dir, datasets_dir -''' -def init(run_id=None, run_dir=None, datasets_env_var='FIDLE_DATASETS_DIR'): - global __run_id - global __run_dir - global __datasets_dir - - # ---- run_id - # - # If None, we choose - __run_id = str(randint(10000,99999)) if run_id is None else run_id - - # Overrided ? - __run_id = __get_override_env('run_id', __run_id) - - # ---- run_dir - # - # If None, we set it - __run_dir = f'./run/{__run_id}' if run_dir is None else run_dir - - # Override run_dir ? - __run_dir = __get_override_env('run_dir', __run_dir) - - # Create run_dir - utils.mkdir(__run_dir) - - # ---- Parameters from config.py - # - mplstyle = config.FIDLE_MPLSTYLE - cssfile = config.FIDLE_CSSFILE - - # ---- Load matplotlib style and css - # - module_dir = pathlib.Path(__file__).parent - matplotlib.style.use(f'{module_dir}/{mplstyle}') - __load_cssfile(f'{module_dir}/{cssfile}') - - # ---- Get datasets location - # From env var or by looking for - # Raise an exception if cannot be found. - # - __datasets_dir=utils.get_datasets_dir(datasets_env_var) - - # ---- Update Keras cache - # - updated_keras = __update_keras_cache() - - # ---- Update torch cache - # - updated_torch = __update_torch_cache() - - # ---- Tensorflow log level - # - log_level = int(os.getenv('TF_CPP_MIN_LOG_LEVEL', 0 )) - str_level = ['Info + Warning + Error','Warning + Error','Error only'][log_level] - - # ---- Today, now and hostname - # - chrono.start('__global__') - h = platform.uname() - - # ---- Save figs - # - save_figs = os.getenv('FIDLE_SAVE_FIGS', str(config.SAVE_FIGS) ) - save_figs = (save_figs.lower() == 'true') - figs_dir = f'{__run_dir}/figs' - - plot.set_save_fig( save = save_figs, - figs_dir = figs_dir, - figs_name = f'fig_{__run_id}_', - figs_id = 0 ) - - # ---- Hello world - # - utils.display_md('
**FIDLE - Environment initialization**') - print('Version :', config.VERSION) - print('Run id :', __run_id) - print('Run dir :', __run_dir) - print('Datasets dir :', __datasets_dir) - print('Start time :', chrono.get_start('__global__')) - print('Hostname :', f'{h[1]} ({h[0]})') - print('Tensorflow log level :', str_level,f' (={log_level})') - print('Update keras cache :', updated_keras) - print('Update torch cache :', updated_torch) - print('Save figs :', figs_dir, f'({save_figs})') - - # ---- Modules versions - # - for m in config.USED_MODULES: - if m in sys.modules: - print(f'{m:21s}:', sys.modules[m].__version__) - - # ---- Overrided ? - # - if run_id != __run_id: - print(f'\n** run_id has been overrided from {run_id} to {__run_id}') - - - return __run_id, __run_dir, __datasets_dir - -# ------------------------------------------------------------------ -# Update keras cache -# ------------------------------------------------------------------ -# Try to sync ~/.keras/cache with datasets/keras_cache -# because sometime, we cannot access to internet... (IDRIS..) -# -def __update_keras_cache(): - updated = False - if os.path.isdir(f'{__datasets_dir}/keras_cache'): - from_dir = f'{__datasets_dir}/keras_cache/*.*' - to_dir = os.path.expanduser('~/.keras/datasets') - utils.mkdir(to_dir) - for pathname in glob.glob(from_dir): - filename=os.path.basename(pathname) - destname=f'{to_dir}/{filename}' - if not os.path.isfile(destname): - shutil.copy(pathname, destname) - updated=True - return updated - - -# ------------------------------------------------------------------ -# Update torch cache -# ------------------------------------------------------------------ -# Try to sync ~/data/MNIST/raw with datasets/torch_cache -# because sometime, we cannot access to internet... (IDRIS..) -# - -def __update_torch_cache(): - updated = False - if os.path.isdir(f'{__datasets_dir}/torch_cache/MNIST/raw'): - from_dir = f'{__datasets_dir}/torch_cache/MNIST/raw/*' - to_dir = os.getcwd() + '/data/MNIST/raw' - utils.mkdir(to_dir) - for pathname in glob.glob(from_dir): - filename=os.path.basename(pathname) - destname=f'{to_dir}/{filename}' - if not os.path.isfile(destname): - shutil.copy(pathname, destname) - updated=True - return updated - - -# ------------------------------------------------------------------ -# Override -# ------------------------------------------------------------------ -# - -def override(*names, module_name='__main__', verbose=True, return_attributes=False): - ''' - Try to override attributes given par name with environment variables. - Environment variables name must be : FIDLE_OVERRIDE_ - If no env variable is available for a given name, nothing is change. - If type is str, substitution is done with 'notebook_id' and 'datasets_dir' - Example : override('image_size','nb_epochs') - params: - names : list of attributes names as a str list - if empty, all attributes can be override - return : - dict {name=new value} - ''' - # ---- Where to override - # - module=sys.modules[module_name] - - # ---- No names : mean all - # - if len(names)==0: - names=[] - for name in dir(module): - if name.startswith('_'): continue - v=getattr(module,name) - if type(v) not in [str, int, float, bool, tuple, list, dict]: continue - names.append(name) - - # ---- Search for names - # - overrides={} - for name in names: - - # ---- Variable doesn't exist - # - if not hasattr(module,name): - print(f'** Warning : You try to override an inexistant variable ({name})') - continue - - # ---- Get override environment variable name - # - env_value = __get_override_env(name, None) - - # ---- Environment variable doesn't exist - # - if env_value is None: continue - - # ---- Environment variable and variable exist both - # - value_old = getattr(module,name) - value_type = type(value_old) - - if value_type in [ str ] : - new_value = env_value.format(datasets_dir=__datasets_dir, run_id=__run_id) - - if value_type in [ int, float, bool, tuple, list, dict, type(None)]: - new_value = eval(env_value) - - # ---- Override value - # - setattr(module,name,new_value) - overrides[name]=new_value - - if verbose and len(overrides)>0: - print('** Overrided parameters : **') - for name,value in overrides.items(): - print(f'{name:20s} : {value}') - - if return_attributes: - return overrides - - -def __get_override_env(name, default_value=None): - env_name = f'FIDLE_OVERRIDE_{name}' - env_value = os.environ.get(env_name, default_value) - return env_value - - -# def __has_override_env(name): -# env_name = f'FIDLE_OVERRIDE_{name}' -# return (env_name in os.environ) - - - -def __load_cssfile(cssfile): - if cssfile is None: return - styles = open(cssfile, "r").read() - display(HTML(styles)) - - - - - -def end(): - chrono.stop('__global__') - end_time = chrono.get_end('__global__') - duration = chrono.get_delay('__global__', format='human') - site_url = "https://fidle.cnrs.fr" - md = f'**End time :** {end_time} \n' - md+= f'**Duration :** {duration} \n' - md+= f'This notebook ends here :-) \n' - md+= f'[{site_url}]({site_url})' - utils.display_md(md) - - diff -r 8916f75916b6 -r a9b2e3ba898b fidle__init__.py.original --- a/fidle__init__.py.original Fri Dec 06 19:19:52 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,308 +0,0 @@ -# ================================================================== -# ______ _ _ _ __ __ _ -# | ____(_) | | | | \/ | | | -# | |__ _ __| | | ___ | \ / | ___ __| | -# | __| | |/ _` | |/ _ \ | |\/| |/ _ \ / _` | -# | | | | (_| | | __/ | | | | (_) | (_| | -# |_| |_|\__,_|_|\___| |_| |_|\___/ \__,_| -# -# ================================================================== -# A simple module to host usefull functions for Fidle practical work -# Jean-Luc Parouty CNRS/MIAI/SIMaP 2022 -# Contributed by Achille Mbogol Touye MIAI/SIMAP 2023 (PyTorch support) - -import os,sys,platform -import glob -import shutil -import pathlib - -from random import randint -import matplotlib - -from IPython.display import display,HTML - -import fidle.config -import fidle.scrawler -import fidle.utils - -from fidle.Chrono import Chrono - - -__version__ = fidle.config.VERSION - -__run_id = None -__run_dir = None -__datasets_dir = None - -utils = fidle.utils -plot = fidle.scrawler -config = fidle.config -chrono = Chrono() - - -# ------------------------------------------------------------- -# init -# ------------------------------------------------------------- -# -''' -Initialization with parameters run_id and run_dir. -These two parameters can be overridden via environment variables -FIDLE_OVERRIDE_run_id and FIDLE_OVERRIDE_run_dir. -datasets_dir is retrieved via the environment variable . -params: - run_id : Run id for the notebook (random number if None) (None) - run_dir : Run directory (./run/{run_id} if None) (None) - datasets_env_var : Name of env. var. specifying the location of the datasets (FIDLE_DATASETS_DIR) -return: - run_id, run_dir, datasets_dir -''' -def init(run_id=None, run_dir=None, datasets_env_var='FIDLE_DATASETS_DIR'): - global __run_id - global __run_dir - global __datasets_dir - - # ---- run_id - # - # If None, we choose - __run_id = str(randint(10000,99999)) if run_id is None else run_id - - # Overrided ? - __run_id = __get_override_env('run_id', __run_id) - - # ---- run_dir - # - # If None, we set it - __run_dir = f'./run/{__run_id}' if run_dir is None else run_dir - - # Override run_dir ? - __run_dir = __get_override_env('run_dir', __run_dir) - - # Create run_dir - utils.mkdir(__run_dir) - - # ---- Parameters from config.py - # - mplstyle = config.FIDLE_MPLSTYLE - cssfile = config.FIDLE_CSSFILE - - # ---- Load matplotlib style and css - # - module_dir = pathlib.Path(__file__).parent - matplotlib.style.use(f'{module_dir}/{mplstyle}') - __load_cssfile(f'{module_dir}/{cssfile}') - - # ---- Get datasets location - # From env var or by looking for - # Raise an exception if cannot be found. - # - __datasets_dir=utils.get_datasets_dir(datasets_env_var) - - # ---- Update Keras cache - # - updated_keras = __update_keras_cache() - - # ---- Update torch cache - # - updated_torch = __update_torch_cache() - - # ---- Tensorflow log level - # - log_level = int(os.getenv('TF_CPP_MIN_LOG_LEVEL', 0 )) - str_level = ['Info + Warning + Error','Warning + Error','Error only'][log_level] - - # ---- Today, now and hostname - # - chrono.start('__global__') - h = platform.uname() - - # ---- Save figs - # - save_figs = os.getenv('FIDLE_SAVE_FIGS', str(config.SAVE_FIGS) ) - save_figs = (save_figs.lower() == 'true') - figs_dir = f'{__run_dir}/figs' - - plot.set_save_fig( save = save_figs, - figs_dir = figs_dir, - figs_name = f'fig_{__run_id}_', - figs_id = 0 ) - - # ---- Hello world - # - utils.display_md('
**FIDLE - Environment initialization**') - print('Version :', config.VERSION) - print('Run id :', __run_id) - print('Run dir :', __run_dir) - print('Datasets dir :', __datasets_dir) - print('Start time :', chrono.get_start('__global__')) - print('Hostname :', f'{h[1]} ({h[0]})') - print('Tensorflow log level :', str_level,f' (={log_level})') - print('Update keras cache :', updated_keras) - print('Update torch cache :', updated_torch) - print('Save figs :', figs_dir, f'({save_figs})') - - # ---- Modules versions - # - for m in config.USED_MODULES: - if m in sys.modules: - print(f'{m:21s}:', sys.modules[m].__version__) - - # ---- Overrided ? - # - if run_id != __run_id: - print(f'\n** run_id has been overrided from {run_id} to {__run_id}') - - - return __run_id, __run_dir, __datasets_dir - -# ------------------------------------------------------------------ -# Update keras cache -# ------------------------------------------------------------------ -# Try to sync ~/.keras/cache with datasets/keras_cache -# because sometime, we cannot access to internet... (IDRIS..) -# -def __update_keras_cache(): - updated = False - if os.path.isdir(f'{__datasets_dir}/keras_cache'): - from_dir = f'{__datasets_dir}/keras_cache/*.*' - to_dir = os.path.expanduser('~/.keras/datasets') - utils.mkdir(to_dir) - for pathname in glob.glob(from_dir): - filename=os.path.basename(pathname) - destname=f'{to_dir}/{filename}' - if not os.path.isfile(destname): - shutil.copy(pathname, destname) - updated=True - return updated - - -# ------------------------------------------------------------------ -# Update torch cache -# ------------------------------------------------------------------ -# Try to sync ~/data/MNIST/raw with datasets/torch_cache -# because sometime, we cannot access to internet... (IDRIS..) -# - -def __update_torch_cache(): - updated = False - if os.path.isdir(f'{__datasets_dir}/torch_cache/MNIST/raw'): - from_dir = f'{__datasets_dir}/torch_cache/MNIST/raw/*' - to_dir = os.getcwd() + '/data/MNIST/raw' - utils.mkdir(to_dir) - for pathname in glob.glob(from_dir): - filename=os.path.basename(pathname) - destname=f'{to_dir}/{filename}' - if not os.path.isfile(destname): - shutil.copy(pathname, destname) - updated=True - return updated - - -# ------------------------------------------------------------------ -# Override -# ------------------------------------------------------------------ -# - -def override(*names, module_name='__main__', verbose=True, return_attributes=False): - ''' - Try to override attributes given par name with environment variables. - Environment variables name must be : FIDLE_OVERRIDE_ - If no env variable is available for a given name, nothing is change. - If type is str, substitution is done with 'notebook_id' and 'datasets_dir' - Example : override('image_size','nb_epochs') - params: - names : list of attributes names as a str list - if empty, all attributes can be override - return : - dict {name=new value} - ''' - # ---- Where to override - # - module=sys.modules[module_name] - - # ---- No names : mean all - # - if len(names)==0: - names=[] - for name in dir(module): - if name.startswith('_'): continue - v=getattr(module,name) - if type(v) not in [str, int, float, bool, tuple, list, dict]: continue - names.append(name) - - # ---- Search for names - # - overrides={} - for name in names: - - # ---- Variable doesn't exist - # - if not hasattr(module,name): - print(f'** Warning : You try to override an inexistant variable ({name})') - continue - - # ---- Get override environment variable name - # - env_value = __get_override_env(name, None) - - # ---- Environment variable doesn't exist - # - if env_value is None: continue - - # ---- Environment variable and variable exist both - # - value_old = getattr(module,name) - value_type = type(value_old) - - if value_type in [ str ] : - new_value = env_value.format(datasets_dir=__datasets_dir, run_id=__run_id) - - if value_type in [ int, float, bool, tuple, list, dict, type(None)]: - new_value = eval(env_value) - - # ---- Override value - # - setattr(module,name,new_value) - overrides[name]=new_value - - if verbose and len(overrides)>0: - print('** Overrided parameters : **') - for name,value in overrides.items(): - print(f'{name:20s} : {value}') - - if return_attributes: - return overrides - - -def __get_override_env(name, default_value=None): - env_name = f'FIDLE_OVERRIDE_{name}' - env_value = os.environ.get(env_name, default_value) - return env_value - - -# def __has_override_env(name): -# env_name = f'FIDLE_OVERRIDE_{name}' -# return (env_name in os.environ) - - - -def __load_cssfile(cssfile): - if cssfile is None: return - styles = open(cssfile, "r").read() - display(HTML(styles)) - - - - - -def end(): - chrono.stop('__global__') - end_time = chrono.get_end('__global__') - duration = chrono.get_delay('__global__', format='human') - site_url = "https://fidle.cnrs.fr" - md = f'**End time :** {end_time} \n' - md+= f'**Duration :** {duration} \n' - md+= f'This notebook ends here :-) \n' - md+= f'[{site_url}]({site_url})' - utils.display_md(md) - - diff -r 8916f75916b6 -r a9b2e3ba898b toto_wine_quality_train_eval.py --- a/toto_wine_quality_train_eval.py Fri Dec 06 19:19:52 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,357 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -############################################################################### -# Module : model_wine_lightning -# -# This code has been extracted from 01-DNN-Wine-Regression-lightning.ipynb -# (fidle-tp/fidle-master-3.0.11/Wine.Lightning) then modified. -# Only first part kept : -# - (Retrieve data) -# - (Preparing the data) -# - (Build a model) -# - Train and save the model -# -# Inputs : -# -# -dataset_filepath : dataset file path (.csv). -# - File containing data used to train and test the model. -# - The dataset will be splitted in 2 parts : -# one for training and one for validation. -# - if unavailable (not given, not found...) : default dataset_filepath used -# -# Outputs : -# -# Output files under "OUTPUTS" folder (must exist !!!) -# -# - Model file (model_ckpt_filepath) (.ckpt) -# -# - Normalization configuration file (norm_config_json_filepath) (.json) -# -# - Report file (report_json_filepath) (.json) containing: -# - Normalization configuration information -# - Evaluation score information -# example {'val_loss': 0.48292940855026245, 'val_mae': 0.524127721786499, -# 'val_mse': 0.48292940855026245} -# -# - Log files into Wine.Lightning/run/LWINE1/logs/reg_logs -# -# - Screen display containing running information : -# - training lines : -# Epoch 99: 100%|█| 64/64 [00:01<00:00, 44.89it/s, v_num=0, val_loss=0.483, val_mae=0.524, val_mse=0.483, t -# - Eval score : x_test / loss : 0.4829 -# x_test / mae : 0.5241 -# x_test / mse : 0.4829 -# - ... -# -############################################################################### - -# -# -# # [LWINE1] - Wine quality prediction with a Dense Network (DNN) -# Another example of regression, with a wine quality prediction, using PyTorch Lightning -# -# -# ## Objectives : -# - Predict the **quality of wines**, based on their analysis -# - Understanding the principle and the architecture of a regression with a dense neural network with backup and restore of the trained model. -# -# The **[Wine Quality datasets](https://archive.ics.uci.edu/ml/datasets/wine+Quality)** are made up of analyses of a large number of wines, with an associated quality (between 0 and 10) -# This dataset is provide by : -# Paulo Cortez, University of Minho, Guimarães, Portugal, http://www3.dsi.uminho.pt/pcortez -# A. Cerdeira, F. Almeida, T. Matos and J. Reis, Viticulture Commission of the Vinho Verde Region(CVRVV), Porto, Portugal, @2009 -# This dataset can be retreive at [University of California Irvine (UCI)](https://archive-beta.ics.uci.edu/ml/datasets/wine+quality) -# -# -# Due to privacy and logistic issues, only physicochemical and sensory variables are available -# There is no data about grape types, wine brand, wine selling price, etc. -# -# - fixed acidity -# - volatile acidity -# - citric acid -# - residual sugar -# - chlorides -# - free sulfur dioxide -# - total sulfur dioxide -# - density -# - pH -# - sulphates -# - alcohol -# - quality (score between 0 and 10) -# -# ## What we're going to do : -# -# - (Retrieve data) -# - (Preparing the data) -# - (Build a model) -# - Train and save the model -# - Restore saved model -# - Evaluate the model -# - Make some predictions -# -HEAD = "[model_wine_lightning.wine_quality_train_eval]" - -# ## Step 1 - Import and init -print("\n"+HEAD,"# ## Step 1 - Import and init\n") - -# Import some packages -import os -import lightning.pytorch as pl -import torchvision.transforms as T -##toto## from IPython.display import display, HTML ##toto## HTML -from torch.utils.data import DataLoader, random_split -##toto## from model_wine_lightning.modules.progressbar import CustomTrainProgressBar -from model_wine_lightning.modules.data_load import WineQualityDataset -from model_wine_lightning.modules.data_load import Normalize, ToTensor -from model_wine_lightning.modules.model import LitRegression -from lightning.pytorch.loggers.tensorboard import TensorBoardLogger -import fidle -import json -import shutil -import argparse -from pprint import pprint - -OUTPUTS_PATH = "OUTPUTS" # must exit !!! - -error_msg, warn_msg, more_msg = "", "", "" # default - -model_ckpt_filepath = os.path.join(OUTPUTS_PATH, "model.ckpt") -norm_config_json_filepath = os.path.join(OUTPUTS_PATH, "norm_config.json") -report_json_filepath = os.path.join(OUTPUTS_PATH, "report.json") -report = dict() # init - -try: - if not os.path.exists(OUTPUTS_PATH): # cas isfile non traite - os.mkdir(OUTPUTS_PATH) - message = "Outputs folder '"+OUTPUTS_PATH+" does not exist => created." - warn_msg += message + " " - print(HEAD, "Warning :", message) - - message = "Outputs folder '" + OUTPUTS_PATH + "' must exist." - if not os.path.isdir(OUTPUTS_PATH): - error_msg += message + " " - raise Exception(message) - - # ## INPUTS - print("\n"+HEAD, "# ## INPUTS\n") - - parser = argparse.ArgumentParser() - - help_text = "dataset file path (.csv)" - parser.add_argument("-dataset_filepath", type=str, help=help_text) - - args = parser.parse_args() - - dataset_filepath = None # default (case default data file) - path = args.dataset_filepath - if (path is not None) and (path != 'None') : - if os.path.isfile(path) : - dataset_filepath = path - print(HEAD, "dataset file used :", path) - else : - message = path+ "dataset file not found => default data file used." - warn_msg += message + "" - print(HEAD, "Warning :", message) - else: - message = "no dataset_filepath given => default data file used." - warn_msg += message + "" - print(HEAD, "Warning :", message) - - # Init Fidle environment - print("\n"+HEAD, "# Init Fidle environment\n") - run_id, run_dir, datasets_dir = fidle.init(run_dir='LWINE1_train_eval') - - # Verbosity during training : - # - 0 = silent - # - 1 = progress bar - # - 2 = one line per epoch - fit_verbosity = 1 - dataset_name = 'winequality-red.csv' # default data file - - # Override parameters (batch mode) - Just forget this cell - fidle.override('fit_verbosity', 'dataset_name') - - # ## Step 2 - Retrieve data - print("\n"+HEAD,"# ## Step 2 - Retrieve data\n") - - if dataset_filepath is None: # default data file - dataset_filepath = f'{datasets_dir}/WineQuality/origine/{dataset_name}' - print(HEAD, "Dataset file used :", dataset_filepath) - - # Verify - message = "Dataset file '" + dataset_filepath + "' not found." - if not os.path.isfile(dataset_filepath): - error_msg += message + " " - - datasets = WineQualityDataset(dataset_filepath) - print("datasets:") - #display(datasets.data.head(5).style.format("{0:.2f}")) - ##toto##display(datasets.data.head(5)) - print('Missing Data : ',datasets.data.isna().sum().sum(), - ' Shape is : ', datasets.data.shape) - - # ## Step 3 - Preparing the data - print("\n"+HEAD,"# ## Step 3 - Preparing the data\n") - - # ### 3.1 - Data normalization - print("\n"+HEAD,"# ### 3.1 - Data normalization\n") - # **Note :** - # - All input features must be normalized. - # - To do this we will subtract the mean and divide by the standard - # deviation for each input features. - # - Then we convert numpy array features and target **(quality)** to - # torch tensor - - N = Normalize(dataset_filepath) - norm_config = {"mean_json":N.mean_json, "std_json":N.std_json, - "min_json":N.min_json, "max_json":N.max_json} - transforms = T.Compose([N, ToTensor()]) - dataset = WineQualityDataset(dataset_filepath, transform=transforms) - - print("Before normalization :") - ##toto##display(datasets[:]["features"]) - print("After normalization :") - ##toto##display(dataset[:]["features"]) - - # ### 3.2 - Split data - print("\n"+HEAD,"# ### 3.2 - Split data\n") - # We will use 80% of the data for training and 20% for validation. - # x will be the features data of the analysis and y the target (quality) - - # ---- Split => train, test - data_train_len = int(len(dataset)*0.8) # get 80 % - data_test_len = len(dataset) -data_train_len # test = all - train - - # ---- Split => x,y with random_split - data_train_subset, data_test_subset = random_split(dataset, - [data_train_len, data_test_len]) - - x_train = data_train_subset[:]["features"] - y_train = data_train_subset[:]["quality" ] - - x_test = data_test_subset[:]["features"] - y_test = data_test_subset[:]["quality" ] - - print('Original data shape was : ',dataset.data.shape) - print('x_train : ',x_train.shape, 'y_train : ',y_train.shape) - print('x_test : ',x_test.shape, 'y_test : ',y_test.shape) - - # ### 3.3 - For Training model use Dataloader - print("\n"+HEAD,"# ### 3.3 - For Training model use Dataloader\n") - # The Dataset retrieves our dataset’s features and labels one sample at a time. While training a model, we typically want to pass samples in minibatches, reshuffle the data at every epoch to reduce model overfitting. DataLoader is an iterable that abstracts this complexity for us in an easy API. - - # train batch data - train_loader= DataLoader(dataset=data_train_subset, - shuffle=True, batch_size=20, num_workers=2) - # test batch data - test_loader= DataLoader(dataset=data_test_subset, - shuffle=False, batch_size=20, num_workers=2) - - # ## Step 4 - Build a model - print("\n"+HEAD,"# ## Step 4 - Build a model\n") - - # ## 5 - Train the model - print("\n"+HEAD,"# ## 5 - Train the model\n") - - # ### 5.1 - Get it - print("\n"+HEAD,"# ### 5.1 - Get it\n") - print(HEAD, "Model creation") - reg=LitRegression(in_features=11) - print(reg) - - # ### 5.2 - Add callback - print("\n"+HEAD,"# ### 5.2 - Add callback\n") - os.makedirs('./run/models', exist_ok=True) - save_dir = "./run/models/" - filename ='best-model-{epoch}-{val_loss:.2f}' - savemodel_callback = pl.callbacks.ModelCheckpoint(dirpath=save_dir, - filename=filename, save_top_k=1, - verbose=False, monitor="val_loss") - # ### 5.3 - Train it - print("\n"+HEAD,"# ### 5.3 - Train it\n") - - # loggers data - os.makedirs(f'{run_dir}/logs', mode=0o750, exist_ok=True) - logger= TensorBoardLogger(save_dir=f'{run_dir}/logs', name="reg_logs") - - # train model - trainer = pl.Trainer(accelerator='auto', max_epochs=100, - logger=logger, num_sanity_val_steps=0, - callbacks=[savemodel_callback]) - ##toto##callbacks=[savemodel_callback,CustomTrainProgressBar()]) - trainer.fit(model=reg, train_dataloaders=train_loader, - val_dataloaders=test_loader) - - # ## Step 6 - Evaluate it - print("\n"+HEAD,"# ## Step 6 - Evaluate it\n") - - # ### 6.1 - Model evaluation - print("\n"+HEAD,"# ### 6.1 - Model evaluation\n") - # MAE = Mean Absolute Error (between the labels and predictions) - # A mae equal to 3 represents an average error in prediction of $3k. - score = trainer.validate(model=reg, dataloaders=test_loader, verbose=False) - - print('x_test / loss : {:5.4f}'.format(score[0]['val_loss'])) - print('x_test / mae : {:5.4f}'.format(score[0]['val_mae'])) - print('x_test / mse : {:5.4f}'.format(score[0]['val_mse'])) - - # ### 6.2 - Training history - print("\n"+HEAD,"# ### 6.2 - Training history\n") - # - # To access logs with tensorboad : - # - Under **Docker**, from a terminal launched via the jupyterlab - # launcher, use the following command:
- # ```tensorboard --logdir --host 0.0.0.0``` - # - If you're **not using Docker**, from a terminal :
- # ```tensorboard --logdir ``` - # - # **Note:** One tensorboard instance can be used simultaneously. - - # ## OUTPUTS - print("\n"+HEAD,"# ## OUTPUTS\n") - - # Model (.ckpt) copy of savemodel_callback.best_model_path (under save_dir) - savemodel_path = savemodel_callback.best_model_path - shutil.copyfile(src=savemodel_path, dst=model_ckpt_filepath) - print("OUTPUT:", "Model :", model_ckpt_filepath) - print(" (is a copy of: Best model file ", savemodel_path, ")") - - # Save norm_config as .json file - with open(norm_config_json_filepath, "w") as outfile: - json.dump(norm_config, outfile) - print("OUTPUT:", - "Normalization configuration file (containing norm_config) :", - norm_config_json_filepath) - - # Report (json) : - # - normalization configuration information - # - evaluation score information - # example {'val_loss': 0.48292940855026245, 'val_mae': 0.524127721786499, - # 'val_mse': 0.48292940855026245} - report['eval_score'] = score[0] - report['norm_config'] = norm_config - report['best_model_file'] = savemodel_path - - fidle.end() - -except Exception as e : - error_msg += type(e).__name__ + str(e.args) + ". " - -if error_msg != "": report["error"] = error_msg -if more_msg != "": report["more"] = more_msg -if warn_msg != "": report["warning"] = warn_msg - -print("OUTPUT:", "Report: ") -pprint(report) - -# Save Report as .json file -try: - with open(report_json_filepath, "w") as outfile: - json.dump(report, outfile) - print("OUTPUT:", "Report file (containing report) :", report_json_filepath) -except : - pass - -# --- -# - diff -r 8916f75916b6 -r a9b2e3ba898b wine_quality_train_eval.xml --- a/wine_quality_train_eval.xml Fri Dec 06 19:19:52 2024 +0000 +++ b/wine_quality_train_eval.xml Mon Dec 09 11:06:43 2024 +0000 @@ -6,28 +6,22 @@ - oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:1906137478520a9f71a4d8996c0a8827f27aed16 + oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:c595a7b4b556ab642ecf0396957f2959139dc4ab - - + + wine_quality_train_eval + + + + + oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:1906137478520a9f71a4d8996c0a8827f27aed16 + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 8916f75916b6 -r a9b2e3ba898b wine_quality_train_eval.xml.essaiNOK --- a/wine_quality_train_eval.xml.essaiNOK Fri Dec 06 19:19:52 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ - - - wine_quality_train_eval - - - - - oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:6d159f79349e231deadb738f79bd293c1a8dadd3 - - - - - - - /fidle-tp/datasets-fidle - - - - - - - - - - - - - - - - - - - - - - diff -r 8916f75916b6 -r a9b2e3ba898b wine_quality_train_eval.xml.essaiNOK_2 --- a/wine_quality_train_eval.xml.essaiNOK_2 Fri Dec 06 19:19:52 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ - - - wine_quality_train_eval - - - - - oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:6d159f79349e231deadb738f79bd293c1a8dadd3 - - - - - - - /fidle-tp/datasets-fidle - - - - - - - - - - - - - - - - - - - - - -