Mercurial > repos > siwaa > wine_quality
changeset 0:143b15001522 draft
planemo upload for repository https://forgemia.inra.fr/nathalie.rousse/use/-/tree/dnn/DNN/galaxy-tools/wine_quality commit e7c4e447552083db7eaecbdf139a7c359fe9becc
author | siwaa |
---|---|
date | Wed, 04 Dec 2024 15:25:26 +0000 |
parents | |
children | c3841488b88d |
files | wine_quality.py wine_quality.xml |
diffstat | 2 files changed, 453 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wine_quality.py Wed Dec 04 15:25:26 2024 +0000 @@ -0,0 +1,369 @@ +#!/usr/bin/env python +# coding: utf-8 + +######### ######### ######### ######### ######### ######### ######### +# +# File created from fidlemore/model_wine_lightning/wine_quality_predict.py +# +# Modifications : +# +# - restore sanitized text for -data value +# +# - quality.txt file containing only quality value (extracted from report) +# +######### ######### ######### ######### ######### ######### ######### + +############################################################################### +# Module : model_wine_lightning +# +# This code has been extracted from 01-DNN-Wine-Regression-lightning.ipynb +# (fidle-tp/fidle-master-3.0.11/Wine.Lightning) then modified. +# Only last part kept : +# - Restore saved model from checkpoint +# - Evaluate the model not kept +# - Make some predictions 1 prediction +# +# Inputs : +# +# Data of wine for which quality is going to be predicted is given +# by -data_json_filepath or by -data (only one of both). +# +# -data_json_filepath : data file path (.json) containing data. +# -data : data (string format) +# +# Example of wine data : { "fixed acidity": 11.2, +# "volatile acidity": 0.28, +# "citric acid": 0.56, +# "residual sugar": 1.9, +# "chlorides": 0.075, +# "free sulfur dioxide": 17, +# "total sulfur dioxide": 60, +# "density": 0.998, +# "pH": 3.16, +# "sulphates": 0.58, +# "alcohol": 9.8 } +# +# -model_ckpt_filepath : checkpoint model file path (.ckpt) to be loaded. +# +# -norm_config_json_filepath : normalization configuration file (.json) +# containing information (norm_config) that has been returned by the model +# wine_quality_train_eval running. +# +# Outputs : +# +# Output files under "OUTPUTS" folder (must exist !!!) +# +# - Quality prediction value (float) +# +# - Report file (report_json_filepath) (.json) containing: +# - Quality prediction value +# - Wine data +# - error message, more message, warning message +# +# - Log files into Wine.Lightning/run/LWINE1/logs/reg_logs +# +# - Screen display containing running information +# +############################################################################### + +# <img width="800px" src="../fidle/img/header.svg"></img> +# +# # <!-- TITLE --> [LWINE1] - Wine quality prediction with a Dense Network (DNN) +# <!-- DESC --> Another example of regression, with a wine quality prediction, using PyTorch Lightning +# <!-- AUTHOR : Achille Mbogol Touye (EFFILIA-MIAI/SIMaP) --> +# +# ## Objectives : +# - Predict the **quality of wines**, based on their analysis +# - Understanding the principle and the architecture of a regression with a dense neural network with backup and restore of the trained model. +# +# The **[Wine Quality datasets](https://archive.ics.uci.edu/ml/datasets/wine+Quality)** are made up of analyses of a large number of wines, with an associated quality (between 0 and 10) +# This dataset is provide by : +# Paulo Cortez, University of Minho, GuimarĂ£es, Portugal, http://www3.dsi.uminho.pt/pcortez +# A. Cerdeira, F. Almeida, T. Matos and J. Reis, Viticulture Commission of the Vinho Verde Region(CVRVV), Porto, Portugal, @2009 +# This dataset can be retreive at [University of California Irvine (UCI)](https://archive-beta.ics.uci.edu/ml/datasets/wine+quality) +# +# +# Due to privacy and logistic issues, only physicochemical and sensory variables are available +# There is no data about grape types, wine brand, wine selling price, etc. +# +# - fixed acidity +# - volatile acidity +# - citric acid +# - residual sugar +# - chlorides +# - free sulfur dioxide +# - total sulfur dioxide +# - density +# - pH +# - sulphates +# - alcohol +# - quality (score between 0 and 10) +# +# ## What we're going to do : +# +# - (Retrieve data) +# - (Preparing the data) +# - (Build a model) +# - Train and save the model +# - Restore saved model +# - Evaluate the model +# - Make some predictions +# +HEAD = "[wine_quality/wine_quality]" + +# ## Step 1 - Import and init +print("\n"+HEAD,"# ## Step 1 - Import and init\n") + +# Import some packages +import os +import pandas as pd +import torch +import torchvision.transforms as T +from model_wine_lightning.modules.data_load import NormalizeFeatures +from model_wine_lightning.modules.data_load import ToTensorFeatures +from model_wine_lightning.modules.model import LitRegression +import fidle +import json +import argparse +from pprint import pprint + +OUTPUTS_PATH = "OUTPUTS" # must exit !!! + +error_msg, warn_msg, more_msg = "", "", "" # default + +report_json_filepath = os.path.join(OUTPUTS_PATH, "report.json") +quality_txt_filepath = os.path.join(OUTPUTS_PATH, "quality.txt") +report = dict() # init +pred = 99.99 # default + +data_json_filepath, data = None, None +model_ckpt_filepath = None +norm_config_json_filepath, norm_config = None, None + +try: + def is_not_given(argument): + return ((argument is None) or (argument == 'None')) + def is_given(argument): + return ((argument is not None) and (argument != 'None')) + + if not os.path.exists(OUTPUTS_PATH): # cas isfile non traite + os.mkdir(OUTPUTS_PATH) + message = "Outputs folder '"+OUTPUTS_PATH+" does not exist => created." + warn_msg += message + " " + print(HEAD, "Warning :", message) + + if not os.path.isdir(OUTPUTS_PATH): + message = "Outputs folder '" + OUTPUTS_PATH + "' must exist." + error_msg += message + " " + raise Exception(message) + + # ## INPUTS + print("\n"+HEAD, "# ## INPUTS\n") + + desc_text = "Predict Quality of a Wine" + parser = argparse.ArgumentParser(prog='wine_quality_predict', + description=desc_text) + + help_required_keys = "\"fixed acidity\";\"volatile acidity\";\"citric acid\";\"residual sugar\";\"chlorides\";\"free sulfur dioxide\";\"total sulfur dioxide\";\"density\";\"pH\";\"sulphates\";\"alcohol\"" + help_text = "data file path (.json), required keys:"+help_required_keys + parser.add_argument("-data_json_filepath", type=str, help=help_text) + help_text = "data in string format, required keys:"+help_required_keys + parser.add_argument("-data", type=str, help=help_text) + + help_text = "checkpoint model file path (.ckpt)" + parser.add_argument("-model_ckpt_filepath", type=str, help=help_text) + + help_text = "normalization configuration file path (.json), " + help_text += "information returned by wine_quality_train_eval runnig." + parser.add_argument("-norm_config_json_filepath", type=str, help=help_text) + + args = parser.parse_args() + # 1 and only 1 among -data_json_filepath and -data + if is_given(args.data_json_filepath) and is_given(args.data): + message = "Both -data_json_filepath and -data given " + message += "(1 and only 1 of -data_json_filepath and -data expected) " + message += "=> STOP." + error_msg += message + " " + raise Exception(message) + if is_not_given(args.data_json_filepath) and is_not_given(args.data): + message = "NO data_json_filepath and NO -data given. " + message += "(1 and only 1 of -data_json_filepath and -data expected) " + message += "=> STOP." + error_msg += message + " " + raise Exception(message) + + path = args.data_json_filepath + if is_given(path) : + if os.path.isfile(path) : + data_json_filepath = path + print(HEAD, "data_json_filepath used :", data_json_filepath) + try : + inputfile = open(data_json_filepath, 'r') + data = json.load(inputfile) + except: + message = "Failed to get json data from " + message += "'" + data_json_filepath+ "'" + " file." + error_msg += message + " " + raise Exception(message) + else : + message = path+ "data_json_filepath file not found => STOP." + error_msg += message + "" + raise Exception(message) + + if is_given(args.data) : + data_text = args.data + try : + + # restore sanitized text + MAPPING = {'>': '__gt__', '<': '__lt__', "'": '__sq__', '"': '__dq__', '[': '__ob__', ']': '__cb__', '{': '__oc__', '}': '__cc__', '@': '__at__', '\n': '__cn__', '\r': '__cr__', '\t': '__tc__', '#': '__pd__'} + for key, value in MAPPING.items(): + data_text = data_text.replace(value, key) + data = json.loads(data_text) # get data + + except: + message = "Failed to get json data from string '"+data_text+"'" + error_msg += message + " " + raise Exception(message) + + path = args.model_ckpt_filepath + if is_given(path) : + if os.path.isfile(path) : + model_ckpt_filepath = path + print(HEAD, "model_ckpt_filepath used :", model_ckpt_filepath) + else : + message = path+ "model_ckpt_filepath file not found => STOP." + error_msg += message + " " + raise Exception(message) + else: + message = "NO model_ckpt_filepath given => STOP." + error_msg += message + "" + raise Exception(message) + + path = args.norm_config_json_filepath + if is_given(path) : + if os.path.isfile(path) : + norm_config_json_filepath = path + print(HEAD, "norm_config_json_filepath used :", + norm_config_json_filepath) + try : + inputfile = open(norm_config_json_filepath, 'r') + norm_config = json.load(inputfile) + except: + message = "Failed to get json norm_config from " + message += "'" + norm_config_json_filepath+ "'" + "file." + error_msg += message + " " + raise Exception(message) + else : + message = path+ "norm_config_json_filepath file not found => STOP." + error_msg += message + " " + raise Exception(message) + else: + message = "NO norm_config_json_filepath given => STOP." + error_msg += message + "" + raise Exception(message) + + print(HEAD, "INPUTS:") + print("- data:", data) + print("- model checkpoint:", model_ckpt_filepath) + print("- norm_config:", norm_config) + + # Init Fidle environment + print("\n"+HEAD, "# Init Fidle environment\n") + run_id, run_dir, datasets_dir = fidle.init('LWINE1_predict') + + # Verbosity during training : + # - 0 = silent + # - 1 = progress bar + # - 2 = one line per epoch + fit_verbosity = 1 + + # Override parameters (batch mode) - Just forget this cell + fidle.override('fit_verbosity') + + # ## Step 7 - Restore model : + print("\n"+HEAD, "# ## Step 7 - Restore model :\n") + + # ### 7.1 - Reload model + print("\n"+HEAD, "# ### 7.1 - Reload model\n") + loaded_model = LitRegression.load_from_checkpoint(model_ckpt_filepath) + print(HEAD, "Model loaded from checkpoint: ", model_ckpt_filepath) + print("Loaded:", loaded_model) + + ## ### 7.2 - Evaluate model : Not kept + + # ### 7.3 - Make a prediction + print("\n"+HEAD, "# ### 7.3 - Make a prediction\n") + + mean_json = norm_config['mean_json'] + std_json = norm_config['std_json'] + min_json = norm_config['min_json'] + max_json = norm_config['max_json'] + print(HEAD, "Use Normalization mean: ", mean_json) + print(HEAD, "Use Normalization std: ", std_json) + print(HEAD, "Use Normalization min: ", min_json) + print(HEAD, "Use Normalization max: ", max_json) + NF = NormalizeFeatures(mean_json, std_json, min_json, max_json) + if not NF.is_in_domain(data): + message = "data values out of domain => no prediction." + error_msg += message + " " + raise Exception(message) + features = NF.get_features(data) + transform = T.Compose([NF, ToTensorFeatures()]) + sample = transform(features) + + # Sets the model in evaluation mode + loaded_model.eval() + + # Perform inference using the loaded model + y_pred = loaded_model(sample) + pred = y_pred[0][0].item() + print(HEAD, ":") + print("Quality prediction :", f'{pred:.2f}', " , for wine data:") + pprint(data) + + # ## OUTPUTS + print("\n"+HEAD, "# ## OUTPUTS\n") + + # Report (json) : + # - quality prediction value + # - wine data + # - error message, more message, warning message + report["quality"] = pred + report["data"] = data + report["model_ckpt_filepath"] = model_ckpt_filepath + report["norm_config"] = norm_config + + fidle.end() + +except Exception as e : + error_msg += type(e).__name__ + str(e.args) + ". " + +if error_msg != "": report["error"] = error_msg +if more_msg != "": report["more"] = more_msg +if warn_msg != "": report["warning"] = warn_msg + +print("OUTPUT:", "Quality prediction :", pred) + +print("OUTPUT:", "Report: ") +pprint(report) + +## Save Report as .json file +#try: +# with open(report_json_filepath, "w") as outfile: +# json.dump(report, outfile) +# print("OUTPUT:", "Report file (containing report) :", report_json_filepath) +#except : +# pass + +# Save quality alone into .txt file +try: + with open(quality_txt_filepath, "w") as outfile: + outfile.write(str(pred)) + print("OUTPUT:", "Quality file (containing quality value) :", + quality_txt_filepath) +except : + pass + +# --- +# <img width="80px" src="../fidle/img/logo-paysage.svg"></img> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wine_quality.xml Wed Dec 04 15:25:26 2024 +0000 @@ -0,0 +1,84 @@ +<tool id="wine_quality" name="wine_quality" version="1.0.0"> + + <description>wine_quality</description> + + <requirements> + + <!-- singularity --> + <container type="singularity">oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:6d159f79349e231deadb738f79bd293c1a8dadd3</container> + + <!-- image locale + <container type="singularity">/home/nrousse/workspace_git/SIWAA_regroup/USE_branch_dnn/use/DNN/containers/fidlemore.simg</container --> + + </requirements> + + <environment_variables> + <environment_variable name="FIDLE_DATASETS_DIR">/fidle-tp/datasets-fidle</environment_variable> + </environment_variables> + <command detect_errors="aggressive"> + <![CDATA[ + . /fidle-tp/fidle-env/bin/activate; + /bin/bash -c "python3 $__tool_directory__/wine_quality.py -model_ckpt_filepath ${model_ckpt} -norm_config_json_filepath ${norm_config_json} -data '${datajson_text}'"; + cp OUTPUTS/quality.txt ${quality_txt}; + ]]> + </command> + + <inputs> + <param name="datajson_text" optional="false" type="text" area="true" + label="Data of wine whose quality to be predicted (json as text)"/> + <param name="model_ckpt" optional="false" type="data" format="ckpt" + label="model_ckpt model file (.ckpt)"/> + <param name="norm_config_json" optional="false" type="data" format="json" + label="norm_config normalization configuration (.json)"/> + </inputs> + + <outputs> + <data format="txt" name="quality_txt" label="quality (.txt)"/> + </outputs> + + <tests> + </tests> + + <help><![CDATA[ + +wine_quality.xml + +=========================== + +Code: + +---- + +- wine_quality.py + +Inputs: + +------- + +- data json as text : data of wine for which quality is going to be predicted. + + - example : + + - {"fixed acidity": 11.2, "volatile acidity": 0.28, "citric acid": 0.56, "residual sugar": 1.9, "chlorides": 0.075, "free sulfur dioxide": 17, "total sulfur dioxide": 60, "density": 0.998, "pH": 3.16, "sulphates": 0.58, "alcohol": 9.8} + +- Model file (.ckpt) + +- Normalization configuration file (.json) + +Outputs: + +-------- + +- Quality file (.txt) + +Credits: + +-------- + +- Author: Nathalie Rousse nathalie.rousse@inrae.fr +- Copyright: INRAE + + ]]> + </help> + +</tool>