Mercurial > repos > siwaa > wine_quality
view wine_quality.py @ 0:143b15001522 draft
planemo upload for repository https://forgemia.inra.fr/nathalie.rousse/use/-/tree/dnn/DNN/galaxy-tools/wine_quality commit e7c4e447552083db7eaecbdf139a7c359fe9becc
author | siwaa |
---|---|
date | Wed, 04 Dec 2024 15:25:26 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # coding: utf-8 ######### ######### ######### ######### ######### ######### ######### # # File created from fidlemore/model_wine_lightning/wine_quality_predict.py # # Modifications : # # - restore sanitized text for -data value # # - quality.txt file containing only quality value (extracted from report) # ######### ######### ######### ######### ######### ######### ######### ############################################################################### # Module : model_wine_lightning # # This code has been extracted from 01-DNN-Wine-Regression-lightning.ipynb # (fidle-tp/fidle-master-3.0.11/Wine.Lightning) then modified. # Only last part kept : # - Restore saved model from checkpoint # - Evaluate the model not kept # - Make some predictions 1 prediction # # Inputs : # # Data of wine for which quality is going to be predicted is given # by -data_json_filepath or by -data (only one of both). # # -data_json_filepath : data file path (.json) containing data. # -data : data (string format) # # Example of wine data : { "fixed acidity": 11.2, # "volatile acidity": 0.28, # "citric acid": 0.56, # "residual sugar": 1.9, # "chlorides": 0.075, # "free sulfur dioxide": 17, # "total sulfur dioxide": 60, # "density": 0.998, # "pH": 3.16, # "sulphates": 0.58, # "alcohol": 9.8 } # # -model_ckpt_filepath : checkpoint model file path (.ckpt) to be loaded. # # -norm_config_json_filepath : normalization configuration file (.json) # containing information (norm_config) that has been returned by the model # wine_quality_train_eval running. # # Outputs : # # Output files under "OUTPUTS" folder (must exist !!!) # # - Quality prediction value (float) # # - Report file (report_json_filepath) (.json) containing: # - Quality prediction value # - Wine data # - error message, more message, warning message # # - Log files into Wine.Lightning/run/LWINE1/logs/reg_logs # # - Screen display containing running information # ############################################################################### # <img width="800px" src="../fidle/img/header.svg"></img> # # # <!-- TITLE --> [LWINE1] - Wine quality prediction with a Dense Network (DNN) # <!-- DESC --> Another example of regression, with a wine quality prediction, using PyTorch Lightning # <!-- AUTHOR : Achille Mbogol Touye (EFFILIA-MIAI/SIMaP) --> # # ## Objectives : # - Predict the **quality of wines**, based on their analysis # - Understanding the principle and the architecture of a regression with a dense neural network with backup and restore of the trained model. # # The **[Wine Quality datasets](https://archive.ics.uci.edu/ml/datasets/wine+Quality)** are made up of analyses of a large number of wines, with an associated quality (between 0 and 10) # This dataset is provide by : # Paulo Cortez, University of Minho, GuimarĂ£es, Portugal, http://www3.dsi.uminho.pt/pcortez # A. Cerdeira, F. Almeida, T. Matos and J. Reis, Viticulture Commission of the Vinho Verde Region(CVRVV), Porto, Portugal, @2009 # This dataset can be retreive at [University of California Irvine (UCI)](https://archive-beta.ics.uci.edu/ml/datasets/wine+quality) # # # Due to privacy and logistic issues, only physicochemical and sensory variables are available # There is no data about grape types, wine brand, wine selling price, etc. # # - fixed acidity # - volatile acidity # - citric acid # - residual sugar # - chlorides # - free sulfur dioxide # - total sulfur dioxide # - density # - pH # - sulphates # - alcohol # - quality (score between 0 and 10) # # ## What we're going to do : # # - (Retrieve data) # - (Preparing the data) # - (Build a model) # - Train and save the model # - Restore saved model # - Evaluate the model # - Make some predictions # HEAD = "[wine_quality/wine_quality]" # ## Step 1 - Import and init print("\n"+HEAD,"# ## Step 1 - Import and init\n") # Import some packages import os import pandas as pd import torch import torchvision.transforms as T from model_wine_lightning.modules.data_load import NormalizeFeatures from model_wine_lightning.modules.data_load import ToTensorFeatures from model_wine_lightning.modules.model import LitRegression import fidle import json import argparse from pprint import pprint OUTPUTS_PATH = "OUTPUTS" # must exit !!! error_msg, warn_msg, more_msg = "", "", "" # default report_json_filepath = os.path.join(OUTPUTS_PATH, "report.json") quality_txt_filepath = os.path.join(OUTPUTS_PATH, "quality.txt") report = dict() # init pred = 99.99 # default data_json_filepath, data = None, None model_ckpt_filepath = None norm_config_json_filepath, norm_config = None, None try: def is_not_given(argument): return ((argument is None) or (argument == 'None')) def is_given(argument): return ((argument is not None) and (argument != 'None')) if not os.path.exists(OUTPUTS_PATH): # cas isfile non traite os.mkdir(OUTPUTS_PATH) message = "Outputs folder '"+OUTPUTS_PATH+" does not exist => created." warn_msg += message + " " print(HEAD, "Warning :", message) if not os.path.isdir(OUTPUTS_PATH): message = "Outputs folder '" + OUTPUTS_PATH + "' must exist." error_msg += message + " " raise Exception(message) # ## INPUTS print("\n"+HEAD, "# ## INPUTS\n") desc_text = "Predict Quality of a Wine" parser = argparse.ArgumentParser(prog='wine_quality_predict', description=desc_text) help_required_keys = "\"fixed acidity\";\"volatile acidity\";\"citric acid\";\"residual sugar\";\"chlorides\";\"free sulfur dioxide\";\"total sulfur dioxide\";\"density\";\"pH\";\"sulphates\";\"alcohol\"" help_text = "data file path (.json), required keys:"+help_required_keys parser.add_argument("-data_json_filepath", type=str, help=help_text) help_text = "data in string format, required keys:"+help_required_keys parser.add_argument("-data", type=str, help=help_text) help_text = "checkpoint model file path (.ckpt)" parser.add_argument("-model_ckpt_filepath", type=str, help=help_text) help_text = "normalization configuration file path (.json), " help_text += "information returned by wine_quality_train_eval runnig." parser.add_argument("-norm_config_json_filepath", type=str, help=help_text) args = parser.parse_args() # 1 and only 1 among -data_json_filepath and -data if is_given(args.data_json_filepath) and is_given(args.data): message = "Both -data_json_filepath and -data given " message += "(1 and only 1 of -data_json_filepath and -data expected) " message += "=> STOP." error_msg += message + " " raise Exception(message) if is_not_given(args.data_json_filepath) and is_not_given(args.data): message = "NO data_json_filepath and NO -data given. " message += "(1 and only 1 of -data_json_filepath and -data expected) " message += "=> STOP." error_msg += message + " " raise Exception(message) path = args.data_json_filepath if is_given(path) : if os.path.isfile(path) : data_json_filepath = path print(HEAD, "data_json_filepath used :", data_json_filepath) try : inputfile = open(data_json_filepath, 'r') data = json.load(inputfile) except: message = "Failed to get json data from " message += "'" + data_json_filepath+ "'" + " file." error_msg += message + " " raise Exception(message) else : message = path+ "data_json_filepath file not found => STOP." error_msg += message + "" raise Exception(message) if is_given(args.data) : data_text = args.data try : # restore sanitized text MAPPING = {'>': '__gt__', '<': '__lt__', "'": '__sq__', '"': '__dq__', '[': '__ob__', ']': '__cb__', '{': '__oc__', '}': '__cc__', '@': '__at__', '\n': '__cn__', '\r': '__cr__', '\t': '__tc__', '#': '__pd__'} for key, value in MAPPING.items(): data_text = data_text.replace(value, key) data = json.loads(data_text) # get data except: message = "Failed to get json data from string '"+data_text+"'" error_msg += message + " " raise Exception(message) path = args.model_ckpt_filepath if is_given(path) : if os.path.isfile(path) : model_ckpt_filepath = path print(HEAD, "model_ckpt_filepath used :", model_ckpt_filepath) else : message = path+ "model_ckpt_filepath file not found => STOP." error_msg += message + " " raise Exception(message) else: message = "NO model_ckpt_filepath given => STOP." error_msg += message + "" raise Exception(message) path = args.norm_config_json_filepath if is_given(path) : if os.path.isfile(path) : norm_config_json_filepath = path print(HEAD, "norm_config_json_filepath used :", norm_config_json_filepath) try : inputfile = open(norm_config_json_filepath, 'r') norm_config = json.load(inputfile) except: message = "Failed to get json norm_config from " message += "'" + norm_config_json_filepath+ "'" + "file." error_msg += message + " " raise Exception(message) else : message = path+ "norm_config_json_filepath file not found => STOP." error_msg += message + " " raise Exception(message) else: message = "NO norm_config_json_filepath given => STOP." error_msg += message + "" raise Exception(message) print(HEAD, "INPUTS:") print("- data:", data) print("- model checkpoint:", model_ckpt_filepath) print("- norm_config:", norm_config) # Init Fidle environment print("\n"+HEAD, "# Init Fidle environment\n") run_id, run_dir, datasets_dir = fidle.init('LWINE1_predict') # Verbosity during training : # - 0 = silent # - 1 = progress bar # - 2 = one line per epoch fit_verbosity = 1 # Override parameters (batch mode) - Just forget this cell fidle.override('fit_verbosity') # ## Step 7 - Restore model : print("\n"+HEAD, "# ## Step 7 - Restore model :\n") # ### 7.1 - Reload model print("\n"+HEAD, "# ### 7.1 - Reload model\n") loaded_model = LitRegression.load_from_checkpoint(model_ckpt_filepath) print(HEAD, "Model loaded from checkpoint: ", model_ckpt_filepath) print("Loaded:", loaded_model) ## ### 7.2 - Evaluate model : Not kept # ### 7.3 - Make a prediction print("\n"+HEAD, "# ### 7.3 - Make a prediction\n") mean_json = norm_config['mean_json'] std_json = norm_config['std_json'] min_json = norm_config['min_json'] max_json = norm_config['max_json'] print(HEAD, "Use Normalization mean: ", mean_json) print(HEAD, "Use Normalization std: ", std_json) print(HEAD, "Use Normalization min: ", min_json) print(HEAD, "Use Normalization max: ", max_json) NF = NormalizeFeatures(mean_json, std_json, min_json, max_json) if not NF.is_in_domain(data): message = "data values out of domain => no prediction." error_msg += message + " " raise Exception(message) features = NF.get_features(data) transform = T.Compose([NF, ToTensorFeatures()]) sample = transform(features) # Sets the model in evaluation mode loaded_model.eval() # Perform inference using the loaded model y_pred = loaded_model(sample) pred = y_pred[0][0].item() print(HEAD, ":") print("Quality prediction :", f'{pred:.2f}', " , for wine data:") pprint(data) # ## OUTPUTS print("\n"+HEAD, "# ## OUTPUTS\n") # Report (json) : # - quality prediction value # - wine data # - error message, more message, warning message report["quality"] = pred report["data"] = data report["model_ckpt_filepath"] = model_ckpt_filepath report["norm_config"] = norm_config fidle.end() except Exception as e : error_msg += type(e).__name__ + str(e.args) + ". " if error_msg != "": report["error"] = error_msg if more_msg != "": report["more"] = more_msg if warn_msg != "": report["warning"] = warn_msg print("OUTPUT:", "Quality prediction :", pred) print("OUTPUT:", "Report: ") pprint(report) ## Save Report as .json file #try: # with open(report_json_filepath, "w") as outfile: # json.dump(report, outfile) # print("OUTPUT:", "Report file (containing report) :", report_json_filepath) #except : # pass # Save quality alone into .txt file try: with open(quality_txt_filepath, "w") as outfile: outfile.write(str(pred)) print("OUTPUT:", "Quality file (containing quality value) :", quality_txt_filepath) except : pass # --- # <img width="80px" src="../fidle/img/logo-paysage.svg"></img>