view wine_quality.py @ 0:143b15001522 draft

planemo upload for repository https://forgemia.inra.fr/nathalie.rousse/use/-/tree/dnn/DNN/galaxy-tools/wine_quality commit e7c4e447552083db7eaecbdf139a7c359fe9becc
author siwaa
date Wed, 04 Dec 2024 15:25:26 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python
# coding: utf-8

#########  #########   #########   #########   #########   #########  ######### 
#
# File created from fidlemore/model_wine_lightning/wine_quality_predict.py
#
# Modifications :
#
# - restore sanitized text for -data value
#
# - quality.txt file containing only quality value (extracted from report)
#
#########  #########   #########   #########   #########   #########  ######### 

###############################################################################
# Module : model_wine_lightning
#
# This code has been extracted from 01-DNN-Wine-Regression-lightning.ipynb
# (fidle-tp/fidle-master-3.0.11/Wine.Lightning) then modified.
# Only last part kept :
#  - Restore saved model      from checkpoint
#  - Evaluate the model       not kept
#  - Make some predictions    1 prediction
#
# Inputs :
#
# Data of wine for which quality is going to be predicted is given
# by -data_json_filepath or by -data (only one of both).
#
# -data_json_filepath : data file path (.json) containing data.
# -data : data (string format)
#
# Example of wine data : { "fixed acidity": 11.2,
#                          "volatile acidity": 0.28,
#                          "citric acid": 0.56,
#                          "residual sugar": 1.9,
#                          "chlorides": 0.075,
#                          "free sulfur dioxide": 17,
#                          "total sulfur dioxide": 60,
#                          "density": 0.998,
#                          "pH": 3.16,
#                          "sulphates": 0.58,
#                          "alcohol": 9.8 }
#
# -model_ckpt_filepath : checkpoint model file path (.ckpt) to be loaded.
#
# -norm_config_json_filepath : normalization configuration file (.json)
#  containing information (norm_config) that has been returned by the model
#  wine_quality_train_eval running.
#
# Outputs :
#
# Output files under "OUTPUTS" folder (must exist !!!)
#
# - Quality prediction value (float)
#
# - Report file (report_json_filepath) (.json) containing:
#   - Quality prediction value
#   - Wine data
#   - error message, more message, warning message
#
# - Log files into Wine.Lightning/run/LWINE1/logs/reg_logs
#
# - Screen display containing running information 
#
###############################################################################

# <img width="800px" src="../fidle/img/header.svg"></img>
# 
# # <!-- TITLE --> [LWINE1] - Wine quality prediction with a Dense Network (DNN)
#   <!-- DESC -->  Another example of regression, with a wine quality prediction, using PyTorch Lightning
#   <!-- AUTHOR : Achille Mbogol Touye (EFFILIA-MIAI/SIMaP) -->
# 
# ## Objectives :
#  - Predict the **quality of wines**, based on their analysis
#  - Understanding the principle and the architecture of a regression with a dense neural network with backup and restore of the trained model. 
# 
# The **[Wine Quality datasets](https://archive.ics.uci.edu/ml/datasets/wine+Quality)** are made up of analyses of a large number of wines, with an associated quality (between 0 and 10)  
# This dataset is provide by :  
# Paulo Cortez, University of Minho, GuimarĂ£es, Portugal, http://www3.dsi.uminho.pt/pcortez  
# A. Cerdeira, F. Almeida, T. Matos and J. Reis, Viticulture Commission of the Vinho Verde Region(CVRVV), Porto, Portugal, @2009  
# This dataset can be retreive at [University of California Irvine (UCI)](https://archive-beta.ics.uci.edu/ml/datasets/wine+quality)
# 
# 
# Due to privacy and logistic issues, only physicochemical and sensory variables are available  
# There is no data about grape types, wine brand, wine selling price, etc.
# 
# - fixed acidity
# - volatile acidity
# - citric acid
# - residual sugar
# - chlorides
# - free sulfur dioxide
# - total sulfur dioxide
# - density
# - pH
# - sulphates
# - alcohol
# - quality (score between 0 and 10)
# 
# ## What we're going to do :
# 
#  - (Retrieve data)
#  - (Preparing the data)
#  - (Build a model)
#  - Train and save the model
#  - Restore saved model
#  - Evaluate the model
#  - Make some predictions
# 
HEAD = "[wine_quality/wine_quality]"

# ## Step 1 - Import and init
print("\n"+HEAD,"# ## Step 1 - Import and init\n")

# Import some packages
import os
import pandas as pd
import torch
import torchvision.transforms as T
from model_wine_lightning.modules.data_load import NormalizeFeatures
from model_wine_lightning.modules.data_load import ToTensorFeatures
from model_wine_lightning.modules.model import LitRegression
import fidle
import json
import argparse
from pprint import pprint

OUTPUTS_PATH = "OUTPUTS" # must exit !!!

error_msg, warn_msg, more_msg = "", "", "" # default

report_json_filepath =  os.path.join(OUTPUTS_PATH, "report.json")
quality_txt_filepath =  os.path.join(OUTPUTS_PATH, "quality.txt")
report = dict() # init
pred = 99.99 # default

data_json_filepath, data = None, None 
model_ckpt_filepath = None
norm_config_json_filepath, norm_config = None, None 

try:
    def is_not_given(argument):
        return ((argument is None) or (argument == 'None'))
    def is_given(argument):
        return ((argument is not None) and (argument != 'None'))

    if not os.path.exists(OUTPUTS_PATH): # cas isfile non traite
        os.mkdir(OUTPUTS_PATH)
        message = "Outputs folder '"+OUTPUTS_PATH+" does not exist => created."
        warn_msg += message + " "
        print(HEAD, "Warning :", message)

    if not os.path.isdir(OUTPUTS_PATH):
        message = "Outputs folder '" + OUTPUTS_PATH + "' must exist."
        error_msg += message + " "
        raise Exception(message)

    # ## INPUTS
    print("\n"+HEAD, "# ## INPUTS\n")

    desc_text = "Predict Quality of a Wine"
    parser = argparse.ArgumentParser(prog='wine_quality_predict',
                                     description=desc_text)

    help_required_keys = "\"fixed acidity\";\"volatile acidity\";\"citric acid\";\"residual sugar\";\"chlorides\";\"free sulfur dioxide\";\"total sulfur dioxide\";\"density\";\"pH\";\"sulphates\";\"alcohol\""
    help_text = "data file path (.json), required keys:"+help_required_keys
    parser.add_argument("-data_json_filepath", type=str, help=help_text)
    help_text = "data in string format, required keys:"+help_required_keys
    parser.add_argument("-data", type=str, help=help_text)

    help_text = "checkpoint model file path (.ckpt)"
    parser.add_argument("-model_ckpt_filepath", type=str, help=help_text)

    help_text = "normalization configuration file path (.json), "
    help_text += "information returned by wine_quality_train_eval runnig."
    parser.add_argument("-norm_config_json_filepath", type=str, help=help_text)

    args = parser.parse_args()
    # 1 and only 1 among -data_json_filepath and -data
    if is_given(args.data_json_filepath) and is_given(args.data):
        message = "Both -data_json_filepath and -data given "
        message += "(1 and only 1 of -data_json_filepath and -data expected) "
        message += "=> STOP."
        error_msg += message + " "
        raise Exception(message)
    if is_not_given(args.data_json_filepath) and is_not_given(args.data):
        message = "NO data_json_filepath and NO -data given. "
        message += "(1 and only 1 of -data_json_filepath and -data expected) "
        message +=  "=> STOP."
        error_msg += message + " "
        raise Exception(message)

    path = args.data_json_filepath
    if is_given(path) :
        if os.path.isfile(path) :
            data_json_filepath = path
            print(HEAD, "data_json_filepath used :", data_json_filepath)
            try :
                inputfile = open(data_json_filepath, 'r')
                data = json.load(inputfile)
            except:
                message = "Failed to get json data from "
                message += "'" + data_json_filepath+ "'" + " file."
                error_msg += message + " "
                raise Exception(message)
        else :
            message = path+ "data_json_filepath file not found => STOP."
            error_msg += message + ""
            raise Exception(message)

    if is_given(args.data) :
        data_text = args.data
        try :

            # restore sanitized text
            MAPPING = {'>': '__gt__', '<': '__lt__', "'": '__sq__', '"': '__dq__', '[': '__ob__', ']': '__cb__', '{': '__oc__', '}': '__cc__', '@': '__at__', '\n': '__cn__', '\r': '__cr__', '\t': '__tc__', '#': '__pd__'}
            for key, value in MAPPING.items():
                data_text = data_text.replace(value, key)
            data = json.loads(data_text) # get data

        except:
            message = "Failed to get json data from string '"+data_text+"'"
            error_msg += message + " "
            raise Exception(message)

    path = args.model_ckpt_filepath
    if is_given(path) :
        if os.path.isfile(path) :
            model_ckpt_filepath = path
            print(HEAD, "model_ckpt_filepath used :", model_ckpt_filepath)
        else :
            message = path+ "model_ckpt_filepath file not found => STOP."
            error_msg += message + " "
            raise Exception(message)
    else:
        message = "NO model_ckpt_filepath given => STOP."
        error_msg += message + ""
        raise Exception(message)

    path = args.norm_config_json_filepath
    if is_given(path) :
        if os.path.isfile(path) :
            norm_config_json_filepath = path
            print(HEAD, "norm_config_json_filepath used :",
                  norm_config_json_filepath)
            try :
                inputfile = open(norm_config_json_filepath, 'r')
                norm_config = json.load(inputfile)
            except:
                message = "Failed to get json norm_config from "
                message += "'" + norm_config_json_filepath+ "'" + "file."
                error_msg += message + " "
                raise Exception(message)
        else :
            message = path+ "norm_config_json_filepath file not found => STOP."
            error_msg += message + " "
            raise Exception(message)
    else:
        message = "NO norm_config_json_filepath given => STOP."
        error_msg += message + ""
        raise Exception(message)

    print(HEAD, "INPUTS:")
    print("- data:", data)
    print("- model checkpoint:", model_ckpt_filepath)
    print("- norm_config:", norm_config)
    
    # Init Fidle environment
    print("\n"+HEAD, "# Init Fidle environment\n")
    run_id, run_dir, datasets_dir = fidle.init('LWINE1_predict')

    # Verbosity during training : 
    # - 0 = silent
    # - 1 = progress bar
    # - 2 = one line per epoch
    fit_verbosity = 1

    # Override parameters (batch mode) - Just forget this cell
    fidle.override('fit_verbosity')

    # ## Step 7 - Restore model :
    print("\n"+HEAD, "# ## Step 7 - Restore model :\n")

    # ### 7.1 - Reload model
    print("\n"+HEAD, "# ### 7.1 - Reload model\n")
    loaded_model = LitRegression.load_from_checkpoint(model_ckpt_filepath)
    print(HEAD, "Model loaded from checkpoint: ", model_ckpt_filepath)
    print("Loaded:", loaded_model)

    ## ### 7.2 - Evaluate model : Not kept

    # ### 7.3 - Make a prediction
    print("\n"+HEAD, "# ### 7.3 - Make a prediction\n")

    mean_json = norm_config['mean_json']
    std_json = norm_config['std_json']
    min_json = norm_config['min_json']
    max_json = norm_config['max_json']
    print(HEAD, "Use Normalization mean: ", mean_json)
    print(HEAD, "Use Normalization std: ", std_json)
    print(HEAD, "Use Normalization min: ", min_json)
    print(HEAD, "Use Normalization max: ", max_json)
    NF = NormalizeFeatures(mean_json, std_json, min_json, max_json)
    if not NF.is_in_domain(data):
        message = "data values out of domain => no prediction."
        error_msg += message + " "
        raise Exception(message)
    features = NF.get_features(data)
    transform = T.Compose([NF, ToTensorFeatures()])
    sample = transform(features)

    # Sets the model in evaluation mode
    loaded_model.eval() 

    # Perform inference using the loaded model
    y_pred = loaded_model(sample)
    pred = y_pred[0][0].item()
    print(HEAD, ":")
    print("Quality prediction :", f'{pred:.2f}', " , for wine data:")
    pprint(data)
    
    # ## OUTPUTS
    print("\n"+HEAD, "# ## OUTPUTS\n")

    # Report (json) :
    # - quality prediction value
    # - wine data
    # - error message, more message, warning message
    report["quality"] = pred
    report["data"] = data
    report["model_ckpt_filepath"] = model_ckpt_filepath
    report["norm_config"] = norm_config
    
    fidle.end()

except Exception as e :
    error_msg += type(e).__name__ + str(e.args) + ". "

if error_msg != "": report["error"] = error_msg
if more_msg != "":  report["more"] = more_msg
if warn_msg != "":  report["warning"] = warn_msg

print("OUTPUT:", "Quality prediction :", pred)

print("OUTPUT:", "Report: ")
pprint(report)

## Save Report as .json file
#try:
#    with open(report_json_filepath, "w") as outfile:
#        json.dump(report, outfile)
#    print("OUTPUT:", "Report file (containing report) :", report_json_filepath)
#except :
#    pass

# Save quality alone into .txt file
try:
    with open(quality_txt_filepath, "w") as outfile:
        outfile.write(str(pred))
    print("OUTPUT:", "Quality file (containing quality value) :",
           quality_txt_filepath)
except :
    pass

# ---
# <img width="80px" src="../fidle/img/logo-paysage.svg"></img>