changeset 0:143b15001522 draft

planemo upload for repository https://forgemia.inra.fr/nathalie.rousse/use/-/tree/dnn/DNN/galaxy-tools/wine_quality commit e7c4e447552083db7eaecbdf139a7c359fe9becc
author siwaa
date Wed, 04 Dec 2024 15:25:26 +0000
parents
children c3841488b88d
files wine_quality.py wine_quality.xml
diffstat 2 files changed, 453 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wine_quality.py	Wed Dec 04 15:25:26 2024 +0000
@@ -0,0 +1,369 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+#########  #########   #########   #########   #########   #########  ######### 
+#
+# File created from fidlemore/model_wine_lightning/wine_quality_predict.py
+#
+# Modifications :
+#
+# - restore sanitized text for -data value
+#
+# - quality.txt file containing only quality value (extracted from report)
+#
+#########  #########   #########   #########   #########   #########  ######### 
+
+###############################################################################
+# Module : model_wine_lightning
+#
+# This code has been extracted from 01-DNN-Wine-Regression-lightning.ipynb
+# (fidle-tp/fidle-master-3.0.11/Wine.Lightning) then modified.
+# Only last part kept :
+#  - Restore saved model      from checkpoint
+#  - Evaluate the model       not kept
+#  - Make some predictions    1 prediction
+#
+# Inputs :
+#
+# Data of wine for which quality is going to be predicted is given
+# by -data_json_filepath or by -data (only one of both).
+#
+# -data_json_filepath : data file path (.json) containing data.
+# -data : data (string format)
+#
+# Example of wine data : { "fixed acidity": 11.2,
+#                          "volatile acidity": 0.28,
+#                          "citric acid": 0.56,
+#                          "residual sugar": 1.9,
+#                          "chlorides": 0.075,
+#                          "free sulfur dioxide": 17,
+#                          "total sulfur dioxide": 60,
+#                          "density": 0.998,
+#                          "pH": 3.16,
+#                          "sulphates": 0.58,
+#                          "alcohol": 9.8 }
+#
+# -model_ckpt_filepath : checkpoint model file path (.ckpt) to be loaded.
+#
+# -norm_config_json_filepath : normalization configuration file (.json)
+#  containing information (norm_config) that has been returned by the model
+#  wine_quality_train_eval running.
+#
+# Outputs :
+#
+# Output files under "OUTPUTS" folder (must exist !!!)
+#
+# - Quality prediction value (float)
+#
+# - Report file (report_json_filepath) (.json) containing:
+#   - Quality prediction value
+#   - Wine data
+#   - error message, more message, warning message
+#
+# - Log files into Wine.Lightning/run/LWINE1/logs/reg_logs
+#
+# - Screen display containing running information 
+#
+###############################################################################
+
+# <img width="800px" src="../fidle/img/header.svg"></img>
+# 
+# # <!-- TITLE --> [LWINE1] - Wine quality prediction with a Dense Network (DNN)
+#   <!-- DESC -->  Another example of regression, with a wine quality prediction, using PyTorch Lightning
+#   <!-- AUTHOR : Achille Mbogol Touye (EFFILIA-MIAI/SIMaP) -->
+# 
+# ## Objectives :
+#  - Predict the **quality of wines**, based on their analysis
+#  - Understanding the principle and the architecture of a regression with a dense neural network with backup and restore of the trained model. 
+# 
+# The **[Wine Quality datasets](https://archive.ics.uci.edu/ml/datasets/wine+Quality)** are made up of analyses of a large number of wines, with an associated quality (between 0 and 10)  
+# This dataset is provide by :  
+# Paulo Cortez, University of Minho, GuimarĂ£es, Portugal, http://www3.dsi.uminho.pt/pcortez  
+# A. Cerdeira, F. Almeida, T. Matos and J. Reis, Viticulture Commission of the Vinho Verde Region(CVRVV), Porto, Portugal, @2009  
+# This dataset can be retreive at [University of California Irvine (UCI)](https://archive-beta.ics.uci.edu/ml/datasets/wine+quality)
+# 
+# 
+# Due to privacy and logistic issues, only physicochemical and sensory variables are available  
+# There is no data about grape types, wine brand, wine selling price, etc.
+# 
+# - fixed acidity
+# - volatile acidity
+# - citric acid
+# - residual sugar
+# - chlorides
+# - free sulfur dioxide
+# - total sulfur dioxide
+# - density
+# - pH
+# - sulphates
+# - alcohol
+# - quality (score between 0 and 10)
+# 
+# ## What we're going to do :
+# 
+#  - (Retrieve data)
+#  - (Preparing the data)
+#  - (Build a model)
+#  - Train and save the model
+#  - Restore saved model
+#  - Evaluate the model
+#  - Make some predictions
+# 
+HEAD = "[wine_quality/wine_quality]"
+
+# ## Step 1 - Import and init
+print("\n"+HEAD,"# ## Step 1 - Import and init\n")
+
+# Import some packages
+import os
+import pandas as pd
+import torch
+import torchvision.transforms as T
+from model_wine_lightning.modules.data_load import NormalizeFeatures
+from model_wine_lightning.modules.data_load import ToTensorFeatures
+from model_wine_lightning.modules.model import LitRegression
+import fidle
+import json
+import argparse
+from pprint import pprint
+
+OUTPUTS_PATH = "OUTPUTS" # must exit !!!
+
+error_msg, warn_msg, more_msg = "", "", "" # default
+
+report_json_filepath =  os.path.join(OUTPUTS_PATH, "report.json")
+quality_txt_filepath =  os.path.join(OUTPUTS_PATH, "quality.txt")
+report = dict() # init
+pred = 99.99 # default
+
+data_json_filepath, data = None, None 
+model_ckpt_filepath = None
+norm_config_json_filepath, norm_config = None, None 
+
+try:
+    def is_not_given(argument):
+        return ((argument is None) or (argument == 'None'))
+    def is_given(argument):
+        return ((argument is not None) and (argument != 'None'))
+
+    if not os.path.exists(OUTPUTS_PATH): # cas isfile non traite
+        os.mkdir(OUTPUTS_PATH)
+        message = "Outputs folder '"+OUTPUTS_PATH+" does not exist => created."
+        warn_msg += message + " "
+        print(HEAD, "Warning :", message)
+
+    if not os.path.isdir(OUTPUTS_PATH):
+        message = "Outputs folder '" + OUTPUTS_PATH + "' must exist."
+        error_msg += message + " "
+        raise Exception(message)
+
+    # ## INPUTS
+    print("\n"+HEAD, "# ## INPUTS\n")
+
+    desc_text = "Predict Quality of a Wine"
+    parser = argparse.ArgumentParser(prog='wine_quality_predict',
+                                     description=desc_text)
+
+    help_required_keys = "\"fixed acidity\";\"volatile acidity\";\"citric acid\";\"residual sugar\";\"chlorides\";\"free sulfur dioxide\";\"total sulfur dioxide\";\"density\";\"pH\";\"sulphates\";\"alcohol\""
+    help_text = "data file path (.json), required keys:"+help_required_keys
+    parser.add_argument("-data_json_filepath", type=str, help=help_text)
+    help_text = "data in string format, required keys:"+help_required_keys
+    parser.add_argument("-data", type=str, help=help_text)
+
+    help_text = "checkpoint model file path (.ckpt)"
+    parser.add_argument("-model_ckpt_filepath", type=str, help=help_text)
+
+    help_text = "normalization configuration file path (.json), "
+    help_text += "information returned by wine_quality_train_eval runnig."
+    parser.add_argument("-norm_config_json_filepath", type=str, help=help_text)
+
+    args = parser.parse_args()
+    # 1 and only 1 among -data_json_filepath and -data
+    if is_given(args.data_json_filepath) and is_given(args.data):
+        message = "Both -data_json_filepath and -data given "
+        message += "(1 and only 1 of -data_json_filepath and -data expected) "
+        message += "=> STOP."
+        error_msg += message + " "
+        raise Exception(message)
+    if is_not_given(args.data_json_filepath) and is_not_given(args.data):
+        message = "NO data_json_filepath and NO -data given. "
+        message += "(1 and only 1 of -data_json_filepath and -data expected) "
+        message +=  "=> STOP."
+        error_msg += message + " "
+        raise Exception(message)
+
+    path = args.data_json_filepath
+    if is_given(path) :
+        if os.path.isfile(path) :
+            data_json_filepath = path
+            print(HEAD, "data_json_filepath used :", data_json_filepath)
+            try :
+                inputfile = open(data_json_filepath, 'r')
+                data = json.load(inputfile)
+            except:
+                message = "Failed to get json data from "
+                message += "'" + data_json_filepath+ "'" + " file."
+                error_msg += message + " "
+                raise Exception(message)
+        else :
+            message = path+ "data_json_filepath file not found => STOP."
+            error_msg += message + ""
+            raise Exception(message)
+
+    if is_given(args.data) :
+        data_text = args.data
+        try :
+
+            # restore sanitized text
+            MAPPING = {'>': '__gt__', '<': '__lt__', "'": '__sq__', '"': '__dq__', '[': '__ob__', ']': '__cb__', '{': '__oc__', '}': '__cc__', '@': '__at__', '\n': '__cn__', '\r': '__cr__', '\t': '__tc__', '#': '__pd__'}
+            for key, value in MAPPING.items():
+                data_text = data_text.replace(value, key)
+            data = json.loads(data_text) # get data
+
+        except:
+            message = "Failed to get json data from string '"+data_text+"'"
+            error_msg += message + " "
+            raise Exception(message)
+
+    path = args.model_ckpt_filepath
+    if is_given(path) :
+        if os.path.isfile(path) :
+            model_ckpt_filepath = path
+            print(HEAD, "model_ckpt_filepath used :", model_ckpt_filepath)
+        else :
+            message = path+ "model_ckpt_filepath file not found => STOP."
+            error_msg += message + " "
+            raise Exception(message)
+    else:
+        message = "NO model_ckpt_filepath given => STOP."
+        error_msg += message + ""
+        raise Exception(message)
+
+    path = args.norm_config_json_filepath
+    if is_given(path) :
+        if os.path.isfile(path) :
+            norm_config_json_filepath = path
+            print(HEAD, "norm_config_json_filepath used :",
+                  norm_config_json_filepath)
+            try :
+                inputfile = open(norm_config_json_filepath, 'r')
+                norm_config = json.load(inputfile)
+            except:
+                message = "Failed to get json norm_config from "
+                message += "'" + norm_config_json_filepath+ "'" + "file."
+                error_msg += message + " "
+                raise Exception(message)
+        else :
+            message = path+ "norm_config_json_filepath file not found => STOP."
+            error_msg += message + " "
+            raise Exception(message)
+    else:
+        message = "NO norm_config_json_filepath given => STOP."
+        error_msg += message + ""
+        raise Exception(message)
+
+    print(HEAD, "INPUTS:")
+    print("- data:", data)
+    print("- model checkpoint:", model_ckpt_filepath)
+    print("- norm_config:", norm_config)
+    
+    # Init Fidle environment
+    print("\n"+HEAD, "# Init Fidle environment\n")
+    run_id, run_dir, datasets_dir = fidle.init('LWINE1_predict')
+
+    # Verbosity during training : 
+    # - 0 = silent
+    # - 1 = progress bar
+    # - 2 = one line per epoch
+    fit_verbosity = 1
+
+    # Override parameters (batch mode) - Just forget this cell
+    fidle.override('fit_verbosity')
+
+    # ## Step 7 - Restore model :
+    print("\n"+HEAD, "# ## Step 7 - Restore model :\n")
+
+    # ### 7.1 - Reload model
+    print("\n"+HEAD, "# ### 7.1 - Reload model\n")
+    loaded_model = LitRegression.load_from_checkpoint(model_ckpt_filepath)
+    print(HEAD, "Model loaded from checkpoint: ", model_ckpt_filepath)
+    print("Loaded:", loaded_model)
+
+    ## ### 7.2 - Evaluate model : Not kept
+
+    # ### 7.3 - Make a prediction
+    print("\n"+HEAD, "# ### 7.3 - Make a prediction\n")
+
+    mean_json = norm_config['mean_json']
+    std_json = norm_config['std_json']
+    min_json = norm_config['min_json']
+    max_json = norm_config['max_json']
+    print(HEAD, "Use Normalization mean: ", mean_json)
+    print(HEAD, "Use Normalization std: ", std_json)
+    print(HEAD, "Use Normalization min: ", min_json)
+    print(HEAD, "Use Normalization max: ", max_json)
+    NF = NormalizeFeatures(mean_json, std_json, min_json, max_json)
+    if not NF.is_in_domain(data):
+        message = "data values out of domain => no prediction."
+        error_msg += message + " "
+        raise Exception(message)
+    features = NF.get_features(data)
+    transform = T.Compose([NF, ToTensorFeatures()])
+    sample = transform(features)
+
+    # Sets the model in evaluation mode
+    loaded_model.eval() 
+
+    # Perform inference using the loaded model
+    y_pred = loaded_model(sample)
+    pred = y_pred[0][0].item()
+    print(HEAD, ":")
+    print("Quality prediction :", f'{pred:.2f}', " , for wine data:")
+    pprint(data)
+    
+    # ## OUTPUTS
+    print("\n"+HEAD, "# ## OUTPUTS\n")
+
+    # Report (json) :
+    # - quality prediction value
+    # - wine data
+    # - error message, more message, warning message
+    report["quality"] = pred
+    report["data"] = data
+    report["model_ckpt_filepath"] = model_ckpt_filepath
+    report["norm_config"] = norm_config
+    
+    fidle.end()
+
+except Exception as e :
+    error_msg += type(e).__name__ + str(e.args) + ". "
+
+if error_msg != "": report["error"] = error_msg
+if more_msg != "":  report["more"] = more_msg
+if warn_msg != "":  report["warning"] = warn_msg
+
+print("OUTPUT:", "Quality prediction :", pred)
+
+print("OUTPUT:", "Report: ")
+pprint(report)
+
+## Save Report as .json file
+#try:
+#    with open(report_json_filepath, "w") as outfile:
+#        json.dump(report, outfile)
+#    print("OUTPUT:", "Report file (containing report) :", report_json_filepath)
+#except :
+#    pass
+
+# Save quality alone into .txt file
+try:
+    with open(quality_txt_filepath, "w") as outfile:
+        outfile.write(str(pred))
+    print("OUTPUT:", "Quality file (containing quality value) :",
+           quality_txt_filepath)
+except :
+    pass
+
+# ---
+# <img width="80px" src="../fidle/img/logo-paysage.svg"></img>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wine_quality.xml	Wed Dec 04 15:25:26 2024 +0000
@@ -0,0 +1,84 @@
+<tool id="wine_quality" name="wine_quality" version="1.0.0">
+
+  <description>wine_quality</description>
+
+  <requirements>
+
+    <!-- singularity -->
+    <container type="singularity">oras://registry.forgemia.inra.fr/nathalie.rousse/use/dnn_fidlemore:6d159f79349e231deadb738f79bd293c1a8dadd3</container>
+
+    <!-- image locale
+    <container type="singularity">/home/nrousse/workspace_git/SIWAA_regroup/USE_branch_dnn/use/DNN/containers/fidlemore.simg</container -->
+
+  </requirements>
+
+  <environment_variables>
+    <environment_variable name="FIDLE_DATASETS_DIR">/fidle-tp/datasets-fidle</environment_variable>
+  </environment_variables>
+  <command detect_errors="aggressive">
+    <![CDATA[
+    . /fidle-tp/fidle-env/bin/activate;
+    /bin/bash -c "python3 $__tool_directory__/wine_quality.py -model_ckpt_filepath ${model_ckpt} -norm_config_json_filepath ${norm_config_json} -data '${datajson_text}'";
+    cp OUTPUTS/quality.txt ${quality_txt};
+    ]]>
+  </command>
+
+  <inputs>
+    <param name="datajson_text" optional="false" type="text" area="true"
+           label="Data of wine whose quality to be predicted (json as text)"/>
+    <param name="model_ckpt" optional="false" type="data" format="ckpt" 
+           label="model_ckpt model file (.ckpt)"/>
+    <param name="norm_config_json" optional="false" type="data" format="json" 
+           label="norm_config normalization configuration (.json)"/>
+  </inputs>
+
+  <outputs>
+    <data format="txt" name="quality_txt" label="quality (.txt)"/>
+  </outputs>
+
+  <tests>
+  </tests>
+
+  <help><![CDATA[
+
+wine_quality.xml
+
+===========================
+
+Code:
+
+----
+
+- wine_quality.py
+
+Inputs:
+
+-------
+
+- data json as text : data of wine for which quality is going to be predicted.
+
+  - example :
+
+    - {"fixed acidity": 11.2, "volatile acidity": 0.28, "citric acid": 0.56, "residual sugar": 1.9, "chlorides": 0.075, "free sulfur dioxide": 17, "total sulfur dioxide": 60, "density": 0.998, "pH": 3.16, "sulphates": 0.58, "alcohol": 9.8}
+
+- Model file (.ckpt)
+
+- Normalization configuration file (.json)
+
+Outputs:
+
+--------
+
+- Quality file (.txt)
+
+Credits:
+
+--------
+
+-  Author: Nathalie Rousse nathalie.rousse@inrae.fr
+-  Copyright: INRAE
+
+  ]]>
+  </help>
+
+</tool>