From 06998944e498b711fd5f81fae370af71a701268a Mon Sep 17 00:00:00 2001
From: Donghak PARK <donghak.park@samsung.com>
Date: Thu, 1 Feb 2024 20:28:18 +0900
Subject: [PATCH] [CI] Update Pylint - pip install with requirements.txt

In order to properly check the Python files used by NNTrainer, all modules used in Python must be downloaded.
For now, this part is missing in CI and this PR add and update the modules currently used by NNTrainer.
- fix Applications/LLaMA/PyTorch/weights_converter.py can got 10 score in pylint

**Changes proposed in this PR:**
    modified:   .github/workflows/pylint.yml
    modified:   Applications/LLaMA/PyTorch/weights_converter.py
    new file:   tools/requirements.txt

**Self evaluation:**
1. Build test:	 [X]Passed [ ]Failed [ ]Skipped
2. Run test:	 [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghak PARK <donghak.park@samsung.com>
---
 .github/workflows/pylint.yml                  |  1 +
 .../LLaMA/PyTorch/weights_converter.py        | 69 ++++++++--------
 tools/requirements.txt                        | 81 +++++++++++++++++++
 3 files changed, 119 insertions(+), 32 deletions(-)
 create mode 100644 tools/requirements.txt

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index c49b61b0..98822c86 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -45,6 +45,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pylint==3.0.2 numpy wheel
+        pip install -r tools/requirements.txt
     - name: Run PyLint on changed files
       run: |
         echo "${{ steps.get_file_changes.outputs.files}}" | tr " " "\n" | grep ".py$" | xargs pylint 
diff --git a/Applications/LLaMA/PyTorch/weights_converter.py b/Applications/LLaMA/PyTorch/weights_converter.py
index 357be6c5..7a2b83a7 100644
--- a/Applications/LLaMA/PyTorch/weights_converter.py
+++ b/Applications/LLaMA/PyTorch/weights_converter.py
@@ -1,58 +1,63 @@
-# SPDX-License-Identifier: Apache-2.0
-# Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
-# 
-# @file weights_converter.py
-# @date 13 October 2023
-#
-# @author Seungbaek Hong <sb92.hong@samsung.com>
+"""
+SPDX-License-Identifier: Apache-2.0
+Copyright (C) 2023 Seungbaek Hong <sb92.hong@samsung.com>
+
+@file weights_converter.py
+@date 13 October 2023
+
+@author Seungbaek Hong <sb92.hong@samsung.com>
+"""
 
 import torch
 import numpy as np
 from transformers import LlamaForCausalLM
 
-##
-# @brief convert and save weights as nntrainer format for multi-head attention model
-def save_llama_for_nntrainer(params, n_layers, file, dtype):        
+
+def save_llama_for_nntrainer(params, n_layers, file, dtype):
+    """
+    @brief convert and save weights as nntrainer format for multi-head attention model
+    """
+
     def save_weight(weight):
         np.array(weight, dtype=dtype).tofile(file)
 
     def save_embedding(weight):
         save_weight(weight)
 
-    def save_attention(weights, layer_name):        
-        save_weight(params[layer_name + 'input_layernorm' + '.weight'])
-        save_weight(params[layer_name + 'self_attn.q_proj' + '.weight'].permute(1, 0)) 
-        save_weight(params[layer_name + 'self_attn.k_proj' + '.weight'].permute(1, 0))
-        save_weight(params[layer_name + 'self_attn.v_proj' + '.weight'].permute(1, 0))
-        save_weight(params[layer_name + 'self_attn.o_proj' + '.weight'].permute(1, 0))
+    def save_attention(layer_name):
+        save_weight(params[layer_name + "input_layernorm" + ".weight"])
+        save_weight(params[layer_name + "self_attn.q_proj" + ".weight"].permute(1, 0))
+        save_weight(params[layer_name + "self_attn.k_proj" + ".weight"].permute(1, 0))
+        save_weight(params[layer_name + "self_attn.v_proj" + ".weight"].permute(1, 0))
+        save_weight(params[layer_name + "self_attn.o_proj" + ".weight"].permute(1, 0))
 
-    def save_feed_forward(weights, layer_name):
-        save_weight(params[layer_name + 'post_attention_layernorm' + '.weight'])
-        save_weight(params[layer_name + 'mlp.up_proj' + '.weight'].permute(1, 0))
-        save_weight(params[layer_name + 'mlp.gate_proj' + '.weight'].permute(1, 0))        
-        save_weight(params[layer_name + 'mlp.down_proj' + '.weight'].permute(1, 0))
+    def save_feed_forward(layer_name):
+        save_weight(params[layer_name + "post_attention_layernorm" + ".weight"])
+        save_weight(params[layer_name + "mlp.up_proj" + ".weight"].permute(1, 0))
+        save_weight(params[layer_name + "mlp.gate_proj" + ".weight"].permute(1, 0))
+        save_weight(params[layer_name + "mlp.down_proj" + ".weight"].permute(1, 0))
 
     # save weights of embedding layer
-    save_embedding(params['model.embed_tokens.weight'])
-    
+    save_embedding(params["model.embed_tokens.weight"])
+
     # save weights of attention layers & feed forward layers
     for layer_idx in range(n_layers):
-        save_attention(params, 'model.layers.{}.'.format(layer_idx))
-        save_feed_forward(params, 'model.layers.{}.'.format(layer_idx))
+        save_attention(f"model.layers.{layer_idx}.")
+        save_feed_forward(f"model.layers.{layer_idx}.")
 
     # save weights of output batch-normalization layer
-    save_weight(params['model.norm.weight'])
+    save_weight(params["model.norm.weight"])
 
     # save weights of output fc layer
-    save_weight(params['lm_head.weight'].permute(1, 0))
+    save_weight(params["lm_head.weight"].permute(1, 0))
 
 
-if __name__ == '__main__':
-    model_path = '/USR_DIR/MODEL_DIR/'
+if __name__ == "__main__":
+    MODEL_PATH = "/USR_DIR/MODEL_DIR/"
 
     model = LlamaForCausalLM.from_pretrained(
-        model_path, torch_dtype=torch.float32, device_map='cpu'
+        MODEL_PATH, torch_dtype=torch.float32, device_map="cpu"
     )
 
-    file_mha = open("./llama_v2_mha.bin", "wb")
-    save_llama_for_nntrainer(model.state_dict(), 28, file_mha, 'float16')
+    with open("./llama_v2_mha.bin", "wb") as file_mha:
+        save_llama_for_nntrainer(model.state_dict(), 28, file_mha, "float16")
diff --git a/tools/requirements.txt b/tools/requirements.txt
new file mode 100644
index 00000000..0be69076
--- /dev/null
+++ b/tools/requirements.txt
@@ -0,0 +1,81 @@
+absl-py==2.1.0
+astroid==3.0.2
+astunparse==1.6.3
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+contourpy==1.2.0
+cycler==0.12.1
+dill==0.3.8
+filelock==3.13.1
+flatbuffers==23.5.26
+fonttools==4.47.2
+fsspec==2023.12.2
+gast==0.5.4
+google-auth==2.27.0
+google-auth-oauthlib==1.2.0
+google-pasta==0.2.0
+grpcio==1.60.0
+h5py==3.10.0
+huggingface-hub==0.20.3
+idna==3.6
+importlib-metadata==7.0.1
+importlib-resources==6.1.1
+isort==5.13.2
+Jinja2==3.1.3
+joblib==1.3.2
+keras==2.15.0
+kiwisolver==1.4.5
+libclang==16.0.6
+Markdown==3.5.2
+MarkupSafe==2.1.4
+matplotlib==3.8.2
+mccabe==0.7.0
+ml-dtypes==0.2.0
+mpmath==1.3.0
+networkx==3.2.1
+numpy==1.26.3
+oauthlib==3.2.2
+opt-einsum==3.3.0
+packaging==23.2
+pandas==2.2.0
+pillow==10.2.0
+platformdirs==4.2.0
+protobuf==4.23.4
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pylint==3.0.2
+pyparsing==3.1.1
+python-dateutil==2.8.2
+pytz==2023.4
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+requests-oauthlib==1.3.1
+rsa==4.9
+safetensors==0.4.2
+scikit-learn==1.4.0
+scipy==1.12.0
+six==1.16.0
+sympy==1.12
+tensorboard==2.15.1
+tensorboard-data-server==0.7.2
+tensorflow==2.15.0.post1
+tensorflow-estimator==2.15.0
+tensorflow-io-gcs-filesystem==0.35.0
+termcolor==2.4.0
+threadpoolctl==3.2.0
+tokenizers==0.15.1
+tomli==2.0.1
+tomlkit==0.12.3
+torch==2.2.0
+torchvision==0.17.0
+tqdm==4.66.1
+transformers==4.37.2
+triton==2.2.0
+typing_extensions==4.9.0
+tzdata==2023.4
+urllib3==2.2.0
+Werkzeug==3.0.1
+wrapt==1.14.1
+zipp==3.17.0
-- 
2.34.1