From 7d3fc902615bdbaa47261a62a7e85a4bc58a0347 Mon Sep 17 00:00:00 2001
From: filip <filipcivljak007@gmail.com>
Date: Fri, 30 Aug 2024 16:21:21 +0200
Subject: [PATCH] Modified Dockerfiles for inference and training

Dockerfile for Fraud Detection

Renamed Dockerfiles, modified rul and covid prediction scripts, and added new Dockerfiles

Renamed inference Dockerfiles to Dockerfile.infer

Change command in Dockerfile from python3 to python

Changes on requiremnts.txt and python.yaml
---
 .github/dependabot.yaml          | 15 +------
 .github/workflows/python.yaml    |  2 +
 covid19/Dockerfile.infer         | 14 +++++++
 covid19/Dockerfile.train         | 14 +++++++
 covid19/predict.py               | 20 ++++++++--
 fraud-detection/Dockerfile.infer | 14 +++++++
 fraud-detection/Dockerfile.train | 14 +++++++
 rul-turbofan/Dockerfile.infer    | 14 +++++++
 rul-turbofan/Dockerfile.train    | 14 +++++++
 rul-turbofan/requirements.txt    |  8 ++++
 rul-turbofan/rul-training.py     | 67 +++++++++++++-------------------
 11 files changed, 139 insertions(+), 57 deletions(-)
 create mode 100644 covid19/Dockerfile.infer
 create mode 100644 covid19/Dockerfile.train
 create mode 100644 fraud-detection/Dockerfile.infer
 create mode 100644 fraud-detection/Dockerfile.train
 create mode 100644 rul-turbofan/Dockerfile.infer
 create mode 100644 rul-turbofan/Dockerfile.train
 create mode 100644 rul-turbofan/requirements.txt

diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
index 05c8ed2..b4173c2 100644
--- a/.github/dependabot.yaml
+++ b/.github/dependabot.yaml
@@ -31,17 +31,4 @@ updates:
     groups:
       gh-dependency:
         patterns:
-          - "*"
-
-  - package-ecosystem: "pip"
-    directories:
-      - "/covid19"
-      - "/fraud-detection"
-      - "/rul-turbofan"
-    schedule:
-      interval: "monthly"
-      day: "monday"
-    groups:
-      pip-dependency:
-        patterns:
-          - "*"
+          - "*"
\ No newline at end of file
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index 2cd5ed3..89e1144 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -7,6 +7,7 @@ on:
     paths:
       - "covid19/**"
       - "fraud-detection/**"
+      - "rul-turbofan"/**"
       - ".github/**"
   pull_request:
     branches:
@@ -14,6 +15,7 @@ on:
     paths:
       - "covid19/**"
       - "fraud-detection/**"
+      - "rul-turbofan"/**"
       - ".github/**"
 
 jobs:
diff --git a/covid19/Dockerfile.infer b/covid19/Dockerfile.infer
new file mode 100644
index 0000000..605996c
--- /dev/null
+++ b/covid19/Dockerfile.infer
@@ -0,0 +1,14 @@
+FROM python:3.9-slim
+
+WORKDIR /cocos
+RUN mkdir /cocos/results
+RUN mkdir /cocos/datasets
+
+COPY ./requirements.txt /cocos
+COPY ./predict.py /cocos
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# command to be run when the docker container is started
+CMD ["python", "-u", "/cocos/predict.py"]
\ No newline at end of file
diff --git a/covid19/Dockerfile.train b/covid19/Dockerfile.train
new file mode 100644
index 0000000..35e8211
--- /dev/null
+++ b/covid19/Dockerfile.train
@@ -0,0 +1,14 @@
+FROM python:3.9-slim
+
+WORKDIR /cocos
+RUN mkdir /cocos/results
+RUN mkdir /cocos/datasets
+
+COPY ./requirements.txt /cocos
+COPY ./train.py /cocos
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# command to be run when the docker container is started
+CMD ["python", "-u", "/cocos/train.py"]
\ No newline at end of file
diff --git a/covid19/predict.py b/covid19/predict.py
index 6e39205..a4f4b2d 100644
--- a/covid19/predict.py
+++ b/covid19/predict.py
@@ -38,12 +38,21 @@ def predict(model, image_path, class_names):
     return predicted_class
 
 
-def show_image_with_prediction(image_path, predicted_class):
+def show_image_with_prediction(image_path, predicted_class, output_dir):
     image = Image.open(image_path)
     plt.imshow(image)
     plt.title(f"Predicted: {predicted_class}")
     plt.axis("off")
-    plt.show()
+    
+    # Ensure the results directory exists
+    os.makedirs(output_dir, exist_ok=True)
+    
+    # Create the output file path
+    output_path = os.path.join(output_dir, f"prediction_{os.path.basename(image_path)}")
+    
+    # Save the image with prediction
+    plt.savefig(output_path)
+    print(f"Image with prediction saved to {output_path}")
 
 
 def main():
@@ -86,8 +95,11 @@ def main():
     model = load_model(model_path, class_names)
     predicted_class = predict(model, image_path, class_names)
     print(f"The predicted class for the image is: {predicted_class}")
-    show_image_with_prediction(image_path, predicted_class)
+    
+    # Save the image with the prediction to the results directory
+    output_dir = "./results"
+    show_image_with_prediction(image_path, predicted_class, output_dir)
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/fraud-detection/Dockerfile.infer b/fraud-detection/Dockerfile.infer
new file mode 100644
index 0000000..639086c
--- /dev/null
+++ b/fraud-detection/Dockerfile.infer
@@ -0,0 +1,14 @@
+FROM python:3.9-slim
+
+WORKDIR /cocos
+RUN mkdir /cocos/results
+RUN mkdir /cocos/datasets
+
+COPY ./requirements.txt /cocos
+COPY ./prediction.py /cocos
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# command to be run when the docker container is started
+CMD ["python", "-u", "/cocos/prediction.py"]
\ No newline at end of file
diff --git a/fraud-detection/Dockerfile.train b/fraud-detection/Dockerfile.train
new file mode 100644
index 0000000..4f9efa7
--- /dev/null
+++ b/fraud-detection/Dockerfile.train
@@ -0,0 +1,14 @@
+FROM python:3.9-slim
+
+WORKDIR /cocos
+RUN mkdir /cocos/results
+RUN mkdir /cocos/datasets
+
+COPY ./requirements.txt /cocos
+COPY ./fraud-detection.py /cocos
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# command to be run when the docker container is started
+CMD ["python", "-u", "/cocos/fraud-detection.py"]
\ No newline at end of file
diff --git a/rul-turbofan/Dockerfile.infer b/rul-turbofan/Dockerfile.infer
new file mode 100644
index 0000000..1f42ed8
--- /dev/null
+++ b/rul-turbofan/Dockerfile.infer
@@ -0,0 +1,14 @@
+FROM python:3.9-slim
+
+WORKDIR /cocos
+RUN mkdir /cocos/results
+RUN mkdir /cocos/datasets
+
+COPY ./requirements.txt /cocos
+COPY ./pred-model.py /cocos
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# command to be run when the docker container is started
+CMD ["python", "-u", "/cocos/pred-model.py"]
\ No newline at end of file
diff --git a/rul-turbofan/Dockerfile.train b/rul-turbofan/Dockerfile.train
new file mode 100644
index 0000000..b892a66
--- /dev/null
+++ b/rul-turbofan/Dockerfile.train
@@ -0,0 +1,14 @@
+FROM python:3.9-slim
+
+WORKDIR /cocos
+RUN mkdir /cocos/results
+RUN mkdir /cocos/datasets
+
+COPY ./requirements.txt /cocos
+COPY ./rul-training.py /cocos
+
+# install dependencies
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# command to be run when the docker container is started
+CMD ["python", "-u", "/cocos/rul-training.py"]
\ No newline at end of file
diff --git a/rul-turbofan/requirements.txt b/rul-turbofan/requirements.txt
new file mode 100644
index 0000000..d3f262f
--- /dev/null
+++ b/rul-turbofan/requirements.txt
@@ -0,0 +1,8 @@
+numpy==2.0.0
+pandas==2.2.2
+scikit-learn==1.5.1
+torch==2.3.1
+joblib==1.4.2
+matplotlib==3.9.1
+seaborn==0.13.2
+
diff --git a/rul-turbofan/rul-training.py b/rul-turbofan/rul-training.py
index 116403f..42047f3 100644
--- a/rul-turbofan/rul-training.py
+++ b/rul-turbofan/rul-training.py
@@ -9,12 +9,18 @@
 import joblib
 import matplotlib.pyplot as plt
 import os
-import zipfile
+
+# Directory paths
+datasets_dir = 'datasets'
+results_dir = 'results'
+
+# Ensure the results directory exists
+os.makedirs(results_dir, exist_ok=True)
 
 # Load datasets
-train_df = pd.read_csv('train_FD001.txt', sep=r'\s+', header=None)
-test_df = pd.read_csv('test_FD001.txt', sep=r'\s+', header=None)
-rul_df = pd.read_csv('RUL_FD001.txt', sep=r'\s+', header=None)
+train_df = pd.read_csv(os.path.join(datasets_dir, 'train_FD001.txt'), sep=r'\s+', header=None)
+test_df = pd.read_csv(os.path.join(datasets_dir, 'test_FD001.txt'), sep=r'\s+', header=None)
+rul_df = pd.read_csv(os.path.join(datasets_dir, 'RUL_FD001.txt'), sep=r'\s+', header=None)
 
 # Set column names
 column_names = ['id', 'cycle'] + ['setting1', 'setting2', 'setting3'] + ['s' + str(i) for i in range(1, 22)]
@@ -34,7 +40,7 @@
 test_df[cols_normalize] = scaler.transform(test_df[cols_normalize])
 
 # Save the scaler
-joblib.dump(scaler, 'scaler.pkl')
+joblib.dump(scaler, os.path.join(results_dir, 'scaler.pkl'))
 
 # Dataset class
 class TurbofanDataset(Dataset):
@@ -55,37 +61,33 @@ def __getitem__(self, idx):
 
 # Definition of LSTM model
 class LSTMModel(nn.Module):
-    
     def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
         super(LSTMModel, self).__init__()
-        self.hidden_dim = hidden_dim # Number of features in the hidden state
-        self.num_layers = num_layers # Number of recurrent layers in the LSTM
-        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5) # LSTM layer
-        self.fc = nn.Linear(hidden_dim, output_dim)  # Fully connected layer for output
+        self.hidden_dim = hidden_dim
+        self.num_layers = num_layers
+        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5)
+        self.fc = nn.Linear(hidden_dim, output_dim)
 
     def forward(self, x):
-        # Initialize hidden state and cell state with zeros
         h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
         c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
-         # Get the output from the LSTM layer
         out, _ = self.lstm(x, (h0, c0))
-         # Pass the output of the last time step through the fully connected layer
         out = self.fc(out[:, -1, :])
         return out
 
 # Training settings
-sequence_length = 50 # Number of time steps in each sequence (experimentally chosen)
-input_dim = len(cols_normalize)  # Number of input features 
-hidden_dim = 128  # Number of hidden units (experimentally chosen)
-num_layers = 3  # Number of LSTM layers (experimentally chosen)
-output_dim = 1  # Single output for RUL prediction
+sequence_length = 50
+input_dim = len(cols_normalize)
+hidden_dim = 128
+num_layers = 3
+output_dim = 1
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 # Model initialization and training settings
 model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
 model = model.to(device)
 criterion = nn.MSELoss()
-optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) # Adam optimizer with learning rate 0.0001 and weight decay applied to model parameters to prevent overfitting
+optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)
 
 # Training data
 train_dataset = TurbofanDataset(train_df, sequence_length)
@@ -96,16 +98,14 @@ def forward(self, x):
 val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=4)
 
 # Model training
-def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score=0.82): #early stopping criteria, r2 score is set to 0.82
+def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score=0.82):
     train_losses = []
     val_losses = []
     val_r2_scores = []
     early_stopping_patience = 10
     early_stopping_counter = 0
     best_val_loss = float('inf')
-    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5) # min': Monitoring mode, learning rate will be adjusted based on minimizing validation loss
-#patience: Number of epochs with no improvement after which learning rate will be reduced
-#factor: Factor by which the learning rate will be reduced. New learning rate = old learning rate * factor
+    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)
 
     for epoch in range(num_epochs):
         model.train()
@@ -117,7 +117,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch
             outputs = model(sequences)
             loss = criterion(outputs.squeeze(), targets)
             loss.backward()
-            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Maximum norm value beyond which gradients are clipped to prevent them from growing too large
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
             optimizer.step()
             running_loss += loss.item()
 
@@ -150,7 +150,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch
         if val_loss < best_val_loss:
             best_val_loss = val_loss
             early_stopping_counter = 0
-            torch.save(model.state_dict(), 'model.pth')  # Save the best model
+            torch.save(model.state_dict(), os.path.join(results_dir, 'model.pth'))
         else:
             early_stopping_counter += 1
 
@@ -165,13 +165,12 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch
     return train_losses, val_losses, val_r2_scores
 
 num_epochs = 100
-target_r2_score = 0.82 #r2 score is set to 0.82
+target_r2_score = 0.82
 train_losses, val_losses, val_r2_scores = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score)
 
 def plot_training_history(train_losses, val_losses, val_r2_scores):
     epochs = range(1, len(train_losses) + 1)
 
-    # Plotting training and validation loss
     plt.figure(figsize=(14, 6))
     
     plt.subplot(1, 2, 1)
@@ -182,7 +181,6 @@ def plot_training_history(train_losses, val_losses, val_r2_scores):
     plt.ylabel('Loss')
     plt.legend()
 
-    # Plotting validation R2 score
     plt.subplot(1, 2, 2)
     plt.plot(epochs, val_r2_scores, 'g-', label='Validation R2 Score')
     plt.title('Validation R2 Score')
@@ -191,18 +189,9 @@ def plot_training_history(train_losses, val_losses, val_r2_scores):
     plt.legend()
 
     plt.tight_layout()
-    plt.savefig('training_history.png')
+    plt.savefig(os.path.join(results_dir, 'training_history.png'))
     plt.close()
 
 plot_training_history(train_losses, val_losses, val_r2_scores)
 
-# Create a zip file containing model.pth and training_history.png
-with zipfile.ZipFile('result.zip', 'w') as zipf:
-    zipf.write('model.pth')
-    zipf.write('training_history.png')
-
-# Cleanup
-os.remove('model.pth')
-os.remove('training_history.png')
-
-print("Zipped the model and training history plot into result.zip")
+print(f"Model and training history plot saved in '{results_dir}'")
\ No newline at end of file