diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index 05c8ed2..b4173c2 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -31,17 +31,4 @@ updates: groups: gh-dependency: patterns: - - "*" - - - package-ecosystem: "pip" - directories: - - "/covid19" - - "/fraud-detection" - - "/rul-turbofan" - schedule: - interval: "monthly" - day: "monday" - groups: - pip-dependency: - patterns: - - "*" + - "*" \ No newline at end of file diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 2cd5ed3..89e1144 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -7,6 +7,7 @@ on: paths: - "covid19/**" - "fraud-detection/**" + - "rul-turbofan"/**" - ".github/**" pull_request: branches: @@ -14,6 +15,7 @@ on: paths: - "covid19/**" - "fraud-detection/**" + - "rul-turbofan"/**" - ".github/**" jobs: diff --git a/covid19/Dockerfile.infer b/covid19/Dockerfile.infer new file mode 100644 index 0000000..605996c --- /dev/null +++ b/covid19/Dockerfile.infer @@ -0,0 +1,14 @@ +FROM python:3.9-slim + +WORKDIR /cocos +RUN mkdir /cocos/results +RUN mkdir /cocos/datasets + +COPY ./requirements.txt /cocos +COPY ./predict.py /cocos + +# install dependencies +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# command to be run when the docker container is started +CMD ["python", "-u", "/cocos/predict.py"] \ No newline at end of file diff --git a/covid19/Dockerfile.train b/covid19/Dockerfile.train new file mode 100644 index 0000000..35e8211 --- /dev/null +++ b/covid19/Dockerfile.train @@ -0,0 +1,14 @@ +FROM python:3.9-slim + +WORKDIR /cocos +RUN mkdir /cocos/results +RUN mkdir /cocos/datasets + +COPY ./requirements.txt /cocos +COPY ./train.py /cocos + +# install dependencies +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# command to be run when the docker container is started +CMD ["python", "-u", "/cocos/train.py"] \ No newline at end of file diff --git a/covid19/predict.py b/covid19/predict.py index 6e39205..a4f4b2d 100644 --- a/covid19/predict.py +++ b/covid19/predict.py @@ -38,12 +38,21 @@ def predict(model, image_path, class_names): return predicted_class -def show_image_with_prediction(image_path, predicted_class): +def show_image_with_prediction(image_path, predicted_class, output_dir): image = Image.open(image_path) plt.imshow(image) plt.title(f"Predicted: {predicted_class}") plt.axis("off") - plt.show() + + # Ensure the results directory exists + os.makedirs(output_dir, exist_ok=True) + + # Create the output file path + output_path = os.path.join(output_dir, f"prediction_{os.path.basename(image_path)}") + + # Save the image with prediction + plt.savefig(output_path) + print(f"Image with prediction saved to {output_path}") def main(): @@ -86,8 +95,11 @@ def main(): model = load_model(model_path, class_names) predicted_class = predict(model, image_path, class_names) print(f"The predicted class for the image is: {predicted_class}") - show_image_with_prediction(image_path, predicted_class) + + # Save the image with the prediction to the results directory + output_dir = "./results" + show_image_with_prediction(image_path, predicted_class, output_dir) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/fraud-detection/Dockerfile.infer b/fraud-detection/Dockerfile.infer new file mode 100644 index 0000000..639086c --- /dev/null +++ b/fraud-detection/Dockerfile.infer @@ -0,0 +1,14 @@ +FROM python:3.9-slim + +WORKDIR /cocos +RUN mkdir /cocos/results +RUN mkdir /cocos/datasets + +COPY ./requirements.txt /cocos +COPY ./prediction.py /cocos + +# install dependencies +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# command to be run when the docker container is started +CMD ["python", "-u", "/cocos/prediction.py"] \ No newline at end of file diff --git a/fraud-detection/Dockerfile.train b/fraud-detection/Dockerfile.train new file mode 100644 index 0000000..4f9efa7 --- /dev/null +++ b/fraud-detection/Dockerfile.train @@ -0,0 +1,14 @@ +FROM python:3.9-slim + +WORKDIR /cocos +RUN mkdir /cocos/results +RUN mkdir /cocos/datasets + +COPY ./requirements.txt /cocos +COPY ./fraud-detection.py /cocos + +# install dependencies +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# command to be run when the docker container is started +CMD ["python", "-u", "/cocos/fraud-detection.py"] \ No newline at end of file diff --git a/rul-turbofan/Dockerfile.infer b/rul-turbofan/Dockerfile.infer new file mode 100644 index 0000000..1f42ed8 --- /dev/null +++ b/rul-turbofan/Dockerfile.infer @@ -0,0 +1,14 @@ +FROM python:3.9-slim + +WORKDIR /cocos +RUN mkdir /cocos/results +RUN mkdir /cocos/datasets + +COPY ./requirements.txt /cocos +COPY ./pred-model.py /cocos + +# install dependencies +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# command to be run when the docker container is started +CMD ["python", "-u", "/cocos/pred-model.py"] \ No newline at end of file diff --git a/rul-turbofan/Dockerfile.train b/rul-turbofan/Dockerfile.train new file mode 100644 index 0000000..b892a66 --- /dev/null +++ b/rul-turbofan/Dockerfile.train @@ -0,0 +1,14 @@ +FROM python:3.9-slim + +WORKDIR /cocos +RUN mkdir /cocos/results +RUN mkdir /cocos/datasets + +COPY ./requirements.txt /cocos +COPY ./rul-training.py /cocos + +# install dependencies +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# command to be run when the docker container is started +CMD ["python", "-u", "/cocos/rul-training.py"] \ No newline at end of file diff --git a/rul-turbofan/requirements.txt b/rul-turbofan/requirements.txt new file mode 100644 index 0000000..d3f262f --- /dev/null +++ b/rul-turbofan/requirements.txt @@ -0,0 +1,8 @@ +numpy==2.0.0 +pandas==2.2.2 +scikit-learn==1.5.1 +torch==2.3.1 +joblib==1.4.2 +matplotlib==3.9.1 +seaborn==0.13.2 + diff --git a/rul-turbofan/rul-training.py b/rul-turbofan/rul-training.py index 116403f..42047f3 100644 --- a/rul-turbofan/rul-training.py +++ b/rul-turbofan/rul-training.py @@ -9,12 +9,18 @@ import joblib import matplotlib.pyplot as plt import os -import zipfile + +# Directory paths +datasets_dir = 'datasets' +results_dir = 'results' + +# Ensure the results directory exists +os.makedirs(results_dir, exist_ok=True) # Load datasets -train_df = pd.read_csv('train_FD001.txt', sep=r'\s+', header=None) -test_df = pd.read_csv('test_FD001.txt', sep=r'\s+', header=None) -rul_df = pd.read_csv('RUL_FD001.txt', sep=r'\s+', header=None) +train_df = pd.read_csv(os.path.join(datasets_dir, 'train_FD001.txt'), sep=r'\s+', header=None) +test_df = pd.read_csv(os.path.join(datasets_dir, 'test_FD001.txt'), sep=r'\s+', header=None) +rul_df = pd.read_csv(os.path.join(datasets_dir, 'RUL_FD001.txt'), sep=r'\s+', header=None) # Set column names column_names = ['id', 'cycle'] + ['setting1', 'setting2', 'setting3'] + ['s' + str(i) for i in range(1, 22)] @@ -34,7 +40,7 @@ test_df[cols_normalize] = scaler.transform(test_df[cols_normalize]) # Save the scaler -joblib.dump(scaler, 'scaler.pkl') +joblib.dump(scaler, os.path.join(results_dir, 'scaler.pkl')) # Dataset class class TurbofanDataset(Dataset): @@ -55,37 +61,33 @@ def __getitem__(self, idx): # Definition of LSTM model class LSTMModel(nn.Module): - def __init__(self, input_dim, hidden_dim, num_layers, output_dim): super(LSTMModel, self).__init__() - self.hidden_dim = hidden_dim # Number of features in the hidden state - self.num_layers = num_layers # Number of recurrent layers in the LSTM - self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5) # LSTM layer - self.fc = nn.Linear(hidden_dim, output_dim) # Fully connected layer for output + self.hidden_dim = hidden_dim + self.num_layers = num_layers + self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.5) + self.fc = nn.Linear(hidden_dim, output_dim) def forward(self, x): - # Initialize hidden state and cell state with zeros h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device) - # Get the output from the LSTM layer out, _ = self.lstm(x, (h0, c0)) - # Pass the output of the last time step through the fully connected layer out = self.fc(out[:, -1, :]) return out # Training settings -sequence_length = 50 # Number of time steps in each sequence (experimentally chosen) -input_dim = len(cols_normalize) # Number of input features -hidden_dim = 128 # Number of hidden units (experimentally chosen) -num_layers = 3 # Number of LSTM layers (experimentally chosen) -output_dim = 1 # Single output for RUL prediction +sequence_length = 50 +input_dim = len(cols_normalize) +hidden_dim = 128 +num_layers = 3 +output_dim = 1 device = 'cuda' if torch.cuda.is_available() else 'cpu' # Model initialization and training settings model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim) model = model.to(device) criterion = nn.MSELoss() -optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) # Adam optimizer with learning rate 0.0001 and weight decay applied to model parameters to prevent overfitting +optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5) # Training data train_dataset = TurbofanDataset(train_df, sequence_length) @@ -96,16 +98,14 @@ def forward(self, x): val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=4) # Model training -def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score=0.82): #early stopping criteria, r2 score is set to 0.82 +def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score=0.82): train_losses = [] val_losses = [] val_r2_scores = [] early_stopping_patience = 10 early_stopping_counter = 0 best_val_loss = float('inf') - scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5) # min': Monitoring mode, learning rate will be adjusted based on minimizing validation loss -#patience: Number of epochs with no improvement after which learning rate will be reduced -#factor: Factor by which the learning rate will be reduced. New learning rate = old learning rate * factor + scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5) for epoch in range(num_epochs): model.train() @@ -117,7 +117,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch outputs = model(sequences) loss = criterion(outputs.squeeze(), targets) loss.backward() - torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) # Maximum norm value beyond which gradients are clipped to prevent them from growing too large + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() running_loss += loss.item() @@ -150,7 +150,7 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch if val_loss < best_val_loss: best_val_loss = val_loss early_stopping_counter = 0 - torch.save(model.state_dict(), 'model.pth') # Save the best model + torch.save(model.state_dict(), os.path.join(results_dir, 'model.pth')) else: early_stopping_counter += 1 @@ -165,13 +165,12 @@ def train_model(model, train_loader, val_loader, criterion, optimizer, num_epoch return train_losses, val_losses, val_r2_scores num_epochs = 100 -target_r2_score = 0.82 #r2 score is set to 0.82 +target_r2_score = 0.82 train_losses, val_losses, val_r2_scores = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, target_r2_score) def plot_training_history(train_losses, val_losses, val_r2_scores): epochs = range(1, len(train_losses) + 1) - # Plotting training and validation loss plt.figure(figsize=(14, 6)) plt.subplot(1, 2, 1) @@ -182,7 +181,6 @@ def plot_training_history(train_losses, val_losses, val_r2_scores): plt.ylabel('Loss') plt.legend() - # Plotting validation R2 score plt.subplot(1, 2, 2) plt.plot(epochs, val_r2_scores, 'g-', label='Validation R2 Score') plt.title('Validation R2 Score') @@ -191,18 +189,9 @@ def plot_training_history(train_losses, val_losses, val_r2_scores): plt.legend() plt.tight_layout() - plt.savefig('training_history.png') + plt.savefig(os.path.join(results_dir, 'training_history.png')) plt.close() plot_training_history(train_losses, val_losses, val_r2_scores) -# Create a zip file containing model.pth and training_history.png -with zipfile.ZipFile('result.zip', 'w') as zipf: - zipf.write('model.pth') - zipf.write('training_history.png') - -# Cleanup -os.remove('model.pth') -os.remove('training_history.png') - -print("Zipped the model and training history plot into result.zip") +print(f"Model and training history plot saved in '{results_dir}'") \ No newline at end of file