Skip to content

Commit

Permalink
add model and size params
Browse files Browse the repository at this point in the history
  • Loading branch information
jpizarrom committed Oct 1, 2023
1 parent 8d1af37 commit 693f96f
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 174 deletions.
26 changes: 16 additions & 10 deletions classification_model_training/files_from_makeathon/dvc.lock
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
schema: '2.0'
stages:
train:
cmd: python train.py --num_classes 4 --pretrained_on_ImageNet --fold 1 --dataset
./data/train/images --gt ./data/train/gt.csv --outdir models --epochs 30 --seed
1 --lr 0.001 --dropout_rate 0.3 --drop_connect_rate 0.2 --batch_norm_momentum
0.99 --batch_norm_epsilon 0.001 --metrics_file_path models/metrics-training.json
cmd: python train.py --model efficientnet-b2 --image_size 224 --num_classes 4
--pretrained_on_ImageNet --fold 1 --dataset ./data/train/images --gt ./data/train/gt.csv
--outdir models --epochs 30 --seed 1 --lr 0.001 --dropout_rate 0.3 --drop_connect_rate
0.2 --batch_norm_momentum 0.99 --batch_norm_epsilon 0.001 --metrics_file_path
models/metrics-training.json
deps:
- path: data/train
hash: md5
Expand All @@ -13,8 +14,8 @@ stages:
nfiles: 362
- path: train.py
hash: md5
md5: d0b7beb3fc65200f6a9c3a51e9307a24
size: 9750
md5: cf9e5a2efba47f0da725d7f991d8e424
size: 10428
params:
params.yaml:
base:
Expand All @@ -35,6 +36,8 @@ stages:
drop_connect_rate: 0.2
batch_norm_momentum: 0.99
batch_norm_epsilon: 0.001
model: efficientnet-b2
image_size: 224
outs:
- path: models/metrics-training.json
hash: md5
Expand All @@ -45,8 +48,9 @@ stages:
md5: 43b5c6b74a5b036831c4195bf8679e90
size: 31270857
evaluate:
cmd: python predict.py --num_classes 4 --dataset ./data/test/images --gt ./data/test/gt.csv
--single_model_path models/model_best.pt --metrics_file_path models/metrics-evaluate.json
cmd: python predict.py --model efficientnet-b2 --image_size 224 --num_classes
4 --dataset ./data/test/images --gt ./data/test/gt.csv --single_model_path models/model_best.pt
--metrics_file_path models/metrics-evaluate.json
deps:
- path: data/test
hash: md5
Expand All @@ -59,8 +63,8 @@ stages:
size: 31270857
- path: predict.py
hash: md5
md5: 8423985a8b1f20117aecc13fd9e81fa3
size: 5868
md5: 43fb163a6a3546d1bd6b47cf92fc6ff5
size: 6001
params:
params.yaml:
base:
Expand All @@ -86,6 +90,8 @@ stages:
drop_connect_rate: 0.2
batch_norm_momentum: 0.99
batch_norm_epsilon: 0.001
model: efficientnet-b2
image_size: 224
outs:
- path: models/metrics-evaluate.json
hash: md5
Expand Down
4 changes: 4 additions & 0 deletions classification_model_training/files_from_makeathon/dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ stages:
train:
cmd: >-
python train.py
--model ${train.model}
--image_size ${train.image_size}
--num_classes ${train.num_classes}
--pretrained_on_ImageNet
--fold ${train.fold}
Expand Down Expand Up @@ -39,6 +41,8 @@ stages:
evaluate:
cmd: >-
python predict.py
--model ${train.model}
--image_size ${train.image_size}
--num_classes ${train.num_classes}
--dataset ${evaluate.dataset}
--gt ${evaluate.gt}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ train:
batch_norm_epsilon: 1e-3
# pretrained_on_ImageNet:
# pretrained_own:
model: efficientnet-b2
image_size: 224

evaluate:
# single_model_path: models/model_best.pt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ stages:
deps:
- path: ../../export.py
hash: md5
md5: f955cd2a80cff3babb9183dbee02d68d
size: 1428
md5: 7ac103807a6ff8f1927e64691cf913ff
size: 1467
- path: ../../models/model_best.pt
hash: md5
md5: 43b5c6b74a5b036831c4195bf8679e90
Expand Down
205 changes: 78 additions & 127 deletions classification_model_training/files_from_makeathon/poetry.lock

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions classification_model_training/files_from_makeathon/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,18 @@ def predict(model, device, test_loader, ensemble):
default=None,
type=str,
)
parser.add_argument(
"--model", default="efficientnet-b2", type=str, help="model name"
)
parser.add_argument("--image_size", default=224, type=int, help="image size")

opt = parser.parse_args()

test_images, test_targets = load_data(opt.dataset, opt.gt)

prediction_aug = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.Resize((opt.image_size, opt.image_size)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
Expand All @@ -162,9 +166,7 @@ def predict(model, device, test_loader, ensemble):

if opt.single_model_path is not None:

model = EfficientNet.from_name(
"efficientnet-b2", in_channels=3, num_classes=1
)
model = EfficientNet.from_name(opt.model, in_channels=3, num_classes=1)
model.load_state_dict(torch.load(opt.single_model_path))
model.to(opt.device)

Expand All @@ -179,9 +181,7 @@ def predict(model, device, test_loader, ensemble):

if opt.single_model_path is not None:

model = EfficientNet.from_name(
"efficientnet-b2", in_channels=3, num_classes=4
)
model = EfficientNet.from_name(opt.model, in_channels=3, num_classes=4)
model.load_state_dict(torch.load(opt.single_model_path))
model.to(opt.device)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ wandb = "<0.15.8"
captum = "^0.6.0"
tensorflow = ">=2.13,<2.14"
protobuf = ">=3.20.3,<4.21.0"
ray = {extras = ["tune"], version = "^2.7.0"}
ray = {extras = ["data", "tune"], version = "^2.7.0"}
torchmetrics = "^1.2.0"
seaborn = "^0.13.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.1"
Expand Down
14 changes: 10 additions & 4 deletions classification_model_training/files_from_makeathon/run_ray_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def setup(self, config):

train_bs = config.get("train_bs", 16)
val_bs = config.get("val_bs", 16)
image_size = config.get("image_size", 224)

gt = config.get("args").gt
training_data_path = config.get("args").dataset
Expand All @@ -54,11 +55,11 @@ def setup(self, config):
self.num_classes = config.get("args").num_classes

self.train_loader, self.test_loader = get_data_loaders(
gt, fold, training_data_path, train_bs, val_bs
gt, fold, training_data_path, train_bs, val_bs, image_size=image_size
)

self.model = EfficientNet.from_pretrained(
"efficientnet-b2",
config.get("model", "efficientnet-b2"),
in_channels=3,
num_classes=self.num_classes,
dropout_rate=config.get("dropout_rate", 0.3),
Expand Down Expand Up @@ -178,19 +179,24 @@ def load_checkpoint(self, checkpoint_dir):
metric="val_f1",
mode="max",
scheduler=sched,
num_samples=1 if args.smoke_test else 10,
num_samples=1 if args.smoke_test else 1,
),
param_space={
"args": args,
# "lr": tune.loguniform(1e-4, 1e-3),
"seed": tune.randint(0, 42),
"seed": 1,
# "seed": tune.randint(0, 42),
# "lr": tune.quniform(1e-4, 1e-3, 1e-4),
# "weight_decay": tune.uniform(0.0, 1e-4),
# "dropout_rate": tune.quniform(0.10, 0.4, 0.05),
# "drop_connect_rate": tune.quniform(0.10, 0.4, 0.05),
# "batch_norm_momentum": tune.choice([0.9, 0.997, 0.99]),
# "batch_norm_epsilon": tune.choice([1e-3, 1e-5, 1e-6])
# "momentum": tune.uniform(0.1, 0.9),
# "model": tune.choice(
# ["efficientnet-b0", "efficientnet-b1", "efficientnet-b2"]
# ),
# "image_size": tune.choice([224, 240, 260]),
},
)
results = tuner.fit()
Expand Down
73 changes: 50 additions & 23 deletions classification_model_training/files_from_makeathon/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
import torch
from torch import nn


import argparse
import pandas as pd
import numpy as np
import json
from datetime import datetime
import random
import shutil
from torchvision import transforms

from efficientnet_pytorch import EfficientNet
Expand All @@ -17,7 +19,12 @@


def get_data_loaders(
gt: str, fold: int, training_data_path: str, train_bs: int, val_bs: int
gt: str,
fold: int,
training_data_path: str,
train_bs: int,
val_bs: int,
image_size: int = 224,
):
df = pd.read_csv(gt)

Expand All @@ -27,7 +34,7 @@ def get_data_loaders(
# Set up the train_loader and val_loader
train_aug = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.Resize((image_size, image_size)),
transforms.RandomHorizontalFlip(0.5),
transforms.RandomRotation((0, 360)),
transforms.ToTensor(),
Expand All @@ -37,7 +44,7 @@ def get_data_loaders(

val_aug = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.Resize((image_size, image_size)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]
Expand Down Expand Up @@ -70,6 +77,8 @@ def get_data_loaders(


def train(
model_name,
image_size,
fold,
training_data_path,
gt,
Expand All @@ -92,13 +101,13 @@ def train(
):

train_loader, val_loader = get_data_loaders(
gt, fold, training_data_path, train_bs, val_bs
gt, fold, training_data_path, train_bs, val_bs, image_size
)

if pretrained_on_ImageNet:
print("Using on ImageNet pretrained model")
model = EfficientNet.from_pretrained(
"efficientnet-b2",
model_name,
in_channels=3,
num_classes=num_classes,
dropout_rate=dropout_rate,
Expand All @@ -111,7 +120,7 @@ def train(
else:
print("Using NOT pretrained model")
model = EfficientNet.from_name(
"efficientnet-b2",
model_name,
in_channels=3,
num_classes=num_classes,
dropout_rate=dropout_rate,
Expand Down Expand Up @@ -160,31 +169,43 @@ def train(
)

(
test_running_loss,
test_num_correct,
test_num_total,
test_running_steps,
test_f1,
val_running_loss,
val_num_correct,
val_num_total,
val_running_steps,
val_f1,
) = val_loop(model, num_classes, device, val_loader, loss_function)

print(f"Epoch = {epoch+1}, train_f1 = {train_f1}, val_f1 = {test_f1}")
train_loss = train_running_loss / train_running_steps
# train_acc = train_num_correct / train_num_total
val_loss = val_running_loss / val_running_steps
val_acc = val_num_correct / val_num_total

print(
f"Epoch = {epoch+1}, train_loss = {train_loss}, train_f1 = {train_f1}, val_loss = {val_loss}, val_f1 = {val_f1}"
)

if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
scheduler.step(val_f1)
else:
scheduler.step()

scheduler.step(test_f1)
is_best = val_f1 > best_score
best_score = max(val_f1, best_score)

if test_f1 > best_score and epoch >= 3:
best_score = test_f1
torch.save(
model.state_dict(),
os.path.join(outdir, f"model_fold_{fold}_{epoch}.pt"),
torch.save(model.state_dict(), os.path.join(outdir, "model.pt"))
if is_best:
shutil.copyfile(
os.path.join(outdir, "model.pt"), os.path.join(outdir, "model_best.pt")
)
torch.save(model.state_dict(), os.path.join(outdir, "model_best.pt"))

if opt.metrics_file_path is not None:
json.dump(
obj={
"f1_score": test_f1,
"accuracy": test_num_correct / test_num_total,
"train_loss": train_running_loss / train_running_steps,
"val_loss": test_running_loss / test_running_steps,
"f1_score": val_f1,
"accuracy": val_acc,
"train_loss": train_loss,
"val_loss": val_loss,
"epoch": epoch + 1,
},
fp=open(opt.metrics_file_path, "w"),
Expand Down Expand Up @@ -273,6 +294,10 @@ def enable_determinism():
default=None,
type=str,
)
parser.add_argument(
"--model", default="efficientnet-b2", type=str, help="model name"
)
parser.add_argument("--image_size", default=224, type=int, help="image size")

opt = parser.parse_args()

Expand Down Expand Up @@ -307,6 +332,8 @@ def enable_determinism():
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

train(
opt.model,
opt.image_size,
opt.fold,
opt.dataset,
opt.gt,
Expand Down

0 comments on commit 693f96f

Please sign in to comment.