8000 if one anchor is asigned to multi label, then should choose the max iou truth label by ZouJiu1 · Pull Request #8820 · AlexeyAB/darknet · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

if one anchor is asigned to multi label, then should choose the max iou truth label #8820

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
13 changes: 12 additions & 1 deletion include/darknet.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ typedef struct tree {
int *group_offset;
} tree;


// activations.h
typedef enum {
LOGISTIC, RELU, RELU6, RELIE, LINEAR, RAMP, TANH, PLSE, REVLEAKY, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, GELU, SWISH, MISH, HARD_MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL
Expand Down Expand Up @@ -912,6 +911,18 @@ typedef struct detection{
int track_id;
} detection;

// multi_label_one_anchor_choose_maxiou
typedef struct mloam {
box truth;
float best_iou;
int x;
int y;
int track_id;
int best_n;
int mask_n;
int t;
} mloam;

// network.c -batch inference
typedef struct det_num_pair {
int num;
Expand Down
183 changes: 104 additions & 79 deletions src/yolo_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "box.h"
#include "dark_cuda.h"
#include "utils.h"
#include "darknet.h"

#include <math.h>
#include <stdio.h>
Expand Down Expand Up @@ -382,6 +383,7 @@ typedef struct train_yolo_args {
float tot_iou_loss;
int count;
int class_count;
mloam* mloam_ptr;
} train_yolo_args;

void *process_batch(void* ptr)
Expand Down Expand Up @@ -502,6 +504,8 @@ void *process_batch(void* ptr)
}
}
}
mloam* mloam_ptr = args->mloam_ptr; // multi_label_one_anchor_choose_maxiou
int number = 0;
for (t = 0; t < l.max_boxes; ++t) {
box truth = float_to_box_stride(state.truth + t * l.truth_size + b * l.truths, 1);
if (!truth.x) break; // continue;
Expand Down Expand Up @@ -534,54 +538,29 @@ void *process_batch(void* ptr)

int mask_n = int_index(l.mask, best_n, l.n);
if (mask_n >= 0) {
int class_id = state.truth[t * l.truth_size + b * l.truths + 4];
if (l.map) class_id = l.map[class_id];

int box_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w * truth.h), l.w * l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox, l.new_coords);
(*state.net.total_bbox)++;

const int truth_in_index = t * l.truth_size + b * l.truths + 5;
const int track_id = state.truth[truth_in_index];
const int truth_out_index = b * l.n * l.w * l.h + mask_n * l.w * l.h + j * l.w + i;
l.labels[truth_out_index] = track_id;
l.class_ids[truth_out_index] = class_id;
//printf(" track_id = %d, t = %d, b = %d, truth_in_index = %d, truth_out_index = %d \n", track_id, t, b, truth_in_index, truth_out_index);

// range is 0 <= 1
args->tot_iou += all_ious.iou;
args->tot_iou_loss += 1 - all_ious.iou;
// range is -1 <= giou <= 1
tot_giou += all_ious.giou;
args->tot_giou_loss += 1 - all_ious.giou;

tot_diou += all_ious.diou;
tot_diou_loss += 1 - all_ious.diou;

tot_ciou += all_ious.ciou;
tot_ciou_loss += 1 - all_ious.ciou;

int obj_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4);
avg_obj += l.output[obj_index];
if (l.objectness_smooth) {
float delta_obj = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]);
if (l.delta[obj_index] == 0) l.delta[obj_index] = delta_obj;
if(number==0) {
mloam choose = {truth, best_iou, i, j, 1, best_n, mask_n, t};
mloam_ptr[number++] = choose;
}
else {
int markable = 666;
for(int mi = 0; mi < number; mi++) {
mloam mp = mloam_ptr[mi];
if(mp.best_n==best_n && mp.mask_n == mask_n && mp.x==i && mp.y==j) {
markable = -666;
if(mp.best_iou < best_iou) {
mloam choose = {truth, best_iou, i, j, 1, best_n, mask_n, t};
mloam_ptr[mi] = choose;
break;
}
}
}
if(markable > 0) {
mloam choose = {truth, best_iou, i, j, 1, best_n, mask_n, t};
mloam_ptr[number++] = choose;
}
}
else l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]);

int class_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4 + 1);
delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w * l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer);

//printf(" label: class_id = %d, truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", class_id, truth.x, truth.y, truth.w, truth.h);
//printf(" mask_n = %d, l.output[obj_index] = %f, l.output[class_index + class_id] = %f \n\n", mask_n, l.output[obj_index], l.output[class_index + class_id]);

++(args->count);
++(args->class_count);
if (all_ious.iou > .5) recall += 1;
if (all_ious.iou > .75) recall75 += 1;
}

// iou_thresh
for (n = 0; n < l.total; ++n) {
int mask_n = int_index(l.mask, n, l.n);
Expand All @@ -593,45 +572,89 @@ void *process_batch(void* ptr)
// iou, n

if (iou > l.iou_thresh) {
int class_id = state.truth[t * l.truth_size + b * l.truths + 4];
if (l.map) class_id = l.map[class_id];

int box_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w * truth.h), l.w * l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox, l.new_coords);
(*state.net.total_bbox)++;
if(number==0) {
mloam choose = {truth, iou, i, j, -1, n, mask_n, t};
mloam_ptr[number++] = choose;
}
else {
int markable = 666;
for(int mi = 0; mi < number; mi++) {
mloam mp = mloam_ptr[mi];
if(mp.best_n==n && mp.mask_n == mask_n && mp.x==i && mp.y==j) {
markable = -666;
if(mp.best_iou < iou) {
mloam choose = {truth, iou, i, j, -1, n, mask_n, t};
mloam_ptr[mi] = choose;
break;
}
}
}
if(markable > 0) {
mloam choose = {truth, iou, i, j, -1, n, mask_n, t};
mloam_ptr[number++] = choose;
}
}
}
}
}
}
for(int ni = 0; ni < number; ni++) {
mloam mp = mloam_ptr[ni];
box truth = mp.truth;
int i = mp.x;
int j = mp.y;
int track_id = mp.track_id;
int best_n = mp.best_n;
int mask_n = mp.mask_n;
int t = mp.t;

// range is 0 <= 1
args->tot_iou += all_ious.iou;
args->tot_iou_loss += 1 - all_ious.iou;
// range is -1 <= giou <= 1
tot_giou += all_ious.giou;
args->tot_giou_loss += 1 - all_ious.giou;
int class_id = state.truth[t * l.truth_size + b * l.truths + 4];
if (l.map) class_id = l.map[class_id];

tot_diou += all_ious.diou;
tot_diou_loss += 1 - all_ious.diou;
int box_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 0);
const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f;
ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w * truth.h), l.w * l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox, l.new_coords);
(*state.net.total_bbox)++;

tot_ciou += all_ious.ciou;
tot_ciou_loss += 1 - all_ious.ciou;
if(track_id > 0) {
const int truth_in_index = t * l.truth_size + b * l.truths + 5;
const int track_id = state.truth[truth_in_index];
const int truth_out_index = b * l.n * l.w * l.h + mask_n * l.w * l.h + j * l.w + i;
l.labels[truth_out_index] = track_id;
l.class_ids[truth_out_index] = class_id;
//printf(" track_id = %d, t = %d, b = %d, truth_in_index = %d, truth_out_index = %d \n", track_id, t, b, truth_in_index, truth_out_index);
}
// range is 0 <= 1
args->tot_iou += all_ious.iou;
args->tot_iou_loss += 1 - all_ious.iou;
// range is -1 <= giou <= 1
tot_giou += all_ious.giou;
args->tot_giou_loss += 1 - all_ious.giou;

tot_diou += all_ious.diou;
tot_diou_loss += 1 - all_ious.diou;

tot_ciou += all_ious.ciou;
tot_ciou_loss += 1 - all_ious.ciou;

int obj_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4);
avg_obj += l.output[obj_index];
if (l.objectness_smooth) {
float delta_obj = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]);
if (l.delta[obj_index] == 0) l.delta[obj_index] = delta_obj;
}
else l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]);

int obj_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4);
avg_obj += l.output[obj_index];
if (l.objectness_smooth) {
float delta_obj = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]);
if (l.delta[obj_index] == 0) l.delta[obj_index] = delta_obj;
}
else l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]);
int class_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4 + 1);
delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w * l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer);

int class_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4 + 1);
delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w * l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer);
//printf(" label: class_id = %d, truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", class_id, truth.x, truth.y, truth.w, truth.h);
//printf(" mask_n = %d, l.output[obj_index] = %f, l.output[class_index + class_id] = %f \n\n", mask_n, l.output[obj_index], l.output[class_index + class_id]);

++(args->count);
++(args->class_count);
if (all_ious.iou > .5) recall += 1;
if (all_ious.iou > .75) recall75 += 1;
}
}
}
++(args->count);
++(args->class_count);
if (all_ious.iou > .5) recall += 1;
if (all_ious.iou > .75) recall75 += 1;
}

if (l.iou_thresh < 1.0f) {
Expand Down Expand Up @@ -723,6 +746,7 @@ void forward_yolo_layer(const layer l, network_state state)
yolo_args[b].tot_giou_loss = 0;
yolo_args[b].count = 0;
yolo_args[b].class_count = 0;
yolo_args[b].mloam_ptr = (mloam *)xcalloc(l.max_boxes * 3 + 60, sizeof(mloam)); // multi_label_one_anchor_choose_maxiou

if (pthread_create(&threads[b], 0, process_batch, &(yolo_args[b]))) error("Thread creation failed", DARKNET_LOC);
}
Expand All @@ -736,6 +760,7 @@ void forward_yolo_layer(const layer l, network_state state)
tot_giou_loss += yolo_args[b].tot_giou_loss;
count += yolo_args[b].count;
class_count += yolo_args[b].class_count;
free(yolo_args[b].mloam_ptr);
}

free(yolo_args);
Expand Down
0