|
39 | 39 | #include <string_view> |
40 | 40 | #include <thread> |
41 | 41 | #include <vector> |
| 42 | +#include <unordered_map> |
42 | 43 |
|
43 | 44 | static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { |
44 | 45 | size_t nels = ggml_nelements(tensor); |
@@ -269,6 +270,34 @@ static double nmse(const float * a, const float * b, size_t n) { |
269 | 270 | return mse_a_b / mse_a_0; |
270 | 271 | } |
271 | 272 |
|
| 273 | +// difference between 2 integer sets (Jaccard distance, 0 - no difference, 1 - no overlap) |
| 274 | +static double jdst(const int32_t * a, const int32_t * b, size_t n) { |
| 275 | + std::unordered_map<int32_t, size_t> set_a; |
| 276 | + std::unordered_map<int32_t, size_t> set_b; |
| 277 | + |
| 278 | + for (size_t i = 0; i < n; ++i) { |
| 279 | + set_a[a[i]]++; |
| 280 | + set_b[b[i]]++; |
| 281 | + } |
| 282 | + |
| 283 | + size_t diff = 0; |
| 284 | + |
| 285 | + for (const auto & p : set_a) { |
| 286 | + const int64_t na = p.second; |
| 287 | + const int64_t nb = set_b.find(p.first) != set_b.end() ? set_b.at(p.first) : 0; |
| 288 | + |
| 289 | + diff += std::abs(na - nb); |
| 290 | + } |
| 291 | + |
| 292 | + for (const auto & p : set_b) { |
| 293 | + if (set_a.find(p.first) == set_a.end()) { |
| 294 | + diff += p.second; |
| 295 | + } |
| 296 | + } |
| 297 | + |
| 298 | + return (double) diff / (2*n); |
| 299 | +} |
| 300 | + |
272 | 301 | // maximum absolute asymmetry between a and b |
273 | 302 | // asymmetry: (a - b) / (a + b) |
274 | 303 | // This is more stable than relative error if one of the values fluctuates towards zero. |
@@ -1051,6 +1080,14 @@ struct test_case { |
1051 | 1080 | return 1e-4; |
1052 | 1081 | } |
1053 | 1082 |
|
| 1083 | + virtual double max_err() { |
| 1084 | + return max_nmse_err(); |
| 1085 | + } |
| 1086 | + |
| 1087 | + virtual double err(const float * a, const float * b, size_t n) { |
| 1088 | + return nmse(a, b, n); |
| 1089 | + } |
| 1090 | + |
1054 | 1091 | virtual float grad_eps() { |
1055 | 1092 | return 1e-1f; |
1056 | 1093 | } |
@@ -1257,16 +1294,16 @@ struct test_case { |
1257 | 1294 | // compare |
1258 | 1295 | struct callback_userdata { |
1259 | 1296 | bool ok; |
1260 | | - double max_err; |
| 1297 | + test_case * tc; |
1261 | 1298 | ggml_backend_t backend1; |
1262 | 1299 | ggml_backend_t backend2; |
1263 | 1300 | }; |
1264 | 1301 |
|
1265 | 1302 | callback_userdata ud { |
1266 | 1303 | true, |
1267 | | - max_nmse_err(), |
| 1304 | + this, |
1268 | 1305 | backend1, |
1269 | | - backend2 |
| 1306 | + backend2, |
1270 | 1307 | }; |
1271 | 1308 |
|
1272 | 1309 | auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool { |
@@ -1314,9 +1351,9 @@ struct test_case { |
1314 | 1351 | } |
1315 | 1352 | } |
1316 | 1353 |
|
1317 | | - double err = nmse(f1.data(), f2.data(), f1.size()); |
1318 | | - if (err > ud->max_err) { |
1319 | | - printf("[%s] NMSE = %.9f > %.9f ", ggml_op_desc(t1), err, ud->max_err); |
| 1354 | + double err = ud->tc->err(f1.data(), f2.data(), f1.size()); |
| 1355 | + if (err > ud->tc->max_err()) { |
| 1356 | + printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err()); |
1320 | 1357 | //for (int i = 0; i < (int) f1.size(); i++) { |
1321 | 1358 | // printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]); |
1322 | 1359 | //} |
@@ -4958,6 +4995,28 @@ struct test_top_k : public test_case { |
4958 | 4995 | int k = 4) |
4959 | 4996 | : type(type), ne(ne), k(k) {} |
4960 | 4997 |
|
| 4998 | + double max_err() override { |
| 4999 | + return 0.0; |
| 5000 | + } |
| 5001 | + |
| 5002 | + double err(const float * a, const float * b, size_t n) override { |
| 5003 | + std::vector<int32_t> ia(n); |
| 5004 | + std::vector<int32_t> ib(n); |
| 5005 | + |
| 5006 | + double diff = 0.0f; |
| 5007 | + |
| 5008 | + for (size_t i = 0; i < n; i++) { |
| 5009 | + ia[i] = (int32_t) a[i]; |
| 5010 | + ib[i] = (int32_t) b[i]; |
| 5011 | + |
| 5012 | + // penalize the result if the data is not integer valued |
| 5013 | + diff += std::fabs(a[i] - ia[i]); |
| 5014 | + diff += std::fabs(b[i] - ib[i]); |
| 5015 | + } |
| 5016 | + |
| 5017 | + return diff + jdst(ia.data(), ib.data(), n); |
| 5018 | + } |
| 5019 | + |
4961 | 5020 | ggml_tensor * build_graph(ggml_context * ctx) override { |
4962 | 5021 | ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); |
4963 | 5022 | ggml_set_name(a, "a"); |
@@ -5018,7 +5077,7 @@ struct test_topk_moe : public test_case { |
5018 | 5077 |
|
5019 | 5078 | ggml_tensor * logits = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data()); |
5020 | 5079 | ggml_tensor * probs = delayed_softmax ? logits : ggml_soft_max(ctx, logits); |
5021 | | - ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_expert_used); // [n_expert_used, n_tokens] |
| 5080 | + ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, probs, n_expert_used); // [n_expert_used, n_tokens] |
5022 | 5081 |
|
5023 | 5082 | ggml_tensor * out = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens] |
5024 | 5083 |
|
|
0 commit comments