test-mul-mat0.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
  2. #include "ggml/ggml.h"
  3. #include <math.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <assert.h>
  7. #include <inttypes.h>
  8. #if defined(_MSC_VER)
  9. #pragma warning(disable: 4244 4267) // possible loss of data
  10. #endif
  11. #define MAX_NARGS 2
  12. float frand(void) {
  13. return (float)rand()/(float)RAND_MAX;
  14. }
  15. int irand(int n) {
  16. return rand()%n;
  17. }
  18. void get_random_dims(int64_t * dims, int ndims) {
  19. dims[0] = dims[1] = dims[2] = dims[3] = 1;
  20. for (int i = 0; i < ndims; i++) {
  21. dims[i] = 1 + irand(4);
  22. }
  23. }
  24. struct ggml_tensor * get_random_tensor(
  25. struct ggml_context * ctx0,
  26. int ndims,
  27. int64_t ne[],
  28. float fmin,
  29. float fmax) {
  30. struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
  31. switch (ndims) {
  32. case 1:
  33. for (int i0 = 0; i0 < ne[0]; i0++) {
  34. ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
  35. }
  36. break;
  37. case 2:
  38. for (int i1 = 0; i1 < ne[1]; i1++) {
  39. for (int i0 = 0; i0 < ne[0]; i0++) {
  40. ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  41. }
  42. }
  43. break;
  44. case 3:
  45. for (int i2 = 0; i2 < ne[2]; i2++) {
  46. for (int i1 = 0; i1 < ne[1]; i1++) {
  47. for (int i0 = 0; i0 < ne[0]; i0++) {
  48. ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  49. }
  50. }
  51. }
  52. break;
  53. case 4:
  54. for (int i3 = 0; i3 < ne[3]; i3++) {
  55. for (int i2 = 0; i2 < ne[2]; i2++) {
  56. for (int i1 = 0; i1 < ne[1]; i1++) {
  57. for (int i0 = 0; i0 < ne[0]; i0++) {
  58. ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
  59. }
  60. }
  61. }
  62. }
  63. break;
  64. default:
  65. assert(false);
  66. };
  67. return result;
  68. }
  69. float get_element(const struct ggml_tensor * t, int idx) {
  70. return ((float *)t->data)[idx];
  71. }
  72. void set_element(struct ggml_tensor * t, int idx, float value) {
  73. ((float *)t->data)[idx] = value;
  74. }
  75. bool check_gradient(
  76. const char * op_name,
  77. struct ggml_context * ctx0,
  78. struct ggml_tensor * x[],
  79. struct ggml_tensor * f,
  80. int ndims,
  81. int nargs,
  82. float eps,
  83. float max_error_abs,
  84. float max_error_rel) {
  85. const int n_threads = 1;
  86. struct ggml_cgraph gf = ggml_build_forward (f);
  87. struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
  88. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  89. ggml_graph_reset (&gf);
  90. ggml_set_f32 (f->grad, 1.0f);
  91. ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
  92. ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
  93. ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
  94. for (int i = 0; i < nargs; ++i) {
  95. const int64_t nelements = ggml_nelements(x[i]);
  96. for (int64_t k = 0; k < nelements; ++k) {
  97. // compute gradient using finite differences
  98. const float x0 = get_element(x[i], k);
  99. set_element(x[i], k, x0 + eps);
  100. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  101. const float f0 = ggml_get_f32_1d(f, 0);
  102. set_element(x[i], k, x0 - eps);
  103. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  104. const float f1 = ggml_get_f32_1d(f, 0);
  105. const float g0 = (f0 - f1)/(2.0f*eps);
  106. set_element(x[i], k, x0);
  107. // compute gradient using backward graph
  108. ggml_graph_reset (&gf);
  109. ggml_set_f32 (f->grad, 1.0f);
  110. ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
  111. const float g1 = get_element(x[i]->grad, k);
  112. const float error_abs = fabsf(g0 - g1);
  113. const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabs(g0) : 0;
  114. if (error_abs > max_error_abs || error_rel > max_error_rel) {
  115. printf("%s: ndims=%d, i=%d, k=%" PRId64 ", g0=%f, g1=%f, error_abs=%f, error_rel=%f\n", op_name, ndims, i, k, g0, g1, error_abs, error_rel);
  116. assert(false);
  117. }
  118. }
  119. }
  120. return true;
  121. }
  122. float mat_get(const struct ggml_tensor * t, int i0, int i1, int i2, int i3) {
  123. const size_t nb0 = t->nb[0];
  124. const size_t nb1 = t->nb[1];
  125. const size_t nb2 = t->nb[2];
  126. const size_t nb3 = t->nb[3];
  127. return
  128. *((float*) ((char*)t->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3));
  129. }
  130. bool check_mat_mul(
  131. const struct ggml_tensor * y,
  132. const struct ggml_tensor * x0,
  133. const struct ggml_tensor * x1) {
  134. const int64_t n00 = x0->ne[0];
  135. const int64_t n10 = x0->ne[1];
  136. const int64_t n20 = x0->ne[2];
  137. const int64_t n30 = x0->ne[3];
  138. const int64_t n01 = x1->ne[0];
  139. const int64_t n11 = x1->ne[1];
  140. const int64_t n21 = x1->ne[2];
  141. const int64_t n31 = x1->ne[3];
  142. const int64_t n02 = y->ne[0];
  143. const int64_t n12 = y->ne[1];
  144. const int64_t n22 = y->ne[2];
  145. const int64_t n32 = y->ne[3];
  146. printf("x0: [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "]\n", n00, n10, n20, n30);
  147. for (int j = 0; j < n10; ++j) {
  148. for (int i = 0; i < n00; ++i) {
  149. printf("%6.3f ", mat_get(x0, i, j, 0, 0));
  150. }
  151. printf("\n");
  152. }
  153. printf("\n");
  154. printf("x1: [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "]\n", n01, n11, n21, n31);
  155. for (int j = 0; j < n11; ++j) {
  156. for (int i = 0; i < n01; ++i) {
  157. printf("%6.3f ", mat_get(x1, i, j, 0, 0));
  158. }
  159. printf("\n");
  160. }
  161. printf("\n");
  162. printf("y: [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "]\n", n02, n12, n22, n32);
  163. for (int j = 0; j < n12; ++j) {
  164. for (int i = 0; i < n02; ++i) {
  165. printf("%6.3f ", mat_get(y, i, j, 0, 0));
  166. }
  167. printf("\n");
  168. }
  169. for (int i3 = 0; i3 < n32; ++i3) {
  170. for (int i2 = 0; i2 < n22; ++i2) {
  171. for (int i1 = 0; i1 < n12; ++i1) {
  172. for (int i0 = 0; i0 < n02; ++i0) {
  173. float sum = 0.0f;
  174. for (int k = 0; k < n00; ++k) {
  175. sum += mat_get(x0, k, i0, i2, i3) * mat_get(x1, k, i1, i2, i3);
  176. }
  177. if (fabsf(sum - mat_get(y, i0, i1, i2, i3)) > 1e-5) {
  178. printf("error: i0=%d, i1=%d, i2=%d, i3=%d, sum=%f, y=%f\n",
  179. i0, i1, i2, i3, sum, mat_get(y, i0, i1, i2, i3));
  180. assert(false);
  181. return false;
  182. }
  183. }
  184. }
  185. }
  186. }
  187. return true;
  188. }
  189. int main(int argc, const char ** argv) {
  190. struct ggml_init_params params = {
  191. .mem_size = 128*1024*1024,
  192. .mem_buffer = NULL,
  193. .no_alloc = false,
  194. };
  195. int64_t ne[4];
  196. // original loop: 500
  197. int niter = 500;
  198. const char *env = getenv("GGML_NLOOP");
  199. if (env != NULL) {
  200. niter = atoi(env);
  201. }
  202. if (argc > 1) {
  203. niter = atoi(argv[1]);
  204. }
  205. int n_threads = 1;
  206. for (int iter = 0; iter < niter; ++iter) {
  207. printf("test-mul-mat0: iter:%d/%d\n", iter, niter);
  208. struct ggml_context * ctx0 = ggml_init(params);
  209. get_random_dims(ne, 4);
  210. struct ggml_tensor * x[MAX_NARGS];
  211. // mul_mat
  212. {
  213. const int nargs = 1;
  214. for (int ndims = 2; ndims <= 4; ++ndims) {
  215. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  216. ne[1] = rand()%4 + 1;
  217. x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  218. ggml_set_param(ctx0, x[0]);
  219. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  220. struct ggml_tensor * f = ggml_sum(ctx0, m);
  221. printf("testing: mul_mat, [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "] = [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "] * [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "]\n",
  222. m->ne[0], m->ne[1], m->ne[2], m->ne[3],
  223. x[1]->ne[0], x[1]->ne[1], x[1]->ne[2], x[1]->ne[3],
  224. x[0]->ne[0], x[0]->ne[1], x[0]->ne[2], x[0]->ne[3]);
  225. assert(m->ne[0] == x[1]->ne[1]);
  226. assert(m->ne[1] == x[0]->ne[1]);
  227. assert(m->ne[2] == x[0]->ne[2]);
  228. assert(m->ne[3] == x[0]->ne[3]);
  229. if (ndims <= 2) {
  230. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  231. } else {
  232. struct ggml_cgraph gf = ggml_build_forward(m);
  233. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  234. }
  235. check_mat_mul(m, x[1], x[0]);
  236. }
  237. }
  238. // mul_mat (transposed)
  239. {
  240. const int nargs = 1;
  241. for (int ndims = 2; ndims <= 4; ++ndims) {
  242. x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f);
  243. ne[1] = ne[0];
  244. ne[0] = rand()%4 + 1;
  245. x[1] = ggml_cont(ctx0, ggml_transpose(ctx0, get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f)));
  246. ggml_set_param(ctx0, x[0]);
  247. struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
  248. struct ggml_tensor * f = ggml_sum(ctx0, m);
  249. printf("testing: mul_mat, [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "] = [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "] * [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "]\n",
  250. m->ne[0], m->ne[1], m->ne[2], m->ne[3],
  251. x[1]->ne[0], x[1]->ne[1], x[1]->ne[2], x[1]->ne[3],
  252. x[0]->ne[0], x[0]->ne[1], x[0]->ne[2], x[0]->ne[3]);
  253. assert(m->ne[0] == x[1]->ne[1]);
  254. assert(m->ne[1] == x[0]->ne[1]);
  255. assert(m->ne[2] == x[0]->ne[2]);
  256. assert(m->ne[3] == x[0]->ne[3]);
  257. if (ndims <= 2) {
  258. check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
  259. } else {
  260. struct ggml_cgraph gf = ggml_build_forward(m);
  261. ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
  262. }
  263. check_mat_mul(m, x[1], x[0]);
  264. }
  265. }
  266. ggml_free(ctx0);
  267. }
  268. return 0;
  269. }