123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545 |
- #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
- #include "ggml.h"
- #include <cmath>
- #include <cstdio>
- #include <cstdlib>
- #include <cassert>
- #if defined(_MSC_VER)
- #pragma warning(disable: 4244 4267) // possible loss of data
- #endif
- #if defined(__GNUC__)
- #pragma GCC diagnostic ignored "-Wdouble-promotion"
- #endif
- #define MAX_NARGS 3
- #undef MIN
- #undef MAX
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
- #define GGML_SILU_FP16
- //
- // logging
- //
- #if (GGML_DEBUG >= 1)
- #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
- #else
- #define GGML_PRINT_DEBUG(...)
- #endif
- #if (GGML_DEBUG >= 5)
- #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
- #else
- #define GGML_PRINT_DEBUG_5(...)
- #endif
- #if (GGML_DEBUG >= 10)
- #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
- #else
- #define GGML_PRINT_DEBUG_10(...)
- #endif
- #define GGML_PRINT(...) printf(__VA_ARGS__)
- static float frand(void) {
- return (float)rand()/(float)RAND_MAX;
- }
- static int irand(int n) {
- if (n == 0) return 0;
- return rand()%n;
- }
- static void get_random_dims(int64_t * dims, int ndims) {
- dims[0] = dims[1] = dims[2] = dims[3] = 1;
- for (int i = 0; i < ndims; i++) {
- dims[i] = 1 + irand(4);
- }
- }
- static struct ggml_tensor * get_random_tensor_f32(
- struct ggml_context * ctx0,
- int ndims,
- int64_t ne[],
- float fmin,
- float fmax) {
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
- switch (ndims) {
- case 1:
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
- }
- break;
- case 2:
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
- }
- }
- break;
- case 3:
- for (int i2 = 0; i2 < ne[2]; i2++) {
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
- }
- }
- }
- break;
- case 4:
- for (int i3 = 0; i3 < ne[3]; i3++) {
- for (int i2 = 0; i2 < ne[2]; i2++) {
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
- }
- }
- }
- }
- break;
- default:
- assert(false);
- };
- return result;
- }
- static struct ggml_tensor * get_random_tensor_f16(
- struct ggml_context * ctx0,
- int ndims,
- int64_t ne[],
- float fmin,
- float fmax) {
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne);
- switch (ndims) {
- case 1:
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
- }
- break;
- case 2:
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
- }
- }
- break;
- case 3:
- for (int i2 = 0; i2 < ne[2]; i2++) {
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
- }
- }
- }
- break;
- case 4:
- for (int i3 = 0; i3 < ne[3]; i3++) {
- for (int i2 = 0; i2 < ne[2]; i2++) {
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
- }
- }
- }
- }
- break;
- default:
- assert(false);
- };
- return result;
- }
- static struct ggml_tensor * get_random_tensor_i32(
- struct ggml_context * ctx0,
- int ndims,
- int64_t ne[],
- int32_t imin,
- int32_t imax) {
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
- switch (ndims) {
- case 1:
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
- }
- break;
- case 2:
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
- }
- }
- break;
- case 3:
- for (int i2 = 0; i2 < ne[2]; i2++) {
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
- }
- }
- }
- break;
- case 4:
- for (int i3 = 0; i3 < ne[3]; i3++) {
- for (int i2 = 0; i2 < ne[2]; i2++) {
- for (int i1 = 0; i1 < ne[1]; i1++) {
- for (int i0 = 0; i0 < ne[0]; i0++) {
- ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
- }
- }
- }
- }
- break;
- default:
- assert(false);
- };
- return result;
- }
- static void print_elements(const char* label, const struct ggml_tensor * t) {
- if (!t) {
- printf("%s: %s = null\n", __func__, label);
- return;
- }
- const int nelements = ggml_nelements(t);
- printf("%s: %s = [", __func__, label);
- for (int k = 0; k < nelements; ++k) {
- if (k > 0) { printf(", "); }
- printf("%.5f", ggml_get_f32_1d(t, k));
- }
- printf("] shape: [");
- for (int k = 0; k < t->n_dims; ++k) {
- if (k > 0) { printf(", "); }
- printf("%d", (int)t->ne[k]);
- }
- printf("]\n");
- }
- static bool check_gradient(
- const char * op_name,
- struct ggml_context * ctx0,
- struct ggml_tensor * x[],
- struct ggml_tensor * f,
- int ndims,
- int nargs,
- float eps,
- float max_error_abs,
- float max_error_rel) {
- static int n_threads = -1;
- if (n_threads < 0) {
- n_threads = GGML_DEFAULT_N_THREADS;
- const char *env = getenv("GGML_N_THREADS");
- if (env) {
- n_threads = atoi(env);
- }
- printf("GGML_N_THREADS = %d\n", n_threads);
- }
- struct ggml_cgraph gf = ggml_build_forward (f);
- struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
- ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
- ggml_graph_reset (&gf);
- ggml_set_f32 (f->grad, 1.0f);
- ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
- // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
- // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
- for (int i = 0; i < nargs; ++i) {
- const int nelements = ggml_nelements(x[i]);
- for (int k = 0; k < nelements; ++k) {
- // compute gradient using finite differences
- const float x0 = ggml_get_f32_1d(x[i], k);
- const float xm = x0 - eps;
- const float xp = x0 + eps;
- ggml_set_f32_1d(x[i], k, xp);
- ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
- const double f0 = ggml_get_f32_1d(f, 0);
- ggml_set_f32_1d(x[i], k, xm);
- ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
- const double f1 = ggml_get_f32_1d(f, 0);
- const double g0 = (f0 - f1)/(2.0*(double) eps);
- ggml_set_f32_1d(x[i], k, x0);
- // compute gradient using backward graph
- ggml_graph_reset (&gf);
- ggml_set_f32 (f->grad, 1.0f);
- ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
- const double g1 = ggml_get_f32_1d(x[i]->grad, k);
- const double error_abs = fabs(g0 - g1);
- const double error_rel = g0 != 0 ? fabs(g0 - g1)/fabs(g0) : 0;
- if (error_abs > max_error_abs || error_rel > max_error_rel) {
- printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
- op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
- //assert(false);
- return false;
- }
- }
- }
- return true;
- }
- // TODO: clean-up this ..
- static bool check_mat_mul(
- const struct ggml_tensor * y,
- const struct ggml_tensor * x0,
- const struct ggml_tensor * x1) {
- float * dst = (float *) y->data;
- float * src0 = (float *) x0->data;
- float * src1 = (float *) x1->data;
- const int nc = x0->ne[1];
- const int nr = x1->ne[1];
- const int nk = x0->ne[0];
- GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
- GGML_PRINT_DEBUG("x0:\n");
- for (int j = 0; j < x0->ne[1]; ++j) {
- for (int i = 0; i < x0->ne[0]; ++i) {
- GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
- }
- GGML_PRINT_DEBUG("\n");
- }
- GGML_PRINT_DEBUG("\n");
- GGML_PRINT_DEBUG("x1:\n");
- for (int j = 0; j < x1->ne[1]; ++j) {
- for (int i = 0; i < x1->ne[0]; ++i) {
- GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
- }
- GGML_PRINT_DEBUG("\n");
- }
- GGML_PRINT_DEBUG("\n");
- GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
- for (int j = 0; j < y->ne[1]; ++j) {
- for (int i = 0; i < y->ne[0]; ++i) {
- GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
- }
- GGML_PRINT_DEBUG("\n");
- }
- for (int i = 0; i < nr; ++i) {
- for (int j = 0; j < nc; ++j) {
- float sum = 0.0f;
- for (int k = 0; k < nk; ++k) {
- sum += src0[j*nk + k]*src1[i*nk + k];
- }
- if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
- fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
- assert(false);
- return false;
- }
- }
- }
- return true;
- }
- #define NUM_PERMUTATIONS (4*3*2*1)
- int main(int argc, const char ** argv) {
- struct ggml_init_params params = {
- /* .mem_size = */ 128*1024*1024,
- /* .mem_buffer = */ NULL,
- /* .no_alloc = */ false,
- };
- int64_t ne[4];
- int all_permutations[4 * NUM_PERMUTATIONS];
- {
- int count = 0;
- for (int ax0=0; ax0<4; ++ax0) {
- for (int ax1=0; ax1<4; ++ax1) {
- if (ax1 == ax0) continue;
- for (int ax2=0; ax2<4; ++ax2) {
- if (ax2 == ax0) continue;
- if (ax2 == ax1) continue;
- for (int ax3=0; ax3<4; ++ax3) {
- if (ax3 == ax0) continue;
- if (ax3 == ax1) continue;
- if (ax3 == ax2) continue;
- assert(count < NUM_PERMUTATIONS);
- all_permutations[count*4+0] = ax0;
- all_permutations[count*4+1] = ax1;
- all_permutations[count*4+2] = ax2;
- all_permutations[count*4+3] = ax3;
- ++count;
- }
- }
- }
- }
- }
- // original loop: 1000
- int niter = 4;
- const char *env = getenv("GGML_NLOOP");
- if (env != NULL) {
- niter = atoi(env);
- }
- if (argc > 1) {
- niter = atoi(argv[1]);
- }
- for (int iter = 0; iter < niter; ++iter) {
- printf("test-grad0: iter:%d/%d\n", iter, niter);
- struct ggml_context * ctx0 = ggml_init(params);
- get_random_dims(ne, 4);
- struct ggml_tensor * x[MAX_NARGS];
- // add f32
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
- check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
- }
- }
- // add f16
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
- check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f);
- }
- }
- // sub
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
- check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // mul
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
- check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // div
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
- check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
- }
- }
- // sqr
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
- check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // sqrt
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
- check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f);
- }
- }
- // log
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
- check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
- }
- }
- // sum
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
- check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // sum_rows
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
- check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
- }
- }
- // mean, not yet fully implemented
- if(0)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
- check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // argmax
- if (0)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
- check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // repeat
- {
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- ne2[0] = ne[0] * ne2[0];
- ne2[1] = ne[1] * ne2[1];
- ne2[2] = 1;
- ne2[3] = 1;
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
- check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
- }
- }
- // repeat back
- {
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- ne2[0] = ne[0] * ne2[0];
- ne2[1] = ne[1] * ne2[1];
- ne2[2] = 1;
- ne2[3] = 1;
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
- check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
- }
- }
- // abs (finite differences do not work)
- //{
- // const int nargs = 1;
- // for (int ndims = 1; ndims <= 2; ++ndims) {
- // for (int i = 0; i < nargs; ++i) {
- // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- // ggml_set_param(ctx0, x[i]);
- // }
- // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
- // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
- // }
- //}
- // sgn
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
- check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // neg
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
- check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // step
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
- check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // tanh, not yet fully implemented
- if(0)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
- check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // mul_mat
- {
- const int nargs = 2;
- for (int ndims = 2; ndims <= 2; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- {
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- ne2[0] = ne[0];
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- }
- ggml_set_param(ctx0, x[0]);
- ggml_set_param(ctx0, x[1]);
- struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, m);
- GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
- check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- check_mat_mul(m, x[1], x[0]);
- }
- }
- // elu, not yet fully implemented
- if(0)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
- check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // relu
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
- check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // gelu, not yet fully implemented
- if(0)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
- check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
- }
- }
- // silu
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
- #ifdef GGML_SILU_FP16
- // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
- check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
- #else
- check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- #endif
- }
- }
- // rms_norm
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
- check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
- }
- }
- // scale
- {
- const int nargs = 2;
- int64_t ne2[4];
- ne2[0] = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- ggml_set_param(ctx0, x[1]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1]));
- check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // cpy f32
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
- check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // cpy f16
- {
- const int nargs = 2;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- for (int i = 0; i < nargs; ++i) {
- x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[i]);
- }
- // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
- check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
- }
- }
- // reshape (1d->nd)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- int64_t ne2[4];
- ne2[0] = 1;
- ne2[1] = 1;
- ne2[2] = 1;
- ne2[3] = 1;
- for (int i = 0; i < ndims; ++i) {
- ne2[0] *= ne[i];
- }
- x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
- x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
- check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // reshape (nd->1d)
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 2; ++ndims) {
- int64_t ne2[4];
- ne2[0] = 1;
- ne2[1] = 1;
- ne2[2] = 1;
- ne2[3] = 1;
- for (int i = 0; i < ndims; ++i) {
- ne2[0] *= ne[i];
- }
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
- check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // acc 1d
- {
- int64_t ne2[4] = { 1, 1, 1, 1 };
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- get_random_dims(ne2, 1);
- while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
- get_random_dims(ne2, 1);
- }
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[1]);
- const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
- const int offset = irand(max_offset) * ggml_element_size(x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
- check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // acc 2d
- {
- int64_t ne2[4] = { 1, 1, 1, 1 };
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
- int64_t offsets[4] = { 0, 0, 0, 0 };
- const int nargs = 2;
- for (int ndims = 2; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- get_random_dims(ne2, 2);
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
- get_random_dims(ne2, 2);
- }
- x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[1]);
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
- const int offset = offsets[0] + offsets[1];
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
- check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // acc 3d
- {
- int64_t ne2[4] = { 1, 1, 1, 1 };
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
- int64_t offsets[4] = { 0, 0, 0, 0 };
- const int nargs = 2;
- for (int ndims = 3; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- get_random_dims(ne2, 3);
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
- get_random_dims(ne2, 3);
- }
- x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[1]);
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
- max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
- offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
- const int offset = offsets[0] + offsets[1] + offsets[2];
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
- check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // acc 4d
- {
- int64_t ne2[4] = { 1, 1, 1, 1 };
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
- int64_t offsets[4] = { 0, 0, 0, 0 };
- const int nargs = 2;
- for (int ndims = 4; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- get_random_dims(ne2, 4);
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
- get_random_dims(ne2, 4);
- }
- x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[1]);
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
- max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
- max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
- offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
- offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
- const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
- check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // set_1d
- {
- int64_t ne2[4];
- const int nargs = 2;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- get_random_dims(ne2, 1);
- while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
- get_random_dims(ne2, 1);
- }
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[1]);
- const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
- const int offset = irand(max_offset) * ggml_element_size(x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
- check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // set_2d
- {
- int64_t ne2[4];
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
- int64_t offsets[4] = { 0, 0, 0, 0 };
- const int nargs = 1;
- for (int ndims = 2; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- get_random_dims(ne2, 2);
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
- get_random_dims(ne2, 2);
- }
- x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[1]);
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
- const int offset = offsets[0] + offsets[1];
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
- check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // view_1d
- {
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- const int k0 = irand(ggml_nelements(x[0]));
- const int k1 = irand(ggml_nelements(x[0]));
- const int i0 = MIN(k0, k1);
- const int i1 = MAX(k0, k1);
- const int offset = i0 * sizeof(float);
- const int nelem = i1 - i0;
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
- check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // view_2d
- {
- int64_t ne2[4];
- int64_t nb2[4];
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- get_random_dims(ne2, 2);
- while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
- get_random_dims(ne2, 2);
- }
- const int count = ne2[0]*ne2[1];
- nb2[0] = sizeof(float);
- nb2[1] = nb2[0]*ne2[0];
- ggml_set_param(ctx0, x[0]);
- const int max_offset = ggml_nelements(x[0]) - count;
- const int offset = irand(max_offset+1) * sizeof(float);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
- check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // view_3d
- {
- int64_t ne2[4] = {1,1,1,1};
- int64_t nb2[4] = {0,0,0,0};
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- get_random_dims(ne2, 3);
- while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
- get_random_dims(ne2, 3);
- }
- const int count = ne2[0]*ne2[1]*ne2[2];
- nb2[0] = sizeof(float);
- nb2[1] = nb2[0]*ne2[0];
- nb2[2] = nb2[1]*ne2[1];
- ggml_set_param(ctx0, x[0]);
- const int max_offset = ggml_nelements(x[0]) - count;
- const int offset = irand(max_offset+1) * sizeof(float);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
- check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // permute
- {
- int64_t ne2[4];
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims)
- {
- // ggml_permute will set axes of dimensions below n_dims to 1.
- // to make ggml_permute work correctly on all axes,
- // the input tensor needs maximal n_dim of 4.
- for (int i=0; i<ndims; ++i) {
- ne2[i] = ne[i];
- }
- for (int i=ndims; i<4; ++i) {
- ne2[i] = 1;
- }
- x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- const int p = irand(NUM_PERMUTATIONS);
- const int ax0 = all_permutations[p*4+0];
- const int ax1 = all_permutations[p*4+1];
- const int ax2 = all_permutations[p*4+2];
- const int ax3 = all_permutations[p*4+3];
- // sum requires contiguous tensor rows
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
- check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // transpose
- {
- int64_t ne2[4];
- const int nargs = 1;
- for (int ndims = 1; ndims <= 4; ++ndims)
- {
- // ggml_transpose will set axes of dimensions below n_dims to 1.
- // to make ggml_transpose work correctly on all axes,
- // the input tensor needs maximal n_dim of 4.
- for (int i=0; i<ndims; ++i) {
- ne2[i] = ne[i];
- }
- for (int i=ndims; i<4; ++i) {
- ne2[i] = 1;
- }
- x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- // sum requires contiguous tensor rows
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
- check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- }
- // get_rows
- {
- int64_t ne2[4] = {ne[0], ne[1], 1, 1};
- int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
- const int nargs = 1;
- const int ndims = 2;
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]);
- ggml_set_param(ctx0, x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
- check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- // diag_mask_inf
- {
- const int nargs = 1;
- const int ndims = 2;
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- int n_past = irand(ne[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
- check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- // diag_mask_zero
- {
- const int nargs = 1;
- const int ndims = 2;
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- int n_past = irand(ne[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
- check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- }
- // softmax
- {
- const int nargs = 1;
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- for (int ndims = 1; ndims <= 3; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- float eps = 1e-6f;
- // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
- // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
- struct ggml_tensor * f = ggml_sum(ctx0,
- ggml_log(ctx0,
- ggml_add1(ctx0,
- ggml_scale(ctx0,
- ggml_soft_max(ctx0, x[0]),
- ggml_new_f32(ctx0, 1.0f - eps)),
- ggml_new_f32(ctx0, eps))));
- check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY);
- }
- }
- // cross_entropy_loss
- {
- const int nargs = 1;
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- for (int ndims = 1; ndims <= 4; ++ndims) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -0.1f, 0.1f);
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
- // the second argument to cross_entropy_loss must sum up to 1 for each row
- int nr = ggml_nrows(x[1]);
- int nc = ggml_nelements(x[1]) / nr;
- for (int ir = 0; ir < nr; ++ir) {
- float sum = 0;
- for (int ic = 0; ic < nc; ++ic) {
- sum += ((float *) x[1]->data)[ic + ir*nc];
- }
- for (int ic = 0; ic < nc; ++ic) {
- ((float *) x[1]->data)[ic + ir*nc] /= sum;
- }
- }
- ggml_set_param(ctx0, x[0]);
- struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]);
- check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-4f, 1e-3f, INFINITY);
- }
- }
- // rope f32
- {
- const int nargs = 1;
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- ne2[0] += ne2[0] % 2;
- int n_rot = ne2[0];
- for (int ndims = 3; ndims <= 4; ++ndims) {
- for (int mode = 0; mode < 4; ++mode) {
- for (int n_past = 1; n_past < ne2[2]; ++n_past) {
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- const bool skip_past = (mode & 1);
- if (skip_past) {
- // we have no past, so this would have to work on uninitialized memory.
- // we only test the gradients here;
- // skip_past should have no influence on gradient computation.
- // so when other modes work, we assume that this does as well.
- continue;
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
- GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
- check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
- }
- }
- }
- }
- // rope f16
- {
- const int nargs = 1;
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- ne2[0] += ne2[0] % 2;
- int n_rot = ne2[0];
- for (int ndims = 3; ndims <= 4; ++ndims) {
- for (int mode = 0; mode < 4; ++mode) {
- for (int n_past = 1; n_past < ne2[2]; ++n_past) {
- x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
- ggml_set_param(ctx0, x[0]);
- const bool skip_past = (mode & 1);
- if (skip_past) {
- // we have no past, so this would have to work on uninitialized memory.
- // we only test the gradients here;
- // skip_past should have no influence on gradient computation.
- // so when other modes work, we assume that this does as well.
- continue;
- }
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0));
- GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
- check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
- }
- }
- }
- }
- // flash_attn f32
- {
- const int nargs = 3;
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- int64_t D = ne2[0];
- int64_t N = ne2[1];
- int64_t M = ne2[2] + N;
- int64_t B = ne2[3];
- for (int masked = 0; masked <= 1; ++masked) {
- for (int ndims = 2; ndims <= 4; ++ndims) {
- int64_t neq[4] = { D, N, B, ne[3] };
- int64_t nek[4] = { D, M, B, ne[3] };
- int64_t nev[4] = { M, D, B, ne[3] };
- if (ndims == 2) {
- neq[2] = 1; neq[3] = 1;
- nek[2] = 1; nek[3] = 1;
- nev[2] = 1; nev[3] = 1;
- } else if (ndims == 3) {
- neq[3] = 1;
- nek[3] = 1;
- nev[3] = 1;
- }
- x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
- x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
- x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
- ggml_set_param(ctx0, x[0]);
- ggml_set_param(ctx0, x[1]);
- ggml_set_param(ctx0, x[2]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
- check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY);
- }
- }
- }
- // flash_attn f16, not yet fully implemented
- if(0)
- {
- const int nargs = 3;
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- int64_t D = ne2[0];
- int64_t N = ne2[1];
- int64_t M = ne2[2] + N;
- int64_t B = ne2[3];
- for (int masked = 0; masked <= 1; ++masked) {
- for (int ndims = 2; ndims <= 4; ++ndims) {
- int64_t neq[4] = { D, N, B, ne[3] };
- int64_t nek[4] = { D, M, B, ne[3] };
- int64_t nev[4] = { M, D, B, ne[3] };
- if (ndims == 2) {
- neq[2] = 1; neq[3] = 1;
- nek[2] = 1; nek[3] = 1;
- nev[2] = 1; nev[3] = 1;
- } else if (ndims == 3) {
- neq[3] = 1;
- nek[3] = 1;
- nev[3] = 1;
- }
- x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f);
- x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f);
- x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f);
- ggml_set_param(ctx0, x[0]);
- ggml_set_param(ctx0, x[1]);
- ggml_set_param(ctx0, x[2]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
- check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY);
- }
- }
- }
- ggml_free(ctx0);
- }
- return 0;
- }
|