third_party_ggml.py 269 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781
  1. """This module is the core of the ggml-python library, it exposes a low-level [ctypes](https://docs.python.org/3/library/ctypes.html)-based interface for ggml.
  2. Structures and functions in the `ggml.ggml` module map directly to the original ggml C library and
  3. they operate at a fairly low level.
  4. No additional runtime checks checks are performed nor is memory management handled automatically.
  5. You've been warned :).
  6. With that in mind here are some useful things to keep in mind
  7. - Functions accept both ctypes types (c_int, c_bool, c_float, etc.) and Python types (int, bool, float, etc.) as parameters.
  8. - Functions return Python types for simple values (int, bool, float, etc.) and ctypes types for complex values ([ggml_context_p][ggml.ggml_context_p], [ggml_tensor_p][ggml.ggml_tensor_p], etc.).
  9. - Memory management is the responsibility of the user. The user must call [ggml.ggml_free][] on the context after calling [ggml.ggml_init][].
  10. Example
  11. ```python
  12. import ggml
  13. import ctypes
  14. # Allocate a new context with 16 MB of memory
  15. params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024, mem_buffer=None)
  16. ctx = ggml.ggml_init(params=params)
  17. # Instantiate tensors
  18. x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  19. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  20. b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  21. # Use ggml operations to build a computational graph
  22. x2 = ggml.ggml_mul(ctx, x, x)
  23. f = ggml.ggml_add(ctx, ggml.ggml_mul(ctx, a, x2), b)
  24. gf = ggml.ggml_new_graph(ctx)
  25. ggml.ggml_build_forward_expand(gf, f)
  26. # Set the input values
  27. ggml.ggml_set_f32(x, 2.0)
  28. ggml.ggml_set_f32(a, 3.0)
  29. ggml.ggml_set_f32(b, 4.0)
  30. # Compute the graph
  31. ggml.ggml_graph_compute_with_ctx(ctx, gf, 1)
  32. # Get the output value
  33. output = ggml.ggml_get_f32_1d(f, 0)
  34. assert output == 16.0
  35. # Free the context
  36. ggml.ggml_free(ctx)
  37. ```
  38. """
  39. import os
  40. import sys
  41. import ctypes
  42. import pathlib
  43. import importlib.resources
  44. from pathlib import Path
  45. from typing import List, Optional, Sequence, Union
  46. from typing_extensions import TypeAlias
  47. # Load the library
  48. def load_shared_library(base_path: Path, lib_base_name: str):
  49. # Construct the paths to the possible shared library names
  50. # Searching for the library in the current directory under the name "libggml" (default name
  51. # for ggml) and "ggml" (default name for this repo)
  52. lib_names: List[str] = [
  53. f"lib{lib_base_name}.so",
  54. f"lib{lib_base_name}.dylib",
  55. f"{lib_base_name}.dll",
  56. ]
  57. cdll_args = dict() # type: ignore
  58. # Add the library directory to the DLL search path on Windows (if needed)
  59. if sys.platform == "win32" and sys.version_info >= (3, 8):
  60. os.add_dll_directory(str(base_path))
  61. cdll_args["winmode"] = 0
  62. for lib_name in lib_names:
  63. # Try to load the shared library, handling potential errors
  64. path = base_path / lib_name
  65. if not path.exists():
  66. continue
  67. try:
  68. return ctypes.CDLL(str(path), **cdll_args)
  69. except Exception as e:
  70. raise RuntimeError(f"Failed to load shared library '{path}': {e}")
  71. raise FileNotFoundError(
  72. f"Shared library with base name '{lib_base_name}' not found in {base_path}"
  73. )
  74. base_path = pathlib.Path(__file__).parent.resolve() / "build/examples/unity"
  75. lib_base_name = "fairseq2_cpp"
  76. lib = load_shared_library(base_path, lib_base_name)
  77. #####################################################
  78. # GGML Utility Types
  79. #####################################################
  80. CFloatArray: TypeAlias = "ctypes.Array[ctypes.c_float]"
  81. CInt64Array: TypeAlias = "ctypes.Array[ctypes.c_int64]"
  82. CIntPointer: TypeAlias = "ctypes._Pointer[ctypes.c_int]" # type: ignore
  83. CCharPointer: TypeAlias = "ctypes._Pointer[ctypes.c_char]" # type: ignore
  84. #####################################################
  85. # GGML API
  86. # source: ggml.h
  87. #####################################################
  88. # define GGML_FILE_MAGIC 0x67676d6c // "ggml"
  89. # define GGML_FILE_VERSION 1
  90. GGML_FILE_MAGIC = 0x67676D6C
  91. GGML_FILE_VERSION = 1
  92. # define GGML_QNT_VERSION 2 // bump this on quantization format changes
  93. # define GGML_QNT_VERSION_FACTOR 1000 // do not change this
  94. GGML_QNT_VERSION = 2
  95. GGML_QNT_VERSION_FACTOR = 1000
  96. # define GGML_MAX_DIMS 4
  97. # define GGML_MAX_PARAMS 2048
  98. # define GGML_MAX_CONTEXTS 64
  99. # define GGML_MAX_SRC 10
  100. # define GGML_MAX_NAME 64
  101. # define GGML_MAX_OP_PARAMS 64
  102. # define GGML_DEFAULT_N_THREADS 4
  103. # define GGML_DEFAULT_GRAPH_SIZE 2048
  104. GGML_MAX_DIMS = 4
  105. GGML_MAX_PARAMS = 2048
  106. GGML_MAX_CONTEXTS = 64
  107. GGML_MAX_SRC = 10
  108. GGML_MAX_NAME = 64
  109. GGML_MAX_OP_PARAMS = 64
  110. GGML_DEFAULT_N_THREADS = 4
  111. GGML_DEFAULT_GRAPH_SIZE = 2048
  112. # #if UINTPTR_MAX == 0XFFFFFFFF
  113. # #define GGML_MEMALIGN 4
  114. # #else
  115. # # define GGML_MEMALIGN 16
  116. # #endif
  117. GGML_MEMALIGN = (
  118. 16 if ctypes.sizeof(ctypes.c_void_p) == 4 else 32
  119. ) # FIXME: Check if this is correct
  120. # #define GGML_EXIT_SUCCESS 0
  121. GGML_EXIT_SUCCESS = 0
  122. # #define GGML_EXIT_ABORTED 1
  123. GGML_EXIT_ABORTED = 1
  124. # define GGUF_MAGIC "GGUF"
  125. GGUF_MAGIC = "GGUF"
  126. # define GGUF_VERSION 3
  127. GGUF_VERSION = 3
  128. # #define GGUF_DEFAULT_ALIGNMENT 32
  129. GGUF_DEFAULT_ALIGNMENT = 32
  130. # TODO: Check if this is correct
  131. # typedef uint16_t ggml_fp16_t;
  132. ggml_fp16_t = ctypes.c_uint16
  133. CFP16Array: TypeAlias = "ctypes.Array[ggml_fp16_t]"
  134. # GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
  135. def ggml_fp16_to_fp32(x: ggml_fp16_t) -> float:
  136. return lib.ggml_fp16_to_fp32(x)
  137. lib.ggml_fp16_to_fp32.argtypes = [ggml_fp16_t]
  138. lib.ggml_fp16_to_fp32.restype = ctypes.c_float
  139. # GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
  140. def ggml_fp32_to_fp16(x: ctypes.c_float) -> int:
  141. return lib.ggml_fp32_to_fp16(x)
  142. lib.ggml_fp32_to_fp16.argtypes = [ctypes.c_float]
  143. lib.ggml_fp32_to_fp16.restype = ggml_fp16_t
  144. # GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
  145. def ggml_fp16_to_fp32_row(
  146. x: CFP16Array,
  147. y: CFloatArray,
  148. n: Union[ctypes.c_int, int],
  149. ) -> None:
  150. return lib.ggml_fp16_to_fp32_row(x, y, n)
  151. lib.ggml_fp16_to_fp32_row.argtypes = [
  152. ctypes.POINTER(ggml_fp16_t),
  153. ctypes.POINTER(ctypes.c_float),
  154. ctypes.c_int,
  155. ]
  156. lib.ggml_fp16_to_fp32_row.restype = None
  157. # GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
  158. def ggml_fp32_to_fp16_row(
  159. x: CFloatArray,
  160. y: CFP16Array,
  161. n: Union[ctypes.c_int, int],
  162. ) -> None:
  163. return lib.ggml_fp32_to_fp16_row(x, y, n)
  164. lib.ggml_fp32_to_fp16_row.argtypes = [
  165. ctypes.POINTER(ctypes.c_float),
  166. ctypes.POINTER(ggml_fp16_t),
  167. ctypes.c_int,
  168. ]
  169. lib.ggml_fp32_to_fp16_row.restype = None
  170. # struct ggml_context;
  171. ggml_context_p = ctypes.c_void_p
  172. """Opaque pointer to a ggml_context.
  173. ggml_context structs are not accessed directly instead they must be created using [ggml_init](ggml.ggml_init) and freed using [ggml_free](ggml.ggml_free)."""
  174. # enum ggml_type {
  175. # GGML_TYPE_F32 = 0,
  176. # GGML_TYPE_F16 = 1,
  177. # GGML_TYPE_Q4_0 = 2,
  178. # GGML_TYPE_Q4_1 = 3,
  179. # // GGML_TYPE_Q4_2 = 4, support has been removed
  180. # // GGML_TYPE_Q4_3 (5) support has been removed
  181. # GGML_TYPE_Q5_0 = 6,
  182. # GGML_TYPE_Q5_1 = 7,
  183. # GGML_TYPE_Q8_0 = 8,
  184. # GGML_TYPE_Q8_1 = 9,
  185. # GGML_TYPE_Q2_K = 10,
  186. # GGML_TYPE_Q3_K = 11,
  187. # GGML_TYPE_Q4_K = 12,
  188. # GGML_TYPE_Q5_K = 13,
  189. # GGML_TYPE_Q6_K = 14,
  190. # GGML_TYPE_Q8_K = 15,
  191. # GGML_TYPE_I8,
  192. # GGML_TYPE_I16,
  193. # GGML_TYPE_I32,
  194. # GGML_TYPE_COUNT,
  195. # };
  196. GGML_TYPE_F32 = 0
  197. GGML_TYPE_F16 = 1
  198. GGML_TYPE_Q4_0 = 2
  199. GGML_TYPE_Q4_1 = 3
  200. GGML_TYPE_Q5_0 = 6
  201. GGML_TYPE_Q5_1 = 7
  202. GGML_TYPE_Q8_0 = 8
  203. GGML_TYPE_Q8_1 = 9
  204. GGML_TYPE_Q2_K = 10
  205. GGML_TYPE_Q3_K = 11
  206. GGML_TYPE_Q4_K = 12
  207. GGML_TYPE_Q5_K = 13
  208. GGML_TYPE_Q6_K = 14
  209. GGML_TYPE_Q8_K = 15
  210. GGML_TYPE_I8 = 16
  211. GGML_TYPE_I16 = 17
  212. GGML_TYPE_I32 = 18
  213. GGML_TYPE_COUNT = 19
  214. # enum ggml_backend_type {
  215. # GGML_BACKEND_CPU = 0,
  216. # GGML_BACKEND_GPU = 10,
  217. # GGML_BACKEND_GPU_SPLIT = 20,
  218. # };
  219. GGML_BACKEND_CPU = 0
  220. GGML_BACKEND_GPU = 10
  221. GGML_BACKEND_GPU_SPLIT = 20
  222. # // model file types
  223. # enum ggml_ftype {
  224. # GGML_FTYPE_UNKNOWN = -1,
  225. # GGML_FTYPE_ALL_F32 = 0,
  226. # GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
  227. # GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
  228. # GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
  229. # GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
  230. # GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
  231. # GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
  232. # GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
  233. # GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
  234. # GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
  235. # GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
  236. # GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
  237. # GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
  238. # };
  239. GGML_FTYPE_UNKNOWN = -1
  240. GGML_FTYPE_ALL_F32 = 0
  241. GGML_FTYPE_MOSTLY_F16 = 1
  242. GGML_FTYPE_MOSTLY_Q4_0 = 2
  243. GGML_FTYPE_MOSTLY_Q4_1 = 3
  244. GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4
  245. GGML_FTYPE_MOSTLY_Q8_0 = 7
  246. GGML_FTYPE_MOSTLY_Q5_0 = 8
  247. GGML_FTYPE_MOSTLY_Q5_1 = 9
  248. GGML_FTYPE_MOSTLY_Q2_K = 10
  249. GGML_FTYPE_MOSTLY_Q3_K = 11
  250. GGML_FTYPE_MOSTLY_Q4_K = 12
  251. GGML_FTYPE_MOSTLY_Q5_K = 13
  252. GGML_FTYPE_MOSTLY_Q6_K = 14
  253. # // available tensor operations:
  254. # enum ggml_op {
  255. # GGML_OP_NONE = 0,
  256. # GGML_OP_DUP,
  257. # GGML_OP_ADD,
  258. # GGML_OP_ADD1,
  259. # GGML_OP_ACC,
  260. # GGML_OP_SUB,
  261. # GGML_OP_MUL,
  262. # GGML_OP_DIV,
  263. # GGML_OP_SQR,
  264. # GGML_OP_SQRT,
  265. # GGML_OP_LOG,
  266. # GGML_OP_SUM,
  267. # GGML_OP_SUM_ROWS,
  268. # GGML_OP_MEAN,
  269. # GGML_OP_ARGMAX,
  270. # GGML_OP_REPEAT,
  271. # GGML_OP_REPEAT_BACK,
  272. # GGML_OP_CONCAT,
  273. # GGML_OP_SILU_BACK,
  274. # GGML_OP_NORM, // normalize
  275. # GGML_OP_RMS_NORM,
  276. # GGML_OP_RMS_NORM_BACK,
  277. # GGML_OP_GROUP_NORM,
  278. # GGML_OP_MUL_MAT,
  279. # GGML_OP_MUL_MAT_ID,
  280. # GGML_OP_OUT_PROD,
  281. # GGML_OP_SCALE,
  282. # GGML_OP_SET,
  283. # GGML_OP_CPY,
  284. # GGML_OP_CONT,
  285. # GGML_OP_RESHAPE,
  286. # GGML_OP_VIEW,
  287. # GGML_OP_PERMUTE,
  288. # GGML_OP_TRANSPOSE,
  289. # GGML_OP_GET_ROWS,
  290. # GGML_OP_GET_ROWS_BACK,
  291. # GGML_OP_DIAG,
  292. # GGML_OP_DIAG_MASK_INF,
  293. # GGML_OP_DIAG_MASK_ZERO,
  294. # GGML_OP_SOFT_MAX,
  295. # GGML_OP_SOFT_MAX_BACK,
  296. # GGML_OP_ROPE,
  297. # GGML_OP_ROPE_BACK,
  298. # GGML_OP_ALIBI,
  299. # GGML_OP_CLAMP,
  300. # GGML_OP_CONV_TRANSPOSE_1D,
  301. # GGML_OP_IM2COL,
  302. # GGML_OP_CONV_TRANSPOSE_2D,
  303. # GGML_OP_POOL_1D,
  304. # GGML_OP_POOL_2D,
  305. # GGML_OP_UPSCALE, // nearest interpolate
  306. # GGML_OP_PAD,
  307. # GGML_OP_ARGSORT,
  308. # GGML_OP_LEAKY_RELU,
  309. # GGML_OP_FLASH_ATTN,
  310. # GGML_OP_FLASH_FF,
  311. # GGML_OP_FLASH_ATTN_BACK,
  312. # GGML_OP_WIN_PART,
  313. # GGML_OP_WIN_UNPART,
  314. # GGML_OP_GET_REL_POS,
  315. # GGML_OP_ADD_REL_POS,
  316. # GGML_OP_UNARY,
  317. # GGML_OP_MAP_UNARY,
  318. # GGML_OP_MAP_BINARY,
  319. # GGML_OP_MAP_CUSTOM1_F32,
  320. # GGML_OP_MAP_CUSTOM2_F32,
  321. # GGML_OP_MAP_CUSTOM3_F32,
  322. # GGML_OP_MAP_CUSTOM1,
  323. # GGML_OP_MAP_CUSTOM2,
  324. # GGML_OP_MAP_CUSTOM3,
  325. # GGML_OP_CROSS_ENTROPY_LOSS,
  326. # GGML_OP_CROSS_ENTROPY_LOSS_BACK,
  327. # GGML_OP_COUNT,
  328. # };
  329. GGML_OP_NONE = 0
  330. GGML_OP_DUP = 1
  331. GGML_OP_ADD = 2
  332. GGML_OP_ADD1 = 3
  333. GGML_OP_ACC = 4
  334. GGML_OP_SUB = 5
  335. GGML_OP_MUL = 6
  336. GGML_OP_DIV = 7
  337. GGML_OP_SQR = 8
  338. GGML_OP_SQRT = 9
  339. GGML_OP_LOG = 10
  340. GGML_OP_SUM = 11
  341. GGML_OP_SUM_ROWS = 12
  342. GGML_OP_MEAN = 13
  343. GGML_OP_ARGMAX = 14
  344. GGML_OP_REPEAT = 15
  345. GGML_OP_REPEAT_BACK = 16
  346. GGML_OP_CONCAT = 17
  347. GGML_OP_SILU_BACK = 18
  348. GGML_OP_NORM = 19
  349. GGML_OP_RMS_NORM = 20
  350. GGML_OP_RMS_NORM_BACK = 21
  351. GGML_OP_GROUP_NORM = 22
  352. GGML_OP_MUL_MAT = 23
  353. GGML_OP_MUL_MAT_ID = 24
  354. GGML_OP_OUT_PROD = 25
  355. GGML_OP_SCALE = 26
  356. GGML_OP_SET = 27
  357. GGML_OP_CPY = 28
  358. GGML_OP_CONT = 29
  359. GGML_OP_RESHAPE = 30
  360. GGML_OP_VIEW = 31
  361. GGML_OP_PERMUTE = 32
  362. GGML_OP_TRANSPOSE = 33
  363. GGML_OP_GET_ROWS = 34
  364. GGML_OP_GET_ROWS_BACK = 35
  365. GGML_OP_DIAG = 36
  366. GGML_OP_DIAG_MASK_INF = 37
  367. GGML_OP_DIAG_MASK_ZERO = 38
  368. GGML_OP_SOFT_MAX = 39
  369. GGML_OP_SOFT_MAX_BACK = 40
  370. GGML_OP_ROPE = 41
  371. GGML_OP_ROPE_BACK = 42
  372. GGML_OP_ALIBI = 43
  373. GGML_OP_CLAMP = 44
  374. GGML_OP_CONV_TRANSPOSE_1D = 45
  375. GGML_OP_IM2COL = 46
  376. GGML_OP_CONV_TRANSPOSE_2D = 47
  377. GGML_OP_POOL_1D = 48
  378. GGML_OP_POOL_2D = 49
  379. GGML_OP_UPSCALE = 50
  380. GGML_OP_PAD = 51
  381. GGML_OP_ARGSORT = 52
  382. GGML_OP_LEAKY_RELU = 53
  383. GGML_OP_FLASH_ATTN = 54
  384. GGML_OP_FLASH_FF = 55
  385. GGML_OP_FLASH_ATTN_BACK = 56
  386. GGML_OP_WIN_PART = 57
  387. GGML_OP_WIN_UNPART = 58
  388. GGML_OP_GET_REL_POS = 59
  389. GGML_OP_ADD_REL_POS = 60
  390. GGML_OP_UNARY = 61
  391. GGML_OP_MAP_UNARY = 62
  392. GGML_OP_MAP_BINARY = 63
  393. GGML_OP_MAP_CUSTOM1_F32 = 64
  394. GGML_OP_MAP_CUSTOM2_F32 = 65
  395. GGML_OP_MAP_CUSTOM3_F32 = 66
  396. GGML_OP_MAP_CUSTOM1 = 67
  397. GGML_OP_MAP_CUSTOM2 = 68
  398. GGML_OP_MAP_CUSTOM3 = 69
  399. GGML_OP_CROSS_ENTROPY_LOSS = 70
  400. GGML_OP_CROSS_ENTROPY_LOSS_BACK = 71
  401. GGML_OP_COUNT = 72
  402. # enum ggml_unary_op {
  403. # GGML_UNARY_OP_ABS,
  404. # GGML_UNARY_OP_SGN,
  405. # GGML_UNARY_OP_NEG,
  406. # GGML_UNARY_OP_STEP,
  407. # GGML_UNARY_OP_TANH,
  408. # GGML_UNARY_OP_ELU,
  409. # GGML_UNARY_OP_RELU,
  410. # GGML_UNARY_OP_GELU,
  411. # GGML_UNARY_OP_GELU_QUICK,
  412. # GGML_UNARY_OP_SILU,
  413. # GGML_UNARY_OP_LEAKY
  414. # GGML_UNARY_OP_COUNT,
  415. # };
  416. GGML_UNARY_OP_ABS = 0
  417. GGML_UNARY_OP_SGN = 1
  418. GGML_UNARY_OP_NEG = 2
  419. GGML_UNARY_OP_STEP = 3
  420. GGML_UNARY_OP_TANH = 4
  421. GGML_UNARY_OP_ELU = 5
  422. GGML_UNARY_OP_RELU = 6
  423. GGML_UNARY_OP_GELU = 7
  424. GGML_UNARY_OP_GELU_QUICK = 8
  425. GGML_UNARY_OP_SILU = 9
  426. GGML_UNARY_OP_LEAKY = 10
  427. GGML_UNARY_OP_COUNT = 11
  428. # enum ggml_object_type {
  429. # GGML_OBJECT_TENSOR,
  430. # GGML_OBJECT_GRAPH,
  431. # GGML_OBJECT_WORK_BUFFER
  432. # };
  433. GGML_OBJECT_TENSOR = 0
  434. GGML_OBJECT_GRAPH = 1
  435. GGML_OBJECT_WORK_BUFFER = 2
  436. # enum ggml_log_level {
  437. # GGML_LOG_LEVEL_ERROR = 2,
  438. # GGML_LOG_LEVEL_WARN = 3,
  439. # GGML_LOG_LEVEL_INFO = 4
  440. # };
  441. GGML_LOG_LEVEL_ERROR = 2
  442. GGML_LOG_LEVEL_WARN = 3
  443. GGML_LOG_LEVEL_INFO = 4
  444. # // ggml object
  445. # struct ggml_object {
  446. # size_t offs;
  447. # size_t size;
  448. # struct ggml_object * next;
  449. # enum ggml_object_type type;
  450. # char padding[4];
  451. # };
  452. class ggml_object(ctypes.Structure):
  453. pass
  454. ggml_object._fields_ = [
  455. ("offs", ctypes.c_size_t),
  456. ("size", ctypes.c_size_t),
  457. ("next", ctypes.POINTER(ggml_object)),
  458. ("type", ctypes.c_int),
  459. ("padding", ctypes.c_char * 4),
  460. ]
  461. ggml_object_p: TypeAlias = "ctypes._Pointer[ggml_object]" # type: ignore
  462. GGML_OBJECT_SIZE = ctypes.sizeof(ggml_object)
  463. # // n-dimensional tensor
  464. # struct ggml_tensor {
  465. # enum ggml_type type;
  466. # enum ggml_backend_type backend;
  467. # struct ggml_backend_buffer * buffer;
  468. # int n_dims;
  469. # int64_t ne[GGML_MAX_DIMS]; // number of elements
  470. # size_t nb[GGML_MAX_DIMS]; // stride in bytes:
  471. # // nb[0] = ggml_type_size(type)
  472. # // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding
  473. # // nb[i] = nb[i-1] * ne[i-1]
  474. # // compute data
  475. # enum ggml_op op;
  476. # // op params - allocated as int32_t for alignment
  477. # int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
  478. # bool is_param;
  479. # struct ggml_tensor * grad;
  480. # struct ggml_tensor * src[GGML_MAX_SRC];
  481. # // performance
  482. # int perf_runs;
  483. # int64_t perf_cycles;
  484. # int64_t perf_time_us;
  485. # struct ggml_tensor * view_src;
  486. # size_t view_offs;
  487. # void * data;
  488. # char name[GGML_MAX_NAME];
  489. # void * extra; // extra things e.g. for ggml-cuda.cu
  490. # char padding[12];
  491. # };
  492. class ggml_tensor(ctypes.Structure):
  493. """n-dimensional tensor
  494. Attributes:
  495. type (int): ggml_type
  496. backend (int): ggml_backend
  497. buffer (ctypes.pointer[ggml_backend_buffer]): pointer to backend buffer
  498. n_dims (int): number of dimensions
  499. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension
  500. nb (ctypes.Array[ctypes.c_size_t]): stride in bytes for each dimension
  501. op (int): ggml operation
  502. op_params (ctypes.Array[ctypes.c_int32]): `GGML_MAX_OP_PARAMS`-length array of operation parameters
  503. is_param (bool): is this a parameter tensor
  504. grad (ggml_tensor_p): reference to gradient tensor
  505. src (ctypes.Array[ggml_tensor_p]): `GGML_MAX_SRC`-length array of source tensors
  506. perf_runs (int): number of performance runs
  507. perf_cycles (int): number of cycles
  508. perf_time_us (int): time in microseconds
  509. view_src (ggml_tensor_p): pointer to tensor if this tensor is a view, None if the tensor is not a view
  510. view_offs (ctypes.c_size_t): offset into the data pointer of the view tensor
  511. data (ctypes.c_void_p): reference to raw tensor data
  512. name (bytes): name of tensor
  513. extra (ctypes.c_void_p): extra data (e.g. for CUDA)
  514. """
  515. pass
  516. ggml_tensor._fields_ = [
  517. ("type", ctypes.c_int),
  518. ("backend", ctypes.c_int),
  519. ("buffer", ctypes.c_void_p),
  520. ("n_dims", ctypes.c_int),
  521. ("ne", ctypes.c_int64 * GGML_MAX_DIMS),
  522. ("nb", ctypes.c_size_t * GGML_MAX_DIMS),
  523. ("op", ctypes.c_int),
  524. (
  525. "op_params",
  526. ctypes.c_int32 * (GGML_MAX_OP_PARAMS // ctypes.sizeof(ctypes.c_int32)),
  527. ),
  528. ("is_param", ctypes.c_bool),
  529. ("grad", ctypes.POINTER(ggml_tensor)),
  530. ("src", ctypes.POINTER(ggml_tensor) * GGML_MAX_SRC),
  531. ("perf_runs", ctypes.c_int),
  532. ("perf_cycles", ctypes.c_int64),
  533. ("perf_time_us", ctypes.c_int64),
  534. ("view_src", ctypes.POINTER(ggml_tensor)),
  535. ("view_offs", ctypes.c_size_t),
  536. ("data", ctypes.c_void_p),
  537. ("name", ctypes.c_char * GGML_MAX_NAME),
  538. ("extra", ctypes.c_void_p),
  539. ("padding", ctypes.c_char * 12),
  540. ]
  541. GGML_TENSOR_SIZE = ctypes.sizeof(ggml_tensor)
  542. ggml_tensor_p: TypeAlias = "ctypes._Pointer[ggml_tensor]" # type: ignore
  543. """ctypes pointer to a [ggml_tensor][ggml.ggml_tensor]
  544. Can be dereferenced to a [ggml_tensor][ggml.ggml_tensor] object using
  545. the `.contents` attribute."""
  546. abort_callback_t = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_void_p)
  547. # // the compute plan that needs to be prepared for ggml_graph_compute()
  548. # // since https://github.com/ggerganov/ggml/issues/287
  549. # struct ggml_cplan {
  550. # size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
  551. # uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
  552. # int n_threads;
  553. # // abort ggml_graph_compute when true
  554. # bool (*abort_callback)(void * data);
  555. # void * abort_callback_data;
  556. # };
  557. class ggml_cplan(ctypes.Structure):
  558. """Compute plan for a ggml computation graph
  559. Attributes:
  560. work_size (int): size of work buffer
  561. work_data (ctypes.pointer[ctypes.c_uint8]): work buffer
  562. n_threads (int): number of threads
  563. abort_callback (abort_callback_t): abort callback
  564. abort_callback_data (ctypes.c_void_p): abort callback data
  565. """
  566. _fields_ = [
  567. ("work_size", ctypes.c_size_t),
  568. ("work_data", ctypes.POINTER(ctypes.c_uint8)),
  569. ("n_threads", ctypes.c_int),
  570. (
  571. "abort_callback",
  572. abort_callback_t,
  573. ),
  574. ("abort_callback_data", ctypes.c_void_p),
  575. ]
  576. GGML_CPLAN_SIZE = ctypes.sizeof(ggml_cplan)
  577. ggml_cplan_p: TypeAlias = "ctypes._Pointer[ggml_cplan]" # type: ignore
  578. """ctypes pointer to a [ggml_cplan][ggml.ggml_cplan]
  579. Can be dereferenced to a [ggml_cplan][ggml.ggml_cplan] object using
  580. the `.contents` attribute."""
  581. # enum ggml_cgraph_eval_order {
  582. # GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
  583. # GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
  584. # GGML_CGRAPH_EVAL_ORDER_COUNT
  585. # };
  586. GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0
  587. GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT = 1
  588. GGML_CGRAPH_EVAL_ORDER_COUNT = 2
  589. # struct ggml_hash_set {
  590. # size_t size;
  591. # struct ggml_tensor ** keys;
  592. # };
  593. class ggml_hash_set(ctypes.Structure):
  594. _fields_ = [
  595. ("size", ctypes.c_size_t),
  596. ("keys", ctypes.POINTER(ctypes.POINTER(ggml_tensor))),
  597. ]
  598. # // computation graph
  599. # struct ggml_cgraph {
  600. # int size;
  601. # int n_nodes;
  602. # int n_leafs;
  603. # struct ggml_tensor ** nodes;
  604. # struct ggml_tensor ** grads;
  605. # struct ggml_tensor ** leafs;
  606. # struct ggml_hash_set visited_hash_table;
  607. # enum ggml_cgraph_eval_order order;
  608. # // performance
  609. # int perf_runs;
  610. # int64_t perf_cycles;
  611. # int64_t perf_time_us;
  612. # };
  613. class ggml_cgraph(ctypes.Structure):
  614. """ggml computation graph
  615. Attributes:
  616. n_nodes (int): number of nodes
  617. n_leafs (int): number of leafs
  618. nodes (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of compute tensors
  619. grads (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of gradient tensors
  620. leafs (ctypes.Array[ggml_tensor_p]): `n_leafs`-length array of parameter tensors
  621. visited_hash_table (ctypes.Array[ctypes.POINTER(ggml_tensor)]): hash table of visited tensors
  622. order (int): evaluation order
  623. perf_runs (int): number of runs
  624. perf_cycles (int): number of cycles
  625. perf_time_us (int): computation time in microseconds"""
  626. _fields_ = [
  627. ("size", ctypes.c_int),
  628. ("n_nodes", ctypes.c_int),
  629. ("n_leafs", ctypes.c_int),
  630. ("nodes", ctypes.POINTER(ctypes.POINTER(ggml_tensor))),
  631. ("grads", ctypes.POINTER(ctypes.POINTER(ggml_tensor))),
  632. ("leafs", ctypes.POINTER(ctypes.POINTER(ggml_tensor))),
  633. ("visited_hash_table", ggml_hash_set),
  634. ("order", ctypes.c_int),
  635. ("perf_runs", ctypes.c_int),
  636. ("perf_cycles", ctypes.c_int64),
  637. ("perf_time_us", ctypes.c_int64),
  638. ]
  639. ggml_cgraph_p: TypeAlias = "ctypes._Pointer[ggml_cgraph]" # type: ignore
  640. """ctypes pointer to a [ggml_cgraph][ggml.ggml_cgraph]
  641. Can be dereferenced to a [ggml_cgraph][ggml.ggml_cgraph] object using
  642. the `.contents` attribute."""
  643. # struct ggml_scratch {
  644. # size_t offs;
  645. # size_t size;
  646. # void * data;
  647. # };
  648. class ggml_scratch(ctypes.Structure):
  649. _fields_ = [
  650. ("offs", ctypes.c_size_t),
  651. ("size", ctypes.c_size_t),
  652. ("data", ctypes.c_void_p),
  653. ]
  654. # struct ggml_init_params {
  655. # // memory pool
  656. # size_t mem_size; // bytes
  657. # void * mem_buffer; // if NULL, memory will be allocated internally
  658. # bool no_alloc; // don't allocate memory for the tensor data
  659. # };
  660. class ggml_init_params(ctypes.Structure):
  661. """Initialization parameters for a ggml context
  662. **NOTE**: Reference counting does not cross into ggml, if you allocate a memory buffer
  663. in python using ctypes Arrays or a numpy array, you must keep a reference to it until
  664. you free the ggml context otherwise you will encounter a segmentation fault.
  665. Attributes:
  666. mem_size (int): size of memory pool in bytes
  667. mem_buffer (ctypes.c_void_p): pointer to memory pool, if None, memory will be allocated internally
  668. no_alloc (bool): don't allocate memory for tensor data
  669. """
  670. _fields_ = [
  671. ("mem_size", ctypes.c_size_t),
  672. ("mem_buffer", ctypes.c_void_p),
  673. ("no_alloc", ctypes.c_bool),
  674. ]
  675. # // compute types
  676. # // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
  677. # // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
  678. # enum ggml_task_type {
  679. # GGML_TASK_INIT = 0,
  680. # GGML_TASK_COMPUTE,
  681. # GGML_TASK_FINALIZE,
  682. # };
  683. GGML_TASK_INIT = 0
  684. GGML_TASK_COMPUTE = 1
  685. GGML_TASK_FINALIZE = 2
  686. # struct ggml_compute_params {
  687. # enum ggml_task_type type;
  688. # // ith = thread index, nth = number of threads
  689. # int ith, nth;
  690. # // work buffer for all threads
  691. # size_t wsize;
  692. # void * wdata;
  693. # };
  694. class ggml_compute_params(ctypes.Structure):
  695. _fields_ = [
  696. ("type", ctypes.c_int),
  697. ("ith", ctypes.c_int),
  698. ("nth", ctypes.c_int),
  699. ("wsize", ctypes.c_size_t),
  700. ("wdata", ctypes.c_void_p),
  701. ]
  702. ggml_compute_params_p: TypeAlias = "ctypes._Pointer[ggml_compute_params]" # type: ignore
  703. # // misc
  704. # GGML_API void ggml_time_init(void); // call this once at the beginning of the program
  705. def ggml_time_init():
  706. return lib.ggml_time_init()
  707. lib.ggml_time_init.argtypes = []
  708. lib.ggml_time_init.restype = None
  709. # GGML_API int64_t ggml_time_ms(void);
  710. def ggml_time_ms() -> int:
  711. return lib.ggml_time_ms()
  712. lib.ggml_time_ms.argtypes = []
  713. lib.ggml_time_ms.restype = ctypes.c_int64
  714. # GGML_API int64_t ggml_time_us(void);
  715. def ggml_time_us() -> int:
  716. return lib.ggml_time_us()
  717. lib.ggml_time_us.argtypes = []
  718. lib.ggml_time_us.restype = ctypes.c_int64
  719. # GGML_API int64_t ggml_cycles(void);
  720. def ggml_cycles() -> int:
  721. return lib.ggml_cycles()
  722. lib.ggml_cycles.argtypes = []
  723. lib.ggml_cycles.restype = ctypes.c_int64
  724. # GGML_API int64_t ggml_cycles_per_ms(void);
  725. def ggml_cycles_per_ms() -> int:
  726. return lib.ggml_cycles_per_ms()
  727. lib.ggml_cycles_per_ms.argtypes = []
  728. lib.ggml_cycles_per_ms.restype = ctypes.c_int64
  729. # GGML_API void ggml_print_backtrace(void);
  730. def ggml_print_backtrace():
  731. return lib.ggml_print_backtrace()
  732. lib.ggml_print_backtrace.argtypes = []
  733. lib.ggml_print_backtrace.restype = None
  734. # GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
  735. def ggml_numa_init():
  736. return lib.ggml_numa_init()
  737. lib.ggml_numa_init.argtypes = []
  738. lib.ggml_numa_init.restype = None
  739. # GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
  740. def ggml_is_numa() -> bool:
  741. return lib.ggml_is_numa()
  742. lib.ggml_is_numa.argtypes = []
  743. lib.ggml_is_numa.restype = ctypes.c_bool
  744. # GGML_API void ggml_print_object (const struct ggml_object * obj);
  745. def ggml_print_object(obj: ggml_object_p):
  746. return lib.ggml_print_object(obj)
  747. lib.ggml_print_object.argtypes = [ctypes.POINTER(ggml_object)]
  748. lib.ggml_print_object.restype = None
  749. # GGML_API void ggml_print_objects(const struct ggml_context * ctx);
  750. def ggml_print_objects(ctx: ggml_context_p):
  751. return lib.ggml_print_objects(ctx)
  752. lib.ggml_print_objects.argtypes = [ggml_context_p]
  753. lib.ggml_print_objects.restype = None
  754. # GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
  755. def ggml_nelements(
  756. tensor: ggml_tensor_p,
  757. ) -> int:
  758. """Get the number of elements in a tensor
  759. Parameters:
  760. tensor: tensor
  761. Returns:
  762. number of elements"""
  763. return lib.ggml_nelements(tensor)
  764. lib.ggml_nelements.argtypes = [ctypes.POINTER(ggml_tensor)]
  765. lib.ggml_nelements.restype = ctypes.c_int64
  766. # GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
  767. def ggml_nrows(
  768. tensor: ggml_tensor_p,
  769. ) -> int:
  770. """Get the number of rows in a tensor
  771. Parameters:
  772. tensor: tensor
  773. Returns:
  774. number of rows"""
  775. return lib.ggml_nrows(tensor)
  776. lib.ggml_nrows.argtypes = [ctypes.POINTER(ggml_tensor)]
  777. lib.ggml_nrows.restype = ctypes.c_int64
  778. # GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
  779. def ggml_nbytes(
  780. tensor: ggml_tensor_p,
  781. ) -> int:
  782. """Get the number of bytes required to store tensor data
  783. Parameters:
  784. tensor: tensor
  785. Returns:
  786. number of bytes"""
  787. return lib.ggml_nbytes(tensor)
  788. lib.ggml_nbytes.argtypes = [ctypes.POINTER(ggml_tensor)]
  789. lib.ggml_nbytes.restype = ctypes.c_size_t
  790. # GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
  791. def ggml_nbytes_pad(
  792. tensor: ggml_tensor_p,
  793. ) -> int:
  794. """Get the number of bytes required to store tensor data, padded to GGML_MEM_ALIGN
  795. Parameters:
  796. tensor: tensor
  797. Returns:
  798. number of bytes"""
  799. return lib.ggml_nbytes_pad(tensor)
  800. lib.ggml_nbytes_pad.argtypes = [ctypes.POINTER(ggml_tensor)]
  801. lib.ggml_nbytes_pad.restype = ctypes.c_size_t
  802. # GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
  803. def ggml_nbytes_split(
  804. tensor: ggml_tensor_p,
  805. nrows_split: Union[ctypes.c_int, int],
  806. ) -> int:
  807. return lib.ggml_nbytes_split(tensor, nrows_split)
  808. lib.ggml_nbytes_split.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  809. lib.ggml_nbytes_split.restype = ctypes.c_size_t
  810. # GGML_API int ggml_blck_size (enum ggml_type type);
  811. def ggml_blck_size(type: Union[ctypes.c_int, int]) -> int:
  812. return lib.ggml_blck_size(type)
  813. lib.ggml_blck_size.argtypes = [ctypes.c_int]
  814. lib.ggml_blck_size.restype = ctypes.c_int
  815. # GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
  816. def ggml_type_size(type: Union[ctypes.c_int, int]) -> int:
  817. return lib.ggml_type_size(type)
  818. lib.ggml_type_size.argtypes = [ctypes.c_int]
  819. lib.ggml_type_size.restype = ctypes.c_size_t
  820. # GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
  821. def ggml_type_sizef(type: Union[ctypes.c_int, int]) -> float:
  822. return lib.ggml_type_sizef(type)
  823. lib.ggml_type_sizef.argtypes = [ctypes.c_int]
  824. lib.ggml_type_sizef.restype = ctypes.c_float
  825. # GGML_API const char * ggml_type_name(enum ggml_type type);
  826. def ggml_type_name(type: Union[ctypes.c_int, int]) -> bytes:
  827. return lib.ggml_type_name(type)
  828. lib.ggml_type_name.argtypes = [ctypes.c_int]
  829. lib.ggml_type_name.restype = ctypes.c_char_p
  830. # GGML_API const char * ggml_op_name (enum ggml_op op);
  831. def ggml_op_name(op: Union[ctypes.c_int, int]) -> bytes:
  832. return lib.ggml_op_name(op)
  833. lib.ggml_op_name.argtypes = [ctypes.c_int]
  834. lib.ggml_op_name.restype = ctypes.c_char_p
  835. # GGML_API const char * ggml_op_symbol(enum ggml_op op);
  836. def ggml_op_symbol(op: Union[ctypes.c_int, int]) -> bytes:
  837. return lib.ggml_op_symbol(op)
  838. lib.ggml_op_symbol.argtypes = [ctypes.c_int]
  839. lib.ggml_op_symbol.restype = ctypes.c_char_p
  840. # GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
  841. def ggml_unary_op_name(op: Union[ctypes.c_int, int]) -> bytes:
  842. return lib.ggml_unary_op_name(op)
  843. lib.ggml_unary_op_name.argtypes = [ctypes.c_int]
  844. lib.ggml_unary_op_name.restype = ctypes.c_char_p
  845. # GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
  846. def ggml_op_desc(
  847. t: ggml_tensor_p,
  848. ) -> bytes:
  849. return lib.ggml_op_desc(t)
  850. lib.ggml_op_desc.argtypes = [ctypes.POINTER(ggml_tensor)]
  851. lib.ggml_op_desc.restype = ctypes.c_char_p
  852. # GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
  853. def ggml_element_size(
  854. tensor: ggml_tensor_p,
  855. ) -> int:
  856. return lib.ggml_element_size(tensor)
  857. lib.ggml_element_size.argtypes = [ctypes.POINTER(ggml_tensor)]
  858. lib.ggml_element_size.restype = ctypes.c_size_t
  859. # GGML_API bool ggml_is_quantized(enum ggml_type type);
  860. def ggml_is_quantized(type: Union[ctypes.c_int, int]) -> bool:
  861. return lib.ggml_is_quantized(type)
  862. lib.ggml_is_quantized.argtypes = [ctypes.c_int]
  863. lib.ggml_is_quantized.restype = ctypes.c_bool
  864. # // TODO: temporary until model loading of ggml examples is refactored
  865. # GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
  866. def ggml_ftype_to_ggml_type(ftype: Union[ctypes.c_int, int]) -> int:
  867. return lib.ggml_ftype_to_ggml_type(ftype)
  868. lib.ggml_ftype_to_ggml_type.argtypes = [ctypes.c_int]
  869. lib.ggml_ftype_to_ggml_type.restype = ctypes.c_int
  870. # GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
  871. def ggml_is_transposed(
  872. tensor: ggml_tensor_p,
  873. ) -> bool:
  874. """Check if a tensor is transposed
  875. Parameters:
  876. tensor: tensor
  877. Returns:
  878. True if tensor is transposed else False"""
  879. return lib.ggml_is_transposed(tensor)
  880. lib.ggml_is_transposed.argtypes = [ctypes.POINTER(ggml_tensor)]
  881. lib.ggml_is_transposed.restype = ctypes.c_bool
  882. # GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
  883. def ggml_is_contiguous(
  884. tensor: ggml_tensor_p,
  885. ) -> bool:
  886. """Check if a tensor is contiguous
  887. Parameters:
  888. tensor: tensor
  889. Returns:
  890. True if tensor is contiguous else False"""
  891. return lib.ggml_is_contiguous(tensor)
  892. lib.ggml_is_contiguous.argtypes = [ctypes.POINTER(ggml_tensor)]
  893. lib.ggml_is_contiguous.restype = ctypes.c_bool
  894. # GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
  895. def ggml_is_permuted(
  896. tensor: ggml_tensor_p,
  897. ) -> bool:
  898. """Check if a tensor is permuted
  899. Parameters:
  900. tensor: tensor
  901. Returns:
  902. True if tensor is permuted else False"""
  903. return lib.ggml_is_permuted(tensor)
  904. lib.ggml_is_permuted.argtypes = [ctypes.POINTER(ggml_tensor)]
  905. lib.ggml_is_permuted.restype = ctypes.c_bool
  906. # GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
  907. def ggml_are_same_shape(
  908. t0: ggml_tensor_p,
  909. t1: ggml_tensor_p,
  910. ) -> bool:
  911. """Check if two tensors have the same shape
  912. Parameters:
  913. t0: tensor 0
  914. t1: tensor 1
  915. Returns:
  916. True if tensors have the same shape else False"""
  917. return lib.ggml_are_same_shape(t0, t1)
  918. lib.ggml_are_same_shape.argtypes = [
  919. ctypes.POINTER(ggml_tensor),
  920. ctypes.POINTER(ggml_tensor),
  921. ]
  922. lib.ggml_are_same_shape.restype = ctypes.c_bool
  923. # // use this to compute the memory overhead of a tensor
  924. # GGML_API size_t ggml_tensor_overhead(void);
  925. def ggml_tensor_overhead() -> int:
  926. """Overhead required for a tensor struct in bytes
  927. Returns:
  928. size of tensor struct in bytes"""
  929. return lib.ggml_tensor_overhead()
  930. lib.ggml_tensor_overhead.argtypes = []
  931. lib.ggml_tensor_overhead.restype = ctypes.c_size_t
  932. # // main
  933. # GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
  934. def ggml_init(
  935. params: ggml_init_params,
  936. ) -> ggml_context_p:
  937. """Instantiate a new ggml context with params.
  938. You must call `ggml_free()` to free the context.
  939. Parameters:
  940. params: ggml init params
  941. Returns:
  942. Pointer to ggml_context"""
  943. return lib.ggml_init(params)
  944. lib.ggml_init.argtypes = [ggml_init_params]
  945. lib.ggml_init.restype = ggml_context_p
  946. # GGML_API void ggml_free(struct ggml_context * ctx);
  947. def ggml_free(ctx: ggml_context_p):
  948. """Free the ggml context.
  949. Parameters:
  950. ctx: ggml context"""
  951. return lib.ggml_free(ctx)
  952. lib.ggml_free.argtypes = [ggml_context_p]
  953. lib.ggml_free.restype = None
  954. # GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
  955. def ggml_used_mem(ctx: ggml_context_p) -> int:
  956. """Return the amount of memory used by the ggml context in bytes.
  957. Parameters:
  958. ctx: ggml context
  959. Returns:
  960. amount of memory used in bytes"""
  961. return lib.ggml_used_mem(ctx)
  962. lib.ggml_used_mem.argtypes = [ggml_context_p]
  963. lib.ggml_used_mem.restype = ctypes.c_size_t
  964. # GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
  965. def ggml_set_scratch(ctx: ggml_context_p, scratch: ggml_scratch) -> int:
  966. """Set the scratch buffer for the ggml context."""
  967. return lib.ggml_set_scratch(ctx, scratch)
  968. lib.ggml_set_scratch.argtypes = [ggml_context_p, ggml_scratch]
  969. lib.ggml_set_scratch.restype = ctypes.c_size_t
  970. # GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
  971. def ggml_get_no_alloc(ctx: ggml_context_p) -> bool:
  972. """Return the no_alloc flag for the ggml context."""
  973. return lib.ggml_get_no_alloc(ctx)
  974. lib.ggml_get_no_alloc.argtypes = [ggml_context_p]
  975. lib.ggml_get_no_alloc.restype = ctypes.c_bool
  976. # GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
  977. def ggml_set_no_alloc(ctx: ggml_context_p, no_alloc: Union[ctypes.c_bool, bool]):
  978. """Set the no_alloc flag for the ggml context."""
  979. return lib.ggml_set_no_alloc(ctx, no_alloc)
  980. lib.ggml_set_no_alloc.argtypes = [ggml_context_p, ctypes.c_bool]
  981. lib.ggml_set_no_alloc.restype = None
  982. # GGML_API void * ggml_get_mem_buffer (struct ggml_context * ctx);
  983. def ggml_get_mem_buffer(ctx: ggml_context_p) -> Optional[ctypes.c_void_p]:
  984. """Return the memory buffer for the ggml context."""
  985. return lib.ggml_get_mem_buffer(ctx)
  986. lib.ggml_get_mem_buffer.argtypes = [ggml_context_p]
  987. lib.ggml_get_mem_buffer.restype = ctypes.c_void_p
  988. # GGML_API size_t ggml_get_mem_size (struct ggml_context * ctx);
  989. def ggml_get_mem_size(ctx: ggml_context_p) -> int:
  990. """Return the size of the memory buffer for the ggml context in bytes."""
  991. return lib.ggml_get_mem_size(ctx)
  992. lib.ggml_get_mem_size.argtypes = [ggml_context_p]
  993. lib.ggml_get_mem_size.restype = ctypes.c_size_t
  994. # GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
  995. def ggml_get_max_tensor_size(ctx: ggml_context_p) -> int:
  996. """Return the maximum size of a tensor in bytes."""
  997. return lib.ggml_get_max_tensor_size(ctx)
  998. lib.ggml_get_max_tensor_size.argtypes = [ggml_context_p]
  999. lib.ggml_get_max_tensor_size.restype = ctypes.c_size_t
  1000. # GGML_API struct ggml_tensor * ggml_new_tensor(
  1001. # struct ggml_context * ctx,
  1002. # enum ggml_type type,
  1003. # int n_dims,
  1004. # const int64_t *ne);
  1005. def ggml_new_tensor(
  1006. ctx: ggml_context_p,
  1007. type: Union[ctypes.c_int, int],
  1008. n_dims: Union[ctypes.c_int, int],
  1009. ne: CInt64Array,
  1010. ) -> ggml_tensor_p:
  1011. """Create a new tensor with the given type, number of dimensions, and number of elements in each dimension.
  1012. Parameters:
  1013. ctx: ggml context
  1014. type: ggml type
  1015. n_dims: number of dimensions
  1016. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension (array of length n_dims)
  1017. Returns:
  1018. Pointer to ggml_tensor"""
  1019. return lib.ggml_new_tensor(ctx, type, n_dims, ne)
  1020. lib.ggml_new_tensor.argtypes = [
  1021. ggml_context_p,
  1022. ctypes.c_int,
  1023. ctypes.c_int,
  1024. ctypes.POINTER(ctypes.c_int64),
  1025. ]
  1026. lib.ggml_new_tensor.restype = ctypes.POINTER(ggml_tensor)
  1027. # GGML_API struct ggml_tensor * ggml_new_tensor_1d(
  1028. # struct ggml_context * ctx,
  1029. # enum ggml_type type,
  1030. # int64_t ne0);
  1031. def ggml_new_tensor_1d(
  1032. ctx: ggml_context_p, type: Union[ctypes.c_int, int], ne0: Union[ctypes.c_int64, int]
  1033. ) -> ggml_tensor_p:
  1034. """Create a new 1-dimensional tensor with the given type and number of elements.
  1035. Parameters:
  1036. ctx: ggml context
  1037. type: ggml type
  1038. ne0: number of elements in dimension 0
  1039. Returns:
  1040. Pointer to ggml_tensor"""
  1041. return lib.ggml_new_tensor_1d(ctx, type, ne0)
  1042. lib.ggml_new_tensor_1d.argtypes = [ggml_context_p, ctypes.c_int, ctypes.c_int64]
  1043. lib.ggml_new_tensor_1d.restype = ctypes.POINTER(ggml_tensor)
  1044. # GGML_API struct ggml_tensor * ggml_new_tensor_2d(
  1045. # struct ggml_context * ctx,
  1046. # enum ggml_type type,
  1047. # int64_t ne0,
  1048. # int64_t ne1);
  1049. def ggml_new_tensor_2d(
  1050. ctx: ggml_context_p,
  1051. type: Union[ctypes.c_int, int],
  1052. ne0: Union[ctypes.c_int64, int],
  1053. ne1: Union[ctypes.c_int64, int],
  1054. ) -> ggml_tensor_p:
  1055. """Create a new 2-dimensional tensor with the given type and number of elements in each dimension.
  1056. Parameters:
  1057. ctx: ggml context
  1058. type: ggml type
  1059. ne0: number of elements in dimension 0
  1060. ne1: number of elements in dimension 1
  1061. Returns:
  1062. Pointer to ggml_tensor"""
  1063. return lib.ggml_new_tensor_2d(ctx, type, ne0, ne1)
  1064. lib.ggml_new_tensor_2d.argtypes = [
  1065. ggml_context_p,
  1066. ctypes.c_int,
  1067. ctypes.c_int64,
  1068. ctypes.c_int64,
  1069. ]
  1070. lib.ggml_new_tensor_2d.restype = ctypes.POINTER(ggml_tensor)
  1071. # GGML_API struct ggml_tensor * ggml_new_tensor_3d(
  1072. # struct ggml_context * ctx,
  1073. # enum ggml_type type,
  1074. # int64_t ne0,
  1075. # int64_t ne1,
  1076. # int64_t ne2);
  1077. def ggml_new_tensor_3d(
  1078. ctx: ggml_context_p,
  1079. type: Union[ctypes.c_int, int],
  1080. ne0: Union[ctypes.c_int64, int],
  1081. ne1: Union[ctypes.c_int64, int],
  1082. ne2: Union[ctypes.c_int64, int],
  1083. ) -> ggml_tensor_p:
  1084. """Create a new 3-dimensional tensor with the given type and number of elements in each dimension.
  1085. Parameters:
  1086. ctx: ggml context
  1087. type: ggml type
  1088. ne0: number of elements in dimension 0
  1089. ne1: number of elements in dimension 1
  1090. ne2: number of elements in dimension 2
  1091. Returns:
  1092. Pointer to ggml_tensor"""
  1093. return lib.ggml_new_tensor_3d(ctx, type, ne0, ne1, ne2)
  1094. lib.ggml_new_tensor_3d.argtypes = [
  1095. ggml_context_p,
  1096. ctypes.c_int,
  1097. ctypes.c_int64,
  1098. ctypes.c_int64,
  1099. ctypes.c_int64,
  1100. ]
  1101. lib.ggml_new_tensor_3d.restype = ctypes.POINTER(ggml_tensor)
  1102. # GGML_API struct ggml_tensor * ggml_new_tensor_4d(
  1103. # struct ggml_context * ctx,
  1104. # enum ggml_type type,
  1105. # int64_t ne0,
  1106. # int64_t ne1,
  1107. # int64_t ne2,
  1108. # int64_t ne3);
  1109. def ggml_new_tensor_4d(
  1110. ctx: ggml_context_p,
  1111. type: Union[ctypes.c_int, int],
  1112. ne0: Union[ctypes.c_int64, int],
  1113. ne1: Union[ctypes.c_int64, int],
  1114. ne2: Union[ctypes.c_int64, int],
  1115. ne3: Union[ctypes.c_int64, int],
  1116. ) -> ggml_tensor_p:
  1117. """Create a new 4-dimensional tensor with the given type and number of elements in each dimension.
  1118. Parameters:
  1119. ctx: ggml context
  1120. type: ggml type
  1121. ne0: number of elements in dimension 0
  1122. ne1: number of elements in dimension 1
  1123. ne2: number of elements in dimension 2
  1124. Returns:
  1125. Pointer to ggml_tensor"""
  1126. return lib.ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3)
  1127. lib.ggml_new_tensor_4d.argtypes = [
  1128. ggml_context_p,
  1129. ctypes.c_int,
  1130. ctypes.c_int64,
  1131. ctypes.c_int64,
  1132. ctypes.c_int64,
  1133. ctypes.c_int64,
  1134. ]
  1135. lib.ggml_new_tensor_4d.restype = ctypes.POINTER(ggml_tensor)
  1136. # GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
  1137. def ggml_new_i32(
  1138. ctx: ggml_context_p, value: Union[ctypes.c_int32, int]
  1139. ) -> ggml_tensor_p:
  1140. """Create a 1 element tensor with the given integer value.
  1141. Parameters:
  1142. ctx: ggml context
  1143. value: integer value
  1144. Returns:
  1145. Pointer to ggml_tensor"""
  1146. return lib.ggml_new_i32(ctx, value)
  1147. lib.ggml_new_i32.argtypes = [ggml_context_p, ctypes.c_int32]
  1148. lib.ggml_new_i32.restype = ctypes.POINTER(ggml_tensor)
  1149. # GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
  1150. def ggml_new_f32(
  1151. ctx: ggml_context_p,
  1152. value: Union[ctypes.c_float, float],
  1153. ) -> ggml_tensor_p:
  1154. """Create a 1 element tensor with the given float value.
  1155. Parameters:
  1156. ctx: ggml context
  1157. value: float value
  1158. Returns:
  1159. Pointer to ggml_tensor"""
  1160. return lib.ggml_new_f32(ctx, value)
  1161. lib.ggml_new_f32.argtypes = [ggml_context_p, ctypes.c_float]
  1162. lib.ggml_new_f32.restype = ctypes.POINTER(ggml_tensor)
  1163. # GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
  1164. def ggml_dup_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1165. """Create a new tensor with the same type and dimensions as the source tensor.
  1166. Parameters:
  1167. ctx: ggml context
  1168. src: source tensor
  1169. Returns:
  1170. Pointer to ggml_tensor"""
  1171. return lib.ggml_dup_tensor(ctx, src)
  1172. lib.ggml_dup_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1173. lib.ggml_dup_tensor.restype = ctypes.POINTER(ggml_tensor)
  1174. # GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
  1175. def ggml_view_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1176. """Create a new tensor with the same type, dimensions and data as the source tensor.
  1177. Parameters:
  1178. ctx: ggml context
  1179. src: source tensor
  1180. Returns:
  1181. Pointer to ggml_tensor"""
  1182. return lib.ggml_view_tensor(ctx, src)
  1183. lib.ggml_view_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1184. lib.ggml_view_tensor.restype = ctypes.POINTER(ggml_tensor)
  1185. # // Context tensor enumeration and lookup
  1186. # GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
  1187. def ggml_get_first_tensor(ctx: ggml_context_p) -> ggml_tensor_p:
  1188. """Get the first tensor from the ggml context.
  1189. Parameters:
  1190. ctx: ggml context
  1191. Returns:
  1192. Pointer to ggml_tensor"""
  1193. return lib.ggml_get_first_tensor(ctx)
  1194. lib.ggml_get_first_tensor.argtypes = [ggml_context_p]
  1195. lib.ggml_get_first_tensor.restype = ctypes.POINTER(ggml_tensor)
  1196. # GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
  1197. def ggml_get_next_tensor(ctx: ggml_context_p, tensor: ggml_tensor_p) -> ggml_tensor_p:
  1198. """Get the next tensor from the ggml context.
  1199. Parameters:
  1200. ctx: ggml context
  1201. tensor: tensor
  1202. Returns:
  1203. Pointer to ggml_tensor"""
  1204. return lib.ggml_get_next_tensor(ctx, tensor)
  1205. lib.ggml_get_next_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1206. lib.ggml_get_next_tensor.restype = ctypes.POINTER(ggml_tensor)
  1207. # GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
  1208. def ggml_get_tensor(ctx: ggml_context_p, name: bytes) -> ggml_tensor_p:
  1209. """Get a tensor from the ggml context by name.
  1210. Parameters:
  1211. ctx: ggml context
  1212. name: name of tensor
  1213. Returns:
  1214. Pointer to ggml_tensor"""
  1215. return lib.ggml_get_tensor(ctx, name)
  1216. lib.ggml_get_tensor.argtypes = [ggml_context_p, ctypes.c_char_p]
  1217. lib.ggml_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  1218. # GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
  1219. def ggml_set_zero(
  1220. tensor: ggml_tensor_p,
  1221. ) -> ggml_tensor_p:
  1222. """Zero all elements in a tensor.
  1223. Parameters:
  1224. tensor: tensor
  1225. Returns:
  1226. Pointer to ggml_tensor"""
  1227. return lib.ggml_set_zero(tensor)
  1228. lib.ggml_set_zero.argtypes = [ctypes.POINTER(ggml_tensor)]
  1229. lib.ggml_set_zero.restype = ctypes.POINTER(ggml_tensor)
  1230. # GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
  1231. def ggml_set_i32(
  1232. tensor: ggml_tensor_p,
  1233. value: Union[ctypes.c_int32, int],
  1234. ) -> ggml_tensor_p:
  1235. """Set all elements in a tensor to the given integer value.
  1236. Parameters:
  1237. tensor: tensor
  1238. value: integer value
  1239. Returns:
  1240. Pointer to ggml_tensor"""
  1241. return lib.ggml_set_i32(tensor, value)
  1242. lib.ggml_set_i32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int32]
  1243. lib.ggml_set_i32.restype = ctypes.POINTER(ggml_tensor)
  1244. # GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
  1245. def ggml_set_f32(
  1246. tensor: ggml_tensor_p,
  1247. value: Union[ctypes.c_float, float],
  1248. ) -> ggml_tensor_p:
  1249. """Set all elements in a tensor to the given float value.
  1250. Parameters:
  1251. tensor: tensor
  1252. value: float value
  1253. Returns:
  1254. Pointer to ggml_tensor"""
  1255. return lib.ggml_set_f32(tensor, value)
  1256. lib.ggml_set_f32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_float]
  1257. lib.ggml_set_f32.restype = ctypes.POINTER(ggml_tensor)
  1258. # // Converts a flat index into coordinates
  1259. # GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
  1260. def ggml_unravel_index(
  1261. tensor: ggml_tensor_p,
  1262. i: Union[ctypes.c_int64, int],
  1263. i0, # type: "ctypes._Pointer(ctypes.c_int64)" # type: ignore
  1264. i1, # type: "ctypes._Pointer(ctypes.c_int64)" # type: ignore
  1265. i2, # type: "ctypes._Pointer(ctypes.c_int64)" # type: ignore
  1266. i3, # type: "ctypes._Pointer(ctypes.c_int64)" # type: ignore
  1267. ):
  1268. """Convert a flat index into coordinates.
  1269. Parameters:
  1270. tensor: tensor
  1271. i: flat index
  1272. i0: pointer to index 0
  1273. i1: pointer to index 1
  1274. i2: pointer to index 2
  1275. i3: pointer to index 3"""
  1276. return lib.ggml_unravel_index(tensor, i, i0, i1, i2, i3)
  1277. lib.ggml_unravel_index.argtypes = [
  1278. ctypes.POINTER(ggml_tensor),
  1279. ctypes.c_int64,
  1280. ctypes.POINTER(ctypes.c_int64),
  1281. ctypes.POINTER(ctypes.c_int64),
  1282. ctypes.POINTER(ctypes.c_int64),
  1283. ctypes.POINTER(ctypes.c_int64),
  1284. ]
  1285. lib.ggml_unravel_index.restype = None
  1286. # GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
  1287. def ggml_get_i32_1d(
  1288. tensor: ggml_tensor_p,
  1289. i: Union[ctypes.c_int, int],
  1290. ) -> int:
  1291. """Get the integer value of the i-th element in a 1-dimensional tensor.
  1292. Parameters:
  1293. tensor: tensor
  1294. i: index of element
  1295. Returns:
  1296. integer value of element at index i"""
  1297. return lib.ggml_get_i32_1d(tensor, i)
  1298. lib.ggml_get_i32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1299. lib.ggml_get_i32_1d.restype = ctypes.c_int32
  1300. # GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
  1301. def ggml_set_i32_1d(
  1302. tensor: ggml_tensor_p,
  1303. i: Union[ctypes.c_int, int],
  1304. value: Union[ctypes.c_int32, int],
  1305. ):
  1306. """Set the integer value of the i-th element in a 1-dimensional tensor.
  1307. Parameters:
  1308. tensor: tensor
  1309. i: index of element
  1310. value: integer value to set element to"""
  1311. return lib.ggml_set_i32_1d(tensor, i, value)
  1312. lib.ggml_set_i32_1d.argtypes = [
  1313. ctypes.POINTER(ggml_tensor),
  1314. ctypes.c_int,
  1315. ctypes.c_int32,
  1316. ]
  1317. lib.ggml_set_i32_1d.restype = None
  1318. # GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
  1319. def ggml_get_i32_nd(
  1320. tensor: ggml_tensor_p,
  1321. i0: Union[ctypes.c_int, int],
  1322. i1: Union[ctypes.c_int, int],
  1323. i2: Union[ctypes.c_int, int],
  1324. i3: Union[ctypes.c_int, int],
  1325. ) -> int:
  1326. """Get the integer value of the element at the given coordinates in a 4-dimensional tensor.
  1327. Parameters:
  1328. tensor: tensor
  1329. i0: index of element in dimension 0
  1330. i1: index of element in dimension 1
  1331. i2: index of element in dimension 2
  1332. i3: index of element in dimension 3
  1333. Returns:
  1334. integer value of element at coordinates"""
  1335. return lib.ggml_get_i32_nd(tensor, i0, i1, i2, i3)
  1336. lib.ggml_get_i32_nd.argtypes = [
  1337. ctypes.POINTER(ggml_tensor),
  1338. ctypes.c_int,
  1339. ctypes.c_int,
  1340. ctypes.c_int,
  1341. ctypes.c_int,
  1342. ]
  1343. lib.ggml_get_i32_nd.restype = ctypes.c_int32
  1344. # GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
  1345. def ggml_set_i32_nd(
  1346. tensor: ggml_tensor_p,
  1347. i0: Union[ctypes.c_int, int],
  1348. i1: Union[ctypes.c_int, int],
  1349. i2: Union[ctypes.c_int, int],
  1350. i3: Union[ctypes.c_int, int],
  1351. value: Union[ctypes.c_int32, int],
  1352. ):
  1353. """Set the integer value of the element at the given coordinates in a 4-dimensional tensor.
  1354. Parameters:
  1355. tensor: tensor
  1356. i0: index of element in dimension 0
  1357. i1: index of element in dimension 1
  1358. i2: index of element in dimension 2
  1359. i3: index of element in dimension 3
  1360. value: integer value to set element to"""
  1361. return lib.ggml_set_i32_nd(tensor, i0, i1, i2, i3, value)
  1362. lib.ggml_set_i32_nd.argtypes = [
  1363. ctypes.POINTER(ggml_tensor),
  1364. ctypes.c_int,
  1365. ctypes.c_int,
  1366. ctypes.c_int,
  1367. ctypes.c_int,
  1368. ctypes.c_int32,
  1369. ]
  1370. lib.ggml_set_i32_nd.restype = None
  1371. # GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
  1372. def ggml_get_f32_1d(
  1373. tensor: ggml_tensor_p,
  1374. i: Union[ctypes.c_int, int],
  1375. ) -> float:
  1376. """Get the float value of the i-th element in a 1-dimensional tensor.
  1377. Parameters:
  1378. tensor: tensor
  1379. Returns:
  1380. float value of element at index i"""
  1381. return lib.ggml_get_f32_1d(tensor, i)
  1382. lib.ggml_get_f32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1383. lib.ggml_get_f32_1d.restype = ctypes.c_float
  1384. # GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
  1385. def ggml_set_f32_1d(
  1386. tensor: ggml_tensor_p,
  1387. i: Union[ctypes.c_int, int],
  1388. value: Union[ctypes.c_float, float],
  1389. ):
  1390. """Set the float value of the i-th element in a 1-dimensional tensor.
  1391. Parameters:
  1392. tensor: tensor
  1393. i: index of element
  1394. value: float value to set element to"""
  1395. return lib.ggml_set_f32_1d(tensor, i, value)
  1396. lib.ggml_set_f32_1d.argtypes = [
  1397. ctypes.POINTER(ggml_tensor),
  1398. ctypes.c_int,
  1399. ctypes.c_float,
  1400. ]
  1401. lib.ggml_set_f32_1d.restype = None
  1402. # GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
  1403. def ggml_get_f32_nd(
  1404. tensor: ggml_tensor_p,
  1405. i0: Union[ctypes.c_int, int],
  1406. i1: Union[ctypes.c_int, int],
  1407. i2: Union[ctypes.c_int, int],
  1408. i3: Union[ctypes.c_int, int],
  1409. ) -> float:
  1410. """Get the float value of the element at the given coordinates in a 4-dimensional tensor.
  1411. Parameters:
  1412. tensor: tensor
  1413. i0: index of element in dimension 0
  1414. i1: index of element in dimension 1
  1415. i2: index of element in dimension 2
  1416. i3: index of element in dimension 3
  1417. Returns:
  1418. float value of element at coordinates"""
  1419. return lib.ggml_get_f32_nd(tensor, i0, i1, i2, i3)
  1420. lib.ggml_get_f32_nd.argtypes = [
  1421. ctypes.POINTER(ggml_tensor),
  1422. ctypes.c_int,
  1423. ctypes.c_int,
  1424. ctypes.c_int,
  1425. ctypes.c_int,
  1426. ]
  1427. lib.ggml_get_f32_nd.restype = ctypes.c_float
  1428. # GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
  1429. def ggml_set_f32_nd(
  1430. tensor: ggml_tensor_p,
  1431. i0: Union[ctypes.c_int, int],
  1432. i1: Union[ctypes.c_int, int],
  1433. i2: Union[ctypes.c_int, int],
  1434. i3: Union[ctypes.c_int, int],
  1435. value: Union[ctypes.c_float, float],
  1436. ):
  1437. """Set the float value of the element at the given coordinates in a 4-dimensional tensor.
  1438. Parameters:
  1439. tensor: tensor
  1440. i0: index of element in dimension 0
  1441. i1: index of element in dimension 1
  1442. i2: index of element in dimension 2
  1443. i3: index of element in dimension 3
  1444. value: float value to set element to"""
  1445. return lib.ggml_set_f32_nd(tensor, i0, i1, i2, i3, value)
  1446. lib.ggml_set_f32_nd.argtypes = [
  1447. ctypes.POINTER(ggml_tensor),
  1448. ctypes.c_int,
  1449. ctypes.c_int,
  1450. ctypes.c_int,
  1451. ctypes.c_int,
  1452. ctypes.c_float,
  1453. ]
  1454. lib.ggml_set_f32_nd.restype = None
  1455. # GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
  1456. def ggml_get_data(
  1457. tensor: ggml_tensor_p,
  1458. ) -> Optional[ctypes.c_void_p]:
  1459. """Get the data pointer of a tensor.
  1460. Parameters:
  1461. tensor: tensor
  1462. Returns:
  1463. Pointer to data, or None if tensor has no data"""
  1464. return lib.ggml_get_data(tensor)
  1465. lib.ggml_get_data.argtypes = [ctypes.POINTER(ggml_tensor)]
  1466. lib.ggml_get_data.restype = ctypes.c_void_p
  1467. # GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
  1468. def ggml_get_data_f32(
  1469. tensor: ggml_tensor_p,
  1470. ) -> Optional[CFloatArray]:
  1471. """Get the data pointer of a tensor as a float array.
  1472. Parameters:
  1473. tensor: tensor
  1474. Returns:
  1475. (Optional[ctypes.Array[ctypes.c_float]]): array of float to data, or None if tensor has no data
  1476. """
  1477. return lib.ggml_get_data_f32(tensor)
  1478. lib.ggml_get_data_f32.argtypes = [ctypes.POINTER(ggml_tensor)]
  1479. lib.ggml_get_data_f32.restype = ctypes.POINTER(ctypes.c_float)
  1480. # GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
  1481. def ggml_get_unary_op(
  1482. tensor: ggml_tensor_p,
  1483. ) -> int:
  1484. """Get the unary operation of a tensor.
  1485. Parameters:
  1486. tensor: tensor
  1487. Returns:
  1488. unary operation"""
  1489. return lib.ggml_get_unary_op(tensor)
  1490. lib.ggml_get_unary_op.argtypes = [ctypes.POINTER(ggml_tensor)]
  1491. lib.ggml_get_unary_op.restype = ctypes.c_int
  1492. # GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
  1493. def ggml_get_name(
  1494. tensor: ggml_tensor_p,
  1495. ) -> bytes:
  1496. """Get the name of a tensor.
  1497. Parameters:
  1498. tensor: tensor
  1499. Returns:
  1500. name of tensor"""
  1501. return lib.ggml_get_name(tensor)
  1502. lib.ggml_get_name.argtypes = [ctypes.POINTER(ggml_tensor)]
  1503. lib.ggml_get_name.restype = ctypes.c_char_p
  1504. # GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
  1505. def ggml_set_name(
  1506. tensor: ggml_tensor_p,
  1507. name: bytes,
  1508. ) -> ggml_tensor_p:
  1509. """Set the name of a tensor.
  1510. Parameters:
  1511. tensor: tensor
  1512. name: name to set tensor to
  1513. Returns:
  1514. Pointer to ggml_tensor"""
  1515. return lib.ggml_set_name(tensor, name)
  1516. lib.ggml_set_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1517. lib.ggml_set_name.restype = ctypes.POINTER(ggml_tensor)
  1518. # GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
  1519. def ggml_format_name(
  1520. tensor: ggml_tensor_p,
  1521. fmt: bytes,
  1522. *args: Sequence[Union[bool, int, float, str]],
  1523. ) -> ggml_tensor_p:
  1524. """Format the name of a tensor using the given format c string and arguments.
  1525. Parameters:
  1526. tensor: tensor
  1527. fmt: format c string
  1528. args: arguments to format string
  1529. Returns:
  1530. Pointer to ggml_tensor"""
  1531. return lib.ggml_format_name(tensor, fmt, *args)
  1532. lib.ggml_format_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1533. lib.ggml_format_name.restype = ctypes.POINTER(ggml_tensor)
  1534. # //
  1535. # // operations on tensors with backpropagation
  1536. # //
  1537. # GGML_API struct ggml_tensor * ggml_dup(
  1538. # struct ggml_context * ctx,
  1539. # struct ggml_tensor * a);
  1540. def ggml_dup(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1541. return lib.ggml_dup(ctx, a)
  1542. lib.ggml_dup.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1543. lib.ggml_dup.restype = ctypes.POINTER(ggml_tensor)
  1544. # // in-place, returns view(a)
  1545. # GGML_API struct ggml_tensor * ggml_dup_inplace(
  1546. # struct ggml_context * ctx,
  1547. # struct ggml_tensor * a);
  1548. def ggml_dup_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1549. return lib.ggml_dup_inplace(ctx, a)
  1550. lib.ggml_dup_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1551. lib.ggml_dup_inplace.restype = ctypes.POINTER(ggml_tensor)
  1552. # GGML_API struct ggml_tensor * ggml_add(
  1553. # struct ggml_context * ctx,
  1554. # struct ggml_tensor * a,
  1555. # struct ggml_tensor * b);
  1556. def ggml_add(
  1557. ctx: ggml_context_p,
  1558. a: ggml_tensor_p,
  1559. b: ggml_tensor_p,
  1560. ) -> ggml_tensor_p:
  1561. """Add two tensors together and return the result.
  1562. Parameters:
  1563. ctx: ggml context
  1564. a: first tensor
  1565. b: second tensor
  1566. Returns:
  1567. Pointer to ggml_tensor"""
  1568. return lib.ggml_add(ctx, a, b)
  1569. lib.ggml_add.argtypes = [
  1570. ggml_context_p,
  1571. ctypes.POINTER(ggml_tensor),
  1572. ctypes.POINTER(ggml_tensor),
  1573. ]
  1574. lib.ggml_add.restype = ctypes.POINTER(ggml_tensor)
  1575. # GGML_API struct ggml_tensor * ggml_add_inplace(
  1576. # struct ggml_context * ctx,
  1577. # struct ggml_tensor * a,
  1578. # struct ggml_tensor * b);
  1579. def ggml_add_inplace(
  1580. ctx: ggml_context_p,
  1581. a: ggml_tensor_p,
  1582. b: ggml_tensor_p,
  1583. ) -> ggml_tensor_p:
  1584. """Add two tensors together and store the result in the first tensor.
  1585. Parameters:
  1586. ctx: ggml context
  1587. a: first tensor
  1588. b: second tensor
  1589. Returns:
  1590. Pointer to ggml_tensor"""
  1591. return lib.ggml_add_inplace(ctx, a, b)
  1592. lib.ggml_add_inplace.argtypes = [
  1593. ggml_context_p,
  1594. ctypes.POINTER(ggml_tensor),
  1595. ctypes.POINTER(ggml_tensor),
  1596. ]
  1597. lib.ggml_add_inplace.restype = ctypes.POINTER(ggml_tensor)
  1598. # GGML_API struct ggml_tensor * ggml_add_cast(
  1599. # struct ggml_context * ctx,
  1600. # struct ggml_tensor * a,
  1601. # struct ggml_tensor * b,
  1602. # enum ggml_type type);
  1603. def ggml_add_cast(
  1604. ctx: ggml_context_p,
  1605. a: ggml_tensor_p,
  1606. b: ggml_tensor_p,
  1607. type: Union[ctypes.c_int, int],
  1608. ) -> ggml_tensor_p:
  1609. """Add two tensors together and cast the result to the given type.
  1610. Parameters:
  1611. ctx: ggml context
  1612. a: first tensor
  1613. b: second tensor
  1614. type: type to cast result to
  1615. Returns:
  1616. Pointer to ggml_tensor"""
  1617. return lib.ggml_add_cast(ctx, a, b, type)
  1618. lib.ggml_add_cast.argtypes = [
  1619. ggml_context_p,
  1620. ctypes.POINTER(ggml_tensor),
  1621. ctypes.POINTER(ggml_tensor),
  1622. ctypes.c_int,
  1623. ]
  1624. lib.ggml_add_cast.restype = ctypes.POINTER(ggml_tensor)
  1625. # GGML_API struct ggml_tensor * ggml_add1(
  1626. # struct ggml_context * ctx,
  1627. # struct ggml_tensor * a,
  1628. # struct ggml_tensor * b);
  1629. def ggml_add1(
  1630. ctx: ggml_context_p,
  1631. a: ggml_tensor_p,
  1632. b: ggml_tensor_p,
  1633. ) -> ggml_tensor_p:
  1634. return lib.ggml_add1(ctx, a, b)
  1635. lib.ggml_add1.argtypes = [
  1636. ggml_context_p,
  1637. ctypes.POINTER(ggml_tensor),
  1638. ctypes.POINTER(ggml_tensor),
  1639. ]
  1640. lib.ggml_add1.restype = ctypes.POINTER(ggml_tensor)
  1641. # GGML_API struct ggml_tensor * ggml_add1_inplace(
  1642. # struct ggml_context * ctx,
  1643. # struct ggml_tensor * a,
  1644. # struct ggml_tensor * b);
  1645. def ggml_add1_inplace(
  1646. ctx: ggml_context_p,
  1647. a: ggml_tensor_p,
  1648. b: ggml_tensor_p,
  1649. ) -> ggml_tensor_p:
  1650. return lib.ggml_add1_inplace(ctx, a, b)
  1651. lib.ggml_add1_inplace.argtypes = [
  1652. ggml_context_p,
  1653. ctypes.POINTER(ggml_tensor),
  1654. ctypes.POINTER(ggml_tensor),
  1655. ]
  1656. lib.ggml_add1_inplace.restype = ctypes.POINTER(ggml_tensor)
  1657. # // dst = a
  1658. # // view(dst, nb1, nb2, nb3, offset) += b
  1659. # // return dst
  1660. # GGML_API struct ggml_tensor * ggml_acc(
  1661. # struct ggml_context * ctx,
  1662. # struct ggml_tensor * a,
  1663. # struct ggml_tensor * b,
  1664. # size_t nb1,
  1665. # size_t nb2,
  1666. # size_t nb3,
  1667. # size_t offset);
  1668. def ggml_acc(
  1669. ctx: ggml_context_p,
  1670. a: ggml_tensor_p,
  1671. b: ggml_tensor_p,
  1672. nb1: Union[ctypes.c_size_t, int],
  1673. nb2: Union[ctypes.c_size_t, int],
  1674. nb3: Union[ctypes.c_size_t, int],
  1675. offset: Union[ctypes.c_size_t, int],
  1676. ) -> ggml_tensor_p:
  1677. return lib.ggml_acc(ctx, a, b, nb1, nb2, nb3, offset)
  1678. lib.ggml_acc.argtypes = [
  1679. ggml_context_p,
  1680. ctypes.POINTER(ggml_tensor),
  1681. ctypes.POINTER(ggml_tensor),
  1682. ctypes.c_size_t,
  1683. ctypes.c_size_t,
  1684. ctypes.c_size_t,
  1685. ctypes.c_size_t,
  1686. ]
  1687. lib.ggml_acc.restype = ctypes.POINTER(ggml_tensor)
  1688. # GGML_API struct ggml_tensor * ggml_acc_inplace(
  1689. # struct ggml_context * ctx,
  1690. # struct ggml_tensor * a,
  1691. # struct ggml_tensor * b,
  1692. # size_t nb1,
  1693. # size_t nb2,
  1694. # size_t nb3,
  1695. # size_t offset);
  1696. def ggml_acc_inplace(
  1697. ctx: ggml_context_p,
  1698. a: ggml_tensor_p,
  1699. b: ggml_tensor_p,
  1700. nb1: Union[ctypes.c_size_t, int],
  1701. nb2: Union[ctypes.c_size_t, int],
  1702. nb3: Union[ctypes.c_size_t, int],
  1703. offset: Union[ctypes.c_size_t, int],
  1704. ) -> ggml_tensor_p:
  1705. return lib.ggml_acc_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  1706. lib.ggml_acc_inplace.argtypes = [
  1707. ggml_context_p,
  1708. ctypes.POINTER(ggml_tensor),
  1709. ctypes.POINTER(ggml_tensor),
  1710. ctypes.c_size_t,
  1711. ctypes.c_size_t,
  1712. ctypes.c_size_t,
  1713. ctypes.c_size_t,
  1714. ]
  1715. lib.ggml_acc_inplace.restype = ctypes.POINTER(ggml_tensor)
  1716. # GGML_API struct ggml_tensor * ggml_sub(
  1717. # struct ggml_context * ctx,
  1718. # struct ggml_tensor * a,
  1719. # struct ggml_tensor * b);
  1720. def ggml_sub(
  1721. ctx: ggml_context_p,
  1722. a: ggml_tensor_p,
  1723. b: ggml_tensor_p,
  1724. ) -> ggml_tensor_p:
  1725. """Subtract two tensors and return the result.
  1726. Parameters:
  1727. ctx: ggml context
  1728. a: first tensor
  1729. b: second tensor
  1730. Returns:
  1731. Pointer to ggml_tensor"""
  1732. return lib.ggml_sub(ctx, a, b)
  1733. lib.ggml_sub.argtypes = [
  1734. ggml_context_p,
  1735. ctypes.POINTER(ggml_tensor),
  1736. ctypes.POINTER(ggml_tensor),
  1737. ]
  1738. lib.ggml_sub.restype = ctypes.POINTER(ggml_tensor)
  1739. # GGML_API struct ggml_tensor * ggml_sub_inplace(
  1740. # struct ggml_context * ctx,
  1741. # struct ggml_tensor * a,
  1742. # struct ggml_tensor * b);
  1743. def ggml_sub_inplace(
  1744. ctx: ggml_context_p,
  1745. a: ggml_tensor_p,
  1746. b: ggml_tensor_p,
  1747. ) -> ggml_tensor_p:
  1748. """Subtract two tensors and store the result in the first tensor.
  1749. Parameters:
  1750. ctx: ggml context
  1751. a: first tensor
  1752. b: second tensor
  1753. Returns:
  1754. Pointer to ggml_tensor"""
  1755. return lib.ggml_sub_inplace(ctx, a, b)
  1756. lib.ggml_sub_inplace.argtypes = [
  1757. ggml_context_p,
  1758. ctypes.POINTER(ggml_tensor),
  1759. ctypes.POINTER(ggml_tensor),
  1760. ]
  1761. lib.ggml_sub_inplace.restype = ctypes.POINTER(ggml_tensor)
  1762. # GGML_API struct ggml_tensor * ggml_mul(
  1763. # struct ggml_context * ctx,
  1764. # struct ggml_tensor * a,
  1765. # struct ggml_tensor * b);
  1766. def ggml_mul(
  1767. ctx: ggml_context_p,
  1768. a: ggml_tensor_p,
  1769. b: ggml_tensor_p,
  1770. ) -> ggml_tensor_p:
  1771. """Element-wise multiply two tensors and return the result.
  1772. Parameters:
  1773. ctx: ggml context
  1774. a: first tensor
  1775. b: second tensor
  1776. Returns:
  1777. Pointer to ggml_tensor"""
  1778. return lib.ggml_mul(ctx, a, b)
  1779. lib.ggml_mul.argtypes = [
  1780. ggml_context_p,
  1781. ctypes.POINTER(ggml_tensor),
  1782. ctypes.POINTER(ggml_tensor),
  1783. ]
  1784. lib.ggml_mul.restype = ctypes.POINTER(ggml_tensor)
  1785. # GGML_API struct ggml_tensor * ggml_mul_inplace(
  1786. # struct ggml_context * ctx,
  1787. # struct ggml_tensor * a,
  1788. # struct ggml_tensor * b);
  1789. def ggml_mul_inplace(
  1790. ctx: ggml_context_p,
  1791. a: ggml_tensor_p,
  1792. b: ggml_tensor_p,
  1793. ) -> ggml_tensor_p:
  1794. """Element-wise multiply two tensors and store the result in the first tensor.
  1795. Parameters:
  1796. ctx: ggml context
  1797. a: first tensor
  1798. b: second tensor
  1799. Returns:
  1800. Pointer to ggml_tensor"""
  1801. return lib.ggml_mul_inplace(ctx, a, b)
  1802. lib.ggml_mul_inplace.argtypes = [
  1803. ggml_context_p,
  1804. ctypes.POINTER(ggml_tensor),
  1805. ctypes.POINTER(ggml_tensor),
  1806. ]
  1807. lib.ggml_mul_inplace.restype = ctypes.POINTER(ggml_tensor)
  1808. # GGML_API struct ggml_tensor * ggml_div(
  1809. # struct ggml_context * ctx,
  1810. # struct ggml_tensor * a,
  1811. # struct ggml_tensor * b);
  1812. def ggml_div(
  1813. ctx: ggml_context_p,
  1814. a: ggml_tensor_p,
  1815. b: ggml_tensor_p,
  1816. ) -> ggml_tensor_p:
  1817. """Element-wise divide two tensors and return the result.
  1818. Parameters:
  1819. ctx: ggml context
  1820. a: first tensor
  1821. b: second tensor
  1822. Returns:
  1823. Pointer to ggml_tensor"""
  1824. return lib.ggml_div(ctx, a, b)
  1825. lib.ggml_div.argtypes = [
  1826. ggml_context_p,
  1827. ctypes.POINTER(ggml_tensor),
  1828. ctypes.POINTER(ggml_tensor),
  1829. ]
  1830. lib.ggml_div.restype = ctypes.POINTER(ggml_tensor)
  1831. # GGML_API struct ggml_tensor * ggml_div_inplace(
  1832. # struct ggml_context * ctx,
  1833. # struct ggml_tensor * a,
  1834. # struct ggml_tensor * b);
  1835. def ggml_div_inplace(
  1836. ctx: ggml_context_p,
  1837. a: ggml_tensor_p,
  1838. b: ggml_tensor_p,
  1839. ) -> ggml_tensor_p:
  1840. """Element-wise divide two tensors and store the result in the first tensor.
  1841. Parameters:
  1842. ctx: ggml context
  1843. a: first tensor
  1844. b: second tensor
  1845. Returns:
  1846. Pointer to ggml_tensor"""
  1847. return lib.ggml_div_inplace(ctx, a, b)
  1848. lib.ggml_div_inplace.argtypes = [
  1849. ggml_context_p,
  1850. ctypes.POINTER(ggml_tensor),
  1851. ctypes.POINTER(ggml_tensor),
  1852. ]
  1853. lib.ggml_div_inplace.restype = ctypes.POINTER(ggml_tensor)
  1854. # GGML_API struct ggml_tensor * ggml_sqr(
  1855. # struct ggml_context * ctx,
  1856. # struct ggml_tensor * a);
  1857. def ggml_sqr(
  1858. ctx: ggml_context_p,
  1859. a: ggml_tensor_p,
  1860. ) -> ggml_tensor_p:
  1861. """Square all elements in a tensor and return the result.
  1862. Parameters:
  1863. ctx: ggml context
  1864. a: tensor
  1865. Returns:
  1866. Pointer to ggml_tensor"""
  1867. return lib.ggml_sqr(ctx, a)
  1868. lib.ggml_sqr.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1869. lib.ggml_sqr.restype = ctypes.POINTER(ggml_tensor)
  1870. # GGML_API struct ggml_tensor * ggml_sqr_inplace(
  1871. # struct ggml_context * ctx,
  1872. # struct ggml_tensor * a);
  1873. def ggml_sqr_inplace(
  1874. ctx: ggml_context_p,
  1875. a: ggml_tensor_p,
  1876. ) -> ggml_tensor_p:
  1877. """Square all elements in a tensor and store the result in the first tensor.
  1878. Parameters:
  1879. ctx: ggml context
  1880. a: tensor
  1881. Returns:
  1882. Pointer to ggml_tensor"""
  1883. return lib.ggml_sqr_inplace(ctx, a)
  1884. lib.ggml_sqr_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1885. lib.ggml_sqr_inplace.restype = ctypes.POINTER(ggml_tensor)
  1886. # GGML_API struct ggml_tensor * ggml_sqrt(
  1887. # struct ggml_context * ctx,
  1888. # struct ggml_tensor * a);
  1889. def ggml_sqrt(
  1890. ctx: ggml_context_p,
  1891. a: ggml_tensor_p,
  1892. ) -> ggml_tensor_p:
  1893. """Square root all elements in a tensor and return the result.
  1894. Parameters:
  1895. ctx: ggml context
  1896. a: tensor
  1897. Returns:
  1898. Pointer to ggml_tensor"""
  1899. return lib.ggml_sqrt(ctx, a)
  1900. lib.ggml_sqrt.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1901. lib.ggml_sqrt.restype = ctypes.POINTER(ggml_tensor)
  1902. # GGML_API struct ggml_tensor * ggml_sqrt_inplace(
  1903. # struct ggml_context * ctx,
  1904. # struct ggml_tensor * a);
  1905. def ggml_sqrt_inplace(
  1906. ctx: ggml_context_p,
  1907. a: ggml_tensor_p,
  1908. ) -> ggml_tensor_p:
  1909. """Square root all elements in a tensor and store the result in the first tensor.
  1910. Parameters:
  1911. ctx: ggml context
  1912. Returns:
  1913. Pointer to ggml_tensor"""
  1914. return lib.ggml_sqrt_inplace(ctx, a)
  1915. lib.ggml_sqrt_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1916. lib.ggml_sqrt_inplace.restype = ctypes.POINTER(ggml_tensor)
  1917. # GGML_API struct ggml_tensor * ggml_log(
  1918. # struct ggml_context * ctx,
  1919. # struct ggml_tensor * a);
  1920. def ggml_log(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1921. """Take the natural logarithm of all elements in a tensor and return the result.
  1922. Parameters:
  1923. ctx: ggml context
  1924. a: tensor
  1925. Returns:
  1926. Pointer to ggml_tensor"""
  1927. return lib.ggml_log(ctx, a)
  1928. lib.ggml_log.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1929. lib.ggml_log.restype = ctypes.POINTER(ggml_tensor)
  1930. # GGML_API struct ggml_tensor * ggml_log_inplace(
  1931. # struct ggml_context * ctx,
  1932. # struct ggml_tensor * a);
  1933. def ggml_log_inplace(
  1934. ctx: ggml_context_p,
  1935. a: ggml_tensor_p,
  1936. ) -> ggml_tensor_p:
  1937. """Take the natural logarithm of all elements in a tensor and store the result in the first tensor.
  1938. Parameters:
  1939. ctx: ggml context
  1940. a: tensor
  1941. Returns:
  1942. Pointer to ggml_tensor"""
  1943. return lib.ggml_log_inplace(ctx, a)
  1944. lib.ggml_log_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1945. lib.ggml_log_inplace.restype = ctypes.POINTER(ggml_tensor)
  1946. # // return scalar
  1947. # GGML_API struct ggml_tensor * ggml_sum(
  1948. # struct ggml_context * ctx,
  1949. # struct ggml_tensor * a);
  1950. def ggml_sum(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1951. """Sum all elements in a tensor and return the result.
  1952. Parameters:
  1953. ctx: ggml context
  1954. a: tensor
  1955. Returns:
  1956. Pointer to ggml_tensor"""
  1957. return lib.ggml_sum(ctx, a)
  1958. lib.ggml_sum.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1959. lib.ggml_sum.restype = ctypes.POINTER(ggml_tensor)
  1960. # // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1961. # GGML_API struct ggml_tensor * ggml_sum_rows(
  1962. # struct ggml_context * ctx,
  1963. # struct ggml_tensor * a);
  1964. def ggml_sum_rows(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1965. """Sum all elements in a tensor along the first axis and return the result.
  1966. sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1967. Parameters:
  1968. ctx: ggml context
  1969. a: tensor
  1970. Returns:
  1971. Pointer to ggml_tensor"""
  1972. return lib.ggml_sum_rows(ctx, a)
  1973. lib.ggml_sum_rows.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1974. lib.ggml_sum_rows.restype = ctypes.POINTER(ggml_tensor)
  1975. # // mean along rows
  1976. # GGML_API struct ggml_tensor * ggml_mean(
  1977. # struct ggml_context * ctx,
  1978. # struct ggml_tensor * a);
  1979. def ggml_mean(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1980. """Take the mean of all elements in a tensor and return the result.
  1981. Parameters:
  1982. ctx: ggml context
  1983. a: tensor
  1984. Returns:
  1985. Pointer to ggml_tensor"""
  1986. return lib.ggml_mean(ctx, a)
  1987. lib.ggml_mean.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1988. lib.ggml_mean.restype = ctypes.POINTER(ggml_tensor)
  1989. # // argmax along rows
  1990. # GGML_API struct ggml_tensor * ggml_argmax(
  1991. # struct ggml_context * ctx,
  1992. # struct ggml_tensor * a);
  1993. def ggml_argmax(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1994. """Take the argmax of all elements in a tensor and return the result.
  1995. argmax along rows
  1996. Parameters:
  1997. ctx: ggml context
  1998. a: tensor
  1999. Returns:
  2000. Pointer to ggml_tensor"""
  2001. return lib.ggml_argmax(ctx, a)
  2002. lib.ggml_argmax.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2003. lib.ggml_argmax.restype = ctypes.POINTER(ggml_tensor)
  2004. # // if a is the same shape as b, and a is not parameter, return a
  2005. # // otherwise, return a new tensor: repeat(a) to fit in b
  2006. # GGML_API struct ggml_tensor * ggml_repeat(
  2007. # struct ggml_context * ctx,
  2008. # struct ggml_tensor * a,
  2009. # struct ggml_tensor * b);
  2010. def ggml_repeat(
  2011. ctx: ggml_context_p,
  2012. a: ggml_tensor_p,
  2013. b: ggml_tensor_p,
  2014. ) -> ggml_tensor_p:
  2015. """Repeat a tensor to fit the shape of another tensor.
  2016. If a is the same shape as b, and a is not parameter, return a
  2017. Parameters:
  2018. ctx: ggml context
  2019. a: tensor to repeat
  2020. b: tensor to fit
  2021. Returns:
  2022. Pointer to ggml_tensor"""
  2023. return lib.ggml_repeat(ctx, a, b)
  2024. lib.ggml_repeat.argtypes = [
  2025. ggml_context_p,
  2026. ctypes.POINTER(ggml_tensor),
  2027. ctypes.POINTER(ggml_tensor),
  2028. ]
  2029. lib.ggml_repeat.restype = ctypes.POINTER(ggml_tensor)
  2030. # // sums repetitions in a into shape of b
  2031. # GGML_API struct ggml_tensor * ggml_repeat_back(
  2032. # struct ggml_context * ctx,
  2033. # struct ggml_tensor * a,
  2034. # struct ggml_tensor * b);
  2035. def ggml_repeat_back(
  2036. ctx: ggml_context_p,
  2037. a: ggml_tensor_p,
  2038. b: ggml_tensor_p,
  2039. ) -> ggml_tensor_p:
  2040. return lib.ggml_repeat_back(ctx, a, b)
  2041. lib.ggml_repeat_back.argtypes = [
  2042. ggml_context_p,
  2043. ctypes.POINTER(ggml_tensor),
  2044. ctypes.POINTER(ggml_tensor),
  2045. ]
  2046. lib.ggml_repeat_back.restype = ctypes.POINTER(ggml_tensor)
  2047. # // concat a and b on dim 2
  2048. # // used in stable-diffusion
  2049. # GGML_API struct ggml_tensor * ggml_concat(
  2050. # struct ggml_context * ctx,
  2051. # struct ggml_tensor * a,
  2052. # struct ggml_tensor * b);
  2053. def ggml_concat(
  2054. ctx: ggml_context_p,
  2055. a: ggml_tensor_p,
  2056. b: ggml_tensor_p,
  2057. ) -> ggml_tensor_p:
  2058. """Concatenate two tensors along the second axis and return the result.
  2059. Parameters:
  2060. ctx: ggml context
  2061. a: first tensor
  2062. b: second tensor
  2063. Returns:
  2064. Pointer to ggml_tensor"""
  2065. return lib.ggml_concat(ctx, a, b)
  2066. lib.ggml_concat.argtypes = [
  2067. ggml_context_p,
  2068. ctypes.POINTER(ggml_tensor),
  2069. ctypes.POINTER(ggml_tensor),
  2070. ]
  2071. lib.ggml_concat.restype = ctypes.POINTER(ggml_tensor)
  2072. # GGML_API struct ggml_tensor * ggml_abs(
  2073. # struct ggml_context * ctx,
  2074. # struct ggml_tensor * a);
  2075. def ggml_abs(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2076. """Take the absolute value of all elements in a tensor and return the result.
  2077. Parameters:
  2078. ctx: ggml context
  2079. a: tensor
  2080. Returns:
  2081. Pointer to ggml_tensor"""
  2082. return lib.ggml_abs(ctx, a)
  2083. lib.ggml_abs.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2084. lib.ggml_abs.restype = ctypes.POINTER(ggml_tensor)
  2085. # GGML_API struct ggml_tensor * ggml_abs_inplace(
  2086. # struct ggml_context * ctx,
  2087. # struct ggml_tensor * a);
  2088. def ggml_abs_inplace(
  2089. ctx: ggml_context_p,
  2090. a: ggml_tensor_p,
  2091. ) -> ggml_tensor_p:
  2092. """Take the absolute value of all elements in a tensor and store the result in the first tensor.
  2093. Parameters:
  2094. ctx: ggml context
  2095. a: tensor
  2096. Returns:
  2097. Pointer to ggml_tensor"""
  2098. return lib.ggml_abs_inplace(ctx, a)
  2099. lib.ggml_abs_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2100. lib.ggml_abs_inplace.restype = ctypes.POINTER(ggml_tensor)
  2101. # GGML_API struct ggml_tensor * ggml_sgn(
  2102. # struct ggml_context * ctx,
  2103. # struct ggml_tensor * a);
  2104. def ggml_sgn(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2105. """Get the sign of all elements in a tensor and return the result.
  2106. Parameters:
  2107. ctx: ggml context
  2108. a: tensor
  2109. Returns:
  2110. Pointer to ggml_tensor"""
  2111. return lib.ggml_sgn(ctx, a)
  2112. lib.ggml_sgn.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2113. lib.ggml_sgn.restype = ctypes.POINTER(ggml_tensor)
  2114. # GGML_API struct ggml_tensor * ggml_sgn_inplace(
  2115. # struct ggml_context * ctx,
  2116. # struct ggml_tensor * a);
  2117. def ggml_sgn_inplace(
  2118. ctx: ggml_context_p,
  2119. a: ggml_tensor_p,
  2120. ) -> ggml_tensor_p:
  2121. """Get the sign of all elements in a tensor and store the result in the first tensor.
  2122. Parameters:
  2123. ctx: ggml context
  2124. a: tensor
  2125. Returns:
  2126. Pointer to ggml_tensor"""
  2127. return lib.ggml_sgn_inplace(ctx, a)
  2128. lib.ggml_sgn_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2129. lib.ggml_sgn_inplace.restype = ctypes.POINTER(ggml_tensor)
  2130. # GGML_API struct ggml_tensor * ggml_neg(
  2131. # struct ggml_context * ctx,
  2132. # struct ggml_tensor * a);
  2133. def ggml_neg(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2134. """Negate all elements in a tensor and return the result.
  2135. Parameters:
  2136. ctx: ggml context
  2137. a: tensor
  2138. Returns:
  2139. Pointer to ggml_tensor"""
  2140. return lib.ggml_neg(ctx, a)
  2141. lib.ggml_neg.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2142. lib.ggml_neg.restype = ctypes.POINTER(ggml_tensor)
  2143. # GGML_API struct ggml_tensor * ggml_neg_inplace(
  2144. # struct ggml_context * ctx,
  2145. # struct ggml_tensor * a);
  2146. def ggml_neg_inplace(
  2147. ctx: ggml_context_p,
  2148. a: ggml_tensor_p,
  2149. ) -> ggml_tensor_p:
  2150. """Negate all elements in a tensor and store the result in the first tensor.
  2151. Parameters:
  2152. ctx: ggml context
  2153. a: tensor
  2154. Returns:
  2155. Pointer to ggml_tensor"""
  2156. return lib.ggml_neg_inplace(ctx, a)
  2157. lib.ggml_neg_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2158. lib.ggml_neg_inplace.restype = ctypes.POINTER(ggml_tensor)
  2159. # GGML_API struct ggml_tensor * ggml_step(
  2160. # struct ggml_context * ctx,
  2161. # struct ggml_tensor * a);
  2162. def ggml_step(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2163. return lib.ggml_step(ctx, a)
  2164. lib.ggml_step.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2165. lib.ggml_step.restype = ctypes.POINTER(ggml_tensor)
  2166. # GGML_API struct ggml_tensor * ggml_tanh(
  2167. # struct ggml_context * ctx,
  2168. # struct ggml_tensor * a);
  2169. def ggml_tanh(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2170. """Apply the tanh activation function to all elements in a tensor and return the result.
  2171. Parameters:
  2172. ctx: ggml context
  2173. a: tensor
  2174. Returns:
  2175. Pointer to ggml_tensor"""
  2176. return lib.ggml_tanh(ctx, a)
  2177. lib.ggml_tanh.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2178. lib.ggml_tanh.restype = ctypes.POINTER(ggml_tensor)
  2179. # GGML_API struct ggml_tensor * ggml_tanh_inplace(
  2180. # struct ggml_context * ctx,
  2181. # struct ggml_tensor * a);
  2182. def ggml_tanh_inplace(
  2183. ctx: ggml_context_p,
  2184. a: ggml_tensor_p,
  2185. ) -> ggml_tensor_p:
  2186. """Apply the tanh activation function to all elements in a tensor and store the result in the first tensor.
  2187. Parameters:
  2188. ctx: ggml context
  2189. a: tensor
  2190. Returns:
  2191. Pointer to ggml_tensor"""
  2192. return lib.ggml_tanh_inplace(ctx, a)
  2193. lib.ggml_tanh_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2194. lib.ggml_tanh_inplace.restype = ctypes.POINTER(ggml_tensor)
  2195. # GGML_API struct ggml_tensor * ggml_elu(
  2196. # struct ggml_context * ctx,
  2197. # struct ggml_tensor * a);
  2198. def ggml_elu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2199. """Apply the ELU activation function to all elements in a tensor and return the result.
  2200. Parameters:
  2201. ctx: ggml context
  2202. a: tensor
  2203. Returns:
  2204. Pointer to ggml_tensor"""
  2205. return lib.ggml_elu(ctx, a)
  2206. lib.ggml_elu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2207. lib.ggml_elu.restype = ctypes.POINTER(ggml_tensor)
  2208. # GGML_API struct ggml_tensor * ggml_elu_inplace(
  2209. # struct ggml_context * ctx,
  2210. # struct ggml_tensor * a);
  2211. def ggml_elu_inplace(
  2212. ctx: ggml_context_p,
  2213. a: ggml_tensor_p,
  2214. ) -> ggml_tensor_p:
  2215. """Apply the ELU activation function to all elements in a tensor and store the result in the first tensor.
  2216. Parameters:
  2217. ctx: ggml context
  2218. a: tensor
  2219. Returns:
  2220. Pointer to ggml_tensor"""
  2221. return lib.ggml_elu_inplace(ctx, a)
  2222. lib.ggml_elu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2223. lib.ggml_elu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2224. # GGML_API struct ggml_tensor * ggml_relu(
  2225. # struct ggml_context * ctx,
  2226. # struct ggml_tensor * a);
  2227. def ggml_relu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2228. """Apply the ReLU activation function to all elements in a tensor and return the result.
  2229. Parameters:
  2230. ctx: ggml context
  2231. a: tensor
  2232. Returns:
  2233. Pointer to ggml_tensor"""
  2234. return lib.ggml_relu(ctx, a)
  2235. lib.ggml_relu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2236. lib.ggml_relu.restype = ctypes.POINTER(ggml_tensor)
  2237. # GGML_API struct ggml_tensor * ggml_leaky_relu(
  2238. # struct ggml_context * ctx,
  2239. # struct ggml_tensor * a, float negative_slope, bool inplace);
  2240. def ggml_leaky_relu(
  2241. ctx: ggml_context_p, a: ggml_tensor_p, negative_slope: float, inplace: bool
  2242. ) -> ggml_tensor_p:
  2243. """Apply the Leaky ReLU activation function to all elements in a tensor and return the result.
  2244. Parameters:
  2245. ctx: ggml context
  2246. a: tensor
  2247. negative_slope: negative slope
  2248. inplace: whether to store the result in the first tensor
  2249. Returns:
  2250. Pointer to ggml_tensor"""
  2251. return lib.ggml_leaky_relu(ctx, a, negative_slope, inplace)
  2252. lib.ggml_leaky_relu.argtypes = [
  2253. ggml_context_p,
  2254. ctypes.POINTER(ggml_tensor),
  2255. ctypes.c_float,
  2256. ctypes.c_bool,
  2257. ]
  2258. lib.ggml_leaky_relu.restype = ctypes.POINTER(ggml_tensor)
  2259. # GGML_API struct ggml_tensor * ggml_relu_inplace(
  2260. # struct ggml_context * ctx,
  2261. # struct ggml_tensor * a);
  2262. def ggml_relu_inplace(
  2263. ctx: ggml_context_p,
  2264. a: ggml_tensor_p,
  2265. ) -> ggml_tensor_p:
  2266. """Apply the ReLU activation function to all elements in a tensor and store the result in the first tensor.
  2267. Parameters:
  2268. ctx: ggml context
  2269. a: tensor
  2270. Returns:
  2271. Pointer to ggml_tensor"""
  2272. return lib.ggml_relu_inplace(ctx, a)
  2273. lib.ggml_relu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2274. lib.ggml_relu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2275. # GGML_API struct ggml_tensor * ggml_gelu(
  2276. # struct ggml_context * ctx,
  2277. # struct ggml_tensor * a);
  2278. def ggml_gelu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2279. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2280. Parameters:
  2281. ctx: ggml context
  2282. a: tensor
  2283. Returns:
  2284. Pointer to ggml_tensor"""
  2285. return lib.ggml_gelu(ctx, a)
  2286. lib.ggml_gelu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2287. lib.ggml_gelu.restype = ctypes.POINTER(ggml_tensor)
  2288. # GGML_API struct ggml_tensor * ggml_gelu_inplace(
  2289. # struct ggml_context * ctx,
  2290. # struct ggml_tensor * a);
  2291. def ggml_gelu_inplace(
  2292. ctx: ggml_context_p,
  2293. a: ggml_tensor_p,
  2294. ) -> ggml_tensor_p:
  2295. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2296. Parameters:
  2297. ctx: ggml context
  2298. a: tensor
  2299. Returns:
  2300. Pointer to ggml_tensor"""
  2301. return lib.ggml_gelu_inplace(ctx, a)
  2302. lib.ggml_gelu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2303. lib.ggml_gelu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2304. # GGML_API struct ggml_tensor * ggml_gelu_quick(
  2305. # struct ggml_context * ctx,
  2306. # struct ggml_tensor * a);
  2307. def ggml_gelu_quick(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2308. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2309. Parameters:
  2310. ctx: ggml context
  2311. a: tensor
  2312. Returns:
  2313. Pointer to ggml_tensor"""
  2314. return lib.ggml_gelu_quick(ctx, a)
  2315. lib.ggml_gelu_quick.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2316. lib.ggml_gelu_quick.restype = ctypes.POINTER(ggml_tensor)
  2317. # GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
  2318. # struct ggml_context * ctx,
  2319. # struct ggml_tensor * a);
  2320. def ggml_gelu_quick_inplace(
  2321. ctx: ggml_context_p,
  2322. a: ggml_tensor_p,
  2323. ) -> ggml_tensor_p:
  2324. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2325. Parameters:
  2326. ctx: ggml context
  2327. a: tensor
  2328. Returns:
  2329. Pointer to ggml_tensor"""
  2330. return lib.ggml_gelu_quick_inplace(ctx, a)
  2331. lib.ggml_gelu_quick_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2332. lib.ggml_gelu_quick_inplace.restype = ctypes.POINTER(ggml_tensor)
  2333. # GGML_API struct ggml_tensor * ggml_silu(
  2334. # struct ggml_context * ctx,
  2335. # struct ggml_tensor * a);
  2336. def ggml_silu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2337. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and return the result.
  2338. Parameters:
  2339. ctx: ggml context
  2340. a: tensor
  2341. Returns:
  2342. Pointer to ggml_tensor"""
  2343. return lib.ggml_silu(ctx, a)
  2344. lib.ggml_silu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2345. lib.ggml_silu.restype = ctypes.POINTER(ggml_tensor)
  2346. # GGML_API struct ggml_tensor * ggml_silu_inplace(
  2347. # struct ggml_context * ctx,
  2348. # struct ggml_tensor * a);
  2349. def ggml_silu_inplace(
  2350. ctx: ggml_context_p,
  2351. a: ggml_tensor_p,
  2352. ) -> ggml_tensor_p:
  2353. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2354. Parameters:
  2355. ctx: ggml context
  2356. a: tensor
  2357. Returns:
  2358. Pointer to ggml_tensor"""
  2359. return lib.ggml_silu_inplace(ctx, a)
  2360. lib.ggml_silu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2361. lib.ggml_silu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2362. # // a - x
  2363. # // b - dy
  2364. # GGML_API struct ggml_tensor * ggml_silu_back(
  2365. # struct ggml_context * ctx,
  2366. # struct ggml_tensor * a,
  2367. # struct ggml_tensor * b);
  2368. def ggml_silu_back(
  2369. ctx: ggml_context_p,
  2370. a: ggml_tensor_p,
  2371. b: ggml_tensor_p,
  2372. ) -> ggml_tensor_p:
  2373. return lib.ggml_silu_back(ctx, a, b)
  2374. lib.ggml_silu_back.argtypes = [
  2375. ggml_context_p,
  2376. ctypes.POINTER(ggml_tensor),
  2377. ctypes.POINTER(ggml_tensor),
  2378. ]
  2379. lib.ggml_silu_back.restype = ctypes.POINTER(ggml_tensor)
  2380. # // normalize along rows
  2381. # GGML_API struct ggml_tensor * ggml_norm(
  2382. # struct ggml_context * ctx,
  2383. # struct ggml_tensor * a
  2384. # float eps);
  2385. def ggml_norm(
  2386. ctx: ggml_context_p,
  2387. a: ggml_tensor_p,
  2388. eps: Union[ctypes.c_float, float],
  2389. ) -> ggml_tensor_p:
  2390. """Normalize all elements in a tensor along the first axis and return the result.
  2391. normalize along rows.
  2392. Parameters:
  2393. ctx: ggml context
  2394. a: tensor
  2395. eps: minimum value to avoid division by zero
  2396. Returns:
  2397. Pointer to ggml_tensor"""
  2398. return lib.ggml_norm(ctx, a, eps)
  2399. lib.ggml_norm.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor), ctypes.c_float]
  2400. lib.ggml_norm.restype = ctypes.POINTER(ggml_tensor)
  2401. # GGML_API struct ggml_tensor * ggml_norm_inplace(
  2402. # struct ggml_context * ctx,
  2403. # struct ggml_tensor * a
  2404. # float eps);
  2405. def ggml_norm_inplace(
  2406. ctx: ggml_context_p,
  2407. a: ggml_tensor_p,
  2408. eps: Union[ctypes.c_float, float],
  2409. ) -> ggml_tensor_p:
  2410. """Normalize all elements in a tensor along the first axis and store the result in the first tensor.
  2411. normalize along rows.
  2412. Parameters:
  2413. ctx: ggml context
  2414. a: tensor
  2415. eps: minimum value to avoid division by zero
  2416. Returns:
  2417. Pointer to ggml_tensor"""
  2418. return lib.ggml_norm_inplace(ctx, a, eps)
  2419. lib.ggml_norm_inplace.argtypes = [
  2420. ggml_context_p,
  2421. ctypes.POINTER(ggml_tensor),
  2422. ctypes.c_float,
  2423. ]
  2424. lib.ggml_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2425. # GGML_API struct ggml_tensor * ggml_rms_norm(
  2426. # struct ggml_context * ctx,
  2427. # struct ggml_tensor * a,
  2428. # float eps);
  2429. def ggml_rms_norm(
  2430. ctx: ggml_context_p,
  2431. a: ggml_tensor_p,
  2432. eps: Union[ctypes.c_float, float],
  2433. ) -> ggml_tensor_p:
  2434. """Compute the RMS norm of a tensor and return the result.
  2435. Parameters:
  2436. ctx: ggml context
  2437. a: tensor
  2438. eps: float
  2439. Returns:
  2440. Pointer to ggml_tensor"""
  2441. return lib.ggml_rms_norm(ctx, a, eps)
  2442. lib.ggml_rms_norm.argtypes = [
  2443. ggml_context_p,
  2444. ctypes.POINTER(ggml_tensor),
  2445. ctypes.c_float,
  2446. ]
  2447. lib.ggml_rms_norm.restype = ctypes.POINTER(ggml_tensor)
  2448. # GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
  2449. # struct ggml_context * ctx,
  2450. # struct ggml_tensor * a,
  2451. # float eps);
  2452. def ggml_rms_norm_inplace(
  2453. ctx: ggml_context_p,
  2454. a: ggml_tensor_p,
  2455. eps: Union[ctypes.c_float, float],
  2456. ) -> ggml_tensor_p:
  2457. return lib.ggml_rms_norm_inplace(ctx, a, eps)
  2458. lib.ggml_rms_norm_inplace.argtypes = [
  2459. ggml_context_p,
  2460. ctypes.POINTER(ggml_tensor),
  2461. ctypes.c_float,
  2462. ]
  2463. lib.ggml_rms_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2464. # // group normalize along ne0*ne1*n_groups
  2465. # // used in stable-diffusion
  2466. # // TODO: eps is hardcoded to 1e-6 for now
  2467. # GGML_API struct ggml_tensor * ggml_group_norm(
  2468. # struct ggml_context * ctx,
  2469. # struct ggml_tensor * a,
  2470. # int n_groups);
  2471. def ggml_group_norm(
  2472. ctx: ggml_context_p,
  2473. a: ggml_tensor_p,
  2474. n_groups: int,
  2475. ) -> ggml_tensor_p:
  2476. """Group normalize a tensor and return the result.
  2477. Parameters:
  2478. ctx: ggml context
  2479. a: tensor
  2480. n_groups: int
  2481. Returns:
  2482. Pointer to ggml_tensor"""
  2483. return lib.ggml_group_norm(ctx, a, n_groups)
  2484. lib.ggml_group_norm.argtypes = [
  2485. ggml_context_p,
  2486. ctypes.POINTER(ggml_tensor),
  2487. ctypes.c_int,
  2488. ]
  2489. lib.ggml_group_norm.restype = ctypes.POINTER(ggml_tensor)
  2490. # GGML_API struct ggml_tensor * ggml_group_norm_inplace(
  2491. # struct ggml_context * ctx,
  2492. # struct ggml_tensor * a,
  2493. # int n_groups);
  2494. def ggml_group_norm_inplace(
  2495. ctx: ggml_context_p,
  2496. a: ggml_tensor_p,
  2497. n_groups: int,
  2498. ) -> ggml_tensor_p:
  2499. """Group normalize a tensor and store the result in the first tensor.
  2500. Parameters:
  2501. ctx: ggml context
  2502. a: tensor
  2503. n_groups: int
  2504. Returns:
  2505. Pointer to ggml_tensor"""
  2506. return lib.ggml_group_norm_inplace(ctx, a, n_groups)
  2507. lib.ggml_group_norm_inplace.argtypes = [
  2508. ggml_context_p,
  2509. ctypes.POINTER(ggml_tensor),
  2510. ctypes.c_int,
  2511. ]
  2512. lib.ggml_group_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2513. # // a - x
  2514. # // b - dy
  2515. # GGML_API struct ggml_tensor * ggml_rms_norm_back(
  2516. # struct ggml_context * ctx,
  2517. # struct ggml_tensor * a,
  2518. # struct ggml_tensor * b
  2519. # float eps);
  2520. def ggml_rms_norm_back(
  2521. ctx: ggml_context_p,
  2522. a: ggml_tensor_p,
  2523. b: ggml_tensor_p,
  2524. eps: Union[ctypes.c_float, float],
  2525. ) -> ggml_tensor_p:
  2526. return lib.ggml_rms_norm_back(ctx, a, b, eps)
  2527. lib.ggml_rms_norm_back.argtypes = [
  2528. ggml_context_p,
  2529. ctypes.POINTER(ggml_tensor),
  2530. ctypes.POINTER(ggml_tensor),
  2531. ctypes.c_float,
  2532. ]
  2533. lib.ggml_rms_norm_back.restype = ctypes.POINTER(ggml_tensor)
  2534. # // A: k columns, n rows => [ne03, ne02, n, k]
  2535. # // B: k columns, m rows (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k]
  2536. # // result is n columns, m rows => [ne03 * x, ne02 * y, m, n]
  2537. # GGML_API struct ggml_tensor * ggml_mul_mat(
  2538. # struct ggml_context * ctx,
  2539. # struct ggml_tensor * a,
  2540. # struct ggml_tensor * b);
  2541. def ggml_mul_mat(
  2542. ctx: ggml_context_p,
  2543. a: ggml_tensor_p,
  2544. b: ggml_tensor_p,
  2545. ) -> ggml_tensor_p:
  2546. """Multiply two matrices and return the result.
  2547. A: k columns, n rows => [ne03, ne02, n, k]
  2548. B: k columns, m rows (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k]
  2549. result is n columns, m rows => [ne03 * x, ne02 * y, m, n]
  2550. Parameters:
  2551. ctx: ggml context
  2552. a: tensor
  2553. b: tensor
  2554. Returns:
  2555. Pointer to ggml_tensor"""
  2556. return lib.ggml_mul_mat(ctx, a, b)
  2557. lib.ggml_mul_mat.argtypes = [
  2558. ggml_context_p,
  2559. ctypes.POINTER(ggml_tensor),
  2560. ctypes.POINTER(ggml_tensor),
  2561. ]
  2562. lib.ggml_mul_mat.restype = ctypes.POINTER(ggml_tensor)
  2563. # // indirect matrix multiplication
  2564. # // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
  2565. # GGML_API struct ggml_tensor * ggml_mul_mat_id(
  2566. # struct ggml_context * ctx,
  2567. # struct ggml_tensor * const as[],
  2568. # int n_as,
  2569. # struct ggml_tensor * ids,
  2570. # int id,
  2571. # struct ggml_tensor * b);
  2572. def ggml_mul_mat_id(
  2573. ctx: ggml_context_p,
  2574. as_, # type: ctypes.POINTER(ctypes.POINTER(ggml_tensor)) # type: ignore
  2575. n_as: int,
  2576. ids: ggml_tensor_p,
  2577. id_: int,
  2578. b: ggml_tensor_p,
  2579. ) -> ggml_tensor_p:
  2580. """Multiply two matrices and return the result.
  2581. indirect matrix multiplication
  2582. ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
  2583. Parameters:
  2584. ctx: ggml context
  2585. as_: array of tensor pointers
  2586. n_as: int
  2587. ids: tensor
  2588. id_: int
  2589. b: tensor
  2590. Returns:
  2591. Pointer to ggml_tensor"""
  2592. return lib.ggml_mul_mat_id(ctx, as_, n_as, ids, id_, b)
  2593. lib.ggml_mul_mat_id.argtypes = [
  2594. ggml_context_p,
  2595. ctypes.POINTER(ctypes.POINTER(ggml_tensor)),
  2596. ctypes.c_int,
  2597. ctypes.POINTER(ggml_tensor),
  2598. ctypes.c_int,
  2599. ctypes.POINTER(ggml_tensor),
  2600. ]
  2601. lib.ggml_mul_mat_id.restype = ctypes.POINTER(ggml_tensor)
  2602. # // A: m columns, n rows,
  2603. # // B: p columns, n rows,
  2604. # // result is m columns, p rows
  2605. # GGML_API struct ggml_tensor * ggml_out_prod(
  2606. # struct ggml_context * ctx,
  2607. # struct ggml_tensor * a,
  2608. # struct ggml_tensor * b);
  2609. def ggml_out_prod(
  2610. ctx: ggml_context_p,
  2611. a: ggml_tensor_p,
  2612. b: ggml_tensor_p,
  2613. ) -> ggml_tensor_p:
  2614. """Compute the outer product of two matrices and return the result.
  2615. A: m columns, n rows,
  2616. B: p columns, n rows,
  2617. result is m columns, p rows
  2618. Parameters:
  2619. ctx: ggml context
  2620. a: tensor
  2621. b: tensor
  2622. Returns:
  2623. Pointer to ggml_tensor"""
  2624. return lib.ggml_out_prod(ctx, a, b)
  2625. lib.ggml_out_prod.argtypes = [
  2626. ggml_context_p,
  2627. ctypes.POINTER(ggml_tensor),
  2628. ctypes.POINTER(ggml_tensor),
  2629. ]
  2630. lib.ggml_out_prod.restype = ctypes.POINTER(ggml_tensor)
  2631. # //
  2632. # // operations on tensors without backpropagation
  2633. # //
  2634. # GGML_API struct ggml_tensor * ggml_scale(
  2635. # struct ggml_context * ctx,
  2636. # struct ggml_tensor * a,
  2637. # struct ggml_tensor * b);
  2638. def ggml_scale(
  2639. ctx: ggml_context_p,
  2640. a: ggml_tensor_p,
  2641. b: ggml_tensor_p,
  2642. ) -> ggml_tensor_p:
  2643. """Scale a tensor by another tensor and return the result.
  2644. Parameters:
  2645. ctx: ggml context
  2646. a: tensor
  2647. b: tensor
  2648. Returns:
  2649. Pointer to ggml_tensor"""
  2650. return lib.ggml_scale(ctx, a, b)
  2651. lib.ggml_scale.argtypes = [
  2652. ggml_context_p,
  2653. ctypes.POINTER(ggml_tensor),
  2654. ctypes.POINTER(ggml_tensor),
  2655. ]
  2656. lib.ggml_scale.restype = ctypes.POINTER(ggml_tensor)
  2657. # // in-place, returns view(a)
  2658. # GGML_API struct ggml_tensor * ggml_scale_inplace(
  2659. # struct ggml_context * ctx,
  2660. # struct ggml_tensor * a,
  2661. # struct ggml_tensor * b);
  2662. def ggml_scale_inplace(
  2663. ctx: ggml_context_p,
  2664. a: ggml_tensor_p,
  2665. b: ggml_tensor_p,
  2666. ) -> ggml_tensor_p:
  2667. """Scale a tensor by another tensor and store the result in the first tensor.
  2668. Parameters:
  2669. ctx: ggml context
  2670. a: tensor
  2671. Returns:
  2672. Pointer to ggml_tensor"""
  2673. return lib.ggml_scale_inplace(ctx, a, b)
  2674. lib.ggml_scale_inplace.argtypes = [
  2675. ggml_context_p,
  2676. ctypes.POINTER(ggml_tensor),
  2677. ctypes.POINTER(ggml_tensor),
  2678. ]
  2679. lib.ggml_scale_inplace.restype = ctypes.POINTER(ggml_tensor)
  2680. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2681. # GGML_API struct ggml_tensor * ggml_set(
  2682. # struct ggml_context * ctx,
  2683. # struct ggml_tensor * a,
  2684. # struct ggml_tensor * b,
  2685. # size_t nb1,
  2686. # size_t nb2,
  2687. # size_t nb3,
  2688. # size_t offset);
  2689. def ggml_set(
  2690. ctx: ggml_context_p,
  2691. a: ggml_tensor_p,
  2692. b: ggml_tensor_p,
  2693. nb1: Union[ctypes.c_size_t, int],
  2694. nb2: Union[ctypes.c_size_t, int],
  2695. nb3: Union[ctypes.c_size_t, int],
  2696. offset: Union[ctypes.c_size_t, int],
  2697. ) -> ggml_tensor_p:
  2698. return lib.ggml_set(ctx, a, b, nb1, nb2, nb3, offset)
  2699. lib.ggml_set.argtypes = [
  2700. ggml_context_p,
  2701. ctypes.POINTER(ggml_tensor),
  2702. ctypes.POINTER(ggml_tensor),
  2703. ctypes.c_size_t,
  2704. ctypes.c_size_t,
  2705. ctypes.c_size_t,
  2706. ctypes.c_size_t,
  2707. ]
  2708. lib.ggml_set.restype = ctypes.POINTER(ggml_tensor)
  2709. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2710. # GGML_API struct ggml_tensor * ggml_set_inplace(
  2711. # struct ggml_context * ctx,
  2712. # struct ggml_tensor * a,
  2713. # struct ggml_tensor * b,
  2714. # size_t nb1,
  2715. # size_t nb2,
  2716. # size_t nb3,
  2717. # size_t offset);
  2718. def ggml_set_inplace(
  2719. ctx: ggml_context_p,
  2720. a: ggml_tensor_p,
  2721. b: ggml_tensor_p,
  2722. nb1: Union[ctypes.c_size_t, int],
  2723. nb2: Union[ctypes.c_size_t, int],
  2724. nb3: Union[ctypes.c_size_t, int],
  2725. offset: Union[ctypes.c_size_t, int],
  2726. ) -> ggml_tensor_p:
  2727. return lib.ggml_set_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  2728. lib.ggml_set_inplace.argtypes = [
  2729. ggml_context_p,
  2730. ctypes.POINTER(ggml_tensor),
  2731. ctypes.POINTER(ggml_tensor),
  2732. ctypes.c_size_t,
  2733. ctypes.c_size_t,
  2734. ctypes.c_size_t,
  2735. ctypes.c_size_t,
  2736. ]
  2737. lib.ggml_set_inplace.restype = ctypes.POINTER(ggml_tensor)
  2738. # GGML_API struct ggml_tensor * ggml_set_1d(
  2739. # struct ggml_context * ctx,
  2740. # struct ggml_tensor * a,
  2741. # struct ggml_tensor * b,
  2742. # size_t offset);
  2743. def ggml_set_1d(
  2744. ctx: ggml_context_p,
  2745. a: ggml_tensor_p,
  2746. b: ggml_tensor_p,
  2747. offset: Union[ctypes.c_size_t, int],
  2748. ) -> ggml_tensor_p:
  2749. return lib.ggml_set_1d(ctx, a, b, offset)
  2750. lib.ggml_set_1d.argtypes = [
  2751. ggml_context_p,
  2752. ctypes.POINTER(ggml_tensor),
  2753. ctypes.POINTER(ggml_tensor),
  2754. ctypes.c_size_t,
  2755. ]
  2756. lib.ggml_set_1d.restype = ctypes.POINTER(ggml_tensor)
  2757. # GGML_API struct ggml_tensor * ggml_set_1d_inplace(
  2758. # struct ggml_context * ctx,
  2759. # struct ggml_tensor * a,
  2760. # struct ggml_tensor * b,
  2761. # size_t offset);
  2762. def ggml_set_1d_inplace(
  2763. ctx: ggml_context_p,
  2764. a: ggml_tensor_p,
  2765. b: ggml_tensor_p,
  2766. offset: Union[ctypes.c_size_t, int],
  2767. ) -> ggml_tensor_p:
  2768. return lib.ggml_set_1d_inplace(ctx, a, b, offset)
  2769. lib.ggml_set_1d_inplace.argtypes = [
  2770. ggml_context_p,
  2771. ctypes.POINTER(ggml_tensor),
  2772. ctypes.POINTER(ggml_tensor),
  2773. ctypes.c_size_t,
  2774. ]
  2775. lib.ggml_set_1d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2776. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2777. # GGML_API struct ggml_tensor * ggml_set_2d(
  2778. # struct ggml_context * ctx,
  2779. # struct ggml_tensor * a,
  2780. # struct ggml_tensor * b,
  2781. # size_t nb1,
  2782. # size_t offset);
  2783. def ggml_set_2d(
  2784. ctx: ggml_context_p,
  2785. a: ggml_tensor_p,
  2786. b: ggml_tensor_p,
  2787. nb1: Union[ctypes.c_size_t, int],
  2788. offset: Union[ctypes.c_size_t, int],
  2789. ) -> ggml_tensor_p:
  2790. return lib.ggml_set_2d(ctx, a, b, nb1, offset)
  2791. lib.ggml_set_2d.argtypes = [
  2792. ggml_context_p,
  2793. ctypes.POINTER(ggml_tensor),
  2794. ctypes.POINTER(ggml_tensor),
  2795. ctypes.c_size_t,
  2796. ctypes.c_size_t,
  2797. ]
  2798. lib.ggml_set_2d.restype = ctypes.POINTER(ggml_tensor)
  2799. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2800. # GGML_API struct ggml_tensor * ggml_set_2d_inplace(
  2801. # struct ggml_context * ctx,
  2802. # struct ggml_tensor * a,
  2803. # struct ggml_tensor * b,
  2804. # size_t nb1,
  2805. # size_t offset);
  2806. def ggml_set_2d_inplace(
  2807. ctx: ggml_context_p,
  2808. a: ggml_tensor_p,
  2809. b: ggml_tensor_p,
  2810. nb1: Union[ctypes.c_size_t, int],
  2811. offset: Union[ctypes.c_size_t, int],
  2812. ) -> ggml_tensor_p:
  2813. return lib.ggml_set_2d_inplace(ctx, a, b, nb1, offset)
  2814. lib.ggml_set_2d_inplace.argtypes = [
  2815. ggml_context_p,
  2816. ctypes.POINTER(ggml_tensor),
  2817. ctypes.POINTER(ggml_tensor),
  2818. ctypes.c_size_t,
  2819. ctypes.c_size_t,
  2820. ]
  2821. lib.ggml_set_2d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2822. # // a -> b, return view(b)
  2823. # GGML_API struct ggml_tensor * ggml_cpy(
  2824. # struct ggml_context * ctx,
  2825. # struct ggml_tensor * a,
  2826. # struct ggml_tensor * b);
  2827. def ggml_cpy(
  2828. ctx: ggml_context_p,
  2829. a: ggml_tensor_p,
  2830. b: ggml_tensor_p,
  2831. ) -> ggml_tensor_p:
  2832. return lib.ggml_cpy(ctx, a, b)
  2833. lib.ggml_cpy.argtypes = [
  2834. ggml_context_p,
  2835. ctypes.POINTER(ggml_tensor),
  2836. ctypes.POINTER(ggml_tensor),
  2837. ]
  2838. lib.ggml_cpy.restype = ctypes.POINTER(ggml_tensor)
  2839. # // a -> b, in-place, return view(b)
  2840. # GGML_API struct ggml_tensor * ggml_cpy_inplace(
  2841. # struct ggml_context * ctx,
  2842. # struct ggml_tensor * a,
  2843. # struct ggml_tensor * b);
  2844. def ggml_cpy_inplace(
  2845. ctx: ggml_context_p,
  2846. a: ggml_tensor_p,
  2847. b: ggml_tensor_p,
  2848. ) -> ggml_tensor_p:
  2849. return lib.ggml_cpy_inplace(ctx, a, b)
  2850. lib.ggml_cpy_inplace.argtypes = [
  2851. ggml_context_p,
  2852. ctypes.POINTER(ggml_tensor),
  2853. ctypes.POINTER(ggml_tensor),
  2854. ]
  2855. lib.ggml_cpy_inplace.restype = ctypes.POINTER(ggml_tensor)
  2856. # // make contiguous
  2857. # GGML_API struct ggml_tensor * ggml_cont(
  2858. # struct ggml_context * ctx,
  2859. # struct ggml_tensor * a);
  2860. def ggml_cont(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2861. """Make a tensor contiguous and return the result.
  2862. Parameters:
  2863. ctx: ggml context
  2864. a: tensor
  2865. Returns:
  2866. Pointer to ggml_tensor"""
  2867. return lib.ggml_cont(ctx, a)
  2868. lib.ggml_cont.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2869. lib.ggml_cont.restype = ctypes.POINTER(ggml_tensor)
  2870. # // make contiguous, in-place
  2871. # GGML_API struct ggml_tensor * ggml_cont_inplace(
  2872. # struct ggml_context * ctx,
  2873. # struct ggml_tensor * a);
  2874. def ggml_cont_inplace(
  2875. ctx: ggml_context_p,
  2876. a: ggml_tensor_p,
  2877. ) -> ggml_tensor_p:
  2878. """Make a tensor contiguous and store the result in the first tensor.
  2879. Parameters:
  2880. ctx: ggml context
  2881. a: tensor
  2882. Returns:
  2883. Pointer to ggml_tensor"""
  2884. return lib.ggml_cont_inplace(ctx, a)
  2885. lib.ggml_cont_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2886. lib.ggml_cont_inplace.restype = ctypes.POINTER(ggml_tensor)
  2887. # // make contiguous, with new shape
  2888. # GGML_API struct ggml_tensor * ggml_cont_1d(
  2889. # struct ggml_context * ctx,
  2890. # struct ggml_tensor * a,
  2891. # int64_t ne0);
  2892. def ggml_cont_1d(
  2893. ctx: ggml_context_p,
  2894. a: ggml_tensor_p,
  2895. ne0: Union[ctypes.c_int64, int],
  2896. ) -> ggml_tensor_p:
  2897. return lib.ggml_cont_1d(ctx, a, ne0)
  2898. lib.ggml_cont_1d.argtypes = [
  2899. ggml_context_p,
  2900. ctypes.POINTER(ggml_tensor),
  2901. ctypes.c_int64,
  2902. ]
  2903. lib.ggml_cont_1d.restype = ctypes.POINTER(ggml_tensor)
  2904. # GGML_API struct ggml_tensor * ggml_cont_2d(
  2905. # struct ggml_context * ctx,
  2906. # struct ggml_tensor * a,
  2907. # int64_t ne0,
  2908. # int64_t ne1);
  2909. def ggml_cont_2d(
  2910. ctx: ggml_context_p,
  2911. a: ggml_tensor_p,
  2912. ne0: Union[ctypes.c_int64, int],
  2913. ne1: Union[ctypes.c_int64, int],
  2914. ) -> ggml_tensor_p:
  2915. return lib.ggml_cont_2d(ctx, a, ne0, ne1)
  2916. lib.ggml_cont_2d.argtypes = [
  2917. ggml_context_p,
  2918. ctypes.POINTER(ggml_tensor),
  2919. ctypes.c_int64,
  2920. ctypes.c_int64,
  2921. ]
  2922. lib.ggml_cont_2d.restype = ctypes.POINTER(ggml_tensor)
  2923. # GGML_API struct ggml_tensor * ggml_cont_3d(
  2924. # struct ggml_context * ctx,
  2925. # struct ggml_tensor * a,
  2926. # int64_t ne0,
  2927. # int64_t ne1,
  2928. # int64_t ne2);
  2929. def ggml_cont_3d(
  2930. ctx: ggml_context_p,
  2931. a: ggml_tensor_p,
  2932. ne0: Union[ctypes.c_int64, int],
  2933. ne1: Union[ctypes.c_int64, int],
  2934. ne2: Union[ctypes.c_int64, int],
  2935. ) -> ggml_tensor_p:
  2936. return lib.ggml_cont_3d(ctx, a, ne0, ne1, ne2)
  2937. lib.ggml_cont_3d.argtypes = [
  2938. ggml_context_p,
  2939. ctypes.POINTER(ggml_tensor),
  2940. ctypes.c_int64,
  2941. ctypes.c_int64,
  2942. ctypes.c_int64,
  2943. ]
  2944. lib.ggml_cont_3d.restype = ctypes.POINTER(ggml_tensor)
  2945. # GGML_API struct ggml_tensor * ggml_cont_4d(
  2946. # struct ggml_context * ctx,
  2947. # struct ggml_tensor * a,
  2948. # int64_t ne0,
  2949. # int64_t ne1,
  2950. # int64_t ne2,
  2951. # int64_t ne3);
  2952. def ggml_cont_4d(
  2953. ctx: ggml_context_p,
  2954. a: ggml_tensor_p,
  2955. ne0: Union[ctypes.c_int64, int],
  2956. ne1: Union[ctypes.c_int64, int],
  2957. ne2: Union[ctypes.c_int64, int],
  2958. ne3: Union[ctypes.c_int64, int],
  2959. ) -> ggml_tensor_p:
  2960. return lib.ggml_cont_4d(ctx, a, ne0, ne1, ne2, ne3)
  2961. lib.ggml_cont_4d.argtypes = [
  2962. ggml_context_p,
  2963. ctypes.POINTER(ggml_tensor),
  2964. ctypes.c_int64,
  2965. ctypes.c_int64,
  2966. ctypes.c_int64,
  2967. ctypes.c_int64,
  2968. ]
  2969. lib.ggml_cont_4d.restype = ctypes.POINTER(ggml_tensor)
  2970. # // return view(a), b specifies the new shape
  2971. # // TODO: when we start computing gradient, make a copy instead of view
  2972. # GGML_API struct ggml_tensor * ggml_reshape(
  2973. # struct ggml_context * ctx,
  2974. # struct ggml_tensor * a,
  2975. # struct ggml_tensor * b);
  2976. def ggml_reshape(
  2977. ctx: ggml_context_p,
  2978. a: ggml_tensor_p,
  2979. b: ggml_tensor_p,
  2980. ) -> ggml_tensor_p:
  2981. return lib.ggml_reshape(ctx, a, b)
  2982. lib.ggml_reshape.argtypes = [
  2983. ggml_context_p,
  2984. ctypes.POINTER(ggml_tensor),
  2985. ctypes.POINTER(ggml_tensor),
  2986. ]
  2987. lib.ggml_reshape.restype = ctypes.POINTER(ggml_tensor)
  2988. # // return view(a)
  2989. # // TODO: when we start computing gradient, make a copy instead of view
  2990. # GGML_API struct ggml_tensor * ggml_reshape_1d(
  2991. # struct ggml_context * ctx,
  2992. # struct ggml_tensor * a,
  2993. # int64_t ne0);
  2994. def ggml_reshape_1d(
  2995. ctx: ggml_context_p,
  2996. a: ggml_tensor_p,
  2997. ne0: Union[ctypes.c_int64, int],
  2998. ) -> ggml_tensor_p:
  2999. return lib.ggml_reshape_1d(ctx, a, ne0)
  3000. lib.ggml_reshape_1d.argtypes = [
  3001. ggml_context_p,
  3002. ctypes.POINTER(ggml_tensor),
  3003. ctypes.c_int64,
  3004. ]
  3005. lib.ggml_reshape_1d.restype = ctypes.POINTER(ggml_tensor)
  3006. # GGML_API struct ggml_tensor * ggml_reshape_2d(
  3007. # struct ggml_context * ctx,
  3008. # struct ggml_tensor * a,
  3009. # int64_t ne0,
  3010. # int64_t ne1);
  3011. def ggml_reshape_2d(
  3012. ctx: ggml_context_p,
  3013. a: ggml_tensor_p,
  3014. ne0: Union[ctypes.c_int64, int],
  3015. ne1: Union[ctypes.c_int64, int],
  3016. ) -> ggml_tensor_p:
  3017. return lib.ggml_reshape_2d(ctx, a, ne0, ne1)
  3018. lib.ggml_reshape_2d.argtypes = [
  3019. ggml_context_p,
  3020. ctypes.POINTER(ggml_tensor),
  3021. ctypes.c_int64,
  3022. ctypes.c_int64,
  3023. ]
  3024. lib.ggml_reshape_2d.restype = ctypes.POINTER(ggml_tensor)
  3025. # // return view(a)
  3026. # // TODO: when we start computing gradient, make a copy instead of view
  3027. # GGML_API struct ggml_tensor * ggml_reshape_3d(
  3028. # struct ggml_context * ctx,
  3029. # struct ggml_tensor * a,
  3030. # int64_t ne0,
  3031. # int64_t ne1,
  3032. # int64_t ne2);
  3033. def ggml_reshape_3d(
  3034. ctx: ggml_context_p,
  3035. a: ggml_tensor_p,
  3036. ne0: Union[ctypes.c_int64, int],
  3037. ne1: Union[ctypes.c_int64, int],
  3038. ne2: Union[ctypes.c_int64, int],
  3039. ) -> ggml_tensor_p:
  3040. return lib.ggml_reshape_3d(ctx, a, ne0, ne1, ne2)
  3041. lib.ggml_reshape_3d.argtypes = [
  3042. ggml_context_p,
  3043. ctypes.POINTER(ggml_tensor),
  3044. ctypes.c_int64,
  3045. ctypes.c_int64,
  3046. ctypes.c_int64,
  3047. ]
  3048. lib.ggml_reshape_3d.restype = ctypes.POINTER(ggml_tensor)
  3049. # GGML_API struct ggml_tensor * ggml_reshape_4d(
  3050. # struct ggml_context * ctx,
  3051. # struct ggml_tensor * a,
  3052. # int64_t ne0,
  3053. # int64_t ne1,
  3054. # int64_t ne2,
  3055. # int64_t ne3);
  3056. def ggml_reshape_4d(
  3057. ctx: ggml_context_p,
  3058. a: ggml_tensor_p,
  3059. ne0: Union[ctypes.c_int64, int],
  3060. ne1: Union[ctypes.c_int64, int],
  3061. ne2: Union[ctypes.c_int64, int],
  3062. ne3: Union[ctypes.c_int64, int],
  3063. ) -> ggml_tensor_p:
  3064. return lib.ggml_reshape_4d(ctx, a, ne0, ne1, ne2, ne3)
  3065. lib.ggml_reshape_4d.argtypes = [
  3066. ggml_context_p,
  3067. ctypes.POINTER(ggml_tensor),
  3068. ctypes.c_int64,
  3069. ctypes.c_int64,
  3070. ctypes.c_int64,
  3071. ctypes.c_int64,
  3072. ]
  3073. lib.ggml_reshape_4d.restype = ctypes.POINTER(ggml_tensor)
  3074. # // offset in bytes
  3075. # GGML_API struct ggml_tensor * ggml_view_1d(
  3076. # struct ggml_context * ctx,
  3077. # struct ggml_tensor * a,
  3078. # int64_t ne0,
  3079. # size_t offset);
  3080. def ggml_view_1d(
  3081. ctx: ggml_context_p,
  3082. a: ggml_tensor_p,
  3083. ne0: Union[ctypes.c_int64, int],
  3084. offset: Union[ctypes.c_size_t, int],
  3085. ) -> ggml_tensor_p:
  3086. return lib.ggml_view_1d(ctx, a, ne0, offset)
  3087. lib.ggml_view_1d.argtypes = [
  3088. ggml_context_p,
  3089. ctypes.POINTER(ggml_tensor),
  3090. ctypes.c_int64,
  3091. ctypes.c_size_t,
  3092. ]
  3093. lib.ggml_view_1d.restype = ctypes.POINTER(ggml_tensor)
  3094. # GGML_API struct ggml_tensor * ggml_view_2d(
  3095. # struct ggml_context * ctx,
  3096. # struct ggml_tensor * a,
  3097. # int64_t ne0,
  3098. # int64_t ne1,
  3099. # size_t nb1, // row stride in bytes
  3100. # size_t offset);
  3101. def ggml_view_2d(
  3102. ctx: ggml_context_p,
  3103. a: ggml_tensor_p,
  3104. ne0: Union[ctypes.c_int64, int],
  3105. ne1: Union[ctypes.c_int64, int],
  3106. nb1: Union[ctypes.c_size_t, int],
  3107. offset: Union[ctypes.c_size_t, int],
  3108. ) -> ggml_tensor_p:
  3109. return lib.ggml_view_2d(ctx, a, ne0, ne1, nb1, offset)
  3110. lib.ggml_view_2d.argtypes = [
  3111. ggml_context_p,
  3112. ctypes.POINTER(ggml_tensor),
  3113. ctypes.c_int64,
  3114. ctypes.c_int64,
  3115. ctypes.c_size_t,
  3116. ctypes.c_size_t,
  3117. ]
  3118. lib.ggml_view_2d.restype = ctypes.POINTER(ggml_tensor)
  3119. # GGML_API struct ggml_tensor * ggml_view_3d(
  3120. # struct ggml_context * ctx,
  3121. # struct ggml_tensor * a,
  3122. # int64_t ne0,
  3123. # int64_t ne1,
  3124. # int64_t ne2,
  3125. # size_t nb1, // row stride in bytes
  3126. # size_t nb2, // slice stride in bytes
  3127. # size_t offset);
  3128. def ggml_view_3d(
  3129. ctx: ggml_context_p,
  3130. a: ggml_tensor_p,
  3131. ne0: Union[ctypes.c_int64, int],
  3132. ne1: Union[ctypes.c_int64, int],
  3133. ne2: Union[ctypes.c_int64, int],
  3134. nb1: Union[ctypes.c_size_t, int],
  3135. nb2: Union[ctypes.c_size_t, int],
  3136. offset: Union[ctypes.c_size_t, int],
  3137. ) -> ggml_tensor_p:
  3138. return lib.ggml_view_3d(ctx, a, ne0, ne1, ne2, nb1, nb2, offset)
  3139. lib.ggml_view_3d.argtypes = [
  3140. ggml_context_p,
  3141. ctypes.POINTER(ggml_tensor),
  3142. ctypes.c_int64,
  3143. ctypes.c_int64,
  3144. ctypes.c_int64,
  3145. ctypes.c_size_t,
  3146. ctypes.c_size_t,
  3147. ctypes.c_size_t,
  3148. ]
  3149. lib.ggml_view_3d.restype = ctypes.POINTER(ggml_tensor)
  3150. # GGML_API struct ggml_tensor * ggml_view_4d(
  3151. # struct ggml_context * ctx,
  3152. # struct ggml_tensor * a,
  3153. # int64_t ne0,
  3154. # int64_t ne1,
  3155. # int64_t ne2,
  3156. # int64_t ne3,
  3157. # size_t nb1, // row stride in bytes
  3158. # size_t nb2, // slice stride in bytes
  3159. # size_t nb3,
  3160. # size_t offset);
  3161. def ggml_view_4d(
  3162. ctx: ggml_context_p,
  3163. a: ggml_tensor_p,
  3164. ne0: Union[ctypes.c_int64, int],
  3165. ne1: Union[ctypes.c_int64, int],
  3166. ne2: Union[ctypes.c_int64, int],
  3167. ne3: Union[ctypes.c_int64, int],
  3168. nb1: Union[ctypes.c_size_t, int],
  3169. nb2: Union[ctypes.c_size_t, int],
  3170. nb3: Union[ctypes.c_size_t, int],
  3171. offset: Union[ctypes.c_size_t, int],
  3172. ) -> ggml_tensor_p:
  3173. return lib.ggml_view_4d(ctx, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset)
  3174. lib.ggml_view_4d.argtypes = [
  3175. ggml_context_p,
  3176. ctypes.POINTER(ggml_tensor),
  3177. ctypes.c_int64,
  3178. ctypes.c_int64,
  3179. ctypes.c_int64,
  3180. ctypes.c_int64,
  3181. ctypes.c_size_t,
  3182. ctypes.c_size_t,
  3183. ctypes.c_size_t,
  3184. ctypes.c_size_t,
  3185. ]
  3186. lib.ggml_view_4d.restype = ctypes.POINTER(ggml_tensor)
  3187. # GGML_API struct ggml_tensor * ggml_permute(
  3188. # struct ggml_context * ctx,
  3189. # struct ggml_tensor * a,
  3190. # int axis0,
  3191. # int axis1,
  3192. # int axis2,
  3193. # int axis3);
  3194. def ggml_permute(
  3195. ctx: ggml_context_p,
  3196. a: ggml_tensor_p,
  3197. axis0: Union[ctypes.c_int, int],
  3198. axis1: Union[ctypes.c_int, int],
  3199. axis2: Union[ctypes.c_int, int],
  3200. axis3: Union[ctypes.c_int, int],
  3201. ) -> ggml_tensor_p:
  3202. return lib.ggml_permute(ctx, a, axis0, axis1, axis2, axis3)
  3203. lib.ggml_permute.argtypes = [
  3204. ggml_context_p,
  3205. ctypes.POINTER(ggml_tensor),
  3206. ctypes.c_int,
  3207. ctypes.c_int,
  3208. ctypes.c_int,
  3209. ctypes.c_int,
  3210. ]
  3211. lib.ggml_permute.restype = ctypes.POINTER(ggml_tensor)
  3212. # // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
  3213. # GGML_API struct ggml_tensor * ggml_transpose(
  3214. # struct ggml_context * ctx,
  3215. # struct ggml_tensor * a);
  3216. def ggml_transpose(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  3217. """Transpose *the first two dimensions* of a tensor and return the result.
  3218. alias for `ggml_permute(ctx, a, 1, 0, 2, 3)`
  3219. Parameters:
  3220. ctx: ggml context
  3221. a: tensor
  3222. Returns:
  3223. Pointer to ggml_tensor"""
  3224. return lib.ggml_transpose(ctx, a)
  3225. lib.ggml_transpose.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  3226. lib.ggml_transpose.restype = ctypes.POINTER(ggml_tensor)
  3227. # // supports 3D: a->ne[2] == b->ne[1]
  3228. # GGML_API struct ggml_tensor * ggml_get_rows(
  3229. # struct ggml_context * ctx,
  3230. # struct ggml_tensor * a,
  3231. # struct ggml_tensor * b);
  3232. def ggml_get_rows(
  3233. ctx: ggml_context_p,
  3234. a: ggml_tensor_p,
  3235. b: ggml_tensor_p,
  3236. ) -> ggml_tensor_p:
  3237. return lib.ggml_get_rows(ctx, a, b)
  3238. lib.ggml_get_rows.argtypes = [
  3239. ggml_context_p,
  3240. ctypes.POINTER(ggml_tensor),
  3241. ctypes.POINTER(ggml_tensor),
  3242. ]
  3243. lib.ggml_get_rows.restype = ctypes.POINTER(ggml_tensor)
  3244. # GGML_API struct ggml_tensor * ggml_get_rows_back(
  3245. # struct ggml_context * ctx,
  3246. # struct ggml_tensor * a,
  3247. # struct ggml_tensor * b,
  3248. # struct ggml_tensor * c);
  3249. def ggml_get_rows_back(
  3250. ctx: ggml_context_p,
  3251. a: ggml_tensor_p,
  3252. b: ggml_tensor_p,
  3253. c: ggml_tensor_p,
  3254. ) -> ggml_tensor_p:
  3255. return lib.ggml_get_rows_back(ctx, a, b, c)
  3256. lib.ggml_get_rows_back.argtypes = [
  3257. ggml_context_p,
  3258. ctypes.POINTER(ggml_tensor),
  3259. ctypes.POINTER(ggml_tensor),
  3260. ctypes.POINTER(ggml_tensor),
  3261. ]
  3262. lib.ggml_get_rows_back.restype = ctypes.POINTER(ggml_tensor)
  3263. # GGML_API struct ggml_tensor * ggml_diag(
  3264. # struct ggml_context * ctx,
  3265. # struct ggml_tensor * a);
  3266. def ggml_diag(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  3267. return lib.ggml_diag(ctx, a)
  3268. lib.ggml_diag.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  3269. lib.ggml_diag.restype = ctypes.POINTER(ggml_tensor)
  3270. # // set elements above the diagonal to -INF
  3271. # GGML_API struct ggml_tensor * ggml_diag_mask_inf(
  3272. # struct ggml_context * ctx,
  3273. # struct ggml_tensor * a,
  3274. # int n_past);
  3275. def ggml_diag_mask_inf(
  3276. ctx: ggml_context_p,
  3277. a: ggml_tensor_p,
  3278. n_past: Union[ctypes.c_int, int],
  3279. ) -> ggml_tensor_p:
  3280. return lib.ggml_diag_mask_inf(ctx, a, n_past)
  3281. lib.ggml_diag_mask_inf.argtypes = [
  3282. ggml_context_p,
  3283. ctypes.POINTER(ggml_tensor),
  3284. ctypes.c_int,
  3285. ]
  3286. lib.ggml_diag_mask_inf.restype = ctypes.POINTER(ggml_tensor)
  3287. # // in-place, returns view(a)
  3288. # GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
  3289. # struct ggml_context * ctx,
  3290. # struct ggml_tensor * a,
  3291. # int n_past);
  3292. def ggml_diag_mask_inf_inplace(
  3293. ctx: ggml_context_p,
  3294. a: ggml_tensor_p,
  3295. n_past: Union[ctypes.c_int, int],
  3296. ) -> ggml_tensor_p:
  3297. return lib.ggml_diag_mask_inf_inplace(ctx, a, n_past)
  3298. lib.ggml_diag_mask_inf_inplace.argtypes = [
  3299. ggml_context_p,
  3300. ctypes.POINTER(ggml_tensor),
  3301. ctypes.c_int,
  3302. ]
  3303. lib.ggml_diag_mask_inf_inplace.restype = ctypes.POINTER(ggml_tensor)
  3304. # // set elements above the diagonal to 0
  3305. # GGML_API struct ggml_tensor * ggml_diag_mask_zero(
  3306. # struct ggml_context * ctx,
  3307. # struct ggml_tensor * a,
  3308. # int n_past);
  3309. def ggml_diag_mask_zero(
  3310. ctx: ggml_context_p,
  3311. a: ggml_tensor_p,
  3312. n_past: Union[ctypes.c_int, int],
  3313. ) -> ggml_tensor_p:
  3314. return lib.ggml_diag_mask_zero(ctx, a, n_past)
  3315. lib.ggml_diag_mask_zero.argtypes = [
  3316. ggml_context_p,
  3317. ctypes.POINTER(ggml_tensor),
  3318. ctypes.c_int,
  3319. ]
  3320. lib.ggml_diag_mask_zero.restype = ctypes.POINTER(ggml_tensor)
  3321. # // in-place, returns view(a)
  3322. # GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
  3323. # struct ggml_context * ctx,
  3324. # struct ggml_tensor * a,
  3325. # int n_past);
  3326. def ggml_diag_mask_zero_inplace(
  3327. ctx: ggml_context_p,
  3328. a: ggml_tensor_p,
  3329. n_past: Union[ctypes.c_int, int],
  3330. ) -> ggml_tensor_p:
  3331. return lib.ggml_diag_mask_zero_inplace(ctx, a, n_past)
  3332. lib.ggml_diag_mask_zero_inplace.argtypes = [
  3333. ggml_context_p,
  3334. ctypes.POINTER(ggml_tensor),
  3335. ctypes.c_int,
  3336. ]
  3337. lib.ggml_diag_mask_zero_inplace.restype = ctypes.POINTER(ggml_tensor)
  3338. # GGML_API struct ggml_tensor * ggml_soft_max(
  3339. # struct ggml_context * ctx,
  3340. # struct ggml_tensor * a);
  3341. def ggml_soft_max(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  3342. return lib.ggml_soft_max(ctx, a)
  3343. lib.ggml_soft_max.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  3344. lib.ggml_soft_max.restype = ctypes.POINTER(ggml_tensor)
  3345. # // in-place, returns view(a)
  3346. # GGML_API struct ggml_tensor * ggml_soft_max_inplace(
  3347. # struct ggml_context * ctx,
  3348. # struct ggml_tensor * a);
  3349. def ggml_soft_max_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  3350. return lib.ggml_soft_max_inplace(ctx, a)
  3351. lib.ggml_soft_max_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  3352. lib.ggml_soft_max_inplace.restype = ctypes.POINTER(ggml_tensor)
  3353. # // fused soft_max(a*scale + mask)
  3354. # // mask is optional
  3355. # GGML_API struct ggml_tensor * ggml_soft_max_ext(
  3356. # struct ggml_context * ctx,
  3357. # struct ggml_tensor * a,
  3358. # struct ggml_tensor * mask,
  3359. # float scale);
  3360. def ggml_soft_max_ext(
  3361. ctx: ggml_context_p,
  3362. a: ggml_tensor_p,
  3363. mask: ggml_tensor_p,
  3364. scale: Union[ctypes.c_float, float],
  3365. ) -> ggml_tensor_p:
  3366. return lib.ggml_soft_max_ext(ctx, a, mask, scale)
  3367. lib.ggml_soft_max_ext.argtypes = [
  3368. ggml_context_p,
  3369. ctypes.POINTER(ggml_tensor),
  3370. ctypes.POINTER(ggml_tensor),
  3371. ctypes.c_float,
  3372. ]
  3373. lib.ggml_soft_max_ext.restype = ctypes.POINTER(ggml_tensor)
  3374. # GGML_API struct ggml_tensor * ggml_soft_max_back(
  3375. # struct ggml_context * ctx,
  3376. # struct ggml_tensor * a,
  3377. # struct ggml_tensor * b);
  3378. def ggml_soft_max_back(
  3379. ctx: ggml_context_p,
  3380. a: ggml_tensor_p,
  3381. b: ggml_tensor_p,
  3382. ) -> ggml_tensor_p:
  3383. return lib.ggml_soft_max_back(ctx, a, b)
  3384. lib.ggml_soft_max_back.argtypes = [
  3385. ggml_context_p,
  3386. ctypes.POINTER(ggml_tensor),
  3387. ctypes.POINTER(ggml_tensor),
  3388. ]
  3389. lib.ggml_soft_max_back.restype = ctypes.POINTER(ggml_tensor)
  3390. # // in-place, returns view(a)
  3391. # GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
  3392. # struct ggml_context * ctx,
  3393. # struct ggml_tensor * a,
  3394. # struct ggml_tensor * b);
  3395. def ggml_soft_max_back_inplace(
  3396. ctx: ggml_context_p,
  3397. a: ggml_tensor_p,
  3398. b: ggml_tensor_p,
  3399. ) -> ggml_tensor_p:
  3400. return lib.ggml_soft_max_back_inplace(ctx, a, b)
  3401. lib.ggml_soft_max_back_inplace.argtypes = [
  3402. ggml_context_p,
  3403. ctypes.POINTER(ggml_tensor),
  3404. ctypes.POINTER(ggml_tensor),
  3405. ]
  3406. lib.ggml_soft_max_back_inplace.restype = ctypes.POINTER(ggml_tensor)
  3407. # // rotary position embedding
  3408. # // if mode & 1 == 1, skip n_past elements (DEPRECATED)
  3409. # // if mode & 2 == 1, GPT-NeoX style
  3410. # // if mode & 4 == 1, ChatGLM style
  3411. # //
  3412. # // b is an int32 vector with size a->ne[2], it contains the positions
  3413. # GGML_API struct ggml_tensor * ggml_rope(
  3414. # struct ggml_context * ctx,
  3415. # struct ggml_tensor * a,
  3416. # struct ggml_tensor * b,
  3417. # int n_dims,
  3418. # int mode,
  3419. # int n_ctx);
  3420. def ggml_rope(
  3421. ctx: ggml_context_p,
  3422. a: ggml_tensor_p,
  3423. b: ggml_tensor_p,
  3424. n_dims: Union[ctypes.c_int, int],
  3425. mode: Union[ctypes.c_int, int],
  3426. n_ctx: Union[ctypes.c_int, int],
  3427. ) -> ggml_tensor_p:
  3428. """Rotary position embedding
  3429. Parameters:
  3430. ctx: ggml context
  3431. a: tensor
  3432. b: int32 vector with size a->ne[2], it contains the positions
  3433. n_dims: number of dimensions
  3434. mode: if mode & 1 == 1, skip n_past elements (DEPRECATED)
  3435. if mode & 2 == 1, GPT-NeoX style
  3436. if mode & 4 == 1, ChatGLM style
  3437. n_ctx: context size
  3438. Returns:
  3439. Pointer to ggml_tensor"""
  3440. return lib.ggml_rope(ctx, a, b, n_dims, mode, n_ctx)
  3441. lib.ggml_rope.argtypes = [
  3442. ggml_context_p,
  3443. ctypes.POINTER(ggml_tensor),
  3444. ctypes.POINTER(ggml_tensor),
  3445. ctypes.c_int,
  3446. ctypes.c_int,
  3447. ctypes.c_int,
  3448. ]
  3449. lib.ggml_rope.restype = ctypes.POINTER(ggml_tensor)
  3450. # // in-place, returns view(a)
  3451. # GGML_API struct ggml_tensor * ggml_rope_inplace(
  3452. # struct ggml_context * ctx,
  3453. # struct ggml_tensor * a,
  3454. # struct ggml_tensor * b,
  3455. # int n_dims,
  3456. # int mode,
  3457. # int n_ctx);
  3458. def ggml_rope_inplace(
  3459. ctx: ggml_context_p,
  3460. a: ggml_tensor_p,
  3461. b: ggml_tensor_p,
  3462. n_dims: Union[ctypes.c_int, int],
  3463. mode: Union[ctypes.c_int, int],
  3464. n_ctx: Union[ctypes.c_int, int],
  3465. ) -> ggml_tensor_p:
  3466. """Rotary position embedding inplace
  3467. Parameters:
  3468. ctx: ggml context
  3469. a: tensor
  3470. b: int32 vector with size a->ne[2], it contains the positions
  3471. n_dims: number of dimensions
  3472. mode: if mode & 1 == 1, skip n_past elements (DEPRECATED)
  3473. if mode & 2 == 1, GPT-NeoX style
  3474. if mode & 4 == 1, ChatGLM style
  3475. n_ctx: context size
  3476. Returns:
  3477. Pointer to ggml_tensor"""
  3478. return lib.ggml_rope_inplace(ctx, a, b, n_dims, mode, n_ctx)
  3479. lib.ggml_rope_inplace.argtypes = [
  3480. ggml_context_p,
  3481. ctypes.POINTER(ggml_tensor),
  3482. ctypes.POINTER(ggml_tensor),
  3483. ctypes.c_int,
  3484. ctypes.c_int,
  3485. ctypes.c_int,
  3486. ]
  3487. lib.ggml_rope_inplace.restype = ctypes.POINTER(ggml_tensor)
  3488. # // custom RoPE
  3489. # GGML_API struct ggml_tensor * ggml_rope_custom(
  3490. # struct ggml_context * ctx,
  3491. # struct ggml_tensor * a,
  3492. # struct ggml_tensor * b,
  3493. # int n_dims,
  3494. # int mode,
  3495. # int n_ctx,
  3496. # int n_orig_ctx,
  3497. # float freq_base,
  3498. # float freq_scale,
  3499. # float ext_factor,
  3500. # float attn_factor,
  3501. # float beta_fast,
  3502. # float beta_slow);
  3503. def ggml_rope_custom(
  3504. ctx: ggml_context_p,
  3505. a: ggml_tensor_p,
  3506. b: ggml_tensor_p,
  3507. n_dims: Union[ctypes.c_int, int],
  3508. mode: Union[ctypes.c_int, int],
  3509. n_ctx: Union[ctypes.c_int, int],
  3510. n_orig_ctx: Union[ctypes.c_int, int],
  3511. freq_base: Union[ctypes.c_float, float],
  3512. freq_scale: Union[ctypes.c_float, float],
  3513. ext_factor: Union[ctypes.c_float, float],
  3514. attn_factor: Union[ctypes.c_float, float],
  3515. beta_fast: Union[ctypes.c_float, float],
  3516. beta_slow: Union[ctypes.c_float, float],
  3517. ) -> ggml_tensor_p:
  3518. """Custom rotary position embedding"""
  3519. return lib.ggml_rope_custom(
  3520. ctx,
  3521. a,
  3522. b,
  3523. n_dims,
  3524. mode,
  3525. n_ctx,
  3526. n_orig_ctx,
  3527. freq_base,
  3528. freq_scale,
  3529. ext_factor,
  3530. attn_factor,
  3531. beta_fast,
  3532. beta_slow,
  3533. )
  3534. lib.ggml_rope_custom.argtypes = [
  3535. ggml_context_p,
  3536. ctypes.POINTER(ggml_tensor),
  3537. ctypes.POINTER(ggml_tensor),
  3538. ctypes.c_int,
  3539. ctypes.c_int,
  3540. ctypes.c_int,
  3541. ctypes.c_int,
  3542. ctypes.c_float,
  3543. ctypes.c_float,
  3544. ctypes.c_float,
  3545. ctypes.c_float,
  3546. ctypes.c_float,
  3547. ctypes.c_float,
  3548. ]
  3549. lib.ggml_rope_custom.restype = ctypes.POINTER(ggml_tensor)
  3550. # // in-place, returns view(a)
  3551. # GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
  3552. # struct ggml_context * ctx,
  3553. # struct ggml_tensor * a,
  3554. # struct ggml_tensor * b,
  3555. # int n_dims,
  3556. # int mode,
  3557. # int n_ctx,
  3558. # int n_orig_ctx,
  3559. # float freq_base,
  3560. # float freq_scale,
  3561. # float ext_factor,
  3562. # float attn_factor,
  3563. # float beta_fast,
  3564. # float beta_slow);
  3565. def ggml_rope_custom_inplace(
  3566. ctx: ggml_context_p,
  3567. a: ggml_tensor_p,
  3568. b: ggml_tensor_p,
  3569. n_dims: Union[ctypes.c_int, int],
  3570. mode: Union[ctypes.c_int, int],
  3571. n_ctx: Union[ctypes.c_int, int],
  3572. n_orig_ctx: Union[ctypes.c_int, int],
  3573. freq_base: Union[ctypes.c_float, float],
  3574. freq_scale: Union[ctypes.c_float, float],
  3575. ext_factor: Union[ctypes.c_float, float],
  3576. attn_factor: Union[ctypes.c_float, float],
  3577. beta_fast: Union[ctypes.c_float, float],
  3578. beta_slow: Union[ctypes.c_float, float],
  3579. ) -> ggml_tensor_p:
  3580. """Custom rotary position embedding inplace"""
  3581. return lib.ggml_rope_custom_inplace(
  3582. ctx,
  3583. a,
  3584. b,
  3585. n_dims,
  3586. mode,
  3587. n_ctx,
  3588. n_orig_ctx,
  3589. freq_base,
  3590. freq_scale,
  3591. ext_factor,
  3592. attn_factor,
  3593. beta_fast,
  3594. beta_slow,
  3595. )
  3596. lib.ggml_rope_custom_inplace.argtypes = [
  3597. ggml_context_p,
  3598. ctypes.POINTER(ggml_tensor),
  3599. ctypes.POINTER(ggml_tensor),
  3600. ctypes.c_int,
  3601. ctypes.c_int,
  3602. ctypes.c_int,
  3603. ctypes.c_int,
  3604. ctypes.c_float,
  3605. ctypes.c_float,
  3606. ctypes.c_float,
  3607. ctypes.c_float,
  3608. ctypes.c_float,
  3609. ctypes.c_float,
  3610. ]
  3611. lib.ggml_rope_custom_inplace.restype = ctypes.POINTER(ggml_tensor)
  3612. # // compute correction dims for YaRN RoPE scaling
  3613. # void ggml_rope_yarn_corr_dims(
  3614. # int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
  3615. def ggml_rope_yarn_corr_dims(
  3616. n_dims: Union[ctypes.c_int, int],
  3617. n_orig_ctx: Union[ctypes.c_int, int],
  3618. freq_base: Union[ctypes.c_float, float],
  3619. beta_fast: Union[ctypes.c_float, float],
  3620. beta_slow: Union[ctypes.c_float, float],
  3621. dims: CFloatArray,
  3622. ) -> None:
  3623. """Compute correction dims for YaRN RoPE scaling"""
  3624. return lib.ggml_rope_yarn_corr_dims(
  3625. n_dims,
  3626. n_orig_ctx,
  3627. freq_base,
  3628. beta_fast,
  3629. beta_slow,
  3630. dims,
  3631. )
  3632. lib.ggml_rope_yarn_corr_dims.argtypes = [
  3633. ctypes.c_int,
  3634. ctypes.c_int,
  3635. ctypes.c_float,
  3636. ctypes.c_float,
  3637. ctypes.c_float,
  3638. ctypes.POINTER(ctypes.c_float),
  3639. ]
  3640. lib.ggml_rope_yarn_corr_dims.restype = None
  3641. # // xPos RoPE, in-place, returns view(a)
  3642. # GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
  3643. # struct ggml_context * ctx,
  3644. # struct ggml_tensor * a,
  3645. # struct ggml_tensor * b,
  3646. # int n_dims,
  3647. # float base,
  3648. # bool down);
  3649. def ggml_rope_xpos_inplace(
  3650. ctx: ggml_context_p,
  3651. a: ggml_tensor_p,
  3652. b: ggml_tensor_p,
  3653. n_dims: Union[ctypes.c_int, int],
  3654. base: Union[ctypes.c_float, float],
  3655. down: Union[ctypes.c_bool, bool],
  3656. ) -> ggml_tensor_p:
  3657. """xPos RoPE, in-place, returns view(a)"""
  3658. return lib.ggml_rope_xpos_inplace(ctx, a, b, n_dims, base, down)
  3659. lib.ggml_rope_xpos_inplace.argtypes = [
  3660. ggml_context_p,
  3661. ctypes.POINTER(ggml_tensor),
  3662. ctypes.POINTER(ggml_tensor),
  3663. ctypes.c_int,
  3664. ctypes.c_float,
  3665. ctypes.c_bool,
  3666. ]
  3667. lib.ggml_rope_xpos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3668. # // rotary position embedding backward, i.e compute dx from dy
  3669. # // a - dy
  3670. # GGML_API struct ggml_tensor * ggml_rope_back(
  3671. # struct ggml_context * ctx,
  3672. # struct ggml_tensor * a,
  3673. # struct ggml_tensor * b,
  3674. # int n_dims,
  3675. # int mode,
  3676. # int n_ctx,
  3677. # int n_orig_ctx,
  3678. # float freq_base,
  3679. # float freq_scale,
  3680. # float ext_factor,
  3681. # float attn_factor,
  3682. # float beta_fast,
  3683. # float beta_slow,
  3684. # float xpos_base,
  3685. # bool xpos_down);
  3686. def ggml_rope_back(
  3687. ctx: ggml_context_p,
  3688. a: ggml_tensor_p,
  3689. b: ggml_tensor_p,
  3690. n_dims: Union[ctypes.c_int, int],
  3691. mode: Union[ctypes.c_int, int],
  3692. n_ctx: Union[ctypes.c_int, int],
  3693. n_orig_ctx: Union[ctypes.c_int, int],
  3694. freq_base: Union[ctypes.c_float, float],
  3695. freq_scale: Union[ctypes.c_float, float],
  3696. ext_factor: Union[ctypes.c_float, float],
  3697. attn_factor: Union[ctypes.c_float, float],
  3698. beta_fast: Union[ctypes.c_float, float],
  3699. beta_slow: Union[ctypes.c_float, float],
  3700. xpos_base: Union[ctypes.c_float, float],
  3701. xpos_down: Union[ctypes.c_bool, bool],
  3702. ) -> ggml_tensor_p:
  3703. """Rotary position embedding backward pass"""
  3704. return lib.ggml_rope_back(
  3705. ctx,
  3706. a,
  3707. b,
  3708. n_dims,
  3709. mode,
  3710. n_ctx,
  3711. n_orig_ctx,
  3712. freq_base,
  3713. freq_scale,
  3714. ext_factor,
  3715. attn_factor,
  3716. beta_fast,
  3717. beta_slow,
  3718. xpos_base,
  3719. xpos_down,
  3720. )
  3721. lib.ggml_rope_back.argtypes = [
  3722. ggml_context_p,
  3723. ctypes.POINTER(ggml_tensor),
  3724. ctypes.POINTER(ggml_tensor),
  3725. ctypes.c_int,
  3726. ctypes.c_int,
  3727. ctypes.c_int,
  3728. ctypes.c_int,
  3729. ctypes.c_float,
  3730. ctypes.c_float,
  3731. ctypes.c_float,
  3732. ctypes.c_float,
  3733. ctypes.c_float,
  3734. ctypes.c_float,
  3735. ctypes.c_float,
  3736. ctypes.c_bool,
  3737. ]
  3738. lib.ggml_rope_back.restype = ctypes.POINTER(ggml_tensor)
  3739. # // alibi position embedding
  3740. # // in-place, returns view(a)
  3741. # GGML_API struct ggml_tensor * ggml_alibi(
  3742. # struct ggml_context * ctx,
  3743. # struct ggml_tensor * a,
  3744. # int n_past,
  3745. # int n_head,
  3746. # float bias_max);
  3747. def ggml_alibi(
  3748. ctx: ggml_context_p,
  3749. a: ggml_tensor_p,
  3750. n_past: Union[ctypes.c_int, int],
  3751. n_head: Union[ctypes.c_int, int],
  3752. bias_max: Union[ctypes.c_float, float],
  3753. ) -> ggml_tensor_p:
  3754. return lib.ggml_alibi(ctx, a, n_past, n_head, bias_max)
  3755. lib.ggml_alibi.argtypes = [
  3756. ggml_context_p,
  3757. ctypes.POINTER(ggml_tensor),
  3758. ctypes.c_int,
  3759. ctypes.c_int,
  3760. ctypes.c_float,
  3761. ]
  3762. lib.ggml_alibi.restype = ctypes.POINTER(ggml_tensor)
  3763. # // clamp
  3764. # // in-place, returns view(a)
  3765. # GGML_API struct ggml_tensor * ggml_clamp(
  3766. # struct ggml_context * ctx,
  3767. # struct ggml_tensor * a,
  3768. # float min,
  3769. # float max);
  3770. def ggml_clamp(
  3771. ctx: ggml_context_p,
  3772. a: ggml_tensor_p,
  3773. min: Union[ctypes.c_float, float],
  3774. max: Union[ctypes.c_float, float],
  3775. ) -> ggml_tensor_p:
  3776. """Clamp tensor values between min and max
  3777. Parameters:
  3778. ctx: ggml context
  3779. a: tensor
  3780. min: minimum value
  3781. max: maximum value
  3782. Returns:
  3783. Pointer to ggml_tensor"""
  3784. return lib.ggml_clamp(ctx, a, min, max)
  3785. lib.ggml_clamp.argtypes = [
  3786. ggml_context_p,
  3787. ctypes.POINTER(ggml_tensor),
  3788. ctypes.c_float,
  3789. ctypes.c_float,
  3790. ]
  3791. lib.ggml_clamp.restype = ctypes.POINTER(ggml_tensor)
  3792. # GGML_API struct ggml_tensor * ggml_im2col(
  3793. # struct ggml_context * ctx,
  3794. # struct ggml_tensor * a,
  3795. # struct ggml_tensor * b,
  3796. # int s0,
  3797. # int s1,
  3798. # int p0,
  3799. # int p1,
  3800. # int d0,
  3801. # int d1,
  3802. # bool is_2D);
  3803. def ggml_im2col(
  3804. ctx: ggml_context_p,
  3805. a: ggml_tensor_p,
  3806. b: ggml_tensor_p,
  3807. s0: Union[ctypes.c_int, int],
  3808. s1: Union[ctypes.c_int, int],
  3809. p0: Union[ctypes.c_int, int],
  3810. p1: Union[ctypes.c_int, int],
  3811. d0: Union[ctypes.c_int, int],
  3812. d1: Union[ctypes.c_int, int],
  3813. is_2D: Union[ctypes.c_bool, bool],
  3814. ) -> ggml_tensor_p:
  3815. return lib.ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, is_2D)
  3816. lib.ggml_im2col.argtypes = [
  3817. ggml_context_p,
  3818. ctypes.POINTER(ggml_tensor),
  3819. ctypes.POINTER(ggml_tensor),
  3820. ctypes.c_int,
  3821. ctypes.c_int,
  3822. ctypes.c_int,
  3823. ctypes.c_int,
  3824. ctypes.c_int,
  3825. ctypes.c_int,
  3826. ctypes.c_bool,
  3827. ]
  3828. lib.ggml_im2col.restype = ctypes.POINTER(ggml_tensor)
  3829. # GGML_API struct ggml_tensor * ggml_conv_1d(
  3830. # struct ggml_context * ctx,
  3831. # struct ggml_tensor * a,
  3832. # struct ggml_tensor * b,
  3833. # int s0, // stride
  3834. # int p0, // padding
  3835. # int d0); // dilation
  3836. def ggml_conv_1d(
  3837. ctx: ggml_context_p,
  3838. a: ggml_tensor_p,
  3839. b: ggml_tensor_p,
  3840. s0: Union[ctypes.c_int, int],
  3841. p0: Union[ctypes.c_int, int],
  3842. d0: Union[ctypes.c_int, int],
  3843. ) -> ggml_tensor_p:
  3844. """Convolution 1D
  3845. Parameters:
  3846. a: input tensor
  3847. b: filter tensor
  3848. s0: stride
  3849. p0: padding
  3850. d0: dilation
  3851. Returns:
  3852. output tensor"""
  3853. return lib.ggml_conv_1d(ctx, a, b, s0, p0, d0)
  3854. lib.ggml_conv_1d.argtypes = [
  3855. ggml_context_p,
  3856. ctypes.POINTER(ggml_tensor),
  3857. ctypes.POINTER(ggml_tensor),
  3858. ctypes.c_int,
  3859. ctypes.c_int,
  3860. ctypes.c_int,
  3861. ]
  3862. lib.ggml_conv_1d.restype = ctypes.POINTER(ggml_tensor)
  3863. # // conv_1d with padding = half
  3864. # // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
  3865. # GGML_API struct ggml_tensor* ggml_conv_1d_ph(
  3866. # struct ggml_context * ctx,
  3867. # struct ggml_tensor * a,
  3868. # struct ggml_tensor * b,
  3869. # int s,
  3870. # int d);
  3871. def ggml_conv_1d_ph(
  3872. ctx: ggml_context_p,
  3873. a: ggml_tensor_p,
  3874. b: ggml_tensor_p,
  3875. s: Union[ctypes.c_int, int],
  3876. d: Union[ctypes.c_int, int],
  3877. ) -> ggml_tensor_p:
  3878. """Convolution 1D with padding = half
  3879. Parameters:
  3880. a: input tensor
  3881. b: filter tensor
  3882. s: stride
  3883. d: dilation
  3884. Returns:
  3885. output tensor"""
  3886. return lib.ggml_conv_1d_ph(ctx, a, b, s, d)
  3887. lib.ggml_conv_1d_ph.argtypes = [
  3888. ggml_context_p,
  3889. ctypes.POINTER(ggml_tensor),
  3890. ctypes.POINTER(ggml_tensor),
  3891. ctypes.c_int,
  3892. ctypes.c_int,
  3893. ]
  3894. lib.ggml_conv_1d_ph.restype = ctypes.POINTER(ggml_tensor)
  3895. # GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
  3896. # struct ggml_context * ctx,
  3897. # struct ggml_tensor * a,
  3898. # struct ggml_tensor * b,
  3899. # int s0,
  3900. # int p0,
  3901. # int d0);
  3902. def ggml_conv_transpose_1d(
  3903. ctx: ggml_context_p,
  3904. a: ggml_tensor_p,
  3905. b: ggml_tensor_p,
  3906. s0: Union[ctypes.c_int, int],
  3907. p0: Union[ctypes.c_int, int],
  3908. d0: Union[ctypes.c_int, int],
  3909. ) -> ggml_tensor_p:
  3910. """Convolution transpose 1D
  3911. Parameters:
  3912. a: input tensor
  3913. b: filter tensor
  3914. s0: stride
  3915. p0: padding
  3916. d0: dilation
  3917. Returns:
  3918. output tensor"""
  3919. return lib.ggml_conv_transpose_1d(ctx, a, b, s0, p0, d0)
  3920. lib.ggml_conv_transpose_1d.argtypes = [
  3921. ggml_context_p,
  3922. ctypes.POINTER(ggml_tensor),
  3923. ctypes.POINTER(ggml_tensor),
  3924. ctypes.c_int,
  3925. ctypes.c_int,
  3926. ctypes.c_int,
  3927. ]
  3928. lib.ggml_conv_transpose_1d.restype = ctypes.POINTER(ggml_tensor)
  3929. # GGML_API struct ggml_tensor * ggml_conv_2d(
  3930. # struct ggml_context * ctx,
  3931. # struct ggml_tensor * a,
  3932. # struct ggml_tensor * b,
  3933. # int s0,
  3934. # int s1,
  3935. # int p0,
  3936. # int p1,
  3937. # int d0,
  3938. # int d1);
  3939. def ggml_conv_2d(
  3940. ctx: ggml_context_p,
  3941. a: ggml_tensor_p,
  3942. b: ggml_tensor_p,
  3943. s0: Union[ctypes.c_int, int],
  3944. s1: Union[ctypes.c_int, int],
  3945. p0: Union[ctypes.c_int, int],
  3946. p1: Union[ctypes.c_int, int],
  3947. d0: Union[ctypes.c_int, int],
  3948. d1: Union[ctypes.c_int, int],
  3949. ) -> ggml_tensor_p:
  3950. """Convolution 2D
  3951. Parameters:
  3952. a: input tensor
  3953. b: filter tensor
  3954. s0: stride
  3955. s1: stride
  3956. p0: padding
  3957. p1: padding
  3958. d0: dilation
  3959. d1: dilation
  3960. Returns:
  3961. output tensor"""
  3962. return lib.ggml_conv_2d(ctx, a, b, s0, s1, p0, p1, d0, d1)
  3963. lib.ggml_conv_2d.argtypes = [
  3964. ggml_context_p,
  3965. ctypes.POINTER(ggml_tensor),
  3966. ctypes.POINTER(ggml_tensor),
  3967. ctypes.c_int,
  3968. ctypes.c_int,
  3969. ctypes.c_int,
  3970. ctypes.c_int,
  3971. ctypes.c_int,
  3972. ctypes.c_int,
  3973. ]
  3974. lib.ggml_conv_2d.restype = ctypes.POINTER(ggml_tensor)
  3975. # // kernel size is a->ne[0] x a->ne[1]
  3976. # // stride is equal to kernel size
  3977. # // padding is zero
  3978. # // example:
  3979. # // a: 16 16 3 768
  3980. # // b: 1024 1024 3 1
  3981. # // res: 64 64 768 1
  3982. # // used in sam
  3983. # GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
  3984. # struct ggml_context * ctx,
  3985. # struct ggml_tensor * a,
  3986. # struct ggml_tensor * b);
  3987. def ggml_conv_2d_sk_p0(
  3988. ctx: ggml_context_p,
  3989. a: ggml_tensor_p,
  3990. b: ggml_tensor_p,
  3991. ) -> ggml_tensor_p:
  3992. """Convolution 2D
  3993. Parameters:
  3994. a: input tensor
  3995. b: filter tensor
  3996. Returns:
  3997. output tensor"""
  3998. return lib.ggml_conv_2d_sk_p0(ctx, a, b)
  3999. lib.ggml_conv_2d_sk_p0.argtypes = [
  4000. ggml_context_p,
  4001. ctypes.POINTER(ggml_tensor),
  4002. ctypes.POINTER(ggml_tensor),
  4003. ]
  4004. lib.ggml_conv_2d_sk_p0.restype = ctypes.POINTER(ggml_tensor)
  4005. # // kernel size is a->ne[0] x a->ne[1]
  4006. # // stride is 1
  4007. # // padding is half
  4008. # // example:
  4009. # // a: 3 3 256 256
  4010. # // b: 64 64 256 1
  4011. # // res: 64 64 256 1
  4012. # // used in sam
  4013. # GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
  4014. # struct ggml_context * ctx,
  4015. # struct ggml_tensor * a,
  4016. # struct ggml_tensor * b);
  4017. def ggml_conv_2d_s1_ph(
  4018. ctx: ggml_context_p,
  4019. a: ggml_tensor_p,
  4020. b: ggml_tensor_p,
  4021. ) -> ggml_tensor_p:
  4022. """Convolution 2D with stride = 1 and padding = half
  4023. Parameters:
  4024. a: input tensor
  4025. b: filter tensor
  4026. Returns:
  4027. output tensor"""
  4028. return lib.ggml_conv_2d_s1_ph(ctx, a, b)
  4029. lib.ggml_conv_2d_s1_ph.argtypes = [
  4030. ggml_context_p,
  4031. ctypes.POINTER(ggml_tensor),
  4032. ctypes.POINTER(ggml_tensor),
  4033. ]
  4034. lib.ggml_conv_2d_s1_ph.restype = ctypes.POINTER(ggml_tensor)
  4035. # GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
  4036. # struct ggml_context * ctx,
  4037. # struct ggml_tensor * a,
  4038. # struct ggml_tensor * b,
  4039. # int stride);
  4040. def ggml_conv_transpose_2d_p0(
  4041. ctx: ggml_context_p,
  4042. a: ggml_tensor_p,
  4043. b: ggml_tensor_p,
  4044. stride: Union[ctypes.c_int, int],
  4045. ) -> ggml_tensor_p:
  4046. """Convolution Transpose 2D with padding = zero
  4047. Parameters:
  4048. a: input tensor
  4049. b: filter tensor
  4050. stride: stride
  4051. Returns:
  4052. output tensor"""
  4053. return lib.ggml_conv_transpose_2d_p0(ctx, a, b, stride)
  4054. lib.ggml_conv_transpose_2d_p0.argtypes = [
  4055. ggml_context_p,
  4056. ctypes.POINTER(ggml_tensor),
  4057. ctypes.POINTER(ggml_tensor),
  4058. ctypes.c_int,
  4059. ]
  4060. lib.ggml_conv_transpose_2d_p0.restype = ctypes.POINTER(ggml_tensor)
  4061. # enum ggml_op_pool {
  4062. # GGML_OP_POOL_MAX,
  4063. # GGML_OP_POOL_AVG,
  4064. # GGML_OP_POOL_COUNT,
  4065. # };
  4066. GGML_OP_POOL_MAX = 0
  4067. GGML_OP_POOL_AVG = 1
  4068. GGML_OP_POOL_COUNT = 2
  4069. # GGML_API struct ggml_tensor * ggml_pool_1d(
  4070. # struct ggml_context * ctx,
  4071. # struct ggml_tensor * a,
  4072. # enum ggml_op_pool op,
  4073. # int k0, // kernel size
  4074. # int s0, // stride
  4075. # int p0); // padding
  4076. def ggml_pool_1d(
  4077. ctx: ggml_context_p,
  4078. a: ggml_tensor_p,
  4079. op: Union[ctypes.c_int, int],
  4080. k0: Union[ctypes.c_int, int],
  4081. s0: Union[ctypes.c_int, int],
  4082. p0: Union[ctypes.c_int, int],
  4083. ) -> ggml_tensor_p:
  4084. """1D Pooling
  4085. Parameters:
  4086. a: input tensor
  4087. op: pooling operation
  4088. k0: kernel size
  4089. s0: stride
  4090. p0: padding
  4091. Returns:
  4092. output tensor"""
  4093. return lib.ggml_pool_1d(ctx, a, op, k0, s0, p0)
  4094. lib.ggml_pool_1d.argtypes = [
  4095. ggml_context_p,
  4096. ctypes.POINTER(ggml_tensor),
  4097. ctypes.c_int,
  4098. ctypes.c_int,
  4099. ctypes.c_int,
  4100. ctypes.c_int,
  4101. ]
  4102. lib.ggml_pool_1d.restype = ctypes.POINTER(ggml_tensor)
  4103. # // the result will have 2*p0 padding for the first dimension
  4104. # // and 2*p1 padding for the second dimension
  4105. # GGML_API struct ggml_tensor * ggml_pool_2d(
  4106. # struct ggml_context * ctx,
  4107. # struct ggml_tensor * a,
  4108. # enum ggml_op_pool op,
  4109. # int k0,
  4110. # int k1,
  4111. # int s0,
  4112. # int s1,
  4113. # float p0,
  4114. # float p1);
  4115. def ggml_pool_2d(
  4116. ctx: ggml_context_p,
  4117. a: ggml_tensor_p,
  4118. op: Union[ctypes.c_int, int],
  4119. k0: Union[ctypes.c_int, int],
  4120. k1: Union[ctypes.c_int, int],
  4121. s0: Union[ctypes.c_int, int],
  4122. s1: Union[ctypes.c_int, int],
  4123. p0: Union[ctypes.c_float, float],
  4124. p1: Union[ctypes.c_float, float],
  4125. ) -> ggml_tensor_p:
  4126. """2D Pooling
  4127. Parameters:
  4128. a: input tensor
  4129. op: pooling operation
  4130. k0: kernel size
  4131. k1: kernel size
  4132. s0: stride
  4133. s1: stride
  4134. p0: padding
  4135. p1: padding
  4136. Returns:
  4137. output tensor"""
  4138. return lib.ggml_pool_2d(ctx, a, op, k0, k1, s0, s1, p0, p1)
  4139. lib.ggml_pool_2d.argtypes = [
  4140. ggml_context_p,
  4141. ctypes.POINTER(ggml_tensor),
  4142. ctypes.c_int,
  4143. ctypes.c_int,
  4144. ctypes.c_int,
  4145. ctypes.c_int,
  4146. ctypes.c_float,
  4147. ctypes.c_float,
  4148. ]
  4149. lib.ggml_pool_2d.restype = ctypes.POINTER(ggml_tensor)
  4150. # // nearest interpolate
  4151. # // used in stable-diffusion
  4152. # GGML_API struct ggml_tensor * ggml_upscale(
  4153. # struct ggml_context * ctx,
  4154. # struct ggml_tensor * a,
  4155. # int scale_factor);
  4156. def ggml_upscale(
  4157. ctx: ggml_context_p,
  4158. a: ggml_tensor_p,
  4159. scale_factor: Union[ctypes.c_int, int],
  4160. ) -> ggml_tensor_p:
  4161. """Upscale
  4162. Parameters:
  4163. a: input tensor
  4164. scale_factor: scale factor
  4165. Returns:
  4166. output tensor"""
  4167. return lib.ggml_upscale(ctx, a, scale_factor)
  4168. lib.ggml_upscale.argtypes = [
  4169. ggml_context_p,
  4170. ctypes.POINTER(ggml_tensor),
  4171. ctypes.c_int,
  4172. ]
  4173. lib.ggml_upscale.restype = ctypes.POINTER(ggml_tensor)
  4174. # // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
  4175. # GGML_API struct ggml_tensor * ggml_pad(
  4176. # struct ggml_context * ctx,
  4177. # struct ggml_tensor * a,
  4178. # int p0,
  4179. # int p1,
  4180. # int p2,
  4181. # int p3);
  4182. def ggml_pad(
  4183. ctx: ggml_context_p,
  4184. a: ggml_tensor_p,
  4185. p0: Union[ctypes.c_int, int],
  4186. p1: Union[ctypes.c_int, int],
  4187. p2: Union[ctypes.c_int, int],
  4188. p3: Union[ctypes.c_int, int],
  4189. ) -> ggml_tensor_p:
  4190. """Pad tensor with zeros
  4191. Parameters:
  4192. a: input tensor
  4193. p0: padding
  4194. p1: padding
  4195. p2: padding
  4196. p3: padding
  4197. Returns:
  4198. output tensor"""
  4199. return lib.ggml_pad(ctx, a, p0, p1, p2, p3)
  4200. lib.ggml_pad.argtypes = [
  4201. ggml_context_p,
  4202. ctypes.POINTER(ggml_tensor),
  4203. ctypes.c_int,
  4204. ctypes.c_int,
  4205. ctypes.c_int,
  4206. ctypes.c_int,
  4207. ]
  4208. lib.ggml_pad.restype = ctypes.POINTER(ggml_tensor)
  4209. # // sort rows
  4210. # enum ggml_sort_order {
  4211. # GGML_SORT_ASC,
  4212. # GGML_SORT_DESC,
  4213. # };
  4214. GGML_SORT_ASC = 0
  4215. GGML_SORT_DESC = 1
  4216. # GGML_API struct ggml_tensor * ggml_argsort(
  4217. # struct ggml_context * ctx,
  4218. # struct ggml_tensor * a,
  4219. # enum ggml_sort_order order);
  4220. def ggml_argsort(
  4221. ctx: ggml_context_p,
  4222. a: ggml_tensor_p,
  4223. order: Union[ctypes.c_int, int],
  4224. ) -> ggml_tensor_p:
  4225. """Argsort
  4226. Parameters:
  4227. a: input tensor
  4228. order: sort order
  4229. Returns:
  4230. output tensor"""
  4231. return lib.ggml_argsort(ctx, a, order)
  4232. lib.ggml_argsort.argtypes = [
  4233. ggml_context_p,
  4234. ctypes.POINTER(ggml_tensor),
  4235. ctypes.c_int,
  4236. ]
  4237. lib.ggml_argsort.restype = ctypes.POINTER(ggml_tensor)
  4238. # // top k elements per row
  4239. # GGML_API struct ggml_tensor * ggml_top_k(
  4240. # struct ggml_context * ctx,
  4241. # struct ggml_tensor * a,
  4242. # int k);
  4243. def ggml_top_k(
  4244. ctx: ggml_context_p,
  4245. a: ggml_tensor_p,
  4246. k: Union[ctypes.c_int, int],
  4247. ) -> ggml_tensor_p:
  4248. """Top k elements per row
  4249. Parameters:
  4250. a: input tensor
  4251. k: number of elements
  4252. Returns:
  4253. output tensor"""
  4254. return lib.ggml_top_k(ctx, a, k)
  4255. # GGML_API struct ggml_tensor * ggml_flash_attn(
  4256. # struct ggml_context * ctx,
  4257. # struct ggml_tensor * q,
  4258. # struct ggml_tensor * k,
  4259. # struct ggml_tensor * v,
  4260. # bool masked);
  4261. def ggml_flash_attn(
  4262. ctx: ggml_context_p,
  4263. q: ggml_tensor_p,
  4264. k: ggml_tensor_p,
  4265. v: ggml_tensor_p,
  4266. masked: Union[ctypes.c_bool, bool],
  4267. ) -> ggml_tensor_p:
  4268. return lib.ggml_flash_attn(ctx, q, k, v, masked)
  4269. lib.ggml_flash_attn.argtypes = [
  4270. ggml_context_p,
  4271. ctypes.POINTER(ggml_tensor),
  4272. ctypes.POINTER(ggml_tensor),
  4273. ctypes.POINTER(ggml_tensor),
  4274. ctypes.c_bool,
  4275. ]
  4276. lib.ggml_flash_attn.restype = ctypes.POINTER(ggml_tensor)
  4277. # GGML_API struct ggml_tensor * ggml_flash_attn_back(
  4278. # struct ggml_context * ctx,
  4279. # struct ggml_tensor * q,
  4280. # struct ggml_tensor * k,
  4281. # struct ggml_tensor * v,
  4282. # struct ggml_tensor * d,
  4283. # bool masked);
  4284. def ggml_flash_attn_back(
  4285. ctx: ggml_context_p,
  4286. q: ggml_tensor_p,
  4287. k: ggml_tensor_p,
  4288. v: ggml_tensor_p,
  4289. d: ggml_tensor_p,
  4290. masked: Union[ctypes.c_bool, bool],
  4291. ) -> ggml_tensor_p:
  4292. return lib.ggml_flash_attn_back(ctx, q, k, v, d, masked)
  4293. lib.ggml_flash_attn_back.argtypes = [
  4294. ggml_context_p,
  4295. ctypes.POINTER(ggml_tensor),
  4296. ctypes.POINTER(ggml_tensor),
  4297. ctypes.POINTER(ggml_tensor),
  4298. ctypes.POINTER(ggml_tensor),
  4299. ctypes.c_bool,
  4300. ]
  4301. lib.ggml_flash_attn_back.restype = ctypes.POINTER(ggml_tensor)
  4302. # GGML_API struct ggml_tensor * ggml_flash_ff(
  4303. # struct ggml_context * ctx,
  4304. # struct ggml_tensor * a,
  4305. # struct ggml_tensor * b0,
  4306. # struct ggml_tensor * b1,
  4307. # struct ggml_tensor * c0,
  4308. # struct ggml_tensor * c1);
  4309. def ggml_flash_ff(
  4310. ctx: ggml_context_p,
  4311. a: ggml_tensor_p,
  4312. b0: ggml_tensor_p,
  4313. b1: ggml_tensor_p,
  4314. c0: ggml_tensor_p,
  4315. c1: ggml_tensor_p,
  4316. ) -> ggml_tensor_p:
  4317. return lib.ggml_flash_ff(ctx, a, b0, b1, c0, c1)
  4318. lib.ggml_flash_ff.argtypes = [
  4319. ggml_context_p,
  4320. ctypes.POINTER(ggml_tensor),
  4321. ctypes.POINTER(ggml_tensor),
  4322. ctypes.POINTER(ggml_tensor),
  4323. ctypes.POINTER(ggml_tensor),
  4324. ctypes.POINTER(ggml_tensor),
  4325. ]
  4326. lib.ggml_flash_ff.restype = ctypes.POINTER(ggml_tensor)
  4327. # // partition into non-overlapping windows with padding if needed
  4328. # // example:
  4329. # // a: 768 64 64 1
  4330. # // w: 14
  4331. # // res: 768 14 14 25
  4332. # // used in sam
  4333. # GGML_API struct ggml_tensor * ggml_win_part(
  4334. # struct ggml_context * ctx,
  4335. # struct ggml_tensor * a,
  4336. # int w);
  4337. def ggml_win_part(
  4338. ctx: ggml_context_p,
  4339. a: ggml_tensor_p,
  4340. w: Union[ctypes.c_int, int],
  4341. ) -> ggml_tensor_p:
  4342. return lib.ggml_win_part(ctx, a, w)
  4343. lib.ggml_win_part.argtypes = [
  4344. ggml_context_p,
  4345. ctypes.POINTER(ggml_tensor),
  4346. ctypes.c_int,
  4347. ]
  4348. lib.ggml_win_part.restype = ctypes.POINTER(ggml_tensor)
  4349. # // reverse of ggml_win_part
  4350. # // used in sam
  4351. # GGML_API struct ggml_tensor * ggml_win_unpart(
  4352. # struct ggml_context * ctx,
  4353. # struct ggml_tensor * a,
  4354. # int w0,
  4355. # int h0,
  4356. # int w);
  4357. def ggml_win_unpart(
  4358. ctx: ggml_context_p,
  4359. a: ggml_tensor_p,
  4360. w0: Union[ctypes.c_int, int],
  4361. h0: Union[ctypes.c_int, int],
  4362. w: Union[ctypes.c_int, int],
  4363. ) -> ggml_tensor_p:
  4364. return lib.ggml_win_unpart(ctx, a, w0, h0, w)
  4365. lib.ggml_win_unpart.argtypes = [
  4366. ggml_context_p,
  4367. ctypes.POINTER(ggml_tensor),
  4368. ctypes.c_int,
  4369. ctypes.c_int,
  4370. ctypes.c_int,
  4371. ]
  4372. lib.ggml_win_unpart.restype = ctypes.POINTER(ggml_tensor)
  4373. # GGML_API struct ggml_tensor * ggml_unary(
  4374. # struct ggml_context * ctx,
  4375. # struct ggml_tensor * a,
  4376. # enum ggml_unary_op op);
  4377. def ggml_unary(
  4378. ctx: ggml_context_p,
  4379. a: ggml_tensor_p,
  4380. op: Union[ctypes.c_int, int],
  4381. ) -> ggml_tensor_p:
  4382. return lib.ggml_unary(ctx, a, op)
  4383. lib.ggml_unary.argtypes = [
  4384. ggml_context_p,
  4385. ctypes.POINTER(ggml_tensor),
  4386. ctypes.c_int,
  4387. ]
  4388. lib.ggml_unary.restype = ctypes.POINTER(ggml_tensor)
  4389. # GGML_API struct ggml_tensor * ggml_unary_inplace(
  4390. # struct ggml_context * ctx,
  4391. # struct ggml_tensor * a,
  4392. # enum ggml_unary_op op);
  4393. def ggml_unary_inplace(
  4394. ctx: ggml_context_p,
  4395. a: ggml_tensor_p,
  4396. op: Union[ctypes.c_int, int],
  4397. ) -> ggml_tensor_p:
  4398. return lib.ggml_unary_inplace(ctx, a, op)
  4399. lib.ggml_unary_inplace.argtypes = [
  4400. ggml_context_p,
  4401. ctypes.POINTER(ggml_tensor),
  4402. ctypes.c_int,
  4403. ]
  4404. lib.ggml_unary_inplace.restype = ctypes.POINTER(ggml_tensor)
  4405. # // used in sam
  4406. # GGML_API struct ggml_tensor * ggml_get_rel_pos(
  4407. # struct ggml_context * ctx,
  4408. # struct ggml_tensor * a,
  4409. # int qh,
  4410. # int kh);
  4411. def ggml_get_rel_pos(
  4412. ctx: ggml_context_p,
  4413. a: ggml_tensor_p,
  4414. qh: Union[ctypes.c_int, int],
  4415. kh: Union[ctypes.c_int, int],
  4416. ) -> ggml_tensor_p:
  4417. return lib.ggml_get_rel_pos(ctx, a, qh, kh)
  4418. lib.ggml_get_rel_pos.argtypes = [
  4419. ggml_context_p,
  4420. ctypes.POINTER(ggml_tensor),
  4421. ctypes.c_int,
  4422. ctypes.c_int,
  4423. ]
  4424. lib.ggml_get_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  4425. # // used in sam
  4426. # GGML_API struct ggml_tensor * ggml_add_rel_pos(
  4427. # struct ggml_context * ctx,
  4428. # struct ggml_tensor * a,
  4429. # struct ggml_tensor * pw,
  4430. # struct ggml_tensor * ph);
  4431. def ggml_add_rel_pos(
  4432. ctx: ggml_context_p,
  4433. a: ggml_tensor_p,
  4434. pw: ggml_tensor_p,
  4435. ph: ggml_tensor_p,
  4436. ) -> ggml_tensor_p:
  4437. return lib.ggml_add_rel_pos(ctx, a, pw, ph)
  4438. lib.ggml_add_rel_pos.argtypes = [
  4439. ggml_context_p,
  4440. ctypes.POINTER(ggml_tensor),
  4441. ctypes.POINTER(ggml_tensor),
  4442. ctypes.POINTER(ggml_tensor),
  4443. ]
  4444. lib.ggml_add_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  4445. # GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
  4446. # struct ggml_context * ctx,
  4447. # struct ggml_tensor * a,
  4448. # struct ggml_tensor * pw,
  4449. # struct ggml_tensor * ph);
  4450. def ggml_add_rel_pos_inplace(
  4451. ctx: ggml_context_p,
  4452. a: ggml_tensor_p,
  4453. pw: ggml_tensor_p,
  4454. ph: ggml_tensor_p,
  4455. ) -> ggml_tensor_p:
  4456. return lib.ggml_add_rel_pos_inplace(ctx, a, pw, ph)
  4457. lib.ggml_add_rel_pos_inplace.argtypes = [
  4458. ggml_context_p,
  4459. ctypes.POINTER(ggml_tensor),
  4460. ctypes.POINTER(ggml_tensor),
  4461. ctypes.POINTER(ggml_tensor),
  4462. ]
  4463. lib.ggml_add_rel_pos_inplace.restype = ctypes.POINTER(ggml_tensor)
  4464. # // custom operators (DEPRECATED)
  4465. # typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
  4466. ggml_unary_op_f32_t = ctypes.CFUNCTYPE(
  4467. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float)
  4468. )
  4469. # typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
  4470. ggml_binary_op_f32_t = ctypes.CFUNCTYPE(
  4471. None,
  4472. ctypes.c_int,
  4473. ctypes.POINTER(ctypes.c_float),
  4474. ctypes.POINTER(ctypes.c_float),
  4475. ctypes.POINTER(ctypes.c_float),
  4476. )
  4477. # typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
  4478. ggml_custom1_op_f32_t = ctypes.CFUNCTYPE(
  4479. None, ctypes.POINTER(ggml_tensor), ctypes.POINTER(ggml_tensor)
  4480. )
  4481. """Unary operator function type"""
  4482. # typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  4483. ggml_custom2_op_f32_t = ctypes.CFUNCTYPE(
  4484. None,
  4485. ctypes.POINTER(ggml_tensor),
  4486. ctypes.POINTER(ggml_tensor),
  4487. ctypes.POINTER(ggml_tensor),
  4488. )
  4489. """Binary operator function type"""
  4490. # typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  4491. ggml_custom3_op_f32_t = ctypes.CFUNCTYPE(
  4492. None,
  4493. ctypes.POINTER(ggml_tensor),
  4494. ctypes.POINTER(ggml_tensor),
  4495. ctypes.POINTER(ggml_tensor),
  4496. ctypes.POINTER(ggml_tensor),
  4497. )
  4498. """Ternary operator function type"""
  4499. # GGML_API struct ggml_tensor * ggml_map_unary_f32(
  4500. # struct ggml_context * ctx,
  4501. # struct ggml_tensor * a,
  4502. # ggml_unary_op_f32_t fun);
  4503. def ggml_map_unary_f32(
  4504. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  4505. ) -> ggml_tensor_p:
  4506. return lib.ggml_map_unary_f32(ctx, a, fun)
  4507. lib.ggml_map_unary_f32.argtypes = [
  4508. ggml_context_p,
  4509. ctypes.POINTER(ggml_tensor),
  4510. ggml_unary_op_f32_t,
  4511. ]
  4512. lib.ggml_map_unary_f32.restype = ctypes.POINTER(ggml_tensor)
  4513. # GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
  4514. # struct ggml_context * ctx,
  4515. # struct ggml_tensor * a,
  4516. # ggml_unary_op_f32_t fun);
  4517. def ggml_map_unary_inplace_f32(
  4518. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  4519. ) -> ggml_tensor_p:
  4520. return lib.ggml_map_unary_inplace_f32(ctx, a, fun)
  4521. lib.ggml_map_unary_inplace_f32.argtypes = [
  4522. ggml_context_p,
  4523. ctypes.POINTER(ggml_tensor),
  4524. ggml_unary_op_f32_t,
  4525. ]
  4526. lib.ggml_map_unary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4527. # GGML_API struct ggml_tensor * ggml_map_binary_f32(
  4528. # struct ggml_context * ctx,
  4529. # struct ggml_tensor * a,
  4530. # struct ggml_tensor * b,
  4531. # ggml_binary_op_f32_t fun);
  4532. def ggml_map_binary_f32(
  4533. ctx: ggml_context_p,
  4534. a: ggml_tensor_p,
  4535. b: ggml_tensor_p,
  4536. fun: "ctypes._FuncPointer", # type: ignore
  4537. ) -> ggml_tensor_p:
  4538. return lib.ggml_map_binary_f32(ctx, a, b, fun)
  4539. lib.ggml_map_binary_f32.argtypes = [
  4540. ggml_context_p,
  4541. ctypes.POINTER(ggml_tensor),
  4542. ctypes.POINTER(ggml_tensor),
  4543. ggml_binary_op_f32_t,
  4544. ]
  4545. lib.ggml_map_binary_f32.restype = ctypes.POINTER(ggml_tensor)
  4546. # GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
  4547. # struct ggml_context * ctx,
  4548. # struct ggml_tensor * a,
  4549. # struct ggml_tensor * b,
  4550. # ggml_binary_op_f32_t fun);
  4551. def ggml_map_binary_inplace_f32(
  4552. ctx: ggml_context_p,
  4553. a: ggml_tensor_p,
  4554. b: ggml_tensor_p,
  4555. fun: "ctypes._FuncPointer", # type: ignore
  4556. ) -> ggml_tensor_p:
  4557. return lib.ggml_map_binary_inplace_f32(ctx, a, b, fun)
  4558. lib.ggml_map_binary_inplace_f32.argtypes = [
  4559. ggml_context_p,
  4560. ctypes.POINTER(ggml_tensor),
  4561. ctypes.POINTER(ggml_tensor),
  4562. ggml_binary_op_f32_t,
  4563. ]
  4564. lib.ggml_map_binary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4565. # GGML_API struct ggml_tensor * ggml_map_custom1_f32(
  4566. # struct ggml_context * ctx,
  4567. # struct ggml_tensor * a,
  4568. # ggml_custom1_op_f32_t fun);
  4569. def ggml_map_custom1_f32(
  4570. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  4571. ) -> ggml_tensor_p:
  4572. """Custom unary operator on a tensor.
  4573. Example:
  4574. ```python
  4575. import ggml
  4576. @ggml.ggml_custom1_op_f32_t
  4577. def custom_op(b: ggml.tensor_p, a: ggml.tensor_p):
  4578. # do something with a and copy to b
  4579. return
  4580. ...
  4581. b = ggml.ggml_map_custom1_f32(ctx, a, custom_op)
  4582. ```
  4583. Parameters:
  4584. a: input tensor
  4585. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  4586. Returns:
  4587. output tensor"""
  4588. return lib.ggml_map_custom1_f32(ctx, a, fun)
  4589. lib.ggml_map_custom1_f32.argtypes = [
  4590. ggml_context_p,
  4591. ctypes.POINTER(ggml_tensor),
  4592. ggml_custom1_op_f32_t,
  4593. ]
  4594. lib.ggml_map_custom1_f32.restype = ctypes.POINTER(ggml_tensor)
  4595. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
  4596. # struct ggml_context * ctx,
  4597. # struct ggml_tensor * a,
  4598. # ggml_custom1_op_f32_t fun);
  4599. def ggml_map_custom1_inplace_f32(
  4600. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._CFuncPtr" # type: ignore
  4601. ) -> ggml_tensor_p:
  4602. """Custom unary operator on a tensor inplace.
  4603. Parameters:
  4604. a: input tensor
  4605. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  4606. Returns:
  4607. output tensor"""
  4608. return lib.ggml_map_custom1_inplace_f32(ctx, a, fun)
  4609. lib.ggml_map_custom1_inplace_f32.argtypes = [
  4610. ggml_context_p,
  4611. ctypes.POINTER(ggml_tensor),
  4612. ggml_custom1_op_f32_t,
  4613. ]
  4614. lib.ggml_map_custom1_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4615. # GGML_API struct ggml_tensor * ggml_map_custom2_f32(
  4616. # struct ggml_context * ctx,
  4617. # struct ggml_tensor * a,
  4618. # struct ggml_tensor * b,
  4619. # ggml_custom2_op_f32_t fun);
  4620. def ggml_map_custom2_f32(
  4621. ctx: ggml_context_p,
  4622. a: ggml_tensor_p,
  4623. b: ggml_tensor_p,
  4624. fun: "ctypes._FuncPointer", # type: ignore
  4625. ) -> ggml_tensor_p:
  4626. """Custom binary operator on two tensors.
  4627. Parameters:
  4628. a: input tensor
  4629. b: input tensor
  4630. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  4631. Returns:
  4632. output tensor"""
  4633. return lib.ggml_map_custom2_f32(ctx, a, b, fun)
  4634. lib.ggml_map_custom2_f32.argtypes = [
  4635. ggml_context_p,
  4636. ctypes.POINTER(ggml_tensor),
  4637. ctypes.POINTER(ggml_tensor),
  4638. ggml_custom2_op_f32_t,
  4639. ]
  4640. lib.ggml_map_custom2_f32.restype = ctypes.POINTER(ggml_tensor)
  4641. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
  4642. # struct ggml_context * ctx,
  4643. # struct ggml_tensor * a,
  4644. # struct ggml_tensor * b,
  4645. # ggml_custom2_op_f32_t fun);
  4646. def ggml_map_custom2_inplace_f32(
  4647. ctx: ggml_context_p,
  4648. a: ggml_tensor_p,
  4649. b: ggml_tensor_p,
  4650. fun: "ctypes._FuncPointer", # type: ignore
  4651. ) -> ggml_tensor_p:
  4652. """Custom binary operator on two tensors inplace.
  4653. Parameters:
  4654. a: input tensor
  4655. b: input tensor
  4656. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  4657. Returns:
  4658. output tensor"""
  4659. return lib.ggml_map_custom2_inplace_f32(ctx, a, b, fun)
  4660. lib.ggml_map_custom2_inplace_f32.argtypes = [
  4661. ggml_context_p,
  4662. ctypes.POINTER(ggml_tensor),
  4663. ctypes.POINTER(ggml_tensor),
  4664. ggml_custom2_op_f32_t,
  4665. ]
  4666. lib.ggml_map_custom2_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4667. # GGML_API struct ggml_tensor * ggml_map_custom3_f32(
  4668. # struct ggml_context * ctx,
  4669. # struct ggml_tensor * a,
  4670. # struct ggml_tensor * b,
  4671. # struct ggml_tensor * c,
  4672. # ggml_custom3_op_f32_t fun);
  4673. def ggml_map_custom3_f32(
  4674. ctx: ggml_context_p,
  4675. a: ggml_tensor_p,
  4676. b: ggml_tensor_p,
  4677. c: ggml_tensor_p,
  4678. fun: "ctypes._FuncPointer", # type: ignore
  4679. ) -> ggml_tensor_p:
  4680. """Custom ternary operator on three tensors.
  4681. Parameters:
  4682. a: input tensor
  4683. b: input tensor
  4684. c: input tensor
  4685. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  4686. Returns:
  4687. output tensor"""
  4688. return lib.ggml_map_custom3_f32(ctx, a, b, c, fun)
  4689. lib.ggml_map_custom3_f32.argtypes = [
  4690. ggml_context_p,
  4691. ctypes.POINTER(ggml_tensor),
  4692. ctypes.POINTER(ggml_tensor),
  4693. ctypes.POINTER(ggml_tensor),
  4694. ggml_custom3_op_f32_t,
  4695. ]
  4696. lib.ggml_map_custom3_f32.restype = ctypes.POINTER(ggml_tensor)
  4697. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
  4698. # struct ggml_context * ctx,
  4699. # struct ggml_tensor * a,
  4700. # struct ggml_tensor * b,
  4701. # struct ggml_tensor * c,
  4702. # ggml_custom3_op_f32_t fun);
  4703. def ggml_map_custom3_inplace_f32(
  4704. ctx: ggml_context_p,
  4705. a: ggml_tensor_p,
  4706. b: ggml_tensor_p,
  4707. c: ggml_tensor_p,
  4708. fun: "ctypes._FuncPointer", # type: ignore
  4709. ) -> ggml_tensor_p:
  4710. """Custom ternary operator on three tensors inplace.
  4711. Parameters:
  4712. a: input tensor
  4713. b: input tensor
  4714. c: input tensor
  4715. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  4716. Returns:
  4717. output tensor"""
  4718. return lib.ggml_map_custom3_inplace_f32(ctx, a, b, c, fun)
  4719. lib.ggml_map_custom3_inplace_f32.argtypes = [
  4720. ggml_context_p,
  4721. ctypes.POINTER(ggml_tensor),
  4722. ctypes.POINTER(ggml_tensor),
  4723. ctypes.POINTER(ggml_tensor),
  4724. ggml_custom3_op_f32_t,
  4725. ]
  4726. lib.ggml_map_custom3_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4727. # // custom operators v2
  4728. # typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
  4729. ggml_custom1_op_t = ctypes.CFUNCTYPE(
  4730. None,
  4731. ctypes.POINTER(ggml_tensor),
  4732. ctypes.POINTER(ggml_tensor),
  4733. ctypes.c_int,
  4734. ctypes.c_int,
  4735. ctypes.c_void_p,
  4736. )
  4737. """Custom unary operator on a tensor."""
  4738. # typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
  4739. ggml_custom2_op_t = ctypes.CFUNCTYPE(
  4740. None,
  4741. ctypes.POINTER(ggml_tensor),
  4742. ctypes.POINTER(ggml_tensor),
  4743. ctypes.POINTER(ggml_tensor),
  4744. ctypes.c_int,
  4745. ctypes.c_int,
  4746. ctypes.c_void_p,
  4747. )
  4748. """Custom binary operator on two tensors."""
  4749. # typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
  4750. ggml_custom3_op_t = ctypes.CFUNCTYPE(
  4751. None,
  4752. ctypes.POINTER(ggml_tensor),
  4753. ctypes.POINTER(ggml_tensor),
  4754. ctypes.POINTER(ggml_tensor),
  4755. ctypes.POINTER(ggml_tensor),
  4756. ctypes.c_int,
  4757. ctypes.c_int,
  4758. ctypes.c_void_p,
  4759. )
  4760. """Custom ternary operator on three tensors."""
  4761. # #define GGML_N_TASKS_MAX -1
  4762. GGML_N_TASKS_MAX = -1
  4763. # GGML_API struct ggml_tensor * ggml_map_custom1(
  4764. # struct ggml_context * ctx,
  4765. # struct ggml_tensor * a,
  4766. # ggml_custom1_op_t fun,
  4767. # int n_tasks,
  4768. # void * userdata);
  4769. def ggml_map_custom1(
  4770. ctx: ggml_context_p,
  4771. a: ggml_tensor_p,
  4772. fun: "ctypes._FuncPointer", # type: ignore
  4773. n_tasks: Union[ctypes.c_int, int],
  4774. userdata: Optional[ctypes.c_void_p],
  4775. ) -> ggml_tensor_p:
  4776. return lib.ggml_map_custom1(ctx, a, fun, n_tasks, userdata)
  4777. lib.ggml_map_custom1.argtypes = [
  4778. ggml_context_p,
  4779. ctypes.POINTER(ggml_tensor),
  4780. ggml_custom1_op_t,
  4781. ctypes.c_int,
  4782. ctypes.c_void_p,
  4783. ]
  4784. lib.ggml_map_custom1.restype = ctypes.POINTER(ggml_tensor)
  4785. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
  4786. # struct ggml_context * ctx,
  4787. # struct ggml_tensor * a,
  4788. # ggml_custom1_op_t fun,
  4789. # int n_tasks,
  4790. # void * userdata);
  4791. def ggml_map_custom1_inplace(
  4792. ctx: ggml_context_p,
  4793. a: ggml_tensor_p,
  4794. fun: "ctypes._FuncPointer", # type: ignore
  4795. n_tasks: Union[ctypes.c_int, int],
  4796. userdata: Optional[ctypes.c_void_p],
  4797. ) -> ggml_tensor_p:
  4798. return lib.ggml_map_custom1_inplace(ctx, a, fun, n_tasks, userdata)
  4799. lib.ggml_map_custom1_inplace.argtypes = [
  4800. ggml_context_p,
  4801. ctypes.POINTER(ggml_tensor),
  4802. ggml_custom1_op_t,
  4803. ctypes.c_int,
  4804. ctypes.c_void_p,
  4805. ]
  4806. lib.ggml_map_custom1_inplace.restype = ctypes.POINTER(ggml_tensor)
  4807. # GGML_API struct ggml_tensor * ggml_map_custom2(
  4808. # struct ggml_context * ctx,
  4809. # struct ggml_tensor * a,
  4810. # struct ggml_tensor * b,
  4811. # ggml_custom2_op_t fun,
  4812. # int n_tasks,
  4813. # void * userdata);
  4814. def ggml_map_custom2(
  4815. ctx: ggml_context_p,
  4816. a: ggml_tensor_p,
  4817. b: ggml_tensor_p,
  4818. fun: "ctypes._FuncPointer", # type: ignore
  4819. n_tasks: Union[ctypes.c_int, int],
  4820. userdata: Optional[ctypes.c_void_p],
  4821. ) -> ggml_tensor_p:
  4822. return lib.ggml_map_custom2(ctx, a, b, fun, n_tasks, userdata)
  4823. lib.ggml_map_custom2.argtypes = [
  4824. ggml_context_p,
  4825. ctypes.POINTER(ggml_tensor),
  4826. ctypes.POINTER(ggml_tensor),
  4827. ggml_custom2_op_t,
  4828. ctypes.c_int,
  4829. ctypes.c_void_p,
  4830. ]
  4831. lib.ggml_map_custom2.restype = ctypes.POINTER(ggml_tensor)
  4832. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
  4833. # struct ggml_context * ctx,
  4834. # struct ggml_tensor * a,
  4835. # struct ggml_tensor * b,
  4836. # ggml_custom2_op_t fun,
  4837. # int n_tasks,
  4838. # void * userdata);
  4839. def ggml_map_custom2_inplace(
  4840. ctx: ggml_context_p,
  4841. a: ggml_tensor_p,
  4842. b: ggml_tensor_p,
  4843. fun: "ctypes._FuncPointer", # type: ignore
  4844. n_tasks: Union[ctypes.c_int, int],
  4845. userdata: Optional[ctypes.c_void_p],
  4846. ) -> ggml_tensor_p:
  4847. return lib.ggml_map_custom2_inplace(ctx, a, b, fun, n_tasks, userdata)
  4848. lib.ggml_map_custom2_inplace.argtypes = [
  4849. ggml_context_p,
  4850. ctypes.POINTER(ggml_tensor),
  4851. ctypes.POINTER(ggml_tensor),
  4852. ggml_custom2_op_t,
  4853. ctypes.c_int,
  4854. ctypes.c_void_p,
  4855. ]
  4856. lib.ggml_map_custom2_inplace.restype = ctypes.POINTER(ggml_tensor)
  4857. # GGML_API struct ggml_tensor * ggml_map_custom3(
  4858. # struct ggml_context * ctx,
  4859. # struct ggml_tensor * a,
  4860. # struct ggml_tensor * b,
  4861. # struct ggml_tensor * c,
  4862. # ggml_custom3_op_t fun,
  4863. # int n_tasks,
  4864. # void * userdata);
  4865. def ggml_map_custom3(
  4866. ctx: ggml_context_p,
  4867. a: ggml_tensor_p,
  4868. b: ggml_tensor_p,
  4869. c: ggml_tensor_p,
  4870. fun: "ctypes._FuncPointer", # type: ignore
  4871. n_tasks: Union[ctypes.c_int, int],
  4872. userdata: Optional[ctypes.c_void_p],
  4873. ) -> ggml_tensor_p:
  4874. return lib.ggml_map_custom3(ctx, a, b, c, fun, n_tasks, userdata)
  4875. lib.ggml_map_custom3.argtypes = [
  4876. ggml_context_p,
  4877. ctypes.POINTER(ggml_tensor),
  4878. ctypes.POINTER(ggml_tensor),
  4879. ctypes.POINTER(ggml_tensor),
  4880. ggml_custom3_op_t,
  4881. ctypes.c_int,
  4882. ctypes.c_void_p,
  4883. ]
  4884. lib.ggml_map_custom3.restype = ctypes.POINTER(ggml_tensor)
  4885. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
  4886. # struct ggml_context * ctx,
  4887. # struct ggml_tensor * a,
  4888. # struct ggml_tensor * b,
  4889. # struct ggml_tensor * c,
  4890. # ggml_custom3_op_t fun,
  4891. # int n_tasks,
  4892. # void * userdata);
  4893. def ggml_map_custom3_inplace(
  4894. ctx: ggml_context_p,
  4895. a: ggml_tensor_p,
  4896. b: ggml_tensor_p,
  4897. c: ggml_tensor_p,
  4898. fun: "ctypes._FuncPointer", # type: ignore
  4899. n_tasks: Union[ctypes.c_int, int],
  4900. userdata: Optional[ctypes.c_void_p],
  4901. ) -> ggml_tensor_p:
  4902. return lib.ggml_map_custom3_inplace(ctx, a, b, c, fun, n_tasks, userdata)
  4903. lib.ggml_map_custom3_inplace.argtypes = [
  4904. ggml_context_p,
  4905. ctypes.POINTER(ggml_tensor),
  4906. ctypes.POINTER(ggml_tensor),
  4907. ctypes.POINTER(ggml_tensor),
  4908. ggml_custom3_op_t,
  4909. ctypes.c_int,
  4910. ctypes.c_void_p,
  4911. ]
  4912. lib.ggml_map_custom3_inplace.restype = ctypes.POINTER(ggml_tensor)
  4913. # // loss function
  4914. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
  4915. # struct ggml_context * ctx,
  4916. # struct ggml_tensor * a,
  4917. # struct ggml_tensor * b);
  4918. def ggml_cross_entropy_loss(
  4919. ctx: ggml_context_p,
  4920. a: ggml_tensor_p,
  4921. b: ggml_tensor_p,
  4922. ) -> ggml_tensor_p:
  4923. return lib.ggml_cross_entropy_loss(ctx, a, b)
  4924. lib.ggml_cross_entropy_loss.argtypes = [
  4925. ggml_context_p,
  4926. ctypes.POINTER(ggml_tensor),
  4927. ctypes.POINTER(ggml_tensor),
  4928. ]
  4929. lib.ggml_cross_entropy_loss.restype = ctypes.POINTER(ggml_tensor)
  4930. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
  4931. # struct ggml_context * ctx,
  4932. # struct ggml_tensor * a,
  4933. # struct ggml_tensor * b,
  4934. # struct ggml_tensor * c);
  4935. def ggml_cross_entropy_loss_back(
  4936. ctx: ggml_context_p,
  4937. a: ggml_tensor_p,
  4938. b: ggml_tensor_p,
  4939. c: ggml_tensor_p,
  4940. ) -> ggml_tensor_p:
  4941. return lib.ggml_cross_entropy_loss_back(ctx, a, b, c)
  4942. lib.ggml_cross_entropy_loss_back.argtypes = [
  4943. ggml_context_p,
  4944. ctypes.POINTER(ggml_tensor),
  4945. ctypes.POINTER(ggml_tensor),
  4946. ctypes.POINTER(ggml_tensor),
  4947. ]
  4948. lib.ggml_cross_entropy_loss_back.restype = ctypes.POINTER(ggml_tensor)
  4949. # //
  4950. # // automatic differentiation
  4951. # //
  4952. # GGML_API void ggml_set_param(
  4953. # struct ggml_context * ctx,
  4954. # struct ggml_tensor * tensor);
  4955. def ggml_set_param(ctx: ggml_context_p, tensor: ggml_tensor_p):
  4956. return lib.ggml_set_param(ctx, tensor)
  4957. lib.ggml_set_param.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  4958. lib.ggml_set_param.restype = None
  4959. # GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
  4960. def ggml_build_forward_expand(
  4961. cgraph: ggml_cgraph_p,
  4962. tensor: ggml_tensor_p,
  4963. ):
  4964. """Add a tensor to the forward computation graph. This is used to
  4965. compute and save the value of the tensor.
  4966. Parameters:
  4967. cgraph: The graph.
  4968. tensor: The tensor."""
  4969. return lib.ggml_build_forward_expand(cgraph, tensor)
  4970. lib.ggml_build_forward_expand.argtypes = [
  4971. ctypes.POINTER(ggml_cgraph),
  4972. ctypes.POINTER(ggml_tensor),
  4973. ]
  4974. lib.ggml_build_forward_expand.restype = None
  4975. # GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
  4976. def ggml_build_backward_expand(
  4977. ctx: ggml_context_p,
  4978. gf: ggml_cgraph_p,
  4979. gb: ggml_cgraph_p,
  4980. keep: Union[ctypes.c_bool, bool],
  4981. ):
  4982. """Add a tensor to the backward computation graph. This is used to
  4983. compute the gradient of the tensor.
  4984. Parameters:
  4985. ctx: The context.
  4986. gf: The forward graph.
  4987. gb: The backward graph.
  4988. keep: Whether to keep the tensor."""
  4989. return lib.ggml_build_backward_expand(ctx, gf, gb, keep)
  4990. lib.ggml_build_backward_expand.argtypes = [
  4991. ggml_context_p,
  4992. ctypes.POINTER(ggml_cgraph),
  4993. ctypes.POINTER(ggml_cgraph),
  4994. ctypes.c_bool,
  4995. ]
  4996. lib.ggml_build_backward_expand.restype = None
  4997. # // graph allocation in a context
  4998. # GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
  4999. def ggml_new_graph(ctx: ggml_context_p) -> ggml_cgraph_p:
  5000. """Create a new graph.
  5001. Parameters:
  5002. ctx: The context.
  5003. Returns:
  5004. The graph."""
  5005. return lib.ggml_new_graph(ctx)
  5006. lib.ggml_new_graph.argtypes = [ggml_context_p]
  5007. lib.ggml_new_graph.restype = ctypes.POINTER(ggml_cgraph)
  5008. # GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
  5009. def ggml_new_graph_custom(
  5010. ctx: ggml_context_p,
  5011. size: Union[ctypes.c_size_t, int],
  5012. grads: Union[ctypes.c_bool, bool],
  5013. ) -> ggml_cgraph_p:
  5014. """Create a new graph with custom size and grads.
  5015. Parameters:
  5016. ctx: The context.
  5017. size: The size of the graph.
  5018. grads: Whether to keep the gradients.
  5019. Returns:
  5020. The graph."""
  5021. return lib.ggml_new_graph_custom(ctx, size, grads)
  5022. lib.ggml_new_graph_custom.argtypes = [ggml_context_p, ctypes.c_size_t, ctypes.c_bool]
  5023. lib.ggml_new_graph_custom.restype = ctypes.POINTER(ggml_cgraph)
  5024. # GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
  5025. def ggml_graph_dup(
  5026. ctx: ggml_context_p,
  5027. cgraph: ggml_cgraph_p,
  5028. ) -> ggml_cgraph_p:
  5029. """Duplicate a graph.
  5030. Parameters:
  5031. ctx: The context.
  5032. cgraph: The graph.
  5033. Returns:
  5034. The graph."""
  5035. return lib.ggml_graph_dup(ctx, cgraph)
  5036. lib.ggml_graph_dup.argtypes = [ggml_context_p, ctypes.POINTER(ggml_cgraph)]
  5037. lib.ggml_graph_dup.restype = ctypes.POINTER(ggml_cgraph)
  5038. # GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
  5039. def ggml_graph_view(
  5040. cgraph: ggml_cgraph_p,
  5041. i0: Union[ctypes.c_int, int],
  5042. i1: Union[ctypes.c_int, int],
  5043. ) -> ggml_cgraph:
  5044. """View a graph.
  5045. Parameters:
  5046. cgraph: The graph.
  5047. i0: The start index.
  5048. i1: The end index.
  5049. Returns:
  5050. The graph."""
  5051. return lib.ggml_graph_view(cgraph, i0, i1)
  5052. lib.ggml_graph_view.argtypes = [ctypes.POINTER(ggml_cgraph), ctypes.c_int, ctypes.c_int]
  5053. lib.ggml_graph_view.restype = ggml_cgraph
  5054. # GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
  5055. def ggml_graph_cpy(
  5056. src: ggml_cgraph_p,
  5057. dst: ggml_cgraph_p,
  5058. ):
  5059. """Copy a graph.
  5060. Parameters:
  5061. src: The source graph.
  5062. dst: The destination graph."""
  5063. return lib.ggml_graph_cpy(src, dst)
  5064. lib.ggml_graph_cpy.argtypes = [ctypes.POINTER(ggml_cgraph), ctypes.POINTER(ggml_cgraph)]
  5065. lib.ggml_graph_cpy.restype = None
  5066. # GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
  5067. def ggml_graph_reset(
  5068. cgraph: ggml_cgraph_p,
  5069. ):
  5070. """Reset a graph.
  5071. Parameters:
  5072. cgraph: The graph."""
  5073. return lib.ggml_graph_reset(cgraph)
  5074. lib.ggml_graph_reset.argtypes = [ctypes.POINTER(ggml_cgraph)]
  5075. lib.ggml_graph_reset.restype = None
  5076. # GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
  5077. def ggml_graph_clear(
  5078. cgraph: ggml_cgraph_p,
  5079. ):
  5080. """Clear a graph.
  5081. Parameters:
  5082. cgraph: The graph."""
  5083. return lib.ggml_graph_clear(cgraph)
  5084. lib.ggml_graph_clear.argtypes = [ctypes.POINTER(ggml_cgraph)]
  5085. lib.ggml_graph_clear.restype = None
  5086. # GGML_API size_t ggml_graph_overhead(void);
  5087. def ggml_graph_overhead() -> int:
  5088. """Get the overhead of the graph."""
  5089. return lib.ggml_graph_overhead()
  5090. lib.ggml_graph_overhead.argtypes = []
  5091. lib.ggml_graph_overhead.restype = ctypes.c_size_t
  5092. # GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
  5093. def ggml_graph_overhead_custom(
  5094. size: Union[ctypes.c_size_t, int],
  5095. grads: Union[ctypes.c_bool, bool],
  5096. ) -> int:
  5097. return lib.ggml_graph_overhead_custom(size, grads)
  5098. lib.ggml_graph_overhead_custom.argtypes = [ctypes.c_size_t, ctypes.c_bool]
  5099. lib.ggml_graph_overhead_custom.restype = ctypes.c_size_t
  5100. # // ggml_graph_plan() has to be called before ggml_graph_compute()
  5101. # // when plan.work_size > 0, caller must allocate memory for plan.work_data
  5102. # GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
  5103. def ggml_graph_plan(
  5104. cgraph: ggml_cgraph_p,
  5105. n_threads: Union[ctypes.c_int, int] = GGML_DEFAULT_N_THREADS,
  5106. ) -> ggml_cplan:
  5107. """Plan the computation graph.
  5108. Parameters:
  5109. cgraph: The graph.
  5110. n_threads: The number of threads to use.
  5111. Returns:
  5112. The plan."""
  5113. return lib.ggml_graph_plan(cgraph, n_threads)
  5114. lib.ggml_graph_plan.argtypes = [
  5115. ctypes.POINTER(ggml_cgraph),
  5116. ctypes.c_int,
  5117. ]
  5118. lib.ggml_graph_plan.restype = ggml_cplan
  5119. # GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
  5120. def ggml_graph_compute(
  5121. cgraph: ggml_cgraph_p,
  5122. cplan: ggml_cplan_p,
  5123. ) -> int:
  5124. return lib.ggml_graph_compute(cgraph, cplan)
  5125. lib.ggml_graph_compute.argtypes = [
  5126. ctypes.POINTER(ggml_cgraph),
  5127. ctypes.POINTER(ggml_cplan),
  5128. ]
  5129. lib.ggml_graph_compute.restype = ctypes.c_int
  5130. # // same as ggml_graph_compute() but the work data is allocated as a part of the context
  5131. # // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
  5132. # GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
  5133. def ggml_graph_compute_with_ctx(
  5134. ctx: ggml_context_p,
  5135. cgraph: ggml_cgraph_p,
  5136. n_threads: Union[ctypes.c_int, int],
  5137. ):
  5138. """Compute the graph with a context.
  5139. Parameters:
  5140. ctx: The context.
  5141. cgraph: The graph.
  5142. n_threads: The number of threads to use."""
  5143. return lib.ggml_graph_compute_with_ctx(ctx, cgraph, n_threads)
  5144. lib.ggml_graph_compute_with_ctx.argtypes = [
  5145. ggml_context_p,
  5146. ctypes.POINTER(ggml_cgraph),
  5147. ctypes.c_int,
  5148. ]
  5149. lib.ggml_graph_compute_with_ctx.restype = None
  5150. # GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
  5151. def ggml_graph_get_tensor(
  5152. cgraph: ggml_cgraph_p,
  5153. name: bytes,
  5154. ) -> ggml_tensor_p:
  5155. """Get a tensor from the graph by name.
  5156. Parameters:
  5157. cgraph: The graph.
  5158. name: The name of the tensor.
  5159. Returns:
  5160. The tensor."""
  5161. return lib.ggml_graph_get_tensor(cgraph, name)
  5162. lib.ggml_graph_get_tensor.argtypes = [
  5163. ctypes.POINTER(ggml_cgraph),
  5164. ctypes.c_char_p,
  5165. ]
  5166. lib.ggml_graph_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  5167. # GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
  5168. def ggml_graph_export(
  5169. cgraph: ggml_cgraph_p,
  5170. fname: bytes,
  5171. ):
  5172. return lib.ggml_graph_export(cgraph, fname)
  5173. lib.ggml_graph_export.argtypes = [
  5174. ctypes.POINTER(ggml_cgraph),
  5175. ctypes.c_char_p,
  5176. ]
  5177. lib.ggml_graph_export.restype = None
  5178. # GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
  5179. def ggml_graph_import(
  5180. fname: bytes,
  5181. ctx_data: "ctypes._Pointer[ggml_context_p]", # type: ignore
  5182. ctx_eval: "ctypes._Pointer[ggml_context_p]", # type: ignore
  5183. ) -> ggml_cgraph_p:
  5184. return lib.ggml_graph_import(fname, ctx_data, ctx_eval)
  5185. lib.ggml_graph_import.argtypes = [
  5186. ctypes.c_char_p,
  5187. ctypes.POINTER(ggml_context_p),
  5188. ctypes.POINTER(ggml_context_p),
  5189. ]
  5190. lib.ggml_graph_import.restype = ctypes.POINTER(ggml_cgraph)
  5191. # // print info and performance information for the graph
  5192. # GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
  5193. def ggml_graph_print(
  5194. cgraph: ggml_cgraph_p,
  5195. ):
  5196. return lib.ggml_graph_print(cgraph)
  5197. lib.ggml_graph_print.argtypes = [ctypes.POINTER(ggml_cgraph)]
  5198. lib.ggml_graph_print.restype = None
  5199. # // dump the graph into a file using the dot format
  5200. # GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
  5201. def ggml_graph_dump_dot(
  5202. gb: ggml_cgraph_p,
  5203. gf: ggml_cgraph_p,
  5204. filename: bytes,
  5205. ):
  5206. return lib.ggml_graph_dump_dot(gb, gf, filename)
  5207. lib.ggml_graph_dump_dot.argtypes = [
  5208. ctypes.POINTER(ggml_cgraph),
  5209. ctypes.POINTER(ggml_cgraph),
  5210. ctypes.c_char_p,
  5211. ]
  5212. lib.ggml_graph_dump_dot.restype = None
  5213. # // build gradient checkpointing backward graph gb for gf using provided checkpoints
  5214. # // gb_tmp will contain original backward graph with rewritten backward process nodes,
  5215. # // but without the second forward pass nodes.
  5216. # GGML_API void ggml_build_backward_gradient_checkpointing(
  5217. # struct ggml_context * ctx,
  5218. # struct ggml_cgraph * gf,
  5219. # struct ggml_cgraph * gb,
  5220. # struct ggml_cgraph * gb_tmp,
  5221. # struct ggml_tensor * * checkpoints,
  5222. # int n_checkpoints);
  5223. def ggml_build_backward_gradient_checkpointing(
  5224. ctx: ggml_context_p,
  5225. gf: ggml_cgraph_p,
  5226. gb: ggml_cgraph_p,
  5227. gb_tmp: ggml_cgraph_p,
  5228. checkpoints: "ctypes._Pointer[ggml_tensor_p]", # type: ignore
  5229. n_checkpoints: Union[ctypes.c_int, int],
  5230. ):
  5231. return lib.ggml_build_backward_gradient_checkpointing(
  5232. ctx, gf, gb, gb_tmp, checkpoints, n_checkpoints
  5233. )
  5234. lib.ggml_build_backward_gradient_checkpointing.argtypes = [
  5235. ggml_context_p,
  5236. ctypes.POINTER(ggml_cgraph),
  5237. ctypes.POINTER(ggml_cgraph),
  5238. ctypes.POINTER(ggml_cgraph),
  5239. ctypes.POINTER(ctypes.POINTER(ggml_tensor)),
  5240. ctypes.c_int,
  5241. ]
  5242. lib.ggml_build_backward_gradient_checkpointing.restype = None
  5243. # //
  5244. # // optimization
  5245. # //
  5246. # // optimization methods
  5247. # enum ggml_opt_type {
  5248. # GGML_OPT_ADAM,
  5249. # GGML_OPT_LBFGS,
  5250. # };
  5251. GGML_OPT_ADAM = 0
  5252. GGML_OPT_LBFGS = 1
  5253. # // linesearch methods
  5254. # enum ggml_linesearch {
  5255. # GGML_LINESEARCH_DEFAULT = 1,
  5256. # GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
  5257. # GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
  5258. # GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
  5259. # };
  5260. GGML_LINESEARCH_DEFAULT = 1
  5261. GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0
  5262. GGML_LINESEARCH_BACKTRACKING_WOLFE = 1
  5263. GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2
  5264. # // optimization return values
  5265. # enum ggml_opt_result {
  5266. # GGML_OPT_OK = 0,
  5267. # GGML_OPT_DID_NOT_CONVERGE,
  5268. # GGML_OPT_NO_CONTEXT,
  5269. # GGML_OPT_INVALID_WOLFE,
  5270. # GGML_OPT_FAIL,
  5271. # GGML_OPT_CANCEL,
  5272. # GGML_LINESEARCH_FAIL = -128,
  5273. # GGML_LINESEARCH_MINIMUM_STEP,
  5274. # GGML_LINESEARCH_MAXIMUM_STEP,
  5275. # GGML_LINESEARCH_MAXIMUM_ITERATIONS,
  5276. # GGML_LINESEARCH_INVALID_PARAMETERS,
  5277. # };
  5278. GGML_OPT_OK = 0
  5279. GGML_OPT_DID_NOT_CONVERGE = 1
  5280. GGML_OPT_NO_CONTEXT = 2
  5281. GGML_OPT_INVALID_WOLFE = 3
  5282. GGML_OPT_FAIL = 4
  5283. GGML_OPT_CANCEL = 5
  5284. GGML_LINESEARCH_FAIL = -128
  5285. GGML_LINESEARCH_MINIMUM_STEP = -127
  5286. GGML_LINESEARCH_MAXIMUM_STEP = -126
  5287. GGML_LINESEARCH_MAXIMUM_ITERATIONS = -125
  5288. GGML_LINESEARCH_INVALID_PARAMETERS = -124
  5289. # typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
  5290. ggml_opt_callback = ctypes.CFUNCTYPE(
  5291. None,
  5292. ctypes.c_void_p,
  5293. ctypes.c_int,
  5294. ctypes.POINTER(ctypes.c_float),
  5295. ctypes.POINTER(ctypes.c_bool),
  5296. )
  5297. # typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
  5298. ggml_log_callback = ctypes.CFUNCTYPE(
  5299. None, ctypes.c_int, ctypes.c_char_p, ctypes.c_void_p
  5300. )
  5301. # // optimization parameters
  5302. # //
  5303. # // see ggml.c (ggml_opt_default_params) for default values
  5304. # //
  5305. # struct ggml_opt_params {
  5306. # enum ggml_opt_type type;
  5307. # size_t graph_size;
  5308. # int n_threads;
  5309. # // delta-based convergence test
  5310. # //
  5311. # // if past == 0 - disabled
  5312. # // if past > 0:
  5313. # // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
  5314. # //
  5315. # int past;
  5316. # float delta;
  5317. # // maximum number of iterations without improvement
  5318. # //
  5319. # // if 0 - disabled
  5320. # // if > 0:
  5321. # // assume convergence if no cost improvement in this number of iterations
  5322. # //
  5323. # int max_no_improvement;
  5324. # bool print_forward_graph;
  5325. # bool print_backward_graph;
  5326. # int n_gradient_accumulation;
  5327. # // ADAM parameters
  5328. # struct {
  5329. # int n_iter;
  5330. # float sched; // schedule multiplier (fixed, decay or warmup)
  5331. # float decay; // weight decay for AdamW, use 0.0f to disable
  5332. # int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
  5333. # float alpha; // learning rate
  5334. # float beta1;
  5335. # float beta2;
  5336. # float eps; // epsilon for numerical stability
  5337. # float eps_f; // epsilon for convergence test
  5338. # float eps_g; // epsilon for convergence test
  5339. # float gclip; // gradient clipping
  5340. # } adam;
  5341. # // LBFGS parameters
  5342. # struct {
  5343. # int m; // number of corrections to approximate the inv. Hessian
  5344. # int n_iter;
  5345. # int max_linesearch;
  5346. # float eps; // convergence tolerance
  5347. # float ftol; // line search tolerance
  5348. # float wolfe;
  5349. # float min_step;
  5350. # float max_step;
  5351. # enum ggml_linesearch linesearch;
  5352. # } lbfgs;
  5353. # };
  5354. class ggml_opt_params_adam(ctypes.Structure):
  5355. _fields_ = [
  5356. ("n_iter", ctypes.c_int),
  5357. ("sched", ctypes.c_float),
  5358. ("decay", ctypes.c_float),
  5359. ("decay_min_ndim", ctypes.c_int),
  5360. ("alpha", ctypes.c_float),
  5361. ("beta1", ctypes.c_float),
  5362. ("beta2", ctypes.c_float),
  5363. ("eps", ctypes.c_float),
  5364. ("eps_f", ctypes.c_float),
  5365. ("eps_g", ctypes.c_float),
  5366. ("gclip", ctypes.c_float),
  5367. ]
  5368. class ggml_opt_params_lbfgs(ctypes.Structure):
  5369. _fields_ = [
  5370. ("m", ctypes.c_int),
  5371. ("n_iter", ctypes.c_int),
  5372. ("max_linesearch", ctypes.c_int),
  5373. ("eps", ctypes.c_float),
  5374. ("ftol", ctypes.c_float),
  5375. ("wolfe", ctypes.c_float),
  5376. ("min_step", ctypes.c_float),
  5377. ("max_step", ctypes.c_float),
  5378. ("linesearch", ctypes.c_int),
  5379. ]
  5380. class ggml_opt_params(ctypes.Structure):
  5381. _fields_ = [
  5382. ("type", ctypes.c_int),
  5383. ("graph_size", ctypes.c_size_t),
  5384. ("n_threads", ctypes.c_int),
  5385. ("past", ctypes.c_int),
  5386. ("delta", ctypes.c_float),
  5387. ("max_no_improvement", ctypes.c_int),
  5388. ("print_forward_graph", ctypes.c_bool),
  5389. ("print_backward_graph", ctypes.c_bool),
  5390. ("n_gradient_accumulation", ctypes.c_int),
  5391. ("adam", ggml_opt_params_adam),
  5392. ("lbfgs", ggml_opt_params_lbfgs),
  5393. ]
  5394. # struct ggml_opt_context {
  5395. # struct ggml_context * ctx;
  5396. # struct ggml_opt_params params;
  5397. # int iter;
  5398. # int64_t nx; // number of parameter elements
  5399. # bool just_initialized;
  5400. # float loss_before;
  5401. # float loss_after;
  5402. # struct {
  5403. # struct ggml_tensor * g; // current gradient
  5404. # struct ggml_tensor * m; // first moment
  5405. # struct ggml_tensor * v; // second moment
  5406. # struct ggml_tensor * pf; // past function values
  5407. # float fx_best;
  5408. # float fx_prev;
  5409. # int n_no_improvement;
  5410. # } adam;
  5411. # struct {
  5412. # struct ggml_tensor * x; // current parameters
  5413. # struct ggml_tensor * xp; // previous parameters
  5414. # struct ggml_tensor * g; // current gradient
  5415. # struct ggml_tensor * gp; // previous gradient
  5416. # struct ggml_tensor * d; // search direction
  5417. # struct ggml_tensor * pf; // past function values
  5418. # struct ggml_tensor * lmal; // the L-BFGS memory alpha
  5419. # struct ggml_tensor * lmys; // the L-BFGS memory ys
  5420. # struct ggml_tensor * lms; // the L-BFGS memory s
  5421. # struct ggml_tensor * lmy; // the L-BFGS memory y
  5422. # float fx_best;
  5423. # float step;
  5424. # int j;
  5425. # int k;
  5426. # int end;
  5427. # int n_no_improvement;
  5428. # } lbfgs;
  5429. # };
  5430. class ggml_opt_context_adam(ctypes.Structure):
  5431. _fields_ = [
  5432. ("g", ctypes.POINTER(ggml_tensor)),
  5433. ("m", ctypes.POINTER(ggml_tensor)),
  5434. ("v", ctypes.POINTER(ggml_tensor)),
  5435. ("pf", ctypes.POINTER(ggml_tensor)),
  5436. ("fx_best", ctypes.c_float),
  5437. ("fx_prev", ctypes.c_float),
  5438. ("n_no_improvement", ctypes.c_int),
  5439. ]
  5440. class ggml_opt_context_lbfgs(ctypes.Structure):
  5441. _fields_ = [
  5442. ("x", ctypes.POINTER(ggml_tensor)),
  5443. ("xp", ctypes.POINTER(ggml_tensor)),
  5444. ("g", ctypes.POINTER(ggml_tensor)),
  5445. ("gp", ctypes.POINTER(ggml_tensor)),
  5446. ("d", ctypes.POINTER(ggml_tensor)),
  5447. ("pf", ctypes.POINTER(ggml_tensor)),
  5448. ("lmal", ctypes.POINTER(ggml_tensor)),
  5449. ("lmys", ctypes.POINTER(ggml_tensor)),
  5450. ("lms", ctypes.POINTER(ggml_tensor)),
  5451. ("lmy", ctypes.POINTER(ggml_tensor)),
  5452. ("fx_best", ctypes.c_float),
  5453. ("step", ctypes.c_float),
  5454. ("j", ctypes.c_int),
  5455. ("k", ctypes.c_int),
  5456. ("end", ctypes.c_int),
  5457. ("n_no_improvement", ctypes.c_int),
  5458. ]
  5459. class ggml_opt_context(ctypes.Structure):
  5460. _fields_ = [
  5461. ("ctx", ggml_context_p),
  5462. ("params", ggml_opt_params),
  5463. ("iter", ctypes.c_int),
  5464. ("nx", ctypes.c_int64),
  5465. ("just_initialized", ctypes.c_bool),
  5466. ("loss_before", ctypes.c_float),
  5467. ("loss_after", ctypes.c_float),
  5468. ("adam", ggml_opt_context_adam),
  5469. ("lbfgs", ggml_opt_context_lbfgs),
  5470. ]
  5471. ggml_opt_context_p = ctypes.POINTER(ggml_opt_context)
  5472. # GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
  5473. def ggml_opt_default_params(type: Union[ctypes.c_int, bool]) -> ggml_opt_params:
  5474. return lib.ggml_opt_default_params(type)
  5475. lib.ggml_opt_default_params.argtypes = [ctypes.c_int]
  5476. lib.ggml_opt_default_params.restype = ggml_opt_params
  5477. # // optimize the function defined by the tensor f
  5478. # GGML_API enum ggml_opt_result ggml_opt(
  5479. # struct ggml_context * ctx,
  5480. # struct ggml_opt_params params,
  5481. # struct ggml_tensor * f);
  5482. def ggml_opt(
  5483. ctx: ggml_context_p,
  5484. params: ggml_opt_params,
  5485. f: ggml_tensor_p,
  5486. ) -> int:
  5487. return lib.ggml_opt(ctx, params, f)
  5488. lib.ggml_opt.argtypes = [ggml_context_p, ggml_opt_params, ctypes.POINTER(ggml_tensor)]
  5489. lib.ggml_opt.restype = ctypes.c_int
  5490. # // initialize optimizer context
  5491. # GGML_API void ggml_opt_init(
  5492. # struct ggml_context * ctx,
  5493. # struct ggml_opt_context * opt,
  5494. # struct ggml_opt_params params,
  5495. # int64_t nx);
  5496. def ggml_opt_init(
  5497. ctx: ggml_context_p,
  5498. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  5499. params: ggml_opt_params,
  5500. nx: Union[ctypes.c_int64, int],
  5501. ):
  5502. return lib.ggml_opt_init(ctx, opt, params, nx)
  5503. lib.ggml_opt_init.argtypes = [
  5504. ggml_context_p,
  5505. ctypes.POINTER(ggml_opt_context),
  5506. ggml_opt_params,
  5507. ctypes.c_int64,
  5508. ]
  5509. lib.ggml_opt_init.restype = None
  5510. # // continue optimizing the function defined by the tensor f
  5511. # GGML_API enum ggml_opt_result ggml_opt_resume(
  5512. # struct ggml_context * ctx,
  5513. # struct ggml_opt_context * opt,
  5514. # struct ggml_tensor * f);
  5515. def ggml_opt_resume(
  5516. ctx: ggml_context_p,
  5517. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  5518. f: ggml_tensor_p,
  5519. ) -> int:
  5520. return lib.ggml_opt_resume(ctx, opt, f)
  5521. lib.ggml_opt_resume.argtypes = [
  5522. ggml_context_p,
  5523. ctypes.POINTER(ggml_opt_context),
  5524. ctypes.POINTER(ggml_tensor),
  5525. ]
  5526. lib.ggml_opt_resume.restype = ctypes.c_int
  5527. # // continue optimizing the function defined by the tensor f
  5528. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  5529. # struct ggml_context * ctx,
  5530. # struct ggml_opt_context * opt,
  5531. # struct ggml_tensor * f,
  5532. # struct ggml_cgraph * gf,
  5533. # struct ggml_cgraph * gb,
  5534. # ggml_opt_callback callback,
  5535. # void * callback_data);
  5536. def ggml_opt_resume_g(
  5537. ctx: ggml_context_p,
  5538. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  5539. f: ggml_tensor_p,
  5540. gf: ggml_cgraph_p,
  5541. gb: ggml_cgraph_p,
  5542. callback: "ctypes._CFuncPtr[None, ctypes.c_void_p, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_bool)]", # type: ignore
  5543. callback_data: ctypes.c_void_p,
  5544. ) -> int:
  5545. return lib.ggml_opt_resume_g(ctx, opt, f, gf, gb, callback, callback_data)
  5546. lib.ggml_opt_resume_g.argtypes = [
  5547. ggml_context_p,
  5548. ctypes.POINTER(ggml_opt_context),
  5549. ctypes.POINTER(ggml_tensor),
  5550. ctypes.POINTER(ggml_cgraph),
  5551. ctypes.POINTER(ggml_cgraph),
  5552. ggml_opt_callback,
  5553. ctypes.c_void_p,
  5554. ]
  5555. lib.ggml_opt_resume_g.restype = ctypes.c_int
  5556. # //
  5557. # // quantization
  5558. # //
  5559. # // TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
  5560. # GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
  5561. def ggml_quantize_q4_0(
  5562. src: CFloatArray,
  5563. dst: ctypes.c_void_p,
  5564. n: Union[ctypes.c_int, int],
  5565. k: Union[ctypes.c_int, int],
  5566. hist: CInt64Array,
  5567. ) -> int:
  5568. return lib.ggml_quantize_q4_0(src, dst, n, k, hist)
  5569. lib.ggml_quantize_q4_0.argtypes = [
  5570. ctypes.POINTER(ctypes.c_float),
  5571. ctypes.c_void_p,
  5572. ctypes.c_int,
  5573. ctypes.c_int,
  5574. ctypes.POINTER(ctypes.c_int64),
  5575. ]
  5576. lib.ggml_quantize_q4_0.restype = ctypes.c_size_t
  5577. # GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
  5578. def ggml_quantize_q4_1(
  5579. src: CFloatArray,
  5580. dst: ctypes.c_void_p,
  5581. n: Union[ctypes.c_int, int],
  5582. k: Union[ctypes.c_int, int],
  5583. hist: CInt64Array,
  5584. ) -> int:
  5585. return lib.ggml_quantize_q4_1(src, dst, n, k, hist)
  5586. lib.ggml_quantize_q4_1.argtypes = [
  5587. ctypes.POINTER(ctypes.c_float),
  5588. ctypes.c_void_p,
  5589. ctypes.c_int,
  5590. ctypes.c_int,
  5591. ctypes.POINTER(ctypes.c_int64),
  5592. ]
  5593. lib.ggml_quantize_q4_1.restype = ctypes.c_size_t
  5594. # GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
  5595. def ggml_quantize_q5_0(
  5596. src: CFloatArray,
  5597. dst: ctypes.c_void_p,
  5598. n: Union[ctypes.c_int, int],
  5599. k: Union[ctypes.c_int, int],
  5600. hist: CInt64Array,
  5601. ) -> int:
  5602. return lib.ggml_quantize_q5_0(src, dst, n, k, hist)
  5603. lib.ggml_quantize_q5_0.argtypes = [
  5604. ctypes.POINTER(ctypes.c_float),
  5605. ctypes.c_void_p,
  5606. ctypes.c_int,
  5607. ctypes.c_int,
  5608. ctypes.POINTER(ctypes.c_int64),
  5609. ]
  5610. lib.ggml_quantize_q5_0.restype = ctypes.c_size_t
  5611. # GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
  5612. def ggml_quantize_q5_1(
  5613. src: CFloatArray,
  5614. dst: ctypes.c_void_p,
  5615. n: Union[ctypes.c_int, int],
  5616. k: Union[ctypes.c_int, int],
  5617. hist: CInt64Array,
  5618. ) -> int:
  5619. return lib.ggml_quantize_q5_1(src, dst, n, k, hist)
  5620. lib.ggml_quantize_q5_1.argtypes = [
  5621. ctypes.POINTER(ctypes.c_float),
  5622. ctypes.c_void_p,
  5623. ctypes.c_int,
  5624. ctypes.c_int,
  5625. ctypes.POINTER(ctypes.c_int64),
  5626. ]
  5627. lib.ggml_quantize_q5_1.restype = ctypes.c_size_t
  5628. # GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
  5629. def ggml_quantize_q8_0(
  5630. src: CFloatArray,
  5631. dst: ctypes.c_void_p,
  5632. n: Union[ctypes.c_int, int],
  5633. k: Union[ctypes.c_int, int],
  5634. hist: CInt64Array,
  5635. ) -> int:
  5636. return lib.ggml_quantize_q8_0(src, dst, n, k, hist)
  5637. lib.ggml_quantize_q8_0.argtypes = [
  5638. ctypes.POINTER(ctypes.c_float),
  5639. ctypes.c_void_p,
  5640. ctypes.c_int,
  5641. ctypes.c_int,
  5642. ctypes.POINTER(ctypes.c_int64),
  5643. ]
  5644. lib.ggml_quantize_q8_0.restype = ctypes.c_size_t
  5645. # GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
  5646. def ggml_quantize_q2_K(
  5647. src: CFloatArray,
  5648. dst: ctypes.c_void_p,
  5649. n: Union[ctypes.c_int, int],
  5650. k: Union[ctypes.c_int, int],
  5651. hist: CInt64Array,
  5652. ) -> int:
  5653. return lib.ggml_quantize_q2_K(src, dst, n, k, hist)
  5654. lib.ggml_quantize_q2_K.argtypes = [
  5655. ctypes.POINTER(ctypes.c_float),
  5656. ctypes.c_void_p,
  5657. ctypes.c_int,
  5658. ctypes.c_int,
  5659. ctypes.POINTER(ctypes.c_int64),
  5660. ]
  5661. lib.ggml_quantize_q2_K.restype = ctypes.c_size_t
  5662. # GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
  5663. def ggml_quantize_q3_K(
  5664. src: CFloatArray,
  5665. dst: ctypes.c_void_p,
  5666. n: Union[ctypes.c_int, int],
  5667. k: Union[ctypes.c_int, int],
  5668. hist: CInt64Array,
  5669. ) -> int:
  5670. return lib.ggml_quantize_q3_K(src, dst, n, k, hist)
  5671. lib.ggml_quantize_q3_K.argtypes = [
  5672. ctypes.POINTER(ctypes.c_float),
  5673. ctypes.c_void_p,
  5674. ctypes.c_int,
  5675. ctypes.c_int,
  5676. ctypes.POINTER(ctypes.c_int64),
  5677. ]
  5678. lib.ggml_quantize_q3_K.restype = ctypes.c_size_t
  5679. # GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
  5680. def ggml_quantize_q4_K(
  5681. src: CFloatArray,
  5682. dst: ctypes.c_void_p,
  5683. n: Union[ctypes.c_int, int],
  5684. k: Union[ctypes.c_int, int],
  5685. hist: CInt64Array,
  5686. ) -> int:
  5687. return lib.ggml_quantize_q4_K(src, dst, n, k, hist)
  5688. lib.ggml_quantize_q4_K.argtypes = [
  5689. ctypes.POINTER(ctypes.c_float),
  5690. ctypes.c_void_p,
  5691. ctypes.c_int,
  5692. ctypes.c_int,
  5693. ctypes.POINTER(ctypes.c_int64),
  5694. ]
  5695. lib.ggml_quantize_q4_K.restype = ctypes.c_size_t
  5696. # GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
  5697. def ggml_quantize_q5_K(
  5698. src: CFloatArray,
  5699. dst: ctypes.c_void_p,
  5700. n: Union[ctypes.c_int, int],
  5701. k: Union[ctypes.c_int, int],
  5702. hist: CInt64Array,
  5703. ) -> int:
  5704. return lib.ggml_quantize_q5_K(src, dst, n, k, hist)
  5705. lib.ggml_quantize_q5_K.argtypes = [
  5706. ctypes.POINTER(ctypes.c_float),
  5707. ctypes.c_void_p,
  5708. ctypes.c_int,
  5709. ctypes.c_int,
  5710. ctypes.POINTER(ctypes.c_int64),
  5711. ]
  5712. lib.ggml_quantize_q5_K.restype = ctypes.c_size_t
  5713. # GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
  5714. def ggml_quantize_q6_K(
  5715. src: CFloatArray,
  5716. dst: ctypes.c_void_p,
  5717. n: Union[ctypes.c_int, int],
  5718. k: Union[ctypes.c_int, int],
  5719. hist: CInt64Array,
  5720. ) -> int:
  5721. return lib.ggml_quantize_q6_K(src, dst, n, k, hist)
  5722. lib.ggml_quantize_q6_K.argtypes = [
  5723. ctypes.POINTER(ctypes.c_float),
  5724. ctypes.c_void_p,
  5725. ctypes.c_int,
  5726. ctypes.c_int,
  5727. ctypes.POINTER(ctypes.c_int64),
  5728. ]
  5729. lib.ggml_quantize_q6_K.restype = ctypes.c_size_t
  5730. # GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
  5731. def ggml_quantize_chunk(
  5732. type: Union[ctypes.c_int, int],
  5733. src: CFloatArray,
  5734. dst: ctypes.c_void_p,
  5735. start: Union[ctypes.c_int, int],
  5736. n: Union[ctypes.c_int, int],
  5737. hist: CInt64Array,
  5738. ) -> int:
  5739. return lib.ggml_quantize_chunk(type, src, dst, start, n, hist)
  5740. lib.ggml_quantize_chunk.argtypes = [
  5741. ctypes.c_int,
  5742. ctypes.POINTER(ctypes.c_float),
  5743. ctypes.c_void_p,
  5744. ctypes.c_int,
  5745. ctypes.c_int,
  5746. ctypes.POINTER(ctypes.c_int64),
  5747. ]
  5748. lib.ggml_quantize_chunk.restype = ctypes.c_size_t
  5749. # //
  5750. # // gguf
  5751. # //
  5752. # enum gguf_type {
  5753. # GGUF_TYPE_UINT8 = 0,
  5754. # GGUF_TYPE_INT8 = 1,
  5755. # GGUF_TYPE_UINT16 = 2,
  5756. # GGUF_TYPE_INT16 = 3,
  5757. # GGUF_TYPE_UINT32 = 4,
  5758. # GGUF_TYPE_INT32 = 5,
  5759. # GGUF_TYPE_FLOAT32 = 6,
  5760. # GGUF_TYPE_BOOL = 7,
  5761. # GGUF_TYPE_STRING = 8,
  5762. # GGUF_TYPE_ARRAY = 9,
  5763. # GGUF_TYPE_UINT64 = 10,
  5764. # GGUF_TYPE_INT64 = 11,
  5765. # GGUF_TYPE_FLOAT64 = 12,
  5766. # GGUF_TYPE_COUNT, // marks the end of the enum
  5767. # };
  5768. GGUF_TYPE_UINT8 = 0
  5769. GGUF_TYPE_INT8 = 1
  5770. GGUF_TYPE_UINT16 = 2
  5771. GGUF_TYPE_INT16 = 3
  5772. GGUF_TYPE_UINT32 = 4
  5773. GGUF_TYPE_INT32 = 5
  5774. GGUF_TYPE_FLOAT32 = 6
  5775. GGUF_TYPE_BOOL = 7
  5776. GGUF_TYPE_STRING = 8
  5777. GGUF_TYPE_ARRAY = 9
  5778. GGUF_TYPE_COUNT = 10
  5779. # struct gguf_context;
  5780. gguf_context_p = ctypes.c_void_p
  5781. # //
  5782. # // system info
  5783. # //
  5784. # GGML_API int ggml_cpu_has_avx (void);
  5785. def ggml_cpu_has_avx() -> int:
  5786. return lib.ggml_cpu_has_avx()
  5787. lib.ggml_cpu_has_avx.argtypes = []
  5788. lib.ggml_cpu_has_avx.restype = ctypes.c_int
  5789. # GGML_API int ggml_cpu_has_avx2 (void);
  5790. def ggml_cpu_has_avx2() -> int:
  5791. return lib.ggml_cpu_has_avx2()
  5792. lib.ggml_cpu_has_avx2.argtypes = []
  5793. lib.ggml_cpu_has_avx2.restype = ctypes.c_int
  5794. # GGML_API int ggml_cpu_has_avx512 (void);
  5795. def ggml_cpu_has_avx512() -> int:
  5796. return lib.ggml_cpu_has_avx512()
  5797. lib.ggml_cpu_has_avx512.argtypes = []
  5798. lib.ggml_cpu_has_avx512.restype = ctypes.c_int
  5799. # GGML_API int ggml_cpu_has_avx512_vbmi(void);
  5800. def ggml_cpu_has_avx512_vbmi() -> int:
  5801. return lib.ggml_cpu_has_avx512_vbmi()
  5802. lib.ggml_cpu_has_avx512_vbmi.argtypes = []
  5803. lib.ggml_cpu_has_avx512_vbmi.restype = ctypes.c_int
  5804. # GGML_API int ggml_cpu_has_avx512_vnni(void);
  5805. def ggml_cpu_has_avx512_vnni() -> int:
  5806. return lib.ggml_cpu_has_avx512_vnni()
  5807. lib.ggml_cpu_has_avx512_vnni.argtypes = []
  5808. lib.ggml_cpu_has_avx512_vnni.restype = ctypes.c_int
  5809. # GGML_API int ggml_cpu_has_fma (void);
  5810. def ggml_cpu_has_fma() -> int:
  5811. return lib.ggml_cpu_has_fma()
  5812. lib.ggml_cpu_has_fma.argtypes = []
  5813. lib.ggml_cpu_has_fma.restype = ctypes.c_int
  5814. # GGML_API int ggml_cpu_has_neon (void);
  5815. def ggml_cpu_has_neon() -> int:
  5816. return lib.ggml_cpu_has_neon()
  5817. lib.ggml_cpu_has_neon.argtypes = []
  5818. lib.ggml_cpu_has_neon.restype = ctypes.c_int
  5819. # GGML_API int ggml_cpu_has_arm_fma (void);
  5820. def ggml_cpu_has_arm_fma() -> int:
  5821. return lib.ggml_cpu_has_arm_fma()
  5822. lib.ggml_cpu_has_arm_fma.argtypes = []
  5823. lib.ggml_cpu_has_arm_fma.restype = ctypes.c_int
  5824. # GGML_API int ggml_cpu_has_metal (void);
  5825. def ggml_cpu_has_metal() -> int:
  5826. return lib.ggml_cpu_has_metal()
  5827. lib.ggml_cpu_has_metal.argtypes = []
  5828. lib.ggml_cpu_has_metal.restype = ctypes.c_int
  5829. # GGML_API int ggml_cpu_has_f16c (void);
  5830. def ggml_cpu_has_f16c() -> int:
  5831. return lib.ggml_cpu_has_f16c()
  5832. lib.ggml_cpu_has_f16c.argtypes = []
  5833. lib.ggml_cpu_has_f16c.restype = ctypes.c_int
  5834. # GGML_API int ggml_cpu_has_fp16_va (void);
  5835. def ggml_cpu_has_fp16_va() -> int:
  5836. return lib.ggml_cpu_has_fp16_va()
  5837. lib.ggml_cpu_has_fp16_va.argtypes = []
  5838. lib.ggml_cpu_has_fp16_va.restype = ctypes.c_int
  5839. # GGML_API int ggml_cpu_has_wasm_simd (void);
  5840. def ggml_cpu_has_wasm_simd() -> int:
  5841. return lib.ggml_cpu_has_wasm_simd()
  5842. lib.ggml_cpu_has_wasm_simd.argtypes = []
  5843. lib.ggml_cpu_has_wasm_simd.restype = ctypes.c_int
  5844. # GGML_API int ggml_cpu_has_blas (void);
  5845. def ggml_cpu_has_blas() -> int:
  5846. return lib.ggml_cpu_has_blas()
  5847. lib.ggml_cpu_has_blas.argtypes = []
  5848. lib.ggml_cpu_has_blas.restype = ctypes.c_int
  5849. # GGML_API int ggml_cpu_has_cublas (void);
  5850. def ggml_cpu_has_cublas() -> int:
  5851. return lib.ggml_cpu_has_cublas()
  5852. lib.ggml_cpu_has_cublas.argtypes = []
  5853. lib.ggml_cpu_has_cublas.restype = ctypes.c_int
  5854. # GGML_API int ggml_cpu_has_clblast (void);
  5855. def ggml_cpu_has_clblast() -> int:
  5856. return lib.ggml_cpu_has_clblast()
  5857. lib.ggml_cpu_has_clblast.argtypes = []
  5858. lib.ggml_cpu_has_clblast.restype = ctypes.c_int
  5859. # GGML_API int ggml_cpu_has_gpublas (void);
  5860. def ggml_cpu_has_gpublas() -> int:
  5861. return lib.ggml_cpu_has_gpublas()
  5862. lib.ggml_cpu_has_gpublas.argtypes = []
  5863. lib.ggml_cpu_has_gpublas.restype = ctypes.c_int
  5864. # GGML_API int ggml_cpu_has_sse3 (void);
  5865. def ggml_cpu_has_sse3() -> int:
  5866. return lib.ggml_cpu_has_sse3()
  5867. lib.ggml_cpu_has_sse3.argtypes = []
  5868. lib.ggml_cpu_has_sse3.restype = ctypes.c_int
  5869. # GGML_API int ggml_cpu_has_ssse3 (void);
  5870. def ggml_cpu_has_ssse3() -> int:
  5871. return lib.ggml_cpu_has_ssse3()
  5872. lib.ggml_cpu_has_ssse3.argtypes = []
  5873. lib.ggml_cpu_has_ssse3.restype = ctypes.c_int
  5874. # GGML_API int ggml_cpu_has_vsx (void);
  5875. def ggml_cpu_has_vsx() -> int:
  5876. return lib.ggml_cpu_has_vsx()
  5877. lib.ggml_cpu_has_vsx.argtypes = []
  5878. lib.ggml_cpu_has_vsx.restype = ctypes.c_int
  5879. # //
  5880. # // Internal types and functions exposed for tests and benchmarks
  5881. # //
  5882. # typedef void (*ggml_to_float_t)(const void * x, float * y, int k);
  5883. ggml_to_float_t = ctypes.CFUNCTYPE(
  5884. None, ctypes.c_void_p, ctypes.POINTER(ctypes.c_float), ctypes.c_int
  5885. )
  5886. # typedef void (*ggml_from_float_t)(const float * x, void * y, int k);
  5887. ggml_from_float_t = ctypes.CFUNCTYPE(
  5888. None, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int
  5889. )
  5890. # typedef void (*ggml_vec_dot_t)(const int n, float * s, const void * x, const void * y);
  5891. ggml_vec_dot_t = ctypes.CFUNCTYPE(
  5892. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_void_p
  5893. )
  5894. # typedef struct {
  5895. # const char * type_name;
  5896. # int blck_size;
  5897. # size_t type_size;
  5898. # bool is_quantized;
  5899. # ggml_to_float_t to_float;
  5900. # ggml_from_float_t from_float;
  5901. # ggml_from_float_t from_float_reference;
  5902. # ggml_vec_dot_t vec_dot;
  5903. # enum ggml_type vec_dot_type;
  5904. # } ggml_type_traits_t;
  5905. class ggml_type_traits_t(ctypes.Structure):
  5906. _fields_ = [
  5907. ("type_name", ctypes.c_char_p),
  5908. ("blck_size", ctypes.c_int),
  5909. ("type_size", ctypes.c_size_t),
  5910. ("is_quantized", ctypes.c_bool),
  5911. ("to_float", ggml_to_float_t),
  5912. ("from_float", ggml_from_float_t),
  5913. ("from_float_reference", ggml_from_float_t),
  5914. ("vec_dot", ggml_vec_dot_t),
  5915. ("vec_dot_type", ctypes.c_int),
  5916. ]
  5917. # GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
  5918. def ggml_internal_get_type_traits(type: Union[ctypes.c_int, int]) -> ggml_type_traits_t:
  5919. return lib.ggml_internal_get_type_traits(type)
  5920. lib.ggml_internal_get_type_traits.argtypes = [ctypes.c_int]
  5921. lib.ggml_internal_get_type_traits.restype = ggml_type_traits_t
  5922. #####################################################
  5923. # GGML ALLOC API
  5924. # source: ggml-alloc.h
  5925. #####################################################
  5926. # struct ggml_backend;
  5927. # struct ggml_backend_buffer;
  5928. # struct ggml_backend_buffer_type;
  5929. ggml_backend_t = ctypes.c_void_p
  5930. ggml_backend_buffer_p = ctypes.c_void_p
  5931. ggml_backend_buffer_type_p = ctypes.c_void_p
  5932. # //
  5933. # // Legacy API
  5934. # //
  5935. # typedef struct ggml_allocr * ggml_allocr_t;
  5936. ggml_allocr_t = ctypes.c_void_p
  5937. # // initialize allocator for use with CPU backend only
  5938. # GGML_API ggml_allocr_t ggml_allocr_new(void * data, size_t size, size_t alignment);
  5939. def ggml_allocr_new(
  5940. data: ctypes.c_void_p,
  5941. size: Union[ctypes.c_size_t, int],
  5942. alignment: Union[ctypes.c_size_t, int],
  5943. ) -> ggml_allocr_t:
  5944. return lib.ggml_allocr_new(data, size, alignment)
  5945. lib.ggml_allocr_new.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t]
  5946. lib.ggml_allocr_new.restype = ggml_allocr_t
  5947. # GGML_API ggml_allocr_t ggml_allocr_new_measure(size_t alignment);
  5948. def ggml_allocr_new_measure(alignment: Union[ctypes.c_size_t, int]) -> ggml_allocr_t:
  5949. return lib.ggml_allocr_new_measure(alignment)
  5950. lib.ggml_allocr_new_measure.argtypes = [ctypes.c_size_t]
  5951. lib.ggml_allocr_new_measure.restype = ggml_allocr_t
  5952. # // initialize allocator for use with ggml-backend
  5953. # GGML_API ggml_allocr_t ggml_allocr_new_from_buffer(struct ggml_backend_buffer * buffer);
  5954. def ggml_allocr_new_from_buffer(buffer: ggml_backend_buffer_p) -> ggml_allocr_t:
  5955. return lib.ggml_allocr_new_from_buffer(buffer)
  5956. lib.ggml_allocr_new_from_buffer.argtypes = [ggml_backend_buffer_p]
  5957. lib.ggml_allocr_new_from_buffer.restype = ggml_allocr_t
  5958. # GGML_API ggml_allocr_t ggml_allocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer
  5959. def ggml_allocr_new_from_backend(
  5960. backend: ggml_backend_t, size: Union[ctypes.c_size_t, int]
  5961. ) -> ggml_allocr_t:
  5962. return lib.ggml_allocr_new_from_backend(backend, size)
  5963. lib.ggml_allocr_new_from_backend.argtypes = [ggml_backend_t, ctypes.c_size_t]
  5964. lib.ggml_allocr_new_from_backend.restype = ggml_allocr_t
  5965. # GGML_API ggml_allocr_t ggml_allocr_new_measure_from_backend(struct ggml_backend * backend);
  5966. def ggml_allocr_new_measure_from_backend(backend: ggml_backend_t) -> ggml_allocr_t:
  5967. return lib.ggml_allocr_new_measure_from_backend(backend)
  5968. lib.ggml_allocr_new_measure_from_backend.argtypes = [ggml_backend_t]
  5969. lib.ggml_allocr_new_measure_from_backend.restype = ggml_allocr_t
  5970. # GGML_API struct ggml_backend_buffer * ggml_allocr_get_buffer(ggml_allocr_t alloc);
  5971. def ggml_allocr_get_buffer(alloc: ggml_allocr_t) -> ggml_backend_buffer_p:
  5972. return lib.ggml_allocr_get_buffer(alloc)
  5973. lib.ggml_allocr_get_buffer.argtypes = [ggml_allocr_t]
  5974. lib.ggml_allocr_get_buffer.restype = ggml_backend_buffer_p
  5975. # // tell the allocator to parse nodes following the order described in the list
  5976. # // you should call this if your graph are optimized to execute out-of-order
  5977. # GGML_API void ggml_allocr_set_parse_seq(ggml_allocr_t alloc, const int * list, int n);
  5978. def ggml_allocr_set_parse_seq(
  5979. alloc: ggml_allocr_t,
  5980. list: "ctypes._Pointer(ctypes.c_int)", # type: ignore
  5981. n: Union[ctypes.c_int, int],
  5982. ) -> None:
  5983. return lib.ggml_allocr_set_parse_seq(alloc, list, n)
  5984. lib.ggml_allocr_set_parse_seq.argtypes = [
  5985. ggml_allocr_t,
  5986. ctypes.POINTER(ctypes.c_int),
  5987. ctypes.c_int,
  5988. ]
  5989. lib.ggml_allocr_set_parse_seq.restype = None
  5990. # GGML_API void ggml_allocr_free (ggml_allocr_t alloc);
  5991. def ggml_allocr_free(alloc: ggml_allocr_t) -> None:
  5992. return lib.ggml_allocr_free(alloc)
  5993. lib.ggml_allocr_free.argtypes = [ggml_allocr_t]
  5994. lib.ggml_allocr_free.restype = None
  5995. # GGML_API bool ggml_allocr_is_measure (ggml_allocr_t alloc);
  5996. def ggml_allocr_is_measure(alloc: ggml_allocr_t) -> ctypes.c_bool:
  5997. return lib.ggml_allocr_is_measure(alloc)
  5998. lib.ggml_allocr_is_measure.argtypes = [ggml_allocr_t]
  5999. lib.ggml_allocr_is_measure.restype = ctypes.c_bool
  6000. # GGML_API void ggml_allocr_reset (ggml_allocr_t alloc);
  6001. def ggml_allocr_reset(alloc: ggml_allocr_t) -> None:
  6002. return lib.ggml_allocr_reset(alloc)
  6003. lib.ggml_allocr_reset.argtypes = [ggml_allocr_t]
  6004. lib.ggml_allocr_reset.restype = None
  6005. # GGML_API void ggml_allocr_alloc (ggml_allocr_t alloc, struct ggml_tensor * tensor);
  6006. def ggml_allocr_alloc(alloc: ggml_allocr_t, tensor: ggml_tensor_p) -> None:
  6007. return lib.ggml_allocr_alloc(alloc, tensor)
  6008. lib.ggml_allocr_alloc.argtypes = [ggml_allocr_t, ctypes.POINTER(ggml_tensor)]
  6009. lib.ggml_allocr_alloc.restype = None
  6010. # GGML_API size_t ggml_allocr_max_size (ggml_allocr_t alloc);
  6011. def ggml_allocr_max_size(alloc: ggml_allocr_t) -> Union[ctypes.c_size_t, int]:
  6012. return lib.ggml_allocr_max_size(alloc)
  6013. lib.ggml_allocr_max_size.argtypes = [ggml_allocr_t]
  6014. lib.ggml_allocr_max_size.restype = ctypes.c_size_t
  6015. # GGML_API size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph);
  6016. def ggml_allocr_alloc_graph(alloc: ggml_allocr_t, graph: ggml_cgraph_p) -> int:
  6017. return lib.ggml_allocr_alloc_graph(alloc, graph)
  6018. lib.ggml_allocr_alloc_graph.argtypes = [ggml_allocr_t, ctypes.POINTER(ggml_cgraph)]
  6019. lib.ggml_allocr_alloc_graph.restype = ctypes.c_size_t
  6020. # //
  6021. # // ggml-backend v2 API
  6022. # //
  6023. # // Separate tensor and graph allocator objects
  6024. # // This is necessary for multi-backend allocation because the graph allocator needs to use multiple tensor allocators
  6025. # // The original API is kept as a wrapper around the new API
  6026. # // Tensor allocator
  6027. # typedef struct ggml_tallocr * ggml_tallocr_t;
  6028. ggml_tallocr_t = ctypes.c_void_p
  6029. # GGML_API ggml_tallocr_t ggml_tallocr_new(void * data, size_t size, size_t alignment);
  6030. def ggml_tallocr_new(
  6031. data: ctypes.c_void_p,
  6032. size: Union[ctypes.c_size_t, int],
  6033. alignment: Union[ctypes.c_size_t, int],
  6034. ) -> ggml_tallocr_t:
  6035. return lib.ggml_tallocr_new(data, size, alignment)
  6036. lib.ggml_tallocr_new.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t]
  6037. lib.ggml_tallocr_new.restype = ggml_tallocr_t
  6038. # GGML_API ggml_tallocr_t ggml_tallocr_new_measure(size_t alignment);
  6039. def ggml_tallocr_new_measure(alignment: Union[ctypes.c_size_t, int]) -> ggml_tallocr_t:
  6040. return lib.ggml_tallocr_new_measure(alignment)
  6041. lib.ggml_tallocr_new_measure.argtypes = [ctypes.c_size_t]
  6042. lib.ggml_tallocr_new_measure.restype = ggml_tallocr_t
  6043. # GGML_API ggml_tallocr_t ggml_tallocr_new_from_buffer(struct ggml_backend_buffer * buffer);
  6044. def ggml_tallocr_new_from_buffer(buffer: ggml_backend_buffer_p) -> ggml_tallocr_t:
  6045. return lib.ggml_tallocr_new_from_buffer(buffer)
  6046. lib.ggml_tallocr_new_from_buffer.argtypes = [ggml_backend_buffer_p]
  6047. lib.ggml_tallocr_new_from_buffer.restype = ggml_tallocr_t
  6048. # GGML_API ggml_tallocr_t ggml_tallocr_new_from_backend(struct ggml_backend * backend, size_t size); // allocates an owned buffer
  6049. def ggml_tallocr_new_from_backend(
  6050. backend: ggml_backend_t, size: Union[ctypes.c_size_t, int]
  6051. ) -> ggml_tallocr_t:
  6052. return lib.ggml_tallocr_new_from_backend(backend, size)
  6053. lib.ggml_tallocr_new_from_backend.argtypes = [ggml_backend_t, ctypes.c_size_t]
  6054. lib.ggml_tallocr_new_from_backend.restype = ggml_tallocr_t
  6055. # GGML_API ggml_tallocr_t ggml_tallocr_new_measure_from_backend(struct ggml_backend * backend);
  6056. def ggml_tallocr_new_measure_from_backend(backend: ggml_backend_t) -> ggml_tallocr_t:
  6057. return lib.ggml_tallocr_new_measure_from_backend(backend)
  6058. lib.ggml_tallocr_new_measure_from_backend.argtypes = [ggml_backend_t]
  6059. lib.ggml_tallocr_new_measure_from_backend.restype = ggml_tallocr_t
  6060. # GGML_API struct ggml_backend_buffer * ggml_tallocr_get_buffer(ggml_tallocr_t talloc);
  6061. def ggml_tallocr_get_buffer(talloc: ggml_tallocr_t) -> ggml_backend_buffer_p:
  6062. return lib.ggml_tallocr_get_buffer(talloc)
  6063. lib.ggml_tallocr_get_buffer.argtypes = [ggml_tallocr_t]
  6064. lib.ggml_tallocr_get_buffer.restype = ggml_backend_buffer_p
  6065. # GGML_API void ggml_tallocr_free (ggml_tallocr_t talloc);
  6066. def ggml_tallocr_free(talloc: ggml_tallocr_t) -> None:
  6067. return lib.ggml_tallocr_free(talloc)
  6068. lib.ggml_tallocr_free.argtypes = [ggml_tallocr_t]
  6069. lib.ggml_tallocr_free.restype = None
  6070. # GGML_API bool ggml_tallocr_is_measure (ggml_tallocr_t talloc);
  6071. def ggml_tallocr_is_measure(talloc: ggml_tallocr_t) -> bool:
  6072. return lib.ggml_tallocr_is_measure(talloc)
  6073. lib.ggml_tallocr_is_measure.argtypes = [ggml_tallocr_t]
  6074. lib.ggml_tallocr_is_measure.restype = ctypes.c_bool
  6075. # GGML_API void ggml_tallocr_reset (ggml_tallocr_t talloc);
  6076. def ggml_tallocr_reset(talloc: ggml_tallocr_t) -> None:
  6077. return lib.ggml_tallocr_reset(talloc)
  6078. lib.ggml_tallocr_reset.argtypes = [ggml_tallocr_t]
  6079. lib.ggml_tallocr_reset.restype = None
  6080. # GGML_API void ggml_tallocr_alloc (ggml_tallocr_t talloc, struct ggml_tensor * tensor);
  6081. def ggml_tallocr_alloc(talloc: ggml_tallocr_t, tensor: ggml_tensor_p) -> None:
  6082. return lib.ggml_tallocr_alloc(talloc, tensor)
  6083. lib.ggml_tallocr_alloc.argtypes = [ggml_tallocr_t, ctypes.POINTER(ggml_tensor)]
  6084. lib.ggml_tallocr_alloc.restype = None
  6085. # GGML_API size_t ggml_tallocr_max_size (ggml_tallocr_t talloc);
  6086. def ggml_tallocr_max_size(talloc: ggml_tallocr_t) -> Union[ctypes.c_size_t, int]:
  6087. return lib.ggml_tallocr_max_size(talloc)
  6088. lib.ggml_tallocr_max_size.argtypes = [ggml_tallocr_t]
  6089. lib.ggml_tallocr_max_size.restype = ctypes.c_size_t
  6090. # // Graph allocator
  6091. # typedef struct ggml_gallocr * ggml_gallocr_t;
  6092. ggml_gallocr_t = ctypes.c_void_p
  6093. # GGML_API ggml_gallocr_t ggml_gallocr_new(void);
  6094. def ggml_gallocr_new() -> ggml_gallocr_t:
  6095. return lib.ggml_gallocr_new()
  6096. lib.ggml_gallocr_new.argtypes = []
  6097. lib.ggml_gallocr_new.restype = ggml_gallocr_t
  6098. # GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
  6099. def ggml_gallocr_free(galloc: ggml_gallocr_t) -> None:
  6100. return lib.ggml_gallocr_free(galloc)
  6101. lib.ggml_gallocr_free.argtypes = [ggml_gallocr_t]
  6102. lib.ggml_gallocr_free.restype = None
  6103. # GGML_API void ggml_gallocr_set_parse_seq(ggml_gallocr_t galloc, const int * list, int n);
  6104. def ggml_gallocr_set_parse_seq(
  6105. galloc: ggml_gallocr_t,
  6106. list: "ctypes._Pointer(ctypes.c_int)", # type: ignore
  6107. n: Union[ctypes.c_int, int],
  6108. ) -> None:
  6109. return lib.ggml_gallocr_set_parse_seq(galloc, list, n)
  6110. lib.ggml_gallocr_set_parse_seq.argtypes = [
  6111. ggml_gallocr_t,
  6112. ctypes.POINTER(ctypes.c_int),
  6113. ctypes.c_int,
  6114. ]
  6115. lib.ggml_gallocr_set_parse_seq.restype = None
  6116. # GGML_API size_t ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, struct ggml_cgraph * graph);
  6117. def ggml_gallocr_alloc_graph(
  6118. galloc: ggml_gallocr_t, talloc: ggml_tallocr_t, graph: ggml_cgraph_p
  6119. ) -> Union[ctypes.c_size_t, int]:
  6120. return lib.ggml_gallocr_alloc_graph(galloc, talloc, graph)
  6121. lib.ggml_gallocr_alloc_graph.argtypes = [
  6122. ggml_gallocr_t,
  6123. ggml_tallocr_t,
  6124. ctypes.POINTER(ggml_cgraph),
  6125. ]
  6126. lib.ggml_gallocr_alloc_graph.restype = ctypes.c_size_t
  6127. # // Allocate tensors from the allocators given by the hash table
  6128. # GGML_API void ggml_gallocr_alloc_graph_n(
  6129. # ggml_gallocr_t galloc,
  6130. # struct ggml_cgraph * graph,
  6131. # struct ggml_hash_set hash_set,
  6132. # ggml_tallocr_t * hash_node_talloc);
  6133. def ggml_gallocr_alloc_graph_n(
  6134. galloc: ggml_gallocr_t,
  6135. graph: ggml_cgraph_p,
  6136. hash_set: ggml_hash_set,
  6137. hash_node_talloc: "ctypes._Pointer(ggml_tallocr_t)", # type: ignore
  6138. ) -> None:
  6139. return lib.ggml_gallocr_alloc_graph_n(galloc, graph, hash_set, hash_node_talloc)
  6140. lib.ggml_gallocr_alloc_graph_n.argtypes = [
  6141. ggml_gallocr_t,
  6142. ctypes.POINTER(ggml_cgraph),
  6143. ggml_hash_set,
  6144. ctypes.POINTER(ggml_tallocr_t),
  6145. ]
  6146. lib.ggml_gallocr_alloc_graph_n.restype = None
  6147. # // Utils
  6148. # // Create a buffer and allocate all the tensors in a ggml_context
  6149. # GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, struct ggml_backend_buffer_type * buft);
  6150. def ggml_backend_alloc_ctx_tensors_from_buft(
  6151. ctx: ggml_context_p, buft: ggml_backend_buffer_type_p
  6152. ) -> ggml_backend_buffer_p:
  6153. return lib.ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft)
  6154. lib.ggml_backend_alloc_ctx_tensors_from_buft.argtypes = [
  6155. ggml_context_p,
  6156. ggml_backend_buffer_type_p,
  6157. ]
  6158. lib.ggml_backend_alloc_ctx_tensors_from_buft.restype = ggml_backend_buffer_p
  6159. # GGML_API struct ggml_backend_buffer * ggml_backend_alloc_ctx_tensors(struct ggml_context * ctx, struct ggml_backend * backend);
  6160. def ggml_backend_alloc_ctx_tensors(
  6161. ctx: ggml_context_p, backend: ggml_backend_t
  6162. ) -> ggml_backend_buffer_p:
  6163. return lib.ggml_backend_alloc_ctx_tensors(ctx, backend)
  6164. lib.ggml_backend_alloc_ctx_tensors.argtypes = [
  6165. ggml_context_p,
  6166. ggml_backend_t,
  6167. ]
  6168. lib.ggml_backend_alloc_ctx_tensors.restype = ggml_backend_buffer_p
  6169. #####################################################
  6170. # GGML Backend API
  6171. # source: ggml-backend.h
  6172. #####################################################
  6173. # typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
  6174. # typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
  6175. # typedef struct ggml_backend * ggml_backend_t;
  6176. # typedef void * ggml_backend_graph_plan_t;
  6177. ggml_backend_buffer_type_t = ctypes.c_void_p
  6178. ggml_backend_buffer_t = ctypes.c_void_p
  6179. ggml_backend_t = ctypes.c_void_p
  6180. ggml_backend_graph_plan_t = ctypes.c_void_p
  6181. # //
  6182. # // Backend buffer
  6183. # //
  6184. # // buffer type
  6185. # GGML_API ggml_backend_buffer_t ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size);
  6186. def ggml_backend_buft_alloc_buffer(
  6187. buft: ggml_backend_buffer_type_t, size: Union[ctypes.c_size_t, int]
  6188. ) -> ggml_backend_buffer_t:
  6189. return lib.ggml_backend_buft_alloc_buffer(buft, size)
  6190. lib.ggml_backend_buft_alloc_buffer.argtypes = [
  6191. ggml_backend_buffer_type_t,
  6192. ctypes.c_size_t,
  6193. ]
  6194. lib.ggml_backend_buft_alloc_buffer.restype = ggml_backend_buffer_t
  6195. # GGML_API size_t ggml_backend_buft_get_alignment (ggml_backend_buffer_type_t buft);
  6196. def ggml_backend_buft_get_alignment(
  6197. buft: ggml_backend_buffer_type_t,
  6198. ) -> int:
  6199. return lib.ggml_backend_buft_get_alignment(buft)
  6200. lib.ggml_backend_buft_get_alignment.argtypes = [ggml_backend_buffer_type_t]
  6201. lib.ggml_backend_buft_get_alignment.restype = ctypes.c_size_t
  6202. # GGML_API size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
  6203. def ggml_backend_buft_get_alloc_size(
  6204. buft: ggml_backend_buffer_type_t, tensor: ggml_tensor_p
  6205. ) -> int:
  6206. return lib.ggml_backend_buft_get_alloc_size(buft, tensor)
  6207. lib.ggml_backend_buft_get_alloc_size.argtypes = [
  6208. ggml_backend_buffer_type_t,
  6209. ctypes.POINTER(ggml_tensor),
  6210. ]
  6211. lib.ggml_backend_buft_get_alloc_size.restype = ctypes.c_size_t
  6212. # GGML_API bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
  6213. def ggml_backend_buft_supports_backend(
  6214. buft: ggml_backend_buffer_type_t, backend: ggml_backend_t
  6215. ) -> bool:
  6216. return lib.ggml_backend_buft_supports_backend(buft, backend)
  6217. lib.ggml_backend_buft_supports_backend.argtypes = [
  6218. ggml_backend_buffer_type_t,
  6219. ggml_backend_t,
  6220. ]
  6221. lib.ggml_backend_buft_supports_backend.restype = ctypes.c_bool
  6222. # // buffer
  6223. # GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
  6224. def ggml_backend_buffer_free(
  6225. buffer: ggml_backend_buffer_t,
  6226. ):
  6227. return lib.ggml_backend_buffer_free(buffer)
  6228. lib.ggml_backend_buffer_free.argtypes = [ggml_backend_buffer_t]
  6229. lib.ggml_backend_buffer_free.restype = None
  6230. # GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
  6231. def ggml_backend_buffer_get_base(
  6232. buffer: ggml_backend_buffer_t,
  6233. ) -> ctypes.c_void_p:
  6234. return lib.ggml_backend_buffer_get_base(buffer)
  6235. lib.ggml_backend_buffer_get_base.argtypes = [ggml_backend_buffer_t]
  6236. lib.ggml_backend_buffer_get_base.restype = ctypes.c_void_p
  6237. # GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
  6238. def ggml_backend_buffer_get_size(
  6239. buffer: ggml_backend_buffer_t,
  6240. ) -> int:
  6241. return lib.ggml_backend_buffer_get_size(buffer)
  6242. lib.ggml_backend_buffer_get_size.argtypes = [ggml_backend_buffer_t]
  6243. lib.ggml_backend_buffer_get_size.restype = ctypes.c_size_t
  6244. # GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
  6245. def ggml_backend_buffer_init_tensor(
  6246. buffer: ggml_backend_buffer_t,
  6247. tensor: ggml_tensor_p,
  6248. ):
  6249. return lib.ggml_backend_buffer_init_tensor(buffer, tensor)
  6250. lib.ggml_backend_buffer_init_tensor.argtypes = [
  6251. ggml_backend_buffer_t,
  6252. ctypes.POINTER(ggml_tensor),
  6253. ]
  6254. lib.ggml_backend_buffer_init_tensor.restype = None
  6255. # GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
  6256. def ggml_backend_buffer_get_alignment(
  6257. buffer: ggml_backend_buffer_t,
  6258. ) -> int:
  6259. return lib.ggml_backend_buffer_get_alignment(buffer)
  6260. lib.ggml_backend_buffer_get_alignment.argtypes = [ggml_backend_buffer_t]
  6261. lib.ggml_backend_buffer_get_alignment.restype = ctypes.c_size_t
  6262. # GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
  6263. def ggml_backend_buffer_get_alloc_size(
  6264. buffer: ggml_backend_buffer_t, tensor: ggml_tensor_p
  6265. ) -> int:
  6266. return lib.ggml_backend_buffer_get_alloc_size(buffer, tensor)
  6267. lib.ggml_backend_buffer_get_alloc_size.argtypes = [
  6268. ggml_backend_buffer_t,
  6269. ctypes.POINTER(ggml_tensor),
  6270. ]
  6271. lib.ggml_backend_buffer_get_alloc_size.restype = ctypes.c_size_t
  6272. # GGML_API ggml_backend_buffer_type_t ggml_backend_buffer_type(ggml_backend_buffer_t buffer);
  6273. def ggml_backend_buffer_type(
  6274. buffer: ggml_backend_buffer_t,
  6275. ) -> ggml_backend_buffer_type_t:
  6276. return lib.ggml_backend_buffer_type(buffer)
  6277. lib.ggml_backend_buffer_type.argtypes = [ggml_backend_buffer_t]
  6278. lib.ggml_backend_buffer_type.restype = ggml_backend_buffer_type_t
  6279. # //
  6280. # // Backend
  6281. # //
  6282. # GGML_API const char * ggml_backend_name(ggml_backend_t backend);
  6283. def ggml_backend_name(
  6284. backend: ggml_backend_t,
  6285. ) -> bytes:
  6286. return lib.ggml_backend_name(backend)
  6287. lib.ggml_backend_name.argtypes = [ggml_backend_t]
  6288. lib.ggml_backend_name.restype = ctypes.c_char_p
  6289. # GGML_API void ggml_backend_free(ggml_backend_t backend);
  6290. def ggml_backend_free(
  6291. backend: ggml_backend_t,
  6292. ):
  6293. return lib.ggml_backend_free(backend)
  6294. lib.ggml_backend_free.argtypes = [ggml_backend_t]
  6295. lib.ggml_backend_free.restype = None
  6296. # GGML_API ggml_backend_buffer_type_t ggml_backend_get_default_buffer_type(ggml_backend_t backend);
  6297. def ggml_backend_get_default_buffer_type(
  6298. backend: ggml_backend_t,
  6299. ) -> ggml_backend_buffer_type_t:
  6300. return lib.ggml_backend_get_default_buffer_type(backend)
  6301. lib.ggml_backend_get_default_buffer_type.argtypes = [ggml_backend_t]
  6302. lib.ggml_backend_get_default_buffer_type.restype = ggml_backend_buffer_type_t
  6303. # GGML_API ggml_backend_buffer_t ggml_backend_alloc_buffer(ggml_backend_t backend, size_t size);
  6304. def ggml_backend_alloc_buffer(
  6305. backend: ggml_backend_t,
  6306. size: Union[ctypes.c_size_t, int],
  6307. ) -> ggml_backend_buffer_t:
  6308. return lib.ggml_backend_alloc_buffer(backend, size)
  6309. lib.ggml_backend_alloc_buffer.argtypes = [ggml_backend_t, ctypes.c_size_t]
  6310. lib.ggml_backend_alloc_buffer.restype = ggml_backend_buffer_t
  6311. # GGML_API size_t ggml_backend_get_alignment(ggml_backend_t backend);
  6312. def ggml_backend_get_alignment(
  6313. backend: ggml_backend_t,
  6314. ) -> int:
  6315. return lib.ggml_backend_get_alignment(backend)
  6316. lib.ggml_backend_get_alignment.argtypes = [ggml_backend_t]
  6317. lib.ggml_backend_get_alignment.restype = ctypes.c_size_t
  6318. # GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  6319. def ggml_backend_tensor_set_async(
  6320. backend: ggml_backend_t,
  6321. tensor: ggml_tensor_p,
  6322. data: ctypes.c_void_p,
  6323. offset: Union[ctypes.c_size_t, int],
  6324. size: Union[ctypes.c_size_t, int],
  6325. ):
  6326. return lib.ggml_backend_tensor_set_async(backend, tensor, data, offset, size)
  6327. lib.ggml_backend_tensor_set_async.argtypes = [
  6328. ggml_backend_t,
  6329. ctypes.POINTER(ggml_tensor),
  6330. ctypes.c_void_p,
  6331. ctypes.c_size_t,
  6332. ctypes.c_size_t,
  6333. ]
  6334. lib.ggml_backend_tensor_set_async.restype = None
  6335. # GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  6336. def ggml_backend_tensor_get_async(
  6337. backend: ggml_backend_t,
  6338. tensor: ggml_tensor_p,
  6339. data: ctypes.c_void_p,
  6340. offset: Union[ctypes.c_size_t, int],
  6341. size: Union[ctypes.c_size_t, int],
  6342. ):
  6343. return lib.ggml_backend_tensor_get_async(backend, tensor, data, offset, size)
  6344. lib.ggml_backend_tensor_get_async.argtypes = [
  6345. ggml_backend_t,
  6346. ctypes.POINTER(ggml_tensor),
  6347. ctypes.c_void_p,
  6348. ctypes.c_size_t,
  6349. ctypes.c_size_t,
  6350. ]
  6351. lib.ggml_backend_tensor_get_async.restype = None
  6352. # GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  6353. def ggml_backend_tensor_set(
  6354. tensor: ggml_tensor_p,
  6355. data: ctypes.c_void_p,
  6356. offset: Union[ctypes.c_size_t, int],
  6357. size: Union[ctypes.c_size_t, int],
  6358. ):
  6359. return lib.ggml_backend_tensor_set(tensor, data, offset, size)
  6360. lib.ggml_backend_tensor_set.argtypes = [
  6361. ctypes.POINTER(ggml_tensor),
  6362. ctypes.c_void_p,
  6363. ctypes.c_size_t,
  6364. ctypes.c_size_t,
  6365. ]
  6366. lib.ggml_backend_tensor_set.restype = None
  6367. # GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  6368. def ggml_backend_tensor_get(
  6369. tensor: ggml_tensor_p,
  6370. data: ctypes.c_void_p,
  6371. offset: Union[ctypes.c_size_t, int],
  6372. size: Union[ctypes.c_size_t, int],
  6373. ):
  6374. return lib.ggml_backend_tensor_get(tensor, data, offset, size)
  6375. lib.ggml_backend_tensor_get.argtypes = [
  6376. ctypes.POINTER(ggml_tensor),
  6377. ctypes.c_void_p,
  6378. ctypes.c_size_t,
  6379. ctypes.c_size_t,
  6380. ]
  6381. lib.ggml_backend_tensor_get.restype = None
  6382. # GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
  6383. def ggml_backend_synchronize(
  6384. backend: ggml_backend_t,
  6385. ):
  6386. return lib.ggml_backend_synchronize(backend)
  6387. lib.ggml_backend_synchronize.argtypes = [ggml_backend_t]
  6388. lib.ggml_backend_synchronize.restype = None
  6389. # GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
  6390. def ggml_backend_graph_plan_create(
  6391. backend: ggml_backend_t,
  6392. cgraph: ggml_cgraph_p,
  6393. ) -> ggml_backend_graph_plan_t:
  6394. return lib.ggml_backend_graph_plan_create(backend, cgraph)
  6395. lib.ggml_backend_graph_plan_create.argtypes = [
  6396. ggml_backend_t,
  6397. ctypes.POINTER(ggml_cgraph),
  6398. ]
  6399. lib.ggml_backend_graph_plan_create.restype = ggml_backend_graph_plan_t
  6400. # GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  6401. def ggml_backend_graph_plan_free(
  6402. backend: ggml_backend_t,
  6403. plan: ggml_backend_graph_plan_t,
  6404. ):
  6405. return lib.ggml_backend_graph_plan_free(backend, plan)
  6406. lib.ggml_backend_graph_plan_free.argtypes = [ggml_backend_t, ggml_backend_graph_plan_t]
  6407. lib.ggml_backend_graph_plan_free.restype = None
  6408. # GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  6409. def ggml_backend_graph_plan_compute(
  6410. backend: ggml_backend_t,
  6411. plan: ggml_backend_graph_plan_t,
  6412. ):
  6413. return lib.ggml_backend_graph_plan_compute(backend, plan)
  6414. lib.ggml_backend_graph_plan_compute.argtypes = [
  6415. ggml_backend_t,
  6416. ggml_backend_graph_plan_t,
  6417. ]
  6418. lib.ggml_backend_graph_plan_compute.restype = None
  6419. # GGML_API void ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
  6420. def ggml_backend_graph_compute(
  6421. backend: ggml_backend_t,
  6422. cgraph: ggml_cgraph_p,
  6423. ):
  6424. return lib.ggml_backend_graph_compute(backend, cgraph)
  6425. lib.ggml_backend_graph_compute.argtypes = [ggml_backend_t, ctypes.POINTER(ggml_cgraph)]
  6426. lib.ggml_backend_graph_compute.restype = None
  6427. # GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
  6428. def ggml_backend_supports_op(
  6429. backend: ggml_backend_t,
  6430. op: ggml_tensor_p,
  6431. ) -> Union[ctypes.c_bool, bool]:
  6432. return lib.ggml_backend_supports_op(backend, op)
  6433. lib.ggml_backend_supports_op.argtypes = [ggml_backend_t, ctypes.POINTER(ggml_tensor)]
  6434. lib.ggml_backend_supports_op.restype = ctypes.c_bool
  6435. # // tensor copy between different backends
  6436. # GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
  6437. def ggml_backend_tensor_copy(
  6438. src: ggml_tensor_p,
  6439. dst: ggml_tensor_p,
  6440. ):
  6441. return lib.ggml_backend_tensor_copy(src, dst)
  6442. lib.ggml_backend_tensor_copy.argtypes = [
  6443. ctypes.POINTER(ggml_tensor),
  6444. ctypes.POINTER(ggml_tensor),
  6445. ]
  6446. lib.ggml_backend_tensor_copy.restype = None
  6447. # GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
  6448. def ggml_backend_tensor_copy_async(
  6449. backend: ggml_backend_t,
  6450. src: ggml_tensor_p,
  6451. dst: ggml_tensor_p,
  6452. ):
  6453. return lib.ggml_backend_tensor_copy_async(backend, src, dst)
  6454. # lib.ggml_backend_tensor_copy_async.argtypes = [
  6455. # ggml_backend_t,
  6456. # ctypes.POINTER(ggml_tensor),
  6457. # ctypes.POINTER(ggml_tensor),
  6458. # ]
  6459. # lib.ggml_backend_tensor_copy_async.restype = None
  6460. # //
  6461. # // CPU backend
  6462. # //
  6463. # GGML_API ggml_backend_t ggml_backend_cpu_init(void);
  6464. def ggml_backend_cpu_init() -> ggml_backend_t:
  6465. return lib.ggml_backend_cpu_init()
  6466. lib.ggml_backend_cpu_init.argtypes = []
  6467. lib.ggml_backend_cpu_init.restype = ggml_backend_t
  6468. # GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
  6469. def ggml_backend_is_cpu(
  6470. backend: ggml_backend_t,
  6471. ) -> bool:
  6472. return lib.ggml_backend_is_cpu(backend)
  6473. lib.ggml_backend_is_cpu.argtypes = [ggml_backend_t]
  6474. lib.ggml_backend_is_cpu.restype = ctypes.c_bool
  6475. # GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
  6476. def ggml_backend_cpu_set_n_threads(
  6477. backend_cpu: ggml_backend_t,
  6478. n_threads: Union[ctypes.c_int, int],
  6479. ):
  6480. return lib.ggml_backend_cpu_set_n_threads(backend_cpu, n_threads)
  6481. lib.ggml_backend_cpu_set_n_threads.argtypes = [ggml_backend_t, ctypes.c_int]
  6482. lib.ggml_backend_cpu_set_n_threads.restype = None
  6483. # // Create a backend buffer from an existing pointer
  6484. # GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
  6485. def ggml_backend_cpu_buffer_from_ptr(
  6486. ptr: ctypes.c_void_p,
  6487. size: Union[ctypes.c_size_t, int],
  6488. ) -> ggml_backend_buffer_t:
  6489. return lib.ggml_backend_cpu_buffer_from_ptr(ptr, size)
  6490. lib.ggml_backend_cpu_buffer_from_ptr.argtypes = [ctypes.c_void_p, ctypes.c_size_t]
  6491. lib.ggml_backend_cpu_buffer_from_ptr.restype = ggml_backend_buffer_t
  6492. # GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
  6493. def ggml_backend_cpu_buffer_type() -> ggml_backend_buffer_type_t:
  6494. return lib.ggml_backend_cpu_buffer_type()
  6495. lib.ggml_backend_cpu_buffer_type.argtypes = []
  6496. lib.ggml_backend_cpu_buffer_type.restype = ggml_backend_buffer_type_t
  6497. # //
  6498. # // Backend registry
  6499. # //
  6500. # // The backend registry is a registry of all the available backends, and allows initializing backends in a generic way
  6501. # GGML_API size_t ggml_backend_reg_get_count(void);
  6502. def ggml_backend_reg_get_count() -> int:
  6503. return lib.ggml_backend_reg_get_count()
  6504. lib.ggml_backend_reg_get_count.argtypes = []
  6505. lib.ggml_backend_reg_get_count.restype = ctypes.c_size_t
  6506. # GGML_API size_t ggml_backend_reg_find_by_name(const char * name);
  6507. def ggml_backend_reg_find_by_name(
  6508. name: bytes,
  6509. ) -> int:
  6510. return lib.ggml_backend_reg_find_by_name(name)
  6511. lib.ggml_backend_reg_find_by_name.argtypes = [ctypes.c_char_p]
  6512. lib.ggml_backend_reg_find_by_name.restype = ctypes.c_size_t
  6513. # GGML_API ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str); // str is name[:params]
  6514. def ggml_backend_reg_init_backend_from_str(
  6515. backend_str: bytes,
  6516. ) -> ggml_backend_t:
  6517. return lib.ggml_backend_reg_init_backend_from_str(backend_str)
  6518. lib.ggml_backend_reg_init_backend_from_str.argtypes = [ctypes.c_char_p]
  6519. lib.ggml_backend_reg_init_backend_from_str.restype = ggml_backend_t
  6520. # GGML_API const char * ggml_backend_reg_get_name(size_t i);
  6521. def ggml_backend_reg_get_name(
  6522. i: Union[ctypes.c_size_t, int],
  6523. ) -> bytes:
  6524. return lib.ggml_backend_reg_get_name(i)
  6525. lib.ggml_backend_reg_get_name.argtypes = [ctypes.c_size_t]
  6526. lib.ggml_backend_reg_get_name.restype = ctypes.c_char_p
  6527. # GGML_API ggml_backend_t ggml_backend_reg_init_backend(size_t i, const char * params); // params is backend-specific
  6528. def ggml_backend_reg_init_backend(
  6529. i: Union[ctypes.c_size_t, int],
  6530. params: bytes,
  6531. ) -> ggml_backend_t:
  6532. return lib.ggml_backend_reg_init_backend(i, params)
  6533. lib.ggml_backend_reg_init_backend.argtypes = [ctypes.c_size_t, ctypes.c_char_p]
  6534. lib.ggml_backend_reg_init_backend.restype = ggml_backend_t
  6535. # GGML_API ggml_backend_buffer_type_t ggml_backend_reg_get_default_buffer_type(size_t i);
  6536. def ggml_backend_reg_get_default_buffer_type(
  6537. i: Union[ctypes.c_size_t, int],
  6538. ) -> ggml_backend_buffer_type_t:
  6539. return lib.ggml_backend_reg_get_default_buffer_type(i)
  6540. lib.ggml_backend_reg_get_default_buffer_type.argtypes = [ctypes.c_size_t]
  6541. lib.ggml_backend_reg_get_default_buffer_type.restype = ggml_backend_buffer_type_t
  6542. # GGML_API ggml_backend_buffer_t ggml_backend_reg_alloc_buffer(size_t i, size_t size);
  6543. def ggml_backend_reg_alloc_buffer(
  6544. i: Union[ctypes.c_size_t, int],
  6545. size: Union[ctypes.c_size_t, int],
  6546. ) -> ggml_backend_buffer_t:
  6547. return lib.ggml_backend_reg_alloc_buffer(i, size)
  6548. lib.ggml_backend_reg_alloc_buffer.argtypes = [ctypes.c_size_t, ctypes.c_size_t]
  6549. lib.ggml_backend_reg_alloc_buffer.restype = ggml_backend_buffer_t
  6550. # //
  6551. # // Backend scheduler
  6552. # //
  6553. # // The backend scheduler allows for multiple backends to be used together
  6554. # // Handles compute buffer allocation, assignment of tensors to backends, and copying of tensors between backends
  6555. # // The backends are selected based on:
  6556. # // - the backend that supports the operation
  6557. # // - the location of the pre-allocated tensors (e.g. the weights)
  6558. # /*
  6559. # Example usage:
  6560. # sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends);
  6561. # // sched is initialized with measure allocators and cannot be used until allocated with a measure graph
  6562. # // initialize buffers from a measure graph
  6563. # measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
  6564. # // in build_graph:
  6565. # build_graph(...) {
  6566. # // allocating tensors in a specific backend (optional, recommended: pre-allocate inputs in a different buffer)
  6567. # alloc_cpu = ggml_backend_sched_get_allocr(sched, backend_cpu);
  6568. # ggml_allocr_alloc(alloc_cpu, tensor);
  6569. # // manually assigning nodes to a backend (optional, shouldn't be needed in most cases)
  6570. # struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
  6571. # ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
  6572. # }
  6573. # // allocate backend buffers from measure graph
  6574. # ggml_backend_sched_init_measure(sched, measure_graph);
  6575. # // the scheduler is now ready to compute graphs
  6576. # // compute
  6577. # graph = build_graph(sched);
  6578. # ggml_backend_sched_graph_compute(sched, graph);
  6579. # */
  6580. # struct ggml_backend_sched;
  6581. # typedef struct ggml_backend_sched * ggml_backend_sched_t;
  6582. ggml_backend_sched_t = ctypes.c_void_p
  6583. # // Initialize a backend scheduler
  6584. # GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, int n_backends);
  6585. def ggml_backend_sched_new(
  6586. backends: ggml_backend_t,
  6587. n_backends: Union[ctypes.c_int, int],
  6588. ) -> ggml_backend_sched_t:
  6589. return lib.ggml_backend_sched_new(backends, n_backends)
  6590. lib.ggml_backend_sched_new.argtypes = [ggml_backend_t, ctypes.c_int]
  6591. lib.ggml_backend_sched_new.restype = ggml_backend_sched_t
  6592. # GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
  6593. def ggml_backend_sched_free(
  6594. sched: ggml_backend_sched_t,
  6595. ):
  6596. return lib.ggml_backend_sched_free(sched)
  6597. lib.ggml_backend_sched_free.argtypes = [ggml_backend_sched_t]
  6598. lib.ggml_backend_sched_free.restype = None
  6599. # // Initialize backend buffers from a measure graph
  6600. # GGML_API void ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
  6601. def ggml_backend_sched_init_measure(
  6602. sched: ggml_backend_sched_t,
  6603. measure_graph: ggml_cgraph_p,
  6604. ):
  6605. return lib.ggml_backend_sched_init_measure(sched, measure_graph)
  6606. lib.ggml_backend_sched_init_measure.argtypes = [
  6607. ggml_backend_sched_t,
  6608. ctypes.POINTER(ggml_cgraph),
  6609. ]
  6610. lib.ggml_backend_sched_init_measure.restype = None
  6611. # GGML_API ggml_tallocr_t ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
  6612. def ggml_backend_sched_get_tallocr(
  6613. sched: ggml_backend_sched_t,
  6614. backend: ggml_backend_t,
  6615. ) -> ggml_tallocr_t:
  6616. return lib.ggml_backend_sched_get_tallocr(sched, backend)
  6617. lib.ggml_backend_sched_get_tallocr.argtypes = [ggml_backend_sched_t, ggml_backend_t]
  6618. lib.ggml_backend_sched_get_tallocr.restype = ggml_tallocr_t
  6619. # GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
  6620. def ggml_backend_sched_get_buffer(
  6621. sched: ggml_backend_sched_t,
  6622. backend: ggml_backend_t,
  6623. ) -> ggml_backend_buffer_t:
  6624. return lib.ggml_backend_sched_get_buffer(sched, backend)
  6625. lib.ggml_backend_sched_get_buffer.argtypes = [ggml_backend_sched_t, ggml_backend_t]
  6626. lib.ggml_backend_sched_get_buffer.restype = ggml_backend_buffer_t
  6627. # GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
  6628. def ggml_backend_sched_set_node_backend(
  6629. sched: ggml_backend_sched_t,
  6630. node: ggml_tensor_p,
  6631. backend: ggml_backend_t,
  6632. ):
  6633. return lib.ggml_backend_sched_set_node_backend(sched, node, backend)
  6634. lib.ggml_backend_sched_set_node_backend.argtypes = [
  6635. ggml_backend_sched_t,
  6636. ctypes.POINTER(ggml_tensor),
  6637. ggml_backend_t,
  6638. ]
  6639. lib.ggml_backend_sched_set_node_backend.restype = None
  6640. # // Allocate a graph on the backend scheduler
  6641. # GGML_API void ggml_backend_sched_graph_compute(
  6642. # ggml_backend_sched_t sched,
  6643. # struct ggml_cgraph * graph);
  6644. def ggml_backend_sched_graph_compute(
  6645. sched: ggml_backend_sched_t,
  6646. graph: ggml_cgraph_p,
  6647. ):
  6648. return lib.ggml_backend_sched_graph_compute(sched, graph)
  6649. lib.ggml_backend_sched_graph_compute.argtypes = [
  6650. ggml_backend_sched_t,
  6651. ctypes.POINTER(ggml_cgraph),
  6652. ]
  6653. lib.ggml_backend_sched_graph_compute.restype = None
  6654. # //
  6655. # // Utils
  6656. # //
  6657. # struct ggml_backend_graph_copy {
  6658. # ggml_backend_buffer_t buffer;
  6659. # struct ggml_context * ctx_allocated;
  6660. # struct ggml_context * ctx_unallocated;
  6661. # struct ggml_cgraph * graph;
  6662. # };
  6663. class ggml_backend_graph_copy(ctypes.Structure):
  6664. _fields_ = [
  6665. ("buffer", ggml_backend_buffer_t),
  6666. ("ctx_allocated", ggml_context_p),
  6667. ("ctx_unallocated", ggml_context_p),
  6668. ("graph", ctypes.POINTER(ggml_cgraph)),
  6669. ]
  6670. ggml_backend_graph_copy_t = ggml_backend_graph_copy
  6671. # // Copy a graph to a different backend
  6672. # GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
  6673. def ggml_backend_graph_copy(
  6674. backend: ggml_backend_t,
  6675. graph: ggml_cgraph_p,
  6676. ) -> ggml_backend_graph_copy_t:
  6677. return lib.ggml_backend_graph_copy(backend, graph)
  6678. lib.ggml_backend_graph_copy.argtypes = [
  6679. ggml_backend_t,
  6680. ctypes.POINTER(ggml_cgraph),
  6681. ]
  6682. lib.ggml_backend_graph_copy.restype = ggml_backend_graph_copy_t
  6683. # GGML_API void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
  6684. def ggml_backend_graph_copy_free(
  6685. copy: ggml_backend_graph_copy_t,
  6686. ):
  6687. return lib.ggml_backend_graph_copy_free(copy)
  6688. lib.ggml_backend_graph_copy_free.argtypes = [ggml_backend_graph_copy_t]
  6689. lib.ggml_backend_graph_copy_free.restype = None
  6690. # typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
  6691. ggml_backend_eval_callback = ctypes.CFUNCTYPE(
  6692. ctypes.c_bool,
  6693. ctypes.c_int,
  6694. ctypes.POINTER(ggml_tensor),
  6695. ctypes.POINTER(ggml_tensor),
  6696. ctypes.c_void_p,
  6697. )
  6698. # // Compare the output of two backends
  6699. # GGML_API void ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
  6700. def ggml_backend_compare_graph_backend(
  6701. backend1: ggml_backend_t,
  6702. backend2: ggml_backend_t,
  6703. graph: ggml_cgraph_p,
  6704. callback,
  6705. user_data: ctypes.c_void_p,
  6706. ):
  6707. return lib.ggml_backend_compare_graph_backend(
  6708. backend1, backend2, graph, callback, user_data
  6709. )
  6710. lib.ggml_backend_compare_graph_backend.argtypes = [
  6711. ggml_backend_t,
  6712. ggml_backend_t,
  6713. ctypes.POINTER(ggml_cgraph),
  6714. ggml_backend_eval_callback,
  6715. ctypes.c_void_p,
  6716. ]
  6717. lib.ggml_backend_compare_graph_backend.restype = None
  6718. # // Tensor initialization
  6719. # GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
  6720. def ggml_backend_tensor_alloc(
  6721. buffer: ggml_backend_buffer_t,
  6722. tensor: ggml_tensor_p,
  6723. addr: ctypes.c_void_p,
  6724. ):
  6725. return lib.ggml_backend_tensor_alloc(buffer, tensor, addr)
  6726. lib.ggml_backend_tensor_alloc.argtypes = [
  6727. ggml_backend_buffer_t,
  6728. ctypes.POINTER(ggml_tensor),
  6729. ctypes.c_void_p,
  6730. ]
  6731. lib.ggml_backend_tensor_alloc.restype = None
  6732. # GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
  6733. def ggml_backend_view_init(
  6734. buffer: ggml_backend_buffer_t,
  6735. tensor: ggml_tensor_p,
  6736. ):
  6737. return lib.ggml_backend_view_init(buffer, tensor)
  6738. lib.ggml_backend_view_init.argtypes = [
  6739. ggml_backend_buffer_t,
  6740. ctypes.POINTER(ggml_tensor),
  6741. ]
  6742. lib.ggml_backend_view_init.restype = None
  6743. #####################################################
  6744. # GGML Backend Implementation API
  6745. # source: ggml-backend-impl.h
  6746. #####################################################
  6747. # //
  6748. # // Backend buffer
  6749. # //
  6750. # // buffer type
  6751. # typedef void * ggml_backend_buffer_type_context_t;
  6752. ggml_backend_buffer_type_context_t = ctypes.c_void_p
  6753. # struct ggml_backend_buffer_type_i {
  6754. # ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size);
  6755. # size_t (*get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment
  6756. # size_t (*get_alloc_size) (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding
  6757. # bool (*supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend
  6758. # };
  6759. ggml_backend_buffer_i_alloc_buffer = ctypes.CFUNCTYPE(
  6760. ggml_backend_buffer_t, ggml_backend_buffer_type_t, ctypes.c_size_t
  6761. )
  6762. ggml_backend_buffer_i_get_alignment = ctypes.CFUNCTYPE(
  6763. ctypes.c_size_t, ggml_backend_buffer_type_t
  6764. )
  6765. ggml_backend_buffer_i_get_alloc_size = ctypes.CFUNCTYPE(
  6766. ctypes.c_size_t, ggml_backend_buffer_type_t, ctypes.POINTER(ggml_tensor)
  6767. )
  6768. ggml_backend_buffer_i_supports_backend = ctypes.CFUNCTYPE(
  6769. ctypes.c_bool, ggml_backend_buffer_type_t, ggml_backend_t
  6770. )
  6771. class ggml_backend_buffer_type_i(ctypes.Structure):
  6772. _fields_ = [
  6773. ("alloc_buffer", ggml_backend_buffer_i_alloc_buffer),
  6774. ("get_alignment", ggml_backend_buffer_i_get_alignment),
  6775. ("get_alloc_size", ggml_backend_buffer_i_get_alloc_size),
  6776. ("supports_backend", ggml_backend_buffer_i_supports_backend),
  6777. ]
  6778. # struct ggml_backend_buffer_type {
  6779. # struct ggml_backend_buffer_type_i iface;
  6780. # ggml_backend_buffer_type_context_t context;
  6781. # };
  6782. class ggml_backend_buffer_type(ctypes.Structure):
  6783. _fields_ = [
  6784. ("iface", ggml_backend_buffer_type_i),
  6785. ("context", ggml_backend_buffer_type_context_t),
  6786. ]
  6787. # typedef void * ggml_backend_buffer_context_t;
  6788. ggml_backend_buffer_context_t = ctypes.c_void_p
  6789. # struct ggml_backend_buffer_i {
  6790. # void (*free_buffer)(ggml_backend_buffer_t buffer);
  6791. # //void (*reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras
  6792. # void * (*get_base) (ggml_backend_buffer_t buffer);
  6793. # void (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
  6794. # void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  6795. # void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  6796. # // (optional) copy tensor between different buffer-type, allow for single-copy tranfers
  6797. # void (*cpy_tensor_from)(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
  6798. # void (*cpy_tensor_to) (ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst);
  6799. # };
  6800. ggml_backend_buffer_i_free_buffer = ctypes.CFUNCTYPE(None, ggml_backend_buffer_t)
  6801. ggml_backend_buffer_i_get_base = ctypes.CFUNCTYPE(
  6802. ctypes.c_void_p, ggml_backend_buffer_t
  6803. )
  6804. ggml_backend_buffer_i_init_tensor = ctypes.CFUNCTYPE(
  6805. None, ggml_backend_buffer_t, ctypes.POINTER(ggml_tensor)
  6806. )
  6807. ggml_backend_buffer_i_set_tensor = ctypes.CFUNCTYPE(
  6808. None,
  6809. ggml_backend_buffer_t,
  6810. ctypes.POINTER(ggml_tensor),
  6811. ctypes.c_void_p,
  6812. ctypes.c_size_t,
  6813. ctypes.c_size_t,
  6814. )
  6815. ggml_backend_buffer_i_get_tensor = ctypes.CFUNCTYPE(
  6816. None,
  6817. ggml_backend_buffer_t,
  6818. ctypes.POINTER(ggml_tensor),
  6819. ctypes.c_void_p,
  6820. ctypes.c_size_t,
  6821. ctypes.c_size_t,
  6822. )
  6823. ggml_backend_buffer_i_cpy_tensor_from = ctypes.CFUNCTYPE(
  6824. None,
  6825. ggml_backend_buffer_t,
  6826. ctypes.POINTER(ggml_tensor),
  6827. ctypes.POINTER(ggml_tensor),
  6828. )
  6829. ggml_backend_buffer_i_cpy_tensor_to = ctypes.CFUNCTYPE(
  6830. None,
  6831. ggml_backend_buffer_t,
  6832. ctypes.POINTER(ggml_tensor),
  6833. ctypes.POINTER(ggml_tensor),
  6834. )
  6835. class ggml_backend_buffer_i(ctypes.Structure):
  6836. _fields_ = [
  6837. ("free_buffer", ggml_backend_buffer_i_free_buffer),
  6838. ("get_base", ggml_backend_buffer_i_get_base),
  6839. ("init_tensor", ggml_backend_buffer_i_init_tensor),
  6840. ("set_tensor", ggml_backend_buffer_i_set_tensor),
  6841. ("get_tensor", ggml_backend_buffer_i_get_tensor),
  6842. ("cpy_tensor_from", ggml_backend_buffer_i_cpy_tensor_from),
  6843. ("cpy_tensor_to", ggml_backend_buffer_i_cpy_tensor_to),
  6844. ]
  6845. # struct ggml_backend_buffer {
  6846. # struct ggml_backend_buffer_i iface;
  6847. # ggml_backend_buffer_type_t buft;
  6848. # ggml_backend_buffer_context_t context;
  6849. # size_t size;
  6850. # };
  6851. class ggml_backend_buffer(ctypes.Structure):
  6852. _fields_ = [
  6853. ("iface", ggml_backend_buffer_i),
  6854. ("buft", ggml_backend_buffer_type_t),
  6855. ("context", ggml_backend_buffer_context_t),
  6856. ("size", ctypes.c_size_t),
  6857. ]
  6858. # ggml_backend_buffer_t ggml_backend_buffer_init(
  6859. # ggml_backend_buffer_type_t buft,
  6860. # struct ggml_backend_buffer_i iface,
  6861. # ggml_backend_buffer_context_t context,
  6862. # size_t size);
  6863. def ggml_backend_buffer_init(
  6864. buft: ggml_backend_buffer_type_t,
  6865. iface: ggml_backend_buffer_i,
  6866. context: ggml_backend_buffer_context_t,
  6867. size: Union[ctypes.c_size_t, int],
  6868. ) -> ggml_backend_buffer_t:
  6869. return lib.ggml_backend_buffer_init(buft, iface, context, size)
  6870. lib.ggml_backend_buffer_init.argtypes = [
  6871. ggml_backend_buffer_type_t,
  6872. ggml_backend_buffer_i,
  6873. ggml_backend_buffer_context_t,
  6874. ctypes.c_size_t,
  6875. ]
  6876. lib.ggml_backend_buffer_init.restype = ggml_backend_buffer_t
  6877. # //
  6878. # // Backend
  6879. # //
  6880. # typedef void * ggml_backend_context_t;
  6881. ggml_backend_context_t = ctypes.c_void_p
  6882. # struct ggml_backend_i {
  6883. # const char * (*get_name)(ggml_backend_t backend);
  6884. # void (*free)(ggml_backend_t backend);
  6885. # // buffer allocation
  6886. # ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend);
  6887. # // (optional) asynchroneous tensor data access
  6888. # void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
  6889. # void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
  6890. # // (optional) asynchroneous tensor copy
  6891. # void (*cpy_tensor_from_async)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
  6892. # void (*cpy_tensor_to_async) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst);
  6893. # void (*synchronize) (ggml_backend_t backend);
  6894. # // compute graph with a plan
  6895. # ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
  6896. # void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  6897. # void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
  6898. # // compute graph without a plan
  6899. # void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
  6900. # // check if the backend supports an operation
  6901. # bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
  6902. # };
  6903. ggml_backend_i_get_name = ctypes.CFUNCTYPE(ctypes.c_char_p, ggml_backend_t)
  6904. ggml_backend_i_free = ctypes.CFUNCTYPE(None, ggml_backend_t)
  6905. ggml_backend_i_get_default_buffer_type = ctypes.CFUNCTYPE(
  6906. ggml_backend_buffer_type_t, ggml_backend_t
  6907. )
  6908. ggml_backend_i_set_tensor_async = ctypes.CFUNCTYPE(
  6909. None,
  6910. ggml_backend_t,
  6911. ctypes.POINTER(ggml_tensor),
  6912. ctypes.c_void_p,
  6913. ctypes.c_size_t,
  6914. ctypes.c_size_t,
  6915. )
  6916. ggml_backend_i_get_tensor_async = ctypes.CFUNCTYPE(
  6917. None,
  6918. ggml_backend_t,
  6919. ctypes.POINTER(ggml_tensor),
  6920. ctypes.c_void_p,
  6921. ctypes.c_size_t,
  6922. ctypes.c_size_t,
  6923. )
  6924. ggml_backend_i_cpy_tensor_from_async = ctypes.CFUNCTYPE(
  6925. None, ggml_backend_t, ctypes.POINTER(ggml_tensor), ctypes.POINTER(ggml_tensor)
  6926. )
  6927. ggml_backend_i_cpy_tensor_to_async = ctypes.CFUNCTYPE(
  6928. None, ggml_backend_t, ctypes.POINTER(ggml_tensor), ctypes.POINTER(ggml_tensor)
  6929. )
  6930. ggml_backend_i_synchronize = ctypes.CFUNCTYPE(None, ggml_backend_t)
  6931. ggml_backend_i_graph_plan_create = ctypes.CFUNCTYPE(
  6932. ggml_backend_graph_plan_t, ggml_backend_t, ctypes.POINTER(ggml_cgraph)
  6933. )
  6934. ggml_backend_i_graph_plan_free = ctypes.CFUNCTYPE(
  6935. None, ggml_backend_t, ggml_backend_graph_plan_t
  6936. )
  6937. ggml_backend_i_graph_plan_compute = ctypes.CFUNCTYPE(
  6938. None, ggml_backend_t, ggml_backend_graph_plan_t
  6939. )
  6940. ggml_backend_i_graph_compute = ctypes.CFUNCTYPE(
  6941. None, ggml_backend_t, ctypes.POINTER(ggml_cgraph)
  6942. )
  6943. ggml_backend_i_supports_op = ctypes.CFUNCTYPE(
  6944. ctypes.c_bool, ggml_backend_t, ctypes.POINTER(ggml_tensor)
  6945. )
  6946. class ggml_backend_i(ctypes.Structure):
  6947. _fields_ = [
  6948. ("get_name", ggml_backend_i_get_name),
  6949. ("free", ggml_backend_i_free),
  6950. ("get_default_buffer_type", ggml_backend_i_get_default_buffer_type),
  6951. ("set_tensor_async", ggml_backend_i_set_tensor_async),
  6952. ("get_tensor_async", ggml_backend_i_get_tensor_async),
  6953. ("cpy_tensor_from_async", ggml_backend_i_cpy_tensor_from_async),
  6954. ("cpy_tensor_to_async", ggml_backend_i_cpy_tensor_to_async),
  6955. ("synchronize", ggml_backend_i_synchronize),
  6956. ("graph_plan_create", ggml_backend_i_graph_plan_create),
  6957. ("graph_plan_free", ggml_backend_i_graph_plan_free),
  6958. ("graph_plan_compute", ggml_backend_i_graph_plan_compute),
  6959. ("graph_compute", ggml_backend_i_graph_compute),
  6960. ("supports_op", ggml_backend_i_supports_op),
  6961. ]
  6962. # struct ggml_backend {
  6963. # struct ggml_backend_i iface;
  6964. # ggml_backend_context_t context;
  6965. # };
  6966. class ggml_backend(ctypes.Structure):
  6967. _fields_ = [
  6968. ("iface", ggml_backend_i),
  6969. ("context", ggml_backend_context_t),
  6970. ]
  6971. # //
  6972. # // Backend registry
  6973. # //
  6974. # typedef ggml_backend_t (*ggml_backend_init_fn)(const char * params, void * user_data);
  6975. ggml_backend_init_fn = ctypes.CFUNCTYPE(
  6976. ggml_backend_t, ctypes.c_char_p, ctypes.c_void_p
  6977. )
  6978. # void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data);
  6979. def ggml_backend_register(
  6980. name: bytes,
  6981. init_fn,
  6982. default_buffer_type: ggml_backend_buffer_type_t,
  6983. user_data: ctypes.c_void_p,
  6984. ):
  6985. return lib.ggml_backend_register(name, init_fn, default_buffer_type, user_data)
  6986. lib.ggml_backend_register.argtypes = [
  6987. ctypes.c_char_p,
  6988. ggml_backend_init_fn,
  6989. ggml_backend_buffer_type_t,
  6990. ctypes.c_void_p,
  6991. ]
  6992. lib.ggml_backend_register.restype = None
  6993. #####################################################
  6994. # GGML CUDA API
  6995. # source: ggml-cuda.h
  6996. #####################################################
  6997. GGML_USE_CUBLAS = hasattr(lib, "ggml_init_cublas")
  6998. GGML_CUDA_MAX_DEVICES = 16
  6999. # // Always success. To check if CUDA is actually loaded, use `ggml_cublas_loaded`.
  7000. # GGML_API void ggml_init_cublas(void);
  7001. def ggml_init_cublas():
  7002. return lib.ggml_init_cublas()
  7003. if GGML_USE_CUBLAS:
  7004. lib.ggml_init_cublas.argtypes = []
  7005. lib.ggml_init_cublas.restype = None
  7006. # // Returns `true` if there are available CUDA devices and cublas loads successfully; otherwise, it returns `false`.
  7007. # GGML_API bool ggml_cublas_loaded(void);
  7008. def ggml_cublas_loaded() -> bool:
  7009. return lib.ggml_cublas_loaded()
  7010. if GGML_USE_CUBLAS:
  7011. lib.ggml_cublas_loaded.argtypes = []
  7012. lib.ggml_cublas_loaded.restype = ctypes.c_bool
  7013. # void * ggml_cuda_host_malloc(size_t size);
  7014. def ggml_cuda_host_malloc(
  7015. size: Union[ctypes.c_size_t, int],
  7016. ) -> Optional[ctypes.c_void_p]:
  7017. return lib.ggml_cuda_host_malloc(size)
  7018. if GGML_USE_CUBLAS:
  7019. lib.ggml_cuda_host_malloc.argtypes = [ctypes.c_size_t]
  7020. lib.ggml_cuda_host_malloc.restype = ctypes.c_void_p
  7021. # void ggml_cuda_host_free(void * ptr);
  7022. def ggml_cuda_host_free(
  7023. ptr: ctypes.c_void_p,
  7024. ):
  7025. return lib.ggml_cuda_host_free(ptr)
  7026. if GGML_USE_CUBLAS:
  7027. lib.ggml_cuda_host_free.argtypes = [ctypes.c_void_p]
  7028. lib.ggml_cuda_host_free.restype = None
  7029. # GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  7030. def ggml_cuda_can_mul_mat(
  7031. src0: ggml_tensor_p,
  7032. src1: ggml_tensor_p,
  7033. dst: ggml_tensor_p,
  7034. ) -> bool:
  7035. return lib.ggml_cuda_can_mul_mat(src0, src1, dst)
  7036. if GGML_USE_CUBLAS:
  7037. lib.ggml_cuda_can_mul_mat.argtypes = [
  7038. ctypes.POINTER(ggml_tensor),
  7039. ctypes.POINTER(ggml_tensor),
  7040. ctypes.POINTER(ggml_tensor),
  7041. ]
  7042. lib.ggml_cuda_can_mul_mat.restype = ctypes.c_bool
  7043. # GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
  7044. def ggml_cuda_set_tensor_split(
  7045. tensor_split: CFloatArray,
  7046. ):
  7047. return lib.ggml_cuda_set_tensor_split(tensor_split)
  7048. if GGML_USE_CUBLAS:
  7049. lib.ggml_cuda_set_tensor_split.argtypes = [ctypes.POINTER(ctypes.c_float)]
  7050. lib.ggml_cuda_set_tensor_split.restype = None
  7051. # void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
  7052. def ggml_cuda_transform_tensor(
  7053. data: ctypes.c_void_p,
  7054. tensor: ggml_tensor_p,
  7055. ):
  7056. return lib.ggml_cuda_transform_tensor(data, tensor)
  7057. if GGML_USE_CUBLAS:
  7058. lib.ggml_cuda_transform_tensor.argtypes = [
  7059. ctypes.c_void_p,
  7060. ctypes.POINTER(ggml_tensor),
  7061. ]
  7062. lib.ggml_cuda_transform_tensor.restype = None
  7063. # void ggml_cuda_free_data(struct ggml_tensor * tensor);
  7064. def ggml_cuda_free_data(
  7065. tensor: ggml_tensor_p,
  7066. ):
  7067. return lib.ggml_cuda_free_data(tensor)
  7068. if GGML_USE_CUBLAS:
  7069. lib.ggml_cuda_free_data.argtypes = [
  7070. ctypes.POINTER(ggml_tensor),
  7071. ]
  7072. lib.ggml_cuda_free_data.restype = None
  7073. # void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
  7074. def ggml_cuda_assign_buffers(
  7075. tensor: ggml_tensor_p,
  7076. ):
  7077. return lib.ggml_cuda_assign_buffers(tensor)
  7078. if GGML_USE_CUBLAS:
  7079. lib.ggml_cuda_assign_buffers.argtypes = [
  7080. ctypes.POINTER(ggml_tensor),
  7081. ]
  7082. lib.ggml_cuda_assign_buffers.restype = None
  7083. # void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
  7084. def ggml_cuda_assign_buffers_no_scratch(
  7085. tensor: ggml_tensor_p,
  7086. ):
  7087. return lib.ggml_cuda_assign_buffers_no_scratch(tensor)
  7088. if GGML_USE_CUBLAS:
  7089. lib.ggml_cuda_assign_buffers_no_scratch.argtypes = [
  7090. ctypes.POINTER(ggml_tensor),
  7091. ]
  7092. lib.ggml_cuda_assign_buffers_no_scratch.restype = None
  7093. # GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
  7094. def ggml_cuda_assign_buffers_force_inplace(
  7095. tensor: ggml_tensor_p,
  7096. ):
  7097. return lib.ggml_cuda_assign_buffers_force_inplace(tensor)
  7098. if GGML_USE_CUBLAS:
  7099. lib.ggml_cuda_assign_buffers_force_inplace.argtypes = [
  7100. ctypes.POINTER(ggml_tensor),
  7101. ]
  7102. lib.ggml_cuda_assign_buffers_force_inplace.restype = None
  7103. # GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
  7104. def ggml_cuda_assign_buffers_no_alloc(
  7105. tensor: ggml_tensor_p,
  7106. ):
  7107. return lib.ggml_cuda_assign_buffers_no_alloc(tensor)
  7108. if GGML_USE_CUBLAS:
  7109. lib.ggml_cuda_assign_buffers_no_alloc.argtypes = [
  7110. ctypes.POINTER(ggml_tensor),
  7111. ]
  7112. lib.ggml_cuda_assign_buffers_no_alloc.restype = None
  7113. # GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
  7114. def ggml_cuda_assign_scratch_offset(
  7115. tensor: ggml_tensor_p,
  7116. offset: Union[ctypes.c_size_t, int],
  7117. ):
  7118. return lib.ggml_cuda_assign_scratch_offset(tensor, offset)
  7119. if GGML_USE_CUBLAS:
  7120. lib.ggml_cuda_assign_scratch_offset.argtypes = [
  7121. ctypes.POINTER(ggml_tensor),
  7122. ctypes.c_size_t,
  7123. ]
  7124. lib.ggml_cuda_assign_scratch_offset.restype = None
  7125. # GGML_API void ggml_cuda_copy_to_device(struct ggml_tensor * tensor);
  7126. def ggml_cuda_copy_to_device(
  7127. tensor: ggml_tensor_p,
  7128. ):
  7129. return lib.ggml_cuda_copy_to_device(tensor)
  7130. if GGML_USE_CUBLAS:
  7131. lib.ggml_cuda_copy_to_device.argtypes = [
  7132. ctypes.POINTER(ggml_tensor),
  7133. ]
  7134. lib.ggml_cuda_copy_to_device.restype = None
  7135. # void ggml_cuda_set_main_device(int main_device);
  7136. def ggml_cuda_set_main_device(
  7137. main_device: Union[ctypes.c_int, int],
  7138. ):
  7139. return lib.ggml_cuda_set_main_device(main_device)
  7140. if GGML_USE_CUBLAS:
  7141. lib.ggml_cuda_set_main_device.argtypes = [
  7142. ctypes.c_int,
  7143. ]
  7144. lib.ggml_cuda_set_main_device.restype = None
  7145. # GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
  7146. def ggml_cuda_set_mul_mat_q(
  7147. mul_mat_q: Union[ctypes.c_bool, bool],
  7148. ):
  7149. return lib.ggml_cuda_set_mul_mat_q(mul_mat_q)
  7150. if GGML_USE_CUBLAS:
  7151. lib.ggml_cuda_set_mul_mat_q.argtypes = [
  7152. ctypes.c_bool,
  7153. ]
  7154. lib.ggml_cuda_set_mul_mat_q.restype = None
  7155. # void ggml_cuda_set_scratch_size(size_t scratch_size);
  7156. def ggml_cuda_set_scratch_size(
  7157. scratch_size: Union[ctypes.c_size_t, int],
  7158. ):
  7159. return lib.ggml_cuda_set_scratch_size(scratch_size)
  7160. if GGML_USE_CUBLAS:
  7161. lib.ggml_cuda_set_scratch_size.argtypes = [
  7162. ctypes.c_size_t,
  7163. ]
  7164. lib.ggml_cuda_set_scratch_size.restype = None
  7165. # void ggml_cuda_free_scratch(void);
  7166. def ggml_cuda_free_scratch():
  7167. return lib.ggml_cuda_free_scratch()
  7168. if GGML_USE_CUBLAS:
  7169. lib.ggml_cuda_free_scratch.argtypes = []
  7170. lib.ggml_cuda_free_scratch.restype = None
  7171. # GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
  7172. def ggml_cuda_compute_forward(
  7173. params: ggml_compute_params_p,
  7174. tensor: ggml_tensor_p,
  7175. ) -> bool:
  7176. return lib.ggml_cuda_compute_forward(params, tensor)
  7177. if GGML_USE_CUBLAS:
  7178. lib.ggml_cuda_compute_forward.argtypes = [
  7179. ctypes.POINTER(ggml_compute_params),
  7180. ctypes.POINTER(ggml_tensor),
  7181. ]
  7182. lib.ggml_cuda_compute_forward.restype = ctypes.c_bool
  7183. # GGML_API int ggml_cuda_get_device_count(void);
  7184. def ggml_cuda_get_device_count() -> int:
  7185. return lib.ggml_cuda_get_device_count()
  7186. if GGML_USE_CUBLAS:
  7187. lib.ggml_cuda_get_device_count.argtypes = []
  7188. lib.ggml_cuda_get_device_count.restype = ctypes.c_int
  7189. # GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
  7190. def ggml_cuda_get_device_description(
  7191. device: Union[ctypes.c_int, int],
  7192. description: bytes,
  7193. description_size: Union[ctypes.c_size_t, int],
  7194. ):
  7195. return lib.ggml_cuda_get_device_description(device, description, description_size)
  7196. if GGML_USE_CUBLAS:
  7197. lib.ggml_cuda_get_device_description.argtypes = [
  7198. ctypes.c_int,
  7199. ctypes.c_char_p,
  7200. ctypes.c_size_t,
  7201. ]
  7202. lib.ggml_cuda_get_device_description.restype = None
  7203. # // backend API
  7204. # GGML_API ggml_backend_t ggml_backend_cuda_init(void); // TODO: take a list of devices to use
  7205. def ggml_backend_cuda_init() -> ggml_backend_t:
  7206. return lib.ggml_backend_cuda_init()
  7207. if GGML_USE_CUBLAS:
  7208. lib.ggml_backend_cuda_init.argtypes = []
  7209. lib.ggml_backend_cuda_init.restype = ggml_backend_t
  7210. # GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
  7211. def ggml_backend_is_cuda(
  7212. backend: ggml_backend_t,
  7213. ) -> bool:
  7214. return lib.ggml_backend_is_cuda(backend)
  7215. if GGML_USE_CUBLAS:
  7216. lib.ggml_backend_is_cuda.argtypes = [ggml_backend_t]
  7217. lib.ggml_backend_is_cuda.restype = ctypes.c_bool
  7218. # GGML_API int ggml_backend_cuda_get_device(ggml_backend_t backend);
  7219. def ggml_backend_cuda_get_device(
  7220. backend: ggml_backend_t,
  7221. ) -> int:
  7222. return lib.ggml_backend_cuda_get_device(backend)
  7223. if GGML_USE_CUBLAS:
  7224. lib.ggml_backend_cuda_get_device.argtypes = [ggml_backend_t]
  7225. lib.ggml_backend_cuda_get_device.restype = ctypes.c_int
  7226. # GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
  7227. def ggml_backend_cuda_buffer_type(
  7228. device: Union[ctypes.c_int, int],
  7229. ) -> ggml_backend_buffer_type_t:
  7230. return lib.ggml_backend_cuda_buffer_type(device)
  7231. if GGML_USE_CUBLAS:
  7232. lib.ggml_backend_cuda_buffer_type.argtypes = [ctypes.c_int]
  7233. lib.ggml_backend_cuda_buffer_type.restype = ggml_backend_buffer_type_t
  7234. # // pinned host buffer for use with CPU backend for faster copies between CPU and GPU
  7235. # GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);
  7236. def ggml_backend_cuda_host_buffer_type() -> ggml_backend_buffer_type_t:
  7237. return lib.ggml_backend_cuda_host_buffer_type()
  7238. if GGML_USE_CUBLAS:
  7239. lib.ggml_backend_cuda_host_buffer_type.argtypes = []
  7240. lib.ggml_backend_cuda_host_buffer_type.restype = ggml_backend_buffer_type_t
  7241. #####################################################
  7242. # GGML METAL API
  7243. # source: ggml-metal.h
  7244. #####################################################
  7245. GGML_USE_METAL = hasattr(lib, "ggml_metal_init")
  7246. # // max memory buffers that can be mapped to the device
  7247. # #define GGML_METAL_MAX_BUFFERS 64
  7248. GGML_METAL_MAX_BUFFERS = 64
  7249. # #define GGML_METAL_MAX_COMMAND_BUFFERS 32
  7250. GGML_METAL_MAX_COMMAND_BUFFERS = 32
  7251. # struct ggml_metal_context;
  7252. ggml_metal_context_p = ctypes.c_void_p
  7253. # void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data);
  7254. def ggml_metal_log_set_callback(
  7255. log_callback, # type: "ctypes._CFuncPtr" # type: ignore
  7256. user_data: ctypes.c_void_p,
  7257. ):
  7258. return lib.ggml_metal_log_set_callback(log_callback, user_data)
  7259. if GGML_USE_METAL:
  7260. lib.ggml_metal_log_set_callback.argtypes = [
  7261. ggml_log_callback,
  7262. ctypes.c_void_p,
  7263. ]
  7264. lib.ggml_metal_log_set_callback.restype = None
  7265. # struct ggml_metal_context * ggml_metal_init(int n_cb);
  7266. def ggml_metal_init(
  7267. n_cb: Union[ctypes.c_int, int],
  7268. ) -> ggml_metal_context_p:
  7269. return lib.ggml_metal_init(n_cb)
  7270. if GGML_USE_METAL:
  7271. lib.ggml_metal_init.argtypes = [ctypes.c_int]
  7272. lib.ggml_metal_init.restype = ggml_metal_context_p
  7273. # void ggml_metal_free(struct ggml_metal_context * ctx);
  7274. def ggml_metal_free(
  7275. ctx: ggml_metal_context_p,
  7276. ):
  7277. return lib.ggml_metal_free(ctx)
  7278. if GGML_USE_METAL:
  7279. lib.ggml_metal_free.argtypes = [ggml_metal_context_p]
  7280. lib.ggml_metal_free.restype = None
  7281. # // set the number of command buffers to use
  7282. # void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
  7283. def ggml_metal_set_n_cb(
  7284. ctx: ggml_metal_context_p,
  7285. n_cb: Union[ctypes.c_int, int],
  7286. ):
  7287. return lib.ggml_metal_set_n_cb(ctx, n_cb)
  7288. if GGML_USE_METAL:
  7289. lib.ggml_metal_set_n_cb.argtypes = [ggml_metal_context_p, ctypes.c_int]
  7290. lib.ggml_metal_set_n_cb.restype = None
  7291. # // creates a mapping between a host memory buffer and a device memory buffer
  7292. # // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
  7293. # // - the mapping is used during computation to determine the arguments of the compute kernels
  7294. # // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
  7295. # // - max_size specifies the maximum size of a tensor and is used to create shared views such
  7296. # // that it is guaranteed that the tensor will fit in at least one of the views
  7297. # //
  7298. # bool ggml_metal_add_buffer(
  7299. # struct ggml_metal_context * ctx,
  7300. # const char * name,
  7301. # void * data,
  7302. # size_t size,
  7303. # size_t max_size);
  7304. def ggml_metal_add_buffer(
  7305. ctx: ggml_metal_context_p,
  7306. name: bytes,
  7307. data: ctypes.c_void_p,
  7308. size: Union[ctypes.c_size_t, int],
  7309. max_size: Union[ctypes.c_size_t, int],
  7310. ) -> bool:
  7311. return lib.ggml_metal_add_buffer(ctx, name, data, size, max_size)
  7312. if GGML_USE_METAL:
  7313. lib.ggml_metal_add_buffer.argtypes = [
  7314. ggml_metal_context_p,
  7315. ctypes.c_char_p,
  7316. ctypes.c_void_p,
  7317. ctypes.c_size_t,
  7318. ctypes.c_size_t,
  7319. ]
  7320. lib.ggml_metal_add_buffer.restype = ctypes.c_bool
  7321. # // set data from host memory into the device
  7322. # void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  7323. def ggml_metal_set_tensor(
  7324. ctx: ggml_metal_context_p,
  7325. t: ggml_tensor_p,
  7326. ):
  7327. return lib.ggml_metal_set_tensor(ctx, t)
  7328. if GGML_USE_METAL:
  7329. lib.ggml_metal_set_tensor.argtypes = [
  7330. ggml_metal_context_p,
  7331. ctypes.POINTER(ggml_tensor),
  7332. ]
  7333. lib.ggml_metal_set_tensor.restype = None
  7334. # // get data from the device into host memory
  7335. # void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  7336. def ggml_metal_get_tensor(
  7337. ctx: ggml_metal_context_p,
  7338. t: ggml_tensor_p,
  7339. ):
  7340. return lib.ggml_metal_get_tensor(ctx, t)
  7341. if GGML_USE_METAL:
  7342. lib.ggml_metal_get_tensor.argtypes = [
  7343. ggml_metal_context_p,
  7344. ctypes.POINTER(ggml_tensor),
  7345. ]
  7346. lib.ggml_metal_get_tensor.restype = None
  7347. # // try to find operations that can be run concurrently in the graph
  7348. # // you should run it again if the topology of your graph changes
  7349. # void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
  7350. def ggml_metal_graph_find_concurrency(
  7351. ctx: ggml_metal_context_p,
  7352. gf: ggml_cgraph_p,
  7353. check_mem: Union[ctypes.c_bool, bool],
  7354. ):
  7355. return lib.ggml_metal_graph_find_concurrency(ctx, gf, check_mem)
  7356. if GGML_USE_METAL:
  7357. lib.ggml_metal_graph_find_concurrency.argtypes = [
  7358. ggml_metal_context_p,
  7359. ctypes.POINTER(ggml_cgraph),
  7360. ctypes.c_bool,
  7361. ]
  7362. lib.ggml_metal_graph_find_concurrency.restype = None
  7363. # // if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
  7364. # int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
  7365. def ggml_metal_if_optimized(
  7366. ctx: ggml_metal_context_p,
  7367. ) -> int:
  7368. return lib.ggml_metal_if_optimized(ctx)
  7369. if GGML_USE_METAL:
  7370. lib.ggml_metal_if_optimized.argtypes = [
  7371. ggml_metal_context_p,
  7372. ]
  7373. lib.ggml_metal_if_optimized.restype = ctypes.c_int
  7374. # // output the concur_list for ggml_alloc
  7375. # int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
  7376. def ggml_metal_get_concur_list(
  7377. ctx: ggml_metal_context_p,
  7378. ) -> CIntPointer:
  7379. return lib.ggml_metal_get_concur_list(ctx)
  7380. if GGML_USE_METAL:
  7381. lib.ggml_metal_get_concur_list.argtypes = [
  7382. ggml_metal_context_p,
  7383. ]
  7384. lib.ggml_metal_get_concur_list.restype = ctypes.POINTER(ctypes.c_int)
  7385. # // same as ggml_graph_compute but uses Metal
  7386. # // creates gf->n_threads command buffers in parallel
  7387. # void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
  7388. def ggml_metal_graph_compute(
  7389. ctx: ggml_metal_context_p,
  7390. gf: ggml_cgraph_p,
  7391. ):
  7392. return lib.ggml_metal_graph_compute(ctx, gf)
  7393. if GGML_USE_METAL:
  7394. lib.ggml_metal_graph_compute.argtypes = [
  7395. ggml_metal_context_p,
  7396. ctypes.POINTER(ggml_cgraph),
  7397. ]
  7398. lib.ggml_metal_graph_compute.restype = None
  7399. # //
  7400. # // backend API
  7401. # // user-code should use only these functions
  7402. # //
  7403. # GGML_API ggml_backend_t ggml_backend_metal_init(void);
  7404. def ggml_backend_metal_init() -> ggml_backend_t:
  7405. return lib.ggml_backend_metal_init()
  7406. if GGML_USE_METAL:
  7407. lib.ggml_backend_metal_init.argtypes = []
  7408. lib.ggml_backend_metal_init.restype = ggml_backend_t
  7409. # GGML_API bool ggml_backend_is_metal(ggml_backend_t backend);
  7410. def ggml_backend_is_metal(
  7411. backend: ggml_backend_t,
  7412. ) -> bool:
  7413. return lib.ggml_backend_is_metal(backend)
  7414. if GGML_USE_METAL:
  7415. lib.ggml_backend_is_metal.argtypes = [ggml_backend_t]
  7416. lib.ggml_backend_is_metal.restype = ctypes.c_bool
  7417. # GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
  7418. def ggml_backend_metal_set_n_cb(
  7419. backend: ggml_backend_t,
  7420. n_cb: Union[ctypes.c_int, int],
  7421. ):
  7422. return lib.ggml_backend_metal_set_n_cb(backend, n_cb)
  7423. if GGML_USE_METAL:
  7424. lib.ggml_backend_metal_set_n_cb.argtypes = [ggml_backend_t, ctypes.c_int]
  7425. lib.ggml_backend_metal_set_n_cb.restype = None
  7426. # GGML_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
  7427. def ggml_backend_metal_buffer_type() -> ggml_backend_buffer_type_t:
  7428. return lib.ggml_backend_metal_buffer_type()
  7429. if GGML_USE_METAL:
  7430. lib.ggml_backend_metal_buffer_type.argtypes = []
  7431. lib.ggml_backend_metal_buffer_type.restype = ggml_backend_buffer_type_t
  7432. # // helper to check if the device supports a specific family
  7433. # // ideally, the user code should be doing these checks
  7434. # // ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
  7435. # GGML_API bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family);
  7436. def ggml_backend_metal_supports_family(
  7437. backend: ggml_backend_t,
  7438. family: Union[ctypes.c_int, int],
  7439. ) -> bool:
  7440. return lib.ggml_backend_metal_supports_family(backend, family)
  7441. if GGML_USE_METAL:
  7442. lib.ggml_backend_metal_supports_family.argtypes = [ggml_backend_t, ctypes.c_int]
  7443. lib.ggml_backend_metal_supports_family.restype = ctypes.c_bool
  7444. #####################################################
  7445. # GGML OPENCL API
  7446. # source: ggml-opencl.h
  7447. #####################################################
  7448. GGML_USE_CLBLAST = hasattr(lib, "ggml_cl_init")
  7449. # void ggml_cl_init(void);
  7450. def ggml_cl_init():
  7451. return lib.ggml_cl_init()
  7452. if GGML_USE_CLBLAST:
  7453. lib.ggml_cl_init.argtypes = []
  7454. lib.ggml_cl_init.restype = None
  7455. # void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  7456. def ggml_cl_mul(
  7457. src0: ggml_tensor_p,
  7458. src1: ggml_tensor_p,
  7459. dst: ggml_tensor_p,
  7460. ):
  7461. return lib.ggml_cl_mul(src0, src1, dst)
  7462. if GGML_USE_CLBLAST:
  7463. lib.ggml_cl_mul.argtypes = [
  7464. ctypes.POINTER(ggml_tensor),
  7465. ctypes.POINTER(ggml_tensor),
  7466. ctypes.POINTER(ggml_tensor),
  7467. ]
  7468. lib.ggml_cl_mul.restype = None
  7469. # bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  7470. def ggml_cl_can_mul_mat(
  7471. src0: ggml_tensor_p,
  7472. src1: ggml_tensor_p,
  7473. dst: ggml_tensor_p,
  7474. ) -> bool:
  7475. return lib.ggml_cl_can_mul_mat(src0, src1, dst)
  7476. if GGML_USE_CLBLAST:
  7477. lib.ggml_cl_can_mul_mat.argtypes = [
  7478. ctypes.POINTER(ggml_tensor),
  7479. ctypes.POINTER(ggml_tensor),
  7480. ctypes.POINTER(ggml_tensor),
  7481. ]
  7482. lib.ggml_cl_can_mul_mat.restype = ctypes.c_bool
  7483. # size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  7484. def ggml_cl_mul_mat_get_wsize(
  7485. src0: ggml_tensor_p,
  7486. src1: ggml_tensor_p,
  7487. dst: ggml_tensor_p,
  7488. ) -> int:
  7489. return lib.ggml_cl_mul_mat_get_wsize(src0, src1, dst)
  7490. if GGML_USE_CLBLAST:
  7491. lib.ggml_cl_mul_mat_get_wsize.argtypes = [
  7492. ctypes.POINTER(ggml_tensor),
  7493. ctypes.POINTER(ggml_tensor),
  7494. ctypes.POINTER(ggml_tensor),
  7495. ]
  7496. lib.ggml_cl_mul_mat_get_wsize.restype = ctypes.c_size_t
  7497. # void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
  7498. def ggml_cl_mul_mat(
  7499. src0: ggml_tensor_p,
  7500. src1: ggml_tensor_p,
  7501. dst: ggml_tensor_p,
  7502. wdata: ctypes.c_void_p,
  7503. wsize: Union[ctypes.c_size_t, int],
  7504. ):
  7505. return lib.ggml_cl_mul_mat(src0, src1, dst, wdata, wsize)
  7506. if GGML_USE_CLBLAST:
  7507. lib.ggml_cl_mul_mat.argtypes = [
  7508. ctypes.POINTER(ggml_tensor),
  7509. ctypes.POINTER(ggml_tensor),
  7510. ctypes.POINTER(ggml_tensor),
  7511. ctypes.c_void_p,
  7512. ctypes.c_size_t,
  7513. ]
  7514. lib.ggml_cl_mul_mat.restype = None
  7515. # NOTE: The following functions are defined in the ggml-opencl.h header file but
  7516. # are not defined in the ggml-opencl.c source file.
  7517. # void * ggml_cl_host_malloc(size_t size);
  7518. # def ggml_cl_host_malloc(
  7519. # size: Union[ctypes.c_size_t, int],
  7520. # ) -> Optional[ctypes.c_void_p]:
  7521. # return lib.ggml_cl_host_malloc(size)
  7522. # if GGML_USE_CLBLAST:
  7523. # lib.ggml_cl_host_malloc.argtypes = [
  7524. # ctypes.c_size_t,
  7525. # ]
  7526. # lib.ggml_cl_host_malloc.restype = ctypes.c_void_p
  7527. # void ggml_cl_host_free(void * ptr);
  7528. # def ggml_cl_host_free(
  7529. # ptr: ctypes.c_void_p,
  7530. # ):
  7531. # return lib.ggml_cl_host_free(ptr)
  7532. # if GGML_USE_CLBLAST:
  7533. # lib.ggml_cl_host_free.argtypes = [
  7534. # ctypes.c_void_p,
  7535. # ]
  7536. # lib.ggml_cl_host_free.restype = None
  7537. # void ggml_cl_free_data(const struct ggml_tensor* tensor);
  7538. def ggml_cl_free_data(
  7539. tensor: ggml_tensor_p,
  7540. ):
  7541. return lib.ggml_cl_free_data(tensor)
  7542. if GGML_USE_CLBLAST:
  7543. lib.ggml_cl_free_data.argtypes = [
  7544. ctypes.POINTER(ggml_tensor),
  7545. ]
  7546. lib.ggml_cl_free_data.restype = None
  7547. # void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
  7548. def ggml_cl_transform_tensor(
  7549. data: ctypes.c_void_p,
  7550. tensor: ggml_tensor_p,
  7551. ):
  7552. return lib.ggml_cl_transform_tensor(data, tensor)
  7553. if GGML_USE_CLBLAST:
  7554. lib.ggml_cl_transform_tensor.argtypes = [
  7555. ctypes.c_void_p,
  7556. ctypes.POINTER(ggml_tensor),
  7557. ]
  7558. lib.ggml_cl_transform_tensor.restype = None