third_party_ggml.py 211 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053
  1. """This module is the core of the ggml-python library, it exposes a low-level [ctypes](https://docs.python.org/3/library/ctypes.html)-based interface for ggml.
  2. Structures and functions in the `ggml.ggml` module map directly to the original ggml C library and
  3. they operate at a fairly low level.
  4. No additional runtime checks checks are performed nor is memory management handled automatically.
  5. You've been warned :).
  6. With that in mind here are some useful things to keep in mind
  7. - Functions accept both ctypes types (c_int, c_bool, c_float, etc.) and Python types (int, bool, float, etc.) as parameters.
  8. - Functions return Python types for simple values (int, bool, float, etc.) and ctypes types for complex values ([ggml_context_p][ggml.ggml_context_p], [ggml_tensor_p][ggml.ggml_tensor_p], etc.).
  9. - Memory management is the responsibility of the user. The user must call [ggml.ggml_free][] on the context after calling [ggml.ggml_init][].
  10. Example
  11. ```python
  12. import ggml
  13. import ctypes
  14. # Allocate a new context with 16 MB of memory
  15. params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024, mem_buffer=None)
  16. ctx = ggml.ggml_init(params=params)
  17. # Instantiate tensors
  18. x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  19. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  20. b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  21. # Use ggml operations to build a computational graph
  22. x2 = ggml.ggml_mul(ctx, x, x)
  23. f = ggml.ggml_add(ctx, ggml.ggml_mul(ctx, a, x2), b)
  24. gf = ggml.ggml_build_forward(f)
  25. # Set the input values
  26. ggml.ggml_set_f32(x, 2.0)
  27. ggml.ggml_set_f32(a, 3.0)
  28. ggml.ggml_set_f32(b, 4.0)
  29. # Compute the graph
  30. ggml.ggml_graph_compute_with_ctx(ctx, ctypes.pointer(gf), 1)
  31. # Get the output value
  32. output = ggml.ggml_get_f32_1d(f, 0)
  33. assert output == 16.0
  34. # Free the context
  35. ggml.ggml_free(ctx)
  36. ```
  37. """
  38. import os
  39. import sys
  40. import ctypes
  41. import pathlib
  42. import importlib.resources
  43. import numpy as np
  44. from typing import Union
  45. from typing import Type
  46. from typing import Callable
  47. from typing import Tuple
  48. from typing import Dict
  49. from typing import Any
  50. from pathlib import Path
  51. from typing import List, Optional, Sequence, Union
  52. from typing_extensions import TypeAlias
  53. NULL: ctypes.c_void_p = None # ignore: type
  54. GGML_MEM_ALIGN = 16
  55. # Load the library
  56. def load_shared_library(base_path: Path, lib_base_name: str):
  57. # Construct the paths to the possible shared library names
  58. # Searching for the library in the current directory under the name "libggml" (default name
  59. # for ggml) and "ggml" (default name for this repo)
  60. lib_names: List[str] = [
  61. f"lib{lib_base_name}.so",
  62. f"lib{lib_base_name}.dylib",
  63. f"{lib_base_name}.dll",
  64. ]
  65. path = None
  66. cdll_args = dict() # type: ignore
  67. # Add the library directory to the DLL search path on Windows (if needed)
  68. if sys.platform == "win32" and sys.version_info >= (3, 8):
  69. os.add_dll_directory(str(base_path))
  70. cdll_args["winmode"] = 0
  71. for lib_name in lib_names:
  72. # Try to load the shared library, handling potential errors
  73. path = base_path / lib_name
  74. if not path.exists():
  75. continue
  76. try:
  77. return ctypes.CDLL(str(path), **cdll_args)
  78. except Exception as e:
  79. raise RuntimeError(f"Failed to load shared library '{path}': {e}")
  80. raise FileNotFoundError(
  81. f"Shared library with base name '{lib_base_name}' not found in {base_path}"
  82. )
  83. base_path = Path(__file__).parent.resolve() / "build/examples/unity"
  84. lib_base_name = "fairseq2_cpp"
  85. lib = load_shared_library(base_path, lib_base_name)
  86. #####################################################
  87. # GGML Utility Types
  88. #####################################################
  89. CFloatArray: TypeAlias = "ctypes.Array[ctypes.c_float]"
  90. CInt64Array: TypeAlias = "ctypes.Array[ctypes.c_int64]"
  91. CIntPointer: TypeAlias = "ctypes._Pointer[ctypes.c_int]" # type: ignore
  92. CCharPointer: TypeAlias = "ctypes._Pointer[ctypes.c_char]" # type: ignore
  93. #####################################################
  94. # source: ggml.h
  95. # GGML API
  96. #####################################################
  97. # #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
  98. GGML_FILE_MAGIC = int("0x67676d6c", 16)
  99. # #define GGML_FILE_VERSION 1
  100. GGML_FILE_VERSION = 1
  101. # #define GGML_QNT_VERSION 2 // bump this on quantization format changes
  102. GGML_QNT_VERSION = 2
  103. # #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
  104. GGML_QNT_VERSION_FACTOR = 1000
  105. # #define GGML_MAX_DIMS 4
  106. GGML_MAX_DIMS = 4
  107. # #define GGML_MAX_NODES 4096
  108. GGML_MAX_NODES = 4096
  109. # #define GGML_MAX_PARAMS 256
  110. GGML_MAX_PARAMS = 256
  111. # #define GGML_MAX_CONTEXTS 64
  112. GGML_MAX_CONTEXTS = 64
  113. # #define GGML_MAX_SRC 6
  114. GGML_MAX_SRC = 6
  115. # #define GGML_MAX_NAME 64
  116. GGML_MAX_NAME = 64
  117. # #define GGML_MAX_OP_PARAMS 32
  118. GGML_MAX_OP_PARAMS = 32
  119. # #define GGML_DEFAULT_N_THREADS 4
  120. GGML_DEFAULT_N_THREADS = 4
  121. # #if UINTPTR_MAX == 0XFFFFFFFF
  122. # #define GGML_MEMALIGN 4
  123. # #else
  124. # # define GGML_MEMALIGN 16
  125. # #endif
  126. GGML_MEMALIGN = (
  127. 16 if ctypes.sizeof(ctypes.c_void_p) == 4 else 32
  128. ) # FIXME: Check if this is correct
  129. # #define GGML_EXIT_SUCCESS 0
  130. GGML_EXIT_SUCCESS = 0
  131. # #define GGML_EXIT_ABORTED 1
  132. GGML_EXIT_ABORTED = 1
  133. # #define GGUF_MAGIC 0x46554747 // "GGUF"
  134. GGUF_MAGIC = int("0x46554747", 16)
  135. # #define GGUF_VERSION 2
  136. GGUF_VERSION = 2
  137. # #define GGUF_DEFAULT_ALIGNMENT 32
  138. GGUF_DEFAULT_ALIGNMENT = 32
  139. # TODO: Check if this is correct
  140. # typedef uint16_t ggml_fp16_t;
  141. ggml_fp16_t = ctypes.c_uint16
  142. CFP16Array: TypeAlias = "ctypes.Array[ggml_fp16_t]"
  143. # GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
  144. def ggml_fp16_to_fp32(x: ggml_fp16_t) -> float:
  145. return lib.ggml_fp16_to_fp32(x)
  146. lib.ggml_fp16_to_fp32.argtypes = [ggml_fp16_t]
  147. lib.ggml_fp16_to_fp32.restype = ctypes.c_float
  148. # GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
  149. def ggml_fp32_to_fp16(x: ctypes.c_float) -> int:
  150. return lib.ggml_fp32_to_fp16(x)
  151. lib.ggml_fp32_to_fp16.argtypes = [ctypes.c_float]
  152. lib.ggml_fp32_to_fp16.restype = ggml_fp16_t
  153. # GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
  154. def ggml_fp16_to_fp32_row(
  155. x: CFP16Array,
  156. y: CFloatArray,
  157. n: Union[ctypes.c_int, int],
  158. ) -> None:
  159. return lib.ggml_fp16_to_fp32_row(x, y, n)
  160. lib.ggml_fp16_to_fp32_row.argtypes = [
  161. ctypes.POINTER(ggml_fp16_t),
  162. ctypes.POINTER(ctypes.c_float),
  163. ctypes.c_int,
  164. ]
  165. lib.ggml_fp16_to_fp32_row.restype = None
  166. # GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
  167. def ggml_fp32_to_fp16_row(
  168. x: CFloatArray,
  169. y: CFP16Array,
  170. n: Union[ctypes.c_int, int],
  171. ) -> None:
  172. return lib.ggml_fp32_to_fp16_row(x, y, n)
  173. lib.ggml_fp32_to_fp16_row.argtypes = [
  174. ctypes.POINTER(ctypes.c_float),
  175. ctypes.POINTER(ggml_fp16_t),
  176. ctypes.c_int,
  177. ]
  178. lib.ggml_fp32_to_fp16_row.restype = None
  179. # struct ggml_context;
  180. ggml_context_p = ctypes.c_void_p
  181. """Opaque pointer to a ggml_context.
  182. ggml_context structs are not accessed directly instead they must be created using [ggml_init](ggml.ggml_init) and freed using [ggml_free](ggml.ggml_free)."""
  183. # enum ggml_type {
  184. # GGML_TYPE_F32 = 0,
  185. # GGML_TYPE_F16 = 1,
  186. # GGML_TYPE_Q4_0 = 2,
  187. # GGML_TYPE_Q4_1 = 3,
  188. # // GGML_TYPE_Q4_2 = 4, support has been removed
  189. # // GGML_TYPE_Q4_3 (5) support has been removed
  190. # GGML_TYPE_Q5_0 = 6,
  191. # GGML_TYPE_Q5_1 = 7,
  192. # GGML_TYPE_Q8_0 = 8,
  193. # GGML_TYPE_Q8_1 = 9,
  194. # GGML_TYPE_Q2_K = 10,
  195. # GGML_TYPE_Q3_K = 11,
  196. # GGML_TYPE_Q4_K = 12,
  197. # GGML_TYPE_Q5_K = 13,
  198. # GGML_TYPE_Q6_K = 14,
  199. # GGML_TYPE_Q8_K = 15,
  200. # GGML_TYPE_I8,
  201. # GGML_TYPE_I16,
  202. # GGML_TYPE_I32,
  203. # GGML_TYPE_COUNT,
  204. # };
  205. GGML_TYPE_F32 = 0
  206. GGML_TYPE_F16 = 1
  207. GGML_TYPE_Q4_0 = 2
  208. GGML_TYPE_Q4_1 = 3
  209. GGML_TYPE_Q5_0 = 6
  210. GGML_TYPE_Q5_1 = 7
  211. GGML_TYPE_Q8_0 = 8
  212. GGML_TYPE_Q8_1 = 9
  213. GGML_TYPE_Q2_K = 10
  214. GGML_TYPE_Q3_K = 11
  215. GGML_TYPE_Q4_K = 12
  216. GGML_TYPE_Q5_K = 13
  217. GGML_TYPE_Q6_K = 14
  218. GGML_TYPE_Q8_K = 15
  219. GGML_TYPE_I8 = 16
  220. GGML_TYPE_I16 = 17
  221. GGML_TYPE_I32 = 18
  222. GGML_TYPE_COUNT = 19
  223. # enum ggml_backend {
  224. # GGML_BACKEND_CPU = 0,
  225. # GGML_BACKEND_GPU = 10,
  226. # GGML_BACKEND_GPU_SPLIT = 20,
  227. # };
  228. GGML_BACKEND_CPU = 0
  229. GGML_BACKEND_GPU = 10
  230. GGML_BACKEND_GPU_SPLIT = 20
  231. # // model file types
  232. # enum ggml_ftype {
  233. # GGML_FTYPE_UNKNOWN = -1,
  234. # GGML_FTYPE_ALL_F32 = 0,
  235. # GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
  236. # GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
  237. # GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
  238. # GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
  239. # GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
  240. # GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
  241. # GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
  242. # GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
  243. # GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
  244. # GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
  245. # GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
  246. # GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
  247. # };
  248. GGML_FTYPE_UNKNOWN = -1
  249. GGML_FTYPE_ALL_F32 = 0
  250. GGML_FTYPE_MOSTLY_F16 = 1
  251. GGML_FTYPE_MOSTLY_Q4_0 = 2
  252. GGML_FTYPE_MOSTLY_Q4_1 = 3
  253. GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4
  254. GGML_FTYPE_MOSTLY_Q8_0 = 7
  255. GGML_FTYPE_MOSTLY_Q5_0 = 8
  256. GGML_FTYPE_MOSTLY_Q5_1 = 9
  257. GGML_FTYPE_MOSTLY_Q2_K = 10
  258. GGML_FTYPE_MOSTLY_Q3_K = 11
  259. GGML_FTYPE_MOSTLY_Q4_K = 12
  260. GGML_FTYPE_MOSTLY_Q5_K = 13
  261. GGML_FTYPE_MOSTLY_Q6_K = 14
  262. # // available tensor operations:
  263. # enum ggml_op {
  264. # GGML_OP_NONE = 0,
  265. # GGML_OP_DUP,
  266. # GGML_OP_ADD,
  267. # GGML_OP_ADD1,
  268. # GGML_OP_ACC,
  269. # GGML_OP_SUB,
  270. # GGML_OP_MUL,
  271. # GGML_OP_DIV,
  272. # GGML_OP_SQR,
  273. # GGML_OP_SQRT,
  274. # GGML_OP_LOG,
  275. # GGML_OP_SUM,
  276. # GGML_OP_SUM_ROWS,
  277. # GGML_OP_MEAN,
  278. # GGML_OP_ARGMAX,
  279. # GGML_OP_REPEAT,
  280. # GGML_OP_REPEAT_BACK,
  281. # GGML_OP_CONCAT,
  282. # GGML_OP_SILU_BACK,
  283. # GGML_OP_NORM, // normalize
  284. # GGML_OP_RMS_NORM,
  285. # GGML_OP_RMS_NORM_BACK,
  286. # GGML_OP_GROUP_NORM,
  287. # GGML_OP_MUL_MAT,
  288. # GGML_OP_OUT_PROD,
  289. # GGML_OP_SCALE,
  290. # GGML_OP_SET,
  291. # GGML_OP_CPY,
  292. # GGML_OP_CONT,
  293. # GGML_OP_RESHAPE,
  294. # GGML_OP_VIEW,
  295. # GGML_OP_PERMUTE,
  296. # GGML_OP_TRANSPOSE,
  297. # GGML_OP_GET_ROWS,
  298. # GGML_OP_GET_ROWS_BACK,
  299. # GGML_OP_DIAG,
  300. # GGML_OP_DIAG_MASK_INF,
  301. # GGML_OP_DIAG_MASK_ZERO,
  302. # GGML_OP_SOFT_MAX,
  303. # GGML_OP_SOFT_MAX_BACK,
  304. # GGML_OP_ROPE,
  305. # GGML_OP_ROPE_BACK,
  306. # GGML_OP_ALIBI,
  307. # GGML_OP_CLAMP,
  308. # GGML_OP_CONV_1D,
  309. # GGML_OP_CONV_2D,
  310. # GGML_OP_CONV_TRANSPOSE_2D,
  311. # GGML_OP_POOL_1D,
  312. # GGML_OP_POOL_2D,
  313. # GGML_OP_UPSCALE, // nearest interpolate
  314. # GGML_OP_FLASH_ATTN,
  315. # GGML_OP_FLASH_FF,
  316. # GGML_OP_FLASH_ATTN_BACK,
  317. # GGML_OP_WIN_PART,
  318. # GGML_OP_WIN_UNPART,
  319. # GGML_OP_GET_REL_POS,
  320. # GGML_OP_ADD_REL_POS,
  321. # GGML_OP_UNARY,
  322. # GGML_OP_MAP_UNARY,
  323. # GGML_OP_MAP_BINARY,
  324. # GGML_OP_MAP_CUSTOM1_F32,
  325. # GGML_OP_MAP_CUSTOM2_F32,
  326. # GGML_OP_MAP_CUSTOM3_F32,
  327. # GGML_OP_MAP_CUSTOM1,
  328. # GGML_OP_MAP_CUSTOM2,
  329. # GGML_OP_MAP_CUSTOM3,
  330. # GGML_OP_CROSS_ENTROPY_LOSS,
  331. # GGML_OP_CROSS_ENTROPY_LOSS_BACK,
  332. # GGML_OP_COUNT,
  333. # };
  334. GGML_OP_NONE = 0
  335. GGML_OP_DUP = 1
  336. GGML_OP_ADD = 2
  337. GGML_OP_ADD1 = 3
  338. GGML_OP_ACC = 4
  339. GGML_OP_SUB = 5
  340. GGML_OP_MUL = 6
  341. GGML_OP_DIV = 7
  342. GGML_OP_SQR = 8
  343. GGML_OP_SQRT = 9
  344. GGML_OP_LOG = 10
  345. GGML_OP_SUM = 11
  346. GGML_OP_SUM_ROWS = 12
  347. GGML_OP_MEAN = 13
  348. GGML_OP_ARGMAX = 14
  349. GGML_OP_REPEAT = 15
  350. GGML_OP_REPEAT_BACK = 16
  351. GGML_OP_CONCAT = 17
  352. GGML_OP_SILU_BACK = 18
  353. GGML_OP_NORM = 19
  354. GGML_OP_RMS_NORM = 20
  355. GGML_OP_RMS_NORM_BACK = 21
  356. GGML_OP_GROUP_NORM = 22
  357. GGML_OP_MUL_MAT = 23
  358. GGML_OP_OUT_PROD = 24
  359. GGML_OP_SCALE = 25
  360. GGML_OP_SET = 26
  361. GGML_OP_CPY = 27
  362. GGML_OP_CONT = 28
  363. GGML_OP_RESHAPE = 29
  364. GGML_OP_VIEW = 30
  365. GGML_OP_PERMUTE = 31
  366. GGML_OP_TRANSPOSE = 32
  367. GGML_OP_GET_ROWS = 33
  368. GGML_OP_GET_ROWS_BACK = 34
  369. GGML_OP_DIAG = 35
  370. GGML_OP_DIAG_MASK_INF = 36
  371. GGML_OP_DIAG_MASK_ZERO = 37
  372. GGML_OP_SOFT_MAX = 38
  373. GGML_OP_SOFT_MAX_BACK = 39
  374. GGML_OP_ROPE = 40
  375. GGML_OP_ROPE_BACK = 41
  376. GGML_OP_ALIBI = 42
  377. GGML_OP_CLAMP = 43
  378. GGML_OP_CONV_1D = 44
  379. GGML_OP_CONV_2D = 45
  380. GGML_OP_CONV_TRANSPOSE_2D = 46
  381. GGML_OP_POOL_1D = 47
  382. GGML_OP_POOL_2D = 48
  383. GGML_OP_UPSCALE = 49
  384. GGML_OP_FLASH_ATTN = 50
  385. GGML_OP_FLASH_FF = 51
  386. GGML_OP_FLASH_ATTN_BACK = 52
  387. GGML_OP_WIN_PART = 53
  388. GGML_OP_WIN_UNPART = 54
  389. GGML_OP_GET_REL_POS = 55
  390. GGML_OP_ADD_REL_POS = 56
  391. GGML_OP_UNARY = 57
  392. GGML_OP_MAP_UNARY = 58
  393. GGML_OP_MAP_BINARY = 59
  394. GGML_OP_MAP_CUSTOM1_F32 = 60
  395. GGML_OP_MAP_CUSTOM2_F32 = 61
  396. GGML_OP_MAP_CUSTOM3_F32 = 62
  397. GGML_OP_MAP_CUSTOM1 = 63
  398. GGML_OP_MAP_CUSTOM2 = 64
  399. GGML_OP_MAP_CUSTOM3 = 65
  400. GGML_OP_CROSS_ENTROPY_LOSS = 66
  401. GGML_OP_CROSS_ENTROPY_LOSS_BACK = 67
  402. GGML_OP_COUNT = 68
  403. # enum ggml_unary_op {
  404. # GGML_UNARY_OP_ABS,
  405. # GGML_UNARY_OP_SGN,
  406. # GGML_UNARY_OP_NEG,
  407. # GGML_UNARY_OP_STEP,
  408. # GGML_UNARY_OP_TANH,
  409. # GGML_UNARY_OP_ELU,
  410. # GGML_UNARY_OP_RELU,
  411. # GGML_UNARY_OP_GELU,
  412. # GGML_UNARY_OP_GELU_QUICK,
  413. # GGML_UNARY_OP_SILU,
  414. # };
  415. GGML_UNARY_OP_ABS = 0
  416. GGML_UNARY_OP_SGN = 1
  417. GGML_UNARY_OP_NEG = 2
  418. GGML_UNARY_OP_STEP = 3
  419. GGML_UNARY_OP_TANH = 4
  420. GGML_UNARY_OP_ELU = 5
  421. GGML_UNARY_OP_RELU = 6
  422. GGML_UNARY_OP_GELU = 7
  423. GGML_UNARY_OP_GELU_QUICK = 8
  424. GGML_UNARY_OP_SILU = 9
  425. # enum ggml_object_type {
  426. # GGML_OBJECT_TENSOR,
  427. # GGML_OBJECT_GRAPH,
  428. # GGML_OBJECT_WORK_BUFFER
  429. # };
  430. GGML_OBJECT_TENSOR = 0
  431. GGML_OBJECT_GRAPH = 1
  432. GGML_OBJECT_WORK_BUFFER = 2
  433. # // ggml object
  434. # struct ggml_object {
  435. # size_t offs;
  436. # size_t size;
  437. # struct ggml_object * next;
  438. # enum ggml_object_type type;
  439. # char padding[4];
  440. # };
  441. class ggml_object(ctypes.Structure):
  442. pass
  443. ggml_object._fields_ = [
  444. ("offs", ctypes.c_size_t),
  445. ("size", ctypes.c_size_t),
  446. ("next", ctypes.POINTER(ggml_object)),
  447. ("type", ctypes.c_int),
  448. ("padding", ctypes.c_char * 4),
  449. ]
  450. ggml_object_p: TypeAlias = "ctypes._Pointer[ggml_object]" # type: ignore
  451. GGML_OBJECT_SIZE = ctypes.sizeof(ggml_object)
  452. # // n-dimensional tensor
  453. # struct ggml_tensor {
  454. # enum ggml_type type;
  455. # enum ggml_backend backend;
  456. # int n_dims;
  457. # int64_t ne[GGML_MAX_DIMS]; // number of elements
  458. # size_t nb[GGML_MAX_DIMS]; // stride in bytes:
  459. # // nb[0] = sizeof(type)
  460. # // nb[1] = nb[0] * ne[0] + padding
  461. # // nb[i] = nb[i-1] * ne[i-1]
  462. # // compute data
  463. # enum ggml_op op;
  464. # // op params - allocated as int32_t for alignment
  465. # int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
  466. # bool is_param;
  467. # struct ggml_tensor * grad;
  468. # struct ggml_tensor * src[GGML_MAX_SRC];
  469. # // performance
  470. # int perf_runs;
  471. # int64_t perf_cycles;
  472. # int64_t perf_time_us;
  473. # struct ggml_tensor * view_src;
  474. # size_t view_offs;
  475. # void * data;
  476. # char name[GGML_MAX_NAME];
  477. # void * extra; // extra things e.g. for ggml-cuda.cu
  478. # char padding[4];
  479. # };
  480. class ggml_tensor(ctypes.Structure):
  481. """n-dimensional tensor
  482. Attributes:
  483. type (int): ggml_type
  484. backend (int): ggml_backend
  485. n_dims (int): number of dimensions
  486. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension
  487. nb (ctypes.Array[ctypes.c_size_t]): stride in bytes for each dimension
  488. op (int): ggml operation
  489. op_params (ctypes.Array[ctypes.c_int32]): `GGML_MAX_OP_PARAMS`-length array of operation parameters
  490. is_param (bool): is this a parameter tensor
  491. grad (ggml_tensor_p): reference to gradient tensor
  492. src (ctypes.Array[ggml_tensor_p]): `GGML_MAX_SRC`-length array of source tensors
  493. perf_runs (int): number of performance runs
  494. perf_cycles (int): number of cycles
  495. perf_time_us (int): time in microseconds
  496. view_src (ggml_tensor_p): pointer to tensor if this tensor is a view, None if the tensor is not a view
  497. view_offs (ctypes.c_size_t): offset into the data pointer of the view tensor
  498. data (ctypes.c_void_p): reference to raw tensor data
  499. name (bytes): name of tensor
  500. extra (ctypes.c_void_p): extra data (e.g. for CUDA)
  501. """
  502. pass
  503. ggml_tensor._fields_ = [
  504. ("type", ctypes.c_int),
  505. ("backend", ctypes.c_int),
  506. ("n_dims", ctypes.c_int),
  507. ("ne", ctypes.c_int64 * GGML_MAX_DIMS),
  508. ("nb", ctypes.c_size_t * GGML_MAX_DIMS),
  509. ("op", ctypes.c_int),
  510. (
  511. "op_params",
  512. ctypes.c_int32 * (GGML_MAX_OP_PARAMS // ctypes.sizeof(ctypes.c_int32)),
  513. ),
  514. ("is_param", ctypes.c_bool),
  515. ("grad", ctypes.POINTER(ggml_tensor)),
  516. ("src", ctypes.POINTER(ggml_tensor) * GGML_MAX_SRC),
  517. ("perf_runs", ctypes.c_int),
  518. ("perf_cycles", ctypes.c_int64),
  519. ("perf_time_us", ctypes.c_int64),
  520. ("view_src", ctypes.POINTER(ggml_tensor)),
  521. ("view_offs", ctypes.c_size_t),
  522. ("data", ctypes.c_void_p),
  523. ("name", ctypes.c_char * GGML_MAX_NAME),
  524. ("extra", ctypes.c_void_p),
  525. ("padding", ctypes.c_char * 4),
  526. ]
  527. GGML_TENSOR_SIZE = ctypes.sizeof(ggml_tensor)
  528. ggml_tensor_p: TypeAlias = "ctypes._Pointer[ggml_tensor]" # type: ignore
  529. """ctypes pointer to a [ggml_tensor][ggml.ggml_tensor]
  530. Can be dereferenced to a [ggml_tensor][ggml.ggml_tensor] object using
  531. the `.contents` attribute."""
  532. abort_callback_t = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_void_p)
  533. # // the compute plan that needs to be prepared for ggml_graph_compute()
  534. # // since https://github.com/ggerganov/ggml/issues/287
  535. # struct ggml_cplan {
  536. # size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
  537. # uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
  538. # int n_threads;
  539. # // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes
  540. # int n_tasks[GGML_MAX_NODES];
  541. # // abort ggml_graph_compute when true
  542. # bool (*abort_callback)(void * data);
  543. # void * abort_callback_data;
  544. # };
  545. class ggml_cplan(ctypes.Structure):
  546. """Compute plan for a ggml computation graph
  547. Attributes:
  548. work_size (int): size of work buffer
  549. work_data (ctypes.POINTER(ctypes.c_uint8)): work buffer
  550. n_threads (int): number of threads to use when computing the graph using [ggml_graph_compute][ggml.ggml_graph_compute]
  551. n_tasks (ctypes.Array[ctypes.c_int]): `n_tasks` of nodes, 1:1 mapping to cgraph nodes
  552. abort_callback (abort_callback_t): abort callback
  553. abort_callback_data (ctypes.c_void_p): abort callback data
  554. """
  555. _fields_ = [
  556. ("work_size", ctypes.c_size_t),
  557. ("work_data", ctypes.POINTER(ctypes.c_uint8)),
  558. ("n_threads", ctypes.c_int),
  559. ("n_tasks", ctypes.c_int * GGML_MAX_NODES),
  560. (
  561. "abort_callback",
  562. abort_callback_t,
  563. ),
  564. ("abort_callback_data", ctypes.c_void_p),
  565. ]
  566. GGML_CPLAN_SIZE = ctypes.sizeof(ggml_cplan)
  567. ggml_cplan_p: TypeAlias = "ctypes._Pointer[ggml_cplan]" # type: ignore
  568. """ctypes pointer to a [ggml_cplan][ggml.ggml_cplan]
  569. Can be dereferenced to a [ggml_cplan][ggml.ggml_cplan] object using
  570. the `.contents` attribute."""
  571. # // next prime after GGML_MAX_NODES
  572. # // #define GGML_GRAPH_HASHTABLE_SIZE 4099
  573. # // next prime after GGML_MAX_NODES * 2 (nodes + leafs)
  574. # #define GGML_GRAPH_HASHTABLE_SIZE 8273
  575. GGML_GRAPH_HASHTABLE_SIZE = 8273
  576. # // computation graph
  577. # struct ggml_cgraph {
  578. # int n_nodes;
  579. # int n_leafs;
  580. # struct ggml_tensor * nodes[GGML_MAX_NODES];
  581. # struct ggml_tensor * grads[GGML_MAX_NODES];
  582. # struct ggml_tensor * leafs[GGML_MAX_NODES];
  583. # void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
  584. # // performance
  585. # int perf_runs;
  586. # int64_t perf_cycles;
  587. # int64_t perf_time_us;
  588. # };
  589. class ggml_cgraph(ctypes.Structure):
  590. """ggml computation graph
  591. Attributes:
  592. n_nodes (int): number of nodes
  593. n_leafs (int): number of leafs
  594. nodes (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of compute tensors
  595. grads (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of gradient tensors
  596. leafs (ctypes.Array[ggml_tensor_p]): `n_leafs`-length array of parameter tensors
  597. visited_hash_table (ctypes.Array[ctypes.c_void_p]): `GGML_GRAPH_HASHTABLE_SIZE`-length array of visited nodes
  598. perf_runs (int): number of runs
  599. perf_cycles (int): number of cycles
  600. perf_time_us (int): computation time in microseconds"""
  601. _fields_ = [
  602. ("n_nodes", ctypes.c_int),
  603. ("n_leafs", ctypes.c_int),
  604. ("nodes", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  605. ("grads", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  606. ("leafs", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  607. ("visited_hash_table", ctypes.c_void_p * GGML_GRAPH_HASHTABLE_SIZE),
  608. ("perf_runs", ctypes.c_int),
  609. ("perf_cycles", ctypes.c_int64),
  610. ("perf_time_us", ctypes.c_int64),
  611. ]
  612. ggml_cgraph_p: TypeAlias = "ctypes._Pointer[ggml_cgraph]" # type: ignore
  613. """ctypes pointer to a [ggml_cgraph][ggml.ggml_cgraph]
  614. Can be dereferenced to a [ggml_cgraph][ggml.ggml_cgraph] object using
  615. the `.contents` attribute."""
  616. # static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
  617. GGML_GRAPH_SIZE = ctypes.sizeof(ggml_cgraph)
  618. # struct ggml_scratch {
  619. # size_t offs;
  620. # size_t size;
  621. # void * data;
  622. # };
  623. class ggml_scratch(ctypes.Structure):
  624. _fields_ = [
  625. ("offs", ctypes.c_size_t),
  626. ("size", ctypes.c_size_t),
  627. ("data", ctypes.c_void_p),
  628. ]
  629. # struct ggml_init_params {
  630. # // memory pool
  631. # size_t mem_size; // bytes
  632. # void * mem_buffer; // if NULL, memory will be allocated internally
  633. # bool no_alloc; // don't allocate memory for the tensor data
  634. # };
  635. class ggml_init_params(ctypes.Structure):
  636. """Initialization parameters for a ggml context
  637. **NOTE**: Reference counting does not cross into ggml, if you allocate a memory buffer
  638. in python using ctypes Arrays or a numpy array, you must keep a reference to it until
  639. you free the ggml context otherwise you will encounter a segmentation fault.
  640. Attributes:
  641. mem_size (int): size of memory pool in bytes
  642. mem_buffer (ctypes.c_void_p): pointer to memory pool, if None, memory will be allocated internally
  643. no_alloc (bool): don't allocate memory for tensor data
  644. """
  645. _fields_ = [
  646. ("mem_size", ctypes.c_size_t),
  647. ("mem_buffer", ctypes.c_void_p),
  648. ("no_alloc", ctypes.c_bool),
  649. ]
  650. # // compute types
  651. # // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
  652. # // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
  653. # enum ggml_task_type {
  654. # GGML_TASK_INIT = 0,
  655. # GGML_TASK_COMPUTE,
  656. # GGML_TASK_FINALIZE,
  657. # };
  658. GGML_TASK_INIT = 0
  659. GGML_TASK_COMPUTE = 1
  660. GGML_TASK_FINALIZE = 2
  661. # struct ggml_compute_params {
  662. # enum ggml_task_type type;
  663. # // ith = thread index, nth = number of threads
  664. # int ith, nth;
  665. # // work buffer for all threads
  666. # size_t wsize;
  667. # void * wdata;
  668. # };
  669. class ggml_compute_params(ctypes.Structure):
  670. _fields_ = [
  671. ("type", ctypes.c_int),
  672. ("ith", ctypes.c_int),
  673. ("nth", ctypes.c_int),
  674. ("wsize", ctypes.c_size_t),
  675. ("wdata", ctypes.c_void_p),
  676. ]
  677. ggml_compute_params_p: TypeAlias = "ctypes._Pointer[ggml_compute_params]" # type: ignore
  678. # // misc
  679. # GGML_API void ggml_time_init(void); // call this once at the beginning of the program
  680. def ggml_time_init():
  681. return lib.ggml_time_init()
  682. lib.ggml_time_init.argtypes = []
  683. lib.ggml_time_init.restype = None
  684. # GGML_API int64_t ggml_time_ms(void);
  685. def ggml_time_ms() -> int:
  686. return lib.ggml_time_ms()
  687. lib.ggml_time_ms.argtypes = []
  688. lib.ggml_time_ms.restype = ctypes.c_int64
  689. # GGML_API int64_t ggml_time_us(void);
  690. def ggml_time_us() -> int:
  691. return lib.ggml_time_us()
  692. lib.ggml_time_us.argtypes = []
  693. lib.ggml_time_us.restype = ctypes.c_int64
  694. # GGML_API int64_t ggml_cycles(void);
  695. def ggml_cycles() -> int:
  696. return lib.ggml_cycles()
  697. lib.ggml_cycles.argtypes = []
  698. lib.ggml_cycles.restype = ctypes.c_int64
  699. # GGML_API int64_t ggml_cycles_per_ms(void);
  700. def ggml_cycles_per_ms() -> int:
  701. return lib.ggml_cycles_per_ms()
  702. lib.ggml_cycles_per_ms.argtypes = []
  703. lib.ggml_cycles_per_ms.restype = ctypes.c_int64
  704. # GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
  705. def ggml_numa_init():
  706. return lib.ggml_numa_init()
  707. lib.ggml_numa_init.argtypes = []
  708. lib.ggml_numa_init.restype = None
  709. # GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
  710. def ggml_is_numa() -> bool:
  711. return lib.ggml_is_numa()
  712. lib.ggml_is_numa.argtypes = []
  713. lib.ggml_is_numa.restype = ctypes.c_bool
  714. # GGML_API void ggml_print_object (const struct ggml_object * obj);
  715. def ggml_print_object(obj: ggml_object_p):
  716. return lib.ggml_print_object(obj)
  717. lib.ggml_print_object.argtypes = [ctypes.POINTER(ggml_object)]
  718. lib.ggml_print_object.restype = None
  719. # GGML_API void ggml_print_objects(const struct ggml_context * ctx);
  720. def ggml_print_objects(ctx: ggml_context_p):
  721. return lib.ggml_print_objects(ctx)
  722. lib.ggml_print_objects.argtypes = [ggml_context_p]
  723. lib.ggml_print_objects.restype = None
  724. # GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
  725. def ggml_nelements(
  726. tensor: ggml_tensor_p,
  727. ) -> int:
  728. """Get the number of elements in a tensor
  729. Parameters:
  730. tensor: tensor
  731. Returns:
  732. number of elements"""
  733. return lib.ggml_nelements(tensor)
  734. lib.ggml_nelements.argtypes = [ctypes.POINTER(ggml_tensor)]
  735. lib.ggml_nelements.restype = ctypes.c_int64
  736. # GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
  737. def ggml_nrows(
  738. tensor: ggml_tensor_p,
  739. ) -> int:
  740. """Get the number of rows in a tensor
  741. Parameters:
  742. tensor: tensor
  743. Returns:
  744. number of rows"""
  745. return lib.ggml_nrows(tensor)
  746. lib.ggml_nrows.argtypes = [ctypes.POINTER(ggml_tensor)]
  747. lib.ggml_nrows.restype = ctypes.c_int64
  748. # GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
  749. def ggml_nbytes(
  750. tensor: ggml_tensor_p,
  751. ) -> int:
  752. """Get the number of bytes required to store tensor data
  753. Parameters:
  754. tensor: tensor
  755. Returns:
  756. number of bytes"""
  757. return lib.ggml_nbytes(tensor)
  758. lib.ggml_nbytes.argtypes = [ctypes.POINTER(ggml_tensor)]
  759. lib.ggml_nbytes.restype = ctypes.c_size_t
  760. # GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
  761. def ggml_nbytes_pad(
  762. tensor: ggml_tensor_p,
  763. ) -> int:
  764. """Get the number of bytes required to store tensor data, padded to GGML_MEM_ALIGN
  765. Parameters:
  766. tensor: tensor
  767. Returns:
  768. number of bytes"""
  769. return lib.ggml_nbytes_pad(tensor)
  770. lib.ggml_nbytes_pad.argtypes = [ctypes.POINTER(ggml_tensor)]
  771. lib.ggml_nbytes_pad.restype = ctypes.c_size_t
  772. # GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
  773. def ggml_nbytes_split(
  774. tensor: ggml_tensor_p,
  775. nrows_split: Union[ctypes.c_int, int],
  776. ) -> int:
  777. return lib.ggml_nbytes_split(tensor, nrows_split)
  778. lib.ggml_nbytes_split.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  779. lib.ggml_nbytes_split.restype = ctypes.c_size_t
  780. # GGML_API int ggml_blck_size (enum ggml_type type);
  781. def ggml_blck_size(type: Union[ctypes.c_int, int]) -> int:
  782. return lib.ggml_blck_size(type)
  783. lib.ggml_blck_size.argtypes = [ctypes.c_int]
  784. lib.ggml_blck_size.restype = ctypes.c_int
  785. # GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
  786. def ggml_type_size(type: Union[ctypes.c_int, int]) -> int:
  787. return lib.ggml_type_size(type)
  788. lib.ggml_type_size.argtypes = [ctypes.c_int]
  789. lib.ggml_type_size.restype = ctypes.c_size_t
  790. # GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
  791. def ggml_type_sizef(type: Union[ctypes.c_int, int]) -> float:
  792. return lib.ggml_type_sizef(type)
  793. lib.ggml_type_sizef.argtypes = [ctypes.c_int]
  794. lib.ggml_type_sizef.restype = ctypes.c_float
  795. # GGML_API const char * ggml_type_name(enum ggml_type type);
  796. def ggml_type_name(type: Union[ctypes.c_int, int]) -> bytes:
  797. return lib.ggml_type_name(type)
  798. lib.ggml_type_name.argtypes = [ctypes.c_int]
  799. lib.ggml_type_name.restype = ctypes.c_char_p
  800. # GGML_API const char * ggml_op_name (enum ggml_op op);
  801. def ggml_op_name(op: Union[ctypes.c_int, int]) -> bytes:
  802. return lib.ggml_op_name(op)
  803. lib.ggml_op_name.argtypes = [ctypes.c_int]
  804. lib.ggml_op_name.restype = ctypes.c_char_p
  805. # GGML_API const char * ggml_op_symbol(enum ggml_op op);
  806. def ggml_op_symbol(op: Union[ctypes.c_int, int]) -> bytes:
  807. return lib.ggml_op_symbol(op)
  808. lib.ggml_op_symbol.argtypes = [ctypes.c_int]
  809. lib.ggml_op_symbol.restype = ctypes.c_char_p
  810. # GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
  811. def ggml_element_size(
  812. tensor: ggml_tensor_p,
  813. ) -> int:
  814. return lib.ggml_element_size(tensor)
  815. lib.ggml_element_size.argtypes = [ctypes.POINTER(ggml_tensor)]
  816. lib.ggml_element_size.restype = ctypes.c_size_t
  817. # GGML_API bool ggml_is_quantized(enum ggml_type type);
  818. def ggml_is_quantized(type: Union[ctypes.c_int, int]) -> bool:
  819. return lib.ggml_is_quantized(type)
  820. lib.ggml_is_quantized.argtypes = [ctypes.c_int]
  821. lib.ggml_is_quantized.restype = ctypes.c_bool
  822. # // TODO: temporary until model loading of ggml examples is refactored
  823. # GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
  824. def ggml_ftype_to_ggml_type(ftype: Union[ctypes.c_int, int]) -> int:
  825. return lib.ggml_ftype_to_ggml_type(ftype)
  826. lib.ggml_ftype_to_ggml_type.argtypes = [ctypes.c_int]
  827. lib.ggml_ftype_to_ggml_type.restype = ctypes.c_int
  828. # GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
  829. def ggml_is_transposed(
  830. tensor: ggml_tensor_p,
  831. ) -> bool:
  832. """Check if a tensor is transposed
  833. Parameters:
  834. tensor: tensor
  835. Returns:
  836. True if tensor is transposed else False"""
  837. return lib.ggml_is_transposed(tensor)
  838. lib.ggml_is_transposed.argtypes = [ctypes.POINTER(ggml_tensor)]
  839. lib.ggml_is_transposed.restype = ctypes.c_bool
  840. # GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
  841. def ggml_is_contiguous(
  842. tensor: ggml_tensor_p,
  843. ) -> bool:
  844. """Check if a tensor is contiguous
  845. Parameters:
  846. tensor: tensor
  847. Returns:
  848. True if tensor is contiguous else False"""
  849. return lib.ggml_is_contiguous(tensor)
  850. lib.ggml_is_contiguous.argtypes = [ctypes.POINTER(ggml_tensor)]
  851. lib.ggml_is_contiguous.restype = ctypes.c_bool
  852. # GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
  853. def ggml_is_permuted(
  854. tensor: ggml_tensor_p,
  855. ) -> bool:
  856. """Check if a tensor is permuted
  857. Parameters:
  858. tensor: tensor
  859. Returns:
  860. True if tensor is permuted else False"""
  861. return lib.ggml_is_permuted(tensor)
  862. lib.ggml_is_permuted.argtypes = [ctypes.POINTER(ggml_tensor)]
  863. lib.ggml_is_permuted.restype = ctypes.c_bool
  864. # GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
  865. def ggml_are_same_shape(
  866. t0: ggml_tensor_p,
  867. t1: ggml_tensor_p,
  868. ) -> bool:
  869. """Check if two tensors have the same shape
  870. Parameters:
  871. t0: tensor 0
  872. t1: tensor 1
  873. Returns:
  874. True if tensors have the same shape else False"""
  875. return lib.ggml_are_same_shape(t0, t1)
  876. lib.ggml_are_same_shape.argtypes = [
  877. ctypes.POINTER(ggml_tensor),
  878. ctypes.POINTER(ggml_tensor),
  879. ]
  880. lib.ggml_are_same_shape.restype = ctypes.c_bool
  881. # // use this to compute the memory overhead of a tensor
  882. # GGML_API size_t ggml_tensor_overhead(void);
  883. def ggml_tensor_overhead() -> int:
  884. """Overhead required for a tensor struct in bytes
  885. Returns:
  886. size of tensor struct in bytes"""
  887. return lib.ggml_tensor_overhead()
  888. lib.ggml_tensor_overhead.argtypes = []
  889. lib.ggml_tensor_overhead.restype = ctypes.c_size_t
  890. # // main
  891. # GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
  892. def ggml_init(
  893. params: ggml_init_params,
  894. ) -> ggml_context_p:
  895. """Instantiate a new ggml context with params.
  896. You must call `ggml_free()` to free the context.
  897. Parameters:
  898. params: ggml init params
  899. Returns:
  900. Pointer to ggml_context"""
  901. return lib.ggml_init(params)
  902. lib.ggml_init.argtypes = [ggml_init_params]
  903. lib.ggml_init.restype = ggml_context_p
  904. # GGML_API void ggml_free(struct ggml_context * ctx);
  905. def ggml_free(ctx: ggml_context_p):
  906. """Free the ggml context.
  907. Parameters:
  908. ctx: ggml context"""
  909. return lib.ggml_free(ctx)
  910. lib.ggml_free.argtypes = [ggml_context_p]
  911. lib.ggml_free.restype = None
  912. # GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
  913. def ggml_used_mem(ctx: ggml_context_p) -> int:
  914. """Return the amount of memory used by the ggml context in bytes.
  915. Parameters:
  916. ctx: ggml context
  917. Returns:
  918. amount of memory used in bytes"""
  919. return lib.ggml_used_mem(ctx)
  920. lib.ggml_used_mem.argtypes = [ggml_context_p]
  921. lib.ggml_used_mem.restype = ctypes.c_size_t
  922. # GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
  923. def ggml_set_scratch(ctx: ggml_context_p, scratch: ggml_scratch) -> int:
  924. """Set the scratch buffer for the ggml context."""
  925. return lib.ggml_set_scratch(ctx, scratch)
  926. lib.ggml_set_scratch.argtypes = [ggml_context_p, ggml_scratch]
  927. lib.ggml_set_scratch.restype = ctypes.c_size_t
  928. # GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
  929. def ggml_get_no_alloc(ctx: ggml_context_p) -> bool:
  930. """Return the no_alloc flag for the ggml context."""
  931. return lib.ggml_get_no_alloc(ctx)
  932. lib.ggml_get_no_alloc.argtypes = [ggml_context_p]
  933. lib.ggml_get_no_alloc.restype = ctypes.c_bool
  934. # GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
  935. def ggml_set_no_alloc(ctx: ggml_context_p, no_alloc: Union[ctypes.c_bool, bool]):
  936. """Set the no_alloc flag for the ggml context."""
  937. return lib.ggml_set_no_alloc(ctx, no_alloc)
  938. lib.ggml_set_no_alloc.argtypes = [ggml_context_p, ctypes.c_bool]
  939. lib.ggml_set_no_alloc.restype = None
  940. # GGML_API void * ggml_get_mem_buffer (struct ggml_context * ctx);
  941. def ggml_get_mem_buffer(ctx: ggml_context_p) -> Optional[ctypes.c_void_p]:
  942. """Return the memory buffer for the ggml context."""
  943. return lib.ggml_get_mem_buffer(ctx)
  944. lib.ggml_get_mem_buffer.argtypes = [ggml_context_p]
  945. lib.ggml_get_mem_buffer.restype = ctypes.c_void_p
  946. # GGML_API size_t ggml_get_mem_size (struct ggml_context * ctx);
  947. def ggml_get_mem_size(ctx: ggml_context_p) -> int:
  948. """Return the size of the memory buffer for the ggml context in bytes."""
  949. return lib.ggml_get_mem_size(ctx)
  950. lib.ggml_get_mem_size.argtypes = [ggml_context_p]
  951. lib.ggml_get_mem_size.restype = ctypes.c_size_t
  952. # GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
  953. def ggml_get_max_tensor_size(ctx: ggml_context_p) -> int:
  954. """Return the maximum size of a tensor in bytes."""
  955. return lib.ggml_get_max_tensor_size(ctx)
  956. lib.ggml_get_max_tensor_size.argtypes = [ggml_context_p]
  957. lib.ggml_get_max_tensor_size.restype = ctypes.c_size_t
  958. # GGML_API struct ggml_tensor * ggml_new_tensor(
  959. # struct ggml_context * ctx,
  960. # enum ggml_type type,
  961. # int n_dims,
  962. # const int64_t *ne);
  963. def ggml_new_tensor(
  964. ctx: ggml_context_p,
  965. type: Union[ctypes.c_int, int],
  966. n_dims: Union[ctypes.c_int, int],
  967. ne: CInt64Array,
  968. ) -> ggml_tensor_p:
  969. """Create a new tensor with the given type, number of dimensions, and number of elements in each dimension.
  970. Parameters:
  971. ctx: ggml context
  972. type: ggml type
  973. n_dims: number of dimensions
  974. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension (array of length n_dims)
  975. Returns:
  976. Pointer to ggml_tensor"""
  977. return lib.ggml_new_tensor(ctx, type, n_dims, ne)
  978. lib.ggml_new_tensor.argtypes = [
  979. ggml_context_p,
  980. ctypes.c_int,
  981. ctypes.c_int,
  982. ctypes.POINTER(ctypes.c_int64),
  983. ]
  984. lib.ggml_new_tensor.restype = ctypes.POINTER(ggml_tensor)
  985. # GGML_API struct ggml_tensor * ggml_new_tensor_1d(
  986. # struct ggml_context * ctx,
  987. # enum ggml_type type,
  988. # int64_t ne0);
  989. def ggml_new_tensor_1d(
  990. ctx: ggml_context_p, type: Union[ctypes.c_int, int], ne0: Union[ctypes.c_int64, int]
  991. ) -> ggml_tensor_p:
  992. """Create a new 1-dimensional tensor with the given type and number of elements.
  993. Parameters:
  994. ctx: ggml context
  995. type: ggml type
  996. ne0: number of elements in dimension 0
  997. Returns:
  998. Pointer to ggml_tensor"""
  999. return lib.ggml_new_tensor_1d(ctx, type, ne0)
  1000. lib.ggml_new_tensor_1d.argtypes = [ggml_context_p, ctypes.c_int, ctypes.c_int64]
  1001. lib.ggml_new_tensor_1d.restype = ctypes.POINTER(ggml_tensor)
  1002. # GGML_API struct ggml_tensor * ggml_new_tensor_2d(
  1003. # struct ggml_context * ctx,
  1004. # enum ggml_type type,
  1005. # int64_t ne0,
  1006. # int64_t ne1);
  1007. def ggml_new_tensor_2d(
  1008. ctx: ggml_context_p,
  1009. type: Union[ctypes.c_int, int],
  1010. ne0: Union[ctypes.c_int64, int],
  1011. ne1: Union[ctypes.c_int64, int],
  1012. ) -> ggml_tensor_p:
  1013. """Create a new 2-dimensional tensor with the given type and number of elements in each dimension.
  1014. Parameters:
  1015. ctx: ggml context
  1016. type: ggml type
  1017. ne0: number of elements in dimension 0
  1018. ne1: number of elements in dimension 1
  1019. Returns:
  1020. Pointer to ggml_tensor"""
  1021. return lib.ggml_new_tensor_2d(ctx, type, ne0, ne1)
  1022. lib.ggml_new_tensor_2d.argtypes = [
  1023. ggml_context_p,
  1024. ctypes.c_int,
  1025. ctypes.c_int64,
  1026. ctypes.c_int64,
  1027. ]
  1028. lib.ggml_new_tensor_2d.restype = ctypes.POINTER(ggml_tensor)
  1029. # GGML_API struct ggml_tensor * ggml_new_tensor_3d(
  1030. # struct ggml_context * ctx,
  1031. # enum ggml_type type,
  1032. # int64_t ne0,
  1033. # int64_t ne1,
  1034. # int64_t ne2);
  1035. def ggml_new_tensor_3d(
  1036. ctx: ggml_context_p,
  1037. type: Union[ctypes.c_int, int],
  1038. ne0: Union[ctypes.c_int64, int],
  1039. ne1: Union[ctypes.c_int64, int],
  1040. ne2: Union[ctypes.c_int64, int],
  1041. ) -> ggml_tensor_p:
  1042. """Create a new 3-dimensional tensor with the given type and number of elements in each dimension.
  1043. Parameters:
  1044. ctx: ggml context
  1045. type: ggml type
  1046. ne0: number of elements in dimension 0
  1047. ne1: number of elements in dimension 1
  1048. ne2: number of elements in dimension 2
  1049. Returns:
  1050. Pointer to ggml_tensor"""
  1051. return lib.ggml_new_tensor_3d(ctx, type, ne0, ne1, ne2)
  1052. lib.ggml_new_tensor_3d.argtypes = [
  1053. ggml_context_p,
  1054. ctypes.c_int,
  1055. ctypes.c_int64,
  1056. ctypes.c_int64,
  1057. ctypes.c_int64,
  1058. ]
  1059. lib.ggml_new_tensor_3d.restype = ctypes.POINTER(ggml_tensor)
  1060. # GGML_API struct ggml_tensor * ggml_new_tensor_4d(
  1061. # struct ggml_context * ctx,
  1062. # enum ggml_type type,
  1063. # int64_t ne0,
  1064. # int64_t ne1,
  1065. # int64_t ne2,
  1066. # int64_t ne3);
  1067. def ggml_new_tensor_4d(
  1068. ctx: ggml_context_p,
  1069. type: Union[ctypes.c_int, int],
  1070. ne0: Union[ctypes.c_int64, int],
  1071. ne1: Union[ctypes.c_int64, int],
  1072. ne2: Union[ctypes.c_int64, int],
  1073. ne3: Union[ctypes.c_int64, int],
  1074. ) -> ggml_tensor_p:
  1075. """Create a new 4-dimensional tensor with the given type and number of elements in each dimension.
  1076. Parameters:
  1077. ctx: ggml context
  1078. type: ggml type
  1079. ne0: number of elements in dimension 0
  1080. ne1: number of elements in dimension 1
  1081. ne2: number of elements in dimension 2
  1082. Returns:
  1083. Pointer to ggml_tensor"""
  1084. return lib.ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3)
  1085. lib.ggml_new_tensor_4d.argtypes = [
  1086. ggml_context_p,
  1087. ctypes.c_int,
  1088. ctypes.c_int64,
  1089. ctypes.c_int64,
  1090. ctypes.c_int64,
  1091. ctypes.c_int64,
  1092. ]
  1093. lib.ggml_new_tensor_4d.restype = ctypes.POINTER(ggml_tensor)
  1094. # GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
  1095. def ggml_new_i32(
  1096. ctx: ggml_context_p, value: Union[ctypes.c_int32, int]
  1097. ) -> ggml_tensor_p:
  1098. """Create a 1 element tensor with the given integer value.
  1099. Parameters:
  1100. ctx: ggml context
  1101. value: integer value
  1102. Returns:
  1103. Pointer to ggml_tensor"""
  1104. return lib.ggml_new_i32(ctx, value)
  1105. lib.ggml_new_i32.argtypes = [ggml_context_p, ctypes.c_int32]
  1106. lib.ggml_new_i32.restype = ctypes.POINTER(ggml_tensor)
  1107. # GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
  1108. def ggml_new_f32(
  1109. ctx: ggml_context_p,
  1110. value: Union[ctypes.c_float, float],
  1111. ) -> ggml_tensor_p:
  1112. """Create a 1 element tensor with the given float value.
  1113. Parameters:
  1114. ctx: ggml context
  1115. value: float value
  1116. Returns:
  1117. Pointer to ggml_tensor"""
  1118. return lib.ggml_new_f32(ctx, value)
  1119. lib.ggml_new_f32.argtypes = [ggml_context_p, ctypes.c_float]
  1120. lib.ggml_new_f32.restype = ctypes.POINTER(ggml_tensor)
  1121. # GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
  1122. def ggml_dup_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1123. """Create a new tensor with the same type and dimensions as the source tensor.
  1124. Parameters:
  1125. ctx: ggml context
  1126. src: source tensor
  1127. Returns:
  1128. Pointer to ggml_tensor"""
  1129. return lib.ggml_dup_tensor(ctx, src)
  1130. lib.ggml_dup_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1131. lib.ggml_dup_tensor.restype = ctypes.POINTER(ggml_tensor)
  1132. # GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
  1133. def ggml_view_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1134. """Create a new tensor with the same type, dimensions and data as the source tensor.
  1135. Parameters:
  1136. ctx: ggml context
  1137. src: source tensor
  1138. Returns:
  1139. Pointer to ggml_tensor"""
  1140. return lib.ggml_view_tensor(ctx, src)
  1141. lib.ggml_view_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1142. lib.ggml_view_tensor.restype = ctypes.POINTER(ggml_tensor)
  1143. # GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
  1144. def ggml_get_tensor(ctx: ggml_context_p, name: bytes) -> ggml_tensor_p:
  1145. """Get a tensor from the ggml context by name.
  1146. Parameters:
  1147. ctx: ggml context
  1148. name: name of tensor
  1149. Returns:
  1150. Pointer to ggml_tensor"""
  1151. return lib.ggml_get_tensor(ctx, name)
  1152. lib.ggml_get_tensor.argtypes = [ggml_context_p, ctypes.c_char_p]
  1153. lib.ggml_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  1154. # GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
  1155. def ggml_set_zero(
  1156. tensor: ggml_tensor_p,
  1157. ) -> ggml_tensor_p:
  1158. """Zero all elements in a tensor.
  1159. Parameters:
  1160. tensor: tensor
  1161. Returns:
  1162. Pointer to ggml_tensor"""
  1163. return lib.ggml_set_zero(tensor)
  1164. lib.ggml_set_zero.argtypes = [ctypes.POINTER(ggml_tensor)]
  1165. lib.ggml_set_zero.restype = ctypes.POINTER(ggml_tensor)
  1166. # GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
  1167. def ggml_set_i32(
  1168. tensor: ggml_tensor_p,
  1169. value: Union[ctypes.c_int32, int],
  1170. ) -> ggml_tensor_p:
  1171. """Set all elements in a tensor to the given integer value.
  1172. Parameters:
  1173. tensor: tensor
  1174. value: integer value
  1175. Returns:
  1176. Pointer to ggml_tensor"""
  1177. return lib.ggml_set_i32(tensor, value)
  1178. lib.ggml_set_i32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int32]
  1179. lib.ggml_set_i32.restype = ctypes.POINTER(ggml_tensor)
  1180. # GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
  1181. def ggml_set_f32(
  1182. tensor: ggml_tensor_p,
  1183. value: Union[ctypes.c_float, float],
  1184. ) -> ggml_tensor_p:
  1185. """Set all elements in a tensor to the given float value.
  1186. Parameters:
  1187. tensor: tensor
  1188. value: float value
  1189. Returns:
  1190. Pointer to ggml_tensor"""
  1191. return lib.ggml_set_f32(tensor, value)
  1192. lib.ggml_set_f32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_float]
  1193. lib.ggml_set_f32.restype = ctypes.POINTER(ggml_tensor)
  1194. # GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
  1195. def ggml_get_i32_1d(
  1196. tensor: ggml_tensor_p,
  1197. i: Union[ctypes.c_int, int],
  1198. ) -> int:
  1199. """Get the integer value of the i-th element in a 1-dimensional tensor.
  1200. Parameters:
  1201. tensor: tensor
  1202. i: index of element
  1203. Returns:
  1204. integer value of element at index i"""
  1205. return lib.ggml_get_i32_1d(tensor, i)
  1206. lib.ggml_get_i32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1207. lib.ggml_get_i32_1d.restype = ctypes.c_int32
  1208. # GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
  1209. def ggml_set_i32_1d(
  1210. tensor: ggml_tensor_p,
  1211. i: Union[ctypes.c_int, int],
  1212. value: Union[ctypes.c_int32, int],
  1213. ):
  1214. """Set the integer value of the i-th element in a 1-dimensional tensor.
  1215. Parameters:
  1216. tensor: tensor
  1217. i: index of element
  1218. value: integer value to set element to"""
  1219. return lib.ggml_set_i32_1d(tensor, i, value)
  1220. lib.ggml_set_i32_1d.argtypes = [
  1221. ctypes.POINTER(ggml_tensor),
  1222. ctypes.c_int,
  1223. ctypes.c_int32,
  1224. ]
  1225. lib.ggml_set_i32_1d.restype = None
  1226. # GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
  1227. def ggml_get_f32_1d(
  1228. tensor: ggml_tensor_p,
  1229. i: Union[ctypes.c_int, int],
  1230. ) -> float:
  1231. """Get the float value of the i-th element in a 1-dimensional tensor.
  1232. Parameters:
  1233. tensor: tensor
  1234. Returns:
  1235. float value of element at index i"""
  1236. return lib.ggml_get_f32_1d(tensor, i)
  1237. lib.ggml_get_f32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1238. lib.ggml_get_f32_1d.restype = ctypes.c_float
  1239. # GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
  1240. def ggml_set_f32_1d(
  1241. tensor: ggml_tensor_p,
  1242. i: Union[ctypes.c_int, int],
  1243. value: Union[ctypes.c_float, float],
  1244. ):
  1245. """Set the float value of the i-th element in a 1-dimensional tensor.
  1246. Parameters:
  1247. tensor: tensor
  1248. i: index of element
  1249. value: float value to set element to"""
  1250. return lib.ggml_set_f32_1d(tensor, i, value)
  1251. lib.ggml_set_f32_1d.argtypes = [
  1252. ctypes.POINTER(ggml_tensor),
  1253. ctypes.c_int,
  1254. ctypes.c_float,
  1255. ]
  1256. lib.ggml_set_f32_1d.restype = None
  1257. # GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
  1258. def ggml_get_data(
  1259. tensor: ggml_tensor_p,
  1260. ) -> Optional[ctypes.c_void_p]:
  1261. """Get the data pointer of a tensor.
  1262. Parameters:
  1263. tensor: tensor
  1264. Returns:
  1265. Pointer to data, or None if tensor has no data"""
  1266. return lib.ggml_get_data(tensor)
  1267. lib.ggml_get_data.argtypes = [ctypes.POINTER(ggml_tensor)]
  1268. lib.ggml_get_data.restype = ctypes.c_void_p
  1269. # GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
  1270. def ggml_get_data_f32(
  1271. tensor: ggml_tensor_p,
  1272. ) -> Optional[CFloatArray]:
  1273. """Get the data pointer of a tensor as a float array.
  1274. Parameters:
  1275. tensor: tensor
  1276. Returns:
  1277. (Optional[ctypes.Array[ctypes.c_float]]): array of float to data, or None if tensor has no data
  1278. """
  1279. return lib.ggml_get_data_f32(tensor)
  1280. lib.ggml_get_data_f32.argtypes = [ctypes.POINTER(ggml_tensor)]
  1281. lib.ggml_get_data_f32.restype = ctypes.POINTER(ctypes.c_float)
  1282. # GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
  1283. def ggml_get_unary_op(
  1284. tensor: ggml_tensor_p,
  1285. ) -> int:
  1286. """Get the unary operation of a tensor.
  1287. Parameters:
  1288. tensor: tensor
  1289. Returns:
  1290. unary operation"""
  1291. return lib.ggml_get_unary_op(tensor)
  1292. lib.ggml_get_unary_op.argtypes = [ctypes.POINTER(ggml_tensor)]
  1293. lib.ggml_get_unary_op.restype = ctypes.c_int
  1294. # GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
  1295. def ggml_get_name(
  1296. tensor: ggml_tensor_p,
  1297. ) -> bytes:
  1298. """Get the name of a tensor.
  1299. Parameters:
  1300. tensor: tensor
  1301. Returns:
  1302. name of tensor"""
  1303. return lib.ggml_get_name(tensor)
  1304. lib.ggml_get_name.argtypes = [ctypes.POINTER(ggml_tensor)]
  1305. lib.ggml_get_name.restype = ctypes.c_char_p
  1306. # GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
  1307. def ggml_set_name(
  1308. tensor: ggml_tensor_p,
  1309. name: bytes,
  1310. ) -> ggml_tensor_p:
  1311. """Set the name of a tensor.
  1312. Parameters:
  1313. tensor: tensor
  1314. name: name to set tensor to
  1315. Returns:
  1316. Pointer to ggml_tensor"""
  1317. return lib.ggml_set_name(tensor, name)
  1318. lib.ggml_set_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1319. lib.ggml_set_name.restype = ctypes.POINTER(ggml_tensor)
  1320. # GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
  1321. def ggml_format_name(
  1322. tensor: ggml_tensor_p,
  1323. fmt: bytes,
  1324. *args: Sequence[Union[bool, int, float, str]],
  1325. ) -> ggml_tensor_p:
  1326. """Format the name of a tensor using the given format c string and arguments.
  1327. Parameters:
  1328. tensor: tensor
  1329. fmt: format c string
  1330. args: arguments to format string
  1331. Returns:
  1332. Pointer to ggml_tensor"""
  1333. return lib.ggml_format_name(tensor, fmt, *args)
  1334. lib.ggml_format_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1335. lib.ggml_format_name.restype = ctypes.POINTER(ggml_tensor)
  1336. # //
  1337. # // operations on tensors with backpropagation
  1338. # //
  1339. # GGML_API struct ggml_tensor * ggml_dup(
  1340. # struct ggml_context * ctx,
  1341. # struct ggml_tensor * a);
  1342. def ggml_dup(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1343. return lib.ggml_dup(ctx, a)
  1344. lib.ggml_dup.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1345. lib.ggml_dup.restype = ctypes.POINTER(ggml_tensor)
  1346. # // in-place, returns view(a)
  1347. # GGML_API struct ggml_tensor * ggml_dup_inplace(
  1348. # struct ggml_context * ctx,
  1349. # struct ggml_tensor * a);
  1350. def ggml_dup_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1351. return lib.ggml_dup_inplace(ctx, a)
  1352. lib.ggml_dup_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1353. lib.ggml_dup_inplace.restype = ctypes.POINTER(ggml_tensor)
  1354. # GGML_API struct ggml_tensor * ggml_add(
  1355. # struct ggml_context * ctx,
  1356. # struct ggml_tensor * a,
  1357. # struct ggml_tensor * b);
  1358. def ggml_add(
  1359. ctx: ggml_context_p,
  1360. a: ggml_tensor_p,
  1361. b: ggml_tensor_p,
  1362. ) -> ggml_tensor_p:
  1363. """Add two tensors together and return the result.
  1364. Parameters:
  1365. ctx: ggml context
  1366. a: first tensor
  1367. b: second tensor
  1368. Returns:
  1369. Pointer to ggml_tensor"""
  1370. return lib.ggml_add(ctx, a, b)
  1371. lib.ggml_add.argtypes = [
  1372. ggml_context_p,
  1373. ctypes.POINTER(ggml_tensor),
  1374. ctypes.POINTER(ggml_tensor),
  1375. ]
  1376. lib.ggml_add.restype = ctypes.POINTER(ggml_tensor)
  1377. # GGML_API struct ggml_tensor * ggml_add_inplace(
  1378. # struct ggml_context * ctx,
  1379. # struct ggml_tensor * a,
  1380. # struct ggml_tensor * b);
  1381. def ggml_add_inplace(
  1382. ctx: ggml_context_p,
  1383. a: ggml_tensor_p,
  1384. b: ggml_tensor_p,
  1385. ) -> ggml_tensor_p:
  1386. """Add two tensors together and store the result in the first tensor.
  1387. Parameters:
  1388. ctx: ggml context
  1389. a: first tensor
  1390. b: second tensor
  1391. Returns:
  1392. Pointer to ggml_tensor"""
  1393. return lib.ggml_add_inplace(ctx, a, b)
  1394. lib.ggml_add_inplace.argtypes = [
  1395. ggml_context_p,
  1396. ctypes.POINTER(ggml_tensor),
  1397. ctypes.POINTER(ggml_tensor),
  1398. ]
  1399. lib.ggml_add_inplace.restype = ctypes.POINTER(ggml_tensor)
  1400. # GGML_API struct ggml_tensor * ggml_add1(
  1401. # struct ggml_context * ctx,
  1402. # struct ggml_tensor * a,
  1403. # struct ggml_tensor * b);
  1404. def ggml_add1(
  1405. ctx: ggml_context_p,
  1406. a: ggml_tensor_p,
  1407. b: ggml_tensor_p,
  1408. ) -> ggml_tensor_p:
  1409. return lib.ggml_add1(ctx, a, b)
  1410. lib.ggml_add1.argtypes = [
  1411. ggml_context_p,
  1412. ctypes.POINTER(ggml_tensor),
  1413. ctypes.POINTER(ggml_tensor),
  1414. ]
  1415. lib.ggml_add1.restype = ctypes.POINTER(ggml_tensor)
  1416. # GGML_API struct ggml_tensor * ggml_add1_inplace(
  1417. # struct ggml_context * ctx,
  1418. # struct ggml_tensor * a,
  1419. # struct ggml_tensor * b);
  1420. def ggml_add1_inplace(
  1421. ctx: ggml_context_p,
  1422. a: ggml_tensor_p,
  1423. b: ggml_tensor_p,
  1424. ) -> ggml_tensor_p:
  1425. return lib.ggml_add1_inplace(ctx, a, b)
  1426. lib.ggml_add1_inplace.argtypes = [
  1427. ggml_context_p,
  1428. ctypes.POINTER(ggml_tensor),
  1429. ctypes.POINTER(ggml_tensor),
  1430. ]
  1431. lib.ggml_add1_inplace.restype = ctypes.POINTER(ggml_tensor)
  1432. # GGML_API struct ggml_tensor * ggml_acc(
  1433. # struct ggml_context * ctx,
  1434. # struct ggml_tensor * a,
  1435. # struct ggml_tensor * b,
  1436. # size_t nb1,
  1437. # size_t nb2,
  1438. # size_t nb3,
  1439. # size_t offset);
  1440. def ggml_acc(
  1441. ctx: ggml_context_p,
  1442. a: ggml_tensor_p,
  1443. b: ggml_tensor_p,
  1444. nb1: Union[ctypes.c_size_t, int],
  1445. nb2: Union[ctypes.c_size_t, int],
  1446. nb3: Union[ctypes.c_size_t, int],
  1447. offset: Union[ctypes.c_size_t, int],
  1448. ) -> ggml_tensor_p:
  1449. return lib.ggml_acc(ctx, a, b, nb1, nb2, nb3, offset)
  1450. lib.ggml_acc.argtypes = [
  1451. ggml_context_p,
  1452. ctypes.POINTER(ggml_tensor),
  1453. ctypes.POINTER(ggml_tensor),
  1454. ctypes.c_size_t,
  1455. ctypes.c_size_t,
  1456. ctypes.c_size_t,
  1457. ctypes.c_size_t,
  1458. ]
  1459. lib.ggml_acc.restype = ctypes.POINTER(ggml_tensor)
  1460. # GGML_API struct ggml_tensor * ggml_acc_inplace(
  1461. # struct ggml_context * ctx,
  1462. # struct ggml_tensor * a,
  1463. # struct ggml_tensor * b,
  1464. # size_t nb1,
  1465. # size_t nb2,
  1466. # size_t nb3,
  1467. # size_t offset);
  1468. def ggml_acc_inplace(
  1469. ctx: ggml_context_p,
  1470. a: ggml_tensor_p,
  1471. b: ggml_tensor_p,
  1472. nb1: Union[ctypes.c_size_t, int],
  1473. nb2: Union[ctypes.c_size_t, int],
  1474. nb3: Union[ctypes.c_size_t, int],
  1475. offset: Union[ctypes.c_size_t, int],
  1476. ) -> ggml_tensor_p:
  1477. return lib.ggml_acc_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  1478. lib.ggml_acc_inplace.argtypes = [
  1479. ggml_context_p,
  1480. ctypes.POINTER(ggml_tensor),
  1481. ctypes.POINTER(ggml_tensor),
  1482. ctypes.c_size_t,
  1483. ctypes.c_size_t,
  1484. ctypes.c_size_t,
  1485. ctypes.c_size_t,
  1486. ]
  1487. lib.ggml_acc_inplace.restype = ctypes.POINTER(ggml_tensor)
  1488. # GGML_API struct ggml_tensor * ggml_sub(
  1489. # struct ggml_context * ctx,
  1490. # struct ggml_tensor * a,
  1491. # struct ggml_tensor * b);
  1492. def ggml_sub(
  1493. ctx: ggml_context_p,
  1494. a: ggml_tensor_p,
  1495. b: ggml_tensor_p,
  1496. ) -> ggml_tensor_p:
  1497. """Subtract two tensors and return the result.
  1498. Parameters:
  1499. ctx: ggml context
  1500. a: first tensor
  1501. b: second tensor
  1502. Returns:
  1503. Pointer to ggml_tensor"""
  1504. return lib.ggml_sub(ctx, a, b)
  1505. lib.ggml_sub.argtypes = [
  1506. ggml_context_p,
  1507. ctypes.POINTER(ggml_tensor),
  1508. ctypes.POINTER(ggml_tensor),
  1509. ]
  1510. lib.ggml_sub.restype = ctypes.POINTER(ggml_tensor)
  1511. # GGML_API struct ggml_tensor * ggml_sub_inplace(
  1512. # struct ggml_context * ctx,
  1513. # struct ggml_tensor * a,
  1514. # struct ggml_tensor * b);
  1515. def ggml_sub_inplace(
  1516. ctx: ggml_context_p,
  1517. a: ggml_tensor_p,
  1518. b: ggml_tensor_p,
  1519. ) -> ggml_tensor_p:
  1520. """Subtract two tensors and store the result in the first tensor.
  1521. Parameters:
  1522. ctx: ggml context
  1523. a: first tensor
  1524. b: second tensor
  1525. Returns:
  1526. Pointer to ggml_tensor"""
  1527. return lib.ggml_sub_inplace(ctx, a, b)
  1528. lib.ggml_sub_inplace.argtypes = [
  1529. ggml_context_p,
  1530. ctypes.POINTER(ggml_tensor),
  1531. ctypes.POINTER(ggml_tensor),
  1532. ]
  1533. lib.ggml_sub_inplace.restype = ctypes.POINTER(ggml_tensor)
  1534. # GGML_API struct ggml_tensor * ggml_mul(
  1535. # struct ggml_context * ctx,
  1536. # struct ggml_tensor * a,
  1537. # struct ggml_tensor * b);
  1538. def ggml_mul(
  1539. ctx: ggml_context_p,
  1540. a: ggml_tensor_p,
  1541. b: ggml_tensor_p,
  1542. ) -> ggml_tensor_p:
  1543. """Element-wise multiply two tensors and return the result.
  1544. Parameters:
  1545. ctx: ggml context
  1546. a: first tensor
  1547. b: second tensor
  1548. Returns:
  1549. Pointer to ggml_tensor"""
  1550. return lib.ggml_mul(ctx, a, b)
  1551. lib.ggml_mul.argtypes = [
  1552. ggml_context_p,
  1553. ctypes.POINTER(ggml_tensor),
  1554. ctypes.POINTER(ggml_tensor),
  1555. ]
  1556. lib.ggml_mul.restype = ctypes.POINTER(ggml_tensor)
  1557. # GGML_API struct ggml_tensor * ggml_mul_inplace(
  1558. # struct ggml_context * ctx,
  1559. # struct ggml_tensor * a,
  1560. # struct ggml_tensor * b);
  1561. def ggml_mul_inplace(
  1562. ctx: ggml_context_p,
  1563. a: ggml_tensor_p,
  1564. b: ggml_tensor_p,
  1565. ) -> ggml_tensor_p:
  1566. """Element-wise multiply two tensors and store the result in the first tensor.
  1567. Parameters:
  1568. ctx: ggml context
  1569. a: first tensor
  1570. b: second tensor
  1571. Returns:
  1572. Pointer to ggml_tensor"""
  1573. return lib.ggml_mul_inplace(ctx, a, b)
  1574. lib.ggml_mul_inplace.argtypes = [
  1575. ggml_context_p,
  1576. ctypes.POINTER(ggml_tensor),
  1577. ctypes.POINTER(ggml_tensor),
  1578. ]
  1579. lib.ggml_mul_inplace.restype = ctypes.POINTER(ggml_tensor)
  1580. # GGML_API struct ggml_tensor * ggml_div(
  1581. # struct ggml_context * ctx,
  1582. # struct ggml_tensor * a,
  1583. # struct ggml_tensor * b);
  1584. def ggml_div(
  1585. ctx: ggml_context_p,
  1586. a: ggml_tensor_p,
  1587. b: ggml_tensor_p,
  1588. ) -> ggml_tensor_p:
  1589. """Element-wise divide two tensors and return the result.
  1590. Parameters:
  1591. ctx: ggml context
  1592. a: first tensor
  1593. b: second tensor
  1594. Returns:
  1595. Pointer to ggml_tensor"""
  1596. return lib.ggml_div(ctx, a, b)
  1597. lib.ggml_div.argtypes = [
  1598. ggml_context_p,
  1599. ctypes.POINTER(ggml_tensor),
  1600. ctypes.POINTER(ggml_tensor),
  1601. ]
  1602. lib.ggml_div.restype = ctypes.POINTER(ggml_tensor)
  1603. # GGML_API struct ggml_tensor * ggml_div_inplace(
  1604. # struct ggml_context * ctx,
  1605. # struct ggml_tensor * a,
  1606. # struct ggml_tensor * b);
  1607. def ggml_div_inplace(
  1608. ctx: ggml_context_p,
  1609. a: ggml_tensor_p,
  1610. b: ggml_tensor_p,
  1611. ) -> ggml_tensor_p:
  1612. """Element-wise divide two tensors and store the result in the first tensor.
  1613. Parameters:
  1614. ctx: ggml context
  1615. a: first tensor
  1616. b: second tensor
  1617. Returns:
  1618. Pointer to ggml_tensor"""
  1619. return lib.ggml_div_inplace(ctx, a, b)
  1620. lib.ggml_div_inplace.argtypes = [
  1621. ggml_context_p,
  1622. ctypes.POINTER(ggml_tensor),
  1623. ctypes.POINTER(ggml_tensor),
  1624. ]
  1625. lib.ggml_div_inplace.restype = ctypes.POINTER(ggml_tensor)
  1626. # GGML_API struct ggml_tensor * ggml_sqr(
  1627. # struct ggml_context * ctx,
  1628. # struct ggml_tensor * a);
  1629. def ggml_sqr(
  1630. ctx: ggml_context_p,
  1631. a: ggml_tensor_p,
  1632. ) -> ggml_tensor_p:
  1633. """Square all elements in a tensor and return the result.
  1634. Parameters:
  1635. ctx: ggml context
  1636. a: tensor
  1637. Returns:
  1638. Pointer to ggml_tensor"""
  1639. return lib.ggml_sqr(ctx, a)
  1640. lib.ggml_sqr.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1641. lib.ggml_sqr.restype = ctypes.POINTER(ggml_tensor)
  1642. # GGML_API struct ggml_tensor * ggml_sqr_inplace(
  1643. # struct ggml_context * ctx,
  1644. # struct ggml_tensor * a);
  1645. def ggml_sqr_inplace(
  1646. ctx: ggml_context_p,
  1647. a: ggml_tensor_p,
  1648. ) -> ggml_tensor_p:
  1649. """Square all elements in a tensor and store the result in the first tensor.
  1650. Parameters:
  1651. ctx: ggml context
  1652. a: tensor
  1653. Returns:
  1654. Pointer to ggml_tensor"""
  1655. return lib.ggml_sqr_inplace(ctx, a)
  1656. lib.ggml_sqr_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1657. lib.ggml_sqr_inplace.restype = ctypes.POINTER(ggml_tensor)
  1658. # GGML_API struct ggml_tensor * ggml_sqrt(
  1659. # struct ggml_context * ctx,
  1660. # struct ggml_tensor * a);
  1661. def ggml_sqrt(
  1662. ctx: ggml_context_p,
  1663. a: ggml_tensor_p,
  1664. ) -> ggml_tensor_p:
  1665. """Square root all elements in a tensor and return the result.
  1666. Parameters:
  1667. ctx: ggml context
  1668. a: tensor
  1669. Returns:
  1670. Pointer to ggml_tensor"""
  1671. return lib.ggml_sqrt(ctx, a)
  1672. lib.ggml_sqrt.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1673. lib.ggml_sqrt.restype = ctypes.POINTER(ggml_tensor)
  1674. # GGML_API struct ggml_tensor * ggml_sqrt_inplace(
  1675. # struct ggml_context * ctx,
  1676. # struct ggml_tensor * a);
  1677. def ggml_sqrt_inplace(
  1678. ctx: ggml_context_p,
  1679. a: ggml_tensor_p,
  1680. ) -> ggml_tensor_p:
  1681. """Square root all elements in a tensor and store the result in the first tensor.
  1682. Parameters:
  1683. ctx: ggml context
  1684. Returns:
  1685. Pointer to ggml_tensor"""
  1686. return lib.ggml_sqrt_inplace(ctx, a)
  1687. lib.ggml_sqrt_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1688. lib.ggml_sqrt_inplace.restype = ctypes.POINTER(ggml_tensor)
  1689. # GGML_API struct ggml_tensor * ggml_log(
  1690. # struct ggml_context * ctx,
  1691. # struct ggml_tensor * a);
  1692. def ggml_log(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1693. """Take the natural logarithm of all elements in a tensor and return the result.
  1694. Parameters:
  1695. ctx: ggml context
  1696. a: tensor
  1697. Returns:
  1698. Pointer to ggml_tensor"""
  1699. return lib.ggml_log(ctx, a)
  1700. lib.ggml_log.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1701. lib.ggml_log.restype = ctypes.POINTER(ggml_tensor)
  1702. # GGML_API struct ggml_tensor * ggml_log_inplace(
  1703. # struct ggml_context * ctx,
  1704. # struct ggml_tensor * a);
  1705. def ggml_log_inplace(
  1706. ctx: ggml_context_p,
  1707. a: ggml_tensor_p,
  1708. ) -> ggml_tensor_p:
  1709. """Take the natural logarithm of all elements in a tensor and store the result in the first tensor.
  1710. Parameters:
  1711. ctx: ggml context
  1712. a: tensor
  1713. Returns:
  1714. Pointer to ggml_tensor"""
  1715. return lib.ggml_log_inplace(ctx, a)
  1716. lib.ggml_log_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1717. lib.ggml_log_inplace.restype = ctypes.POINTER(ggml_tensor)
  1718. # // return scalar
  1719. # GGML_API struct ggml_tensor * ggml_sum(
  1720. # struct ggml_context * ctx,
  1721. # struct ggml_tensor * a);
  1722. def ggml_sum(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1723. """Sum all elements in a tensor and return the result.
  1724. Parameters:
  1725. ctx: ggml context
  1726. a: tensor
  1727. Returns:
  1728. Pointer to ggml_tensor"""
  1729. return lib.ggml_sum(ctx, a)
  1730. lib.ggml_sum.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1731. lib.ggml_sum.restype = ctypes.POINTER(ggml_tensor)
  1732. # // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1733. # GGML_API struct ggml_tensor * ggml_sum_rows(
  1734. # struct ggml_context * ctx,
  1735. # struct ggml_tensor * a);
  1736. def ggml_sum_rows(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1737. """Sum all elements in a tensor along the first axis and return the result.
  1738. sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1739. Parameters:
  1740. ctx: ggml context
  1741. a: tensor
  1742. Returns:
  1743. Pointer to ggml_tensor"""
  1744. return lib.ggml_sum_rows(ctx, a)
  1745. lib.ggml_sum_rows.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1746. lib.ggml_sum_rows.restype = ctypes.POINTER(ggml_tensor)
  1747. # // mean along rows
  1748. # GGML_API struct ggml_tensor * ggml_mean(
  1749. # struct ggml_context * ctx,
  1750. # struct ggml_tensor * a);
  1751. def ggml_mean(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1752. """Take the mean of all elements in a tensor and return the result.
  1753. Parameters:
  1754. ctx: ggml context
  1755. a: tensor
  1756. Returns:
  1757. Pointer to ggml_tensor"""
  1758. return lib.ggml_mean(ctx, a)
  1759. lib.ggml_mean.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1760. lib.ggml_mean.restype = ctypes.POINTER(ggml_tensor)
  1761. # // argmax along rows
  1762. # GGML_API struct ggml_tensor * ggml_argmax(
  1763. # struct ggml_context * ctx,
  1764. # struct ggml_tensor * a);
  1765. def ggml_argmax(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1766. """Take the argmax of all elements in a tensor and return the result.
  1767. argmax along rows
  1768. Parameters:
  1769. ctx: ggml context
  1770. a: tensor
  1771. Returns:
  1772. Pointer to ggml_tensor"""
  1773. return lib.ggml_argmax(ctx, a)
  1774. lib.ggml_argmax.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1775. lib.ggml_argmax.restype = ctypes.POINTER(ggml_tensor)
  1776. # // if a is the same shape as b, and a is not parameter, return a
  1777. # // otherwise, return a new tensor: repeat(a) to fit in b
  1778. # GGML_API struct ggml_tensor * ggml_repeat(
  1779. # struct ggml_context * ctx,
  1780. # struct ggml_tensor * a,
  1781. # struct ggml_tensor * b);
  1782. def ggml_repeat(
  1783. ctx: ggml_context_p,
  1784. a: ggml_tensor_p,
  1785. b: ggml_tensor_p,
  1786. ) -> ggml_tensor_p:
  1787. """Repeat a tensor to fit the shape of another tensor.
  1788. If a is the same shape as b, and a is not parameter, return a
  1789. Parameters:
  1790. ctx: ggml context
  1791. a: tensor to repeat
  1792. b: tensor to fit
  1793. Returns:
  1794. Pointer to ggml_tensor"""
  1795. return lib.ggml_repeat(ctx, a, b)
  1796. lib.ggml_repeat.argtypes = [
  1797. ggml_context_p,
  1798. ctypes.POINTER(ggml_tensor),
  1799. ctypes.POINTER(ggml_tensor),
  1800. ]
  1801. lib.ggml_repeat.restype = ctypes.POINTER(ggml_tensor)
  1802. # GGML_API struct ggml_tensor * ggml_repeat_back(
  1803. # struct ggml_context * ctx,
  1804. # struct ggml_tensor * a,
  1805. # struct ggml_tensor * b);
  1806. def ggml_repeat_back(
  1807. ctx: ggml_context_p,
  1808. a: ggml_tensor_p,
  1809. b: ggml_tensor_p,
  1810. ) -> ggml_tensor_p:
  1811. return lib.ggml_repeat_back(ctx, a, b)
  1812. lib.ggml_repeat_back.argtypes = [
  1813. ggml_context_p,
  1814. ctypes.POINTER(ggml_tensor),
  1815. ctypes.POINTER(ggml_tensor),
  1816. ]
  1817. lib.ggml_repeat_back.restype = ctypes.POINTER(ggml_tensor)
  1818. # // concat a and b on dim 2
  1819. # // used in stable-diffusion
  1820. # GGML_API struct ggml_tensor * ggml_concat(
  1821. # struct ggml_context * ctx,
  1822. # struct ggml_tensor * a,
  1823. # struct ggml_tensor * b);
  1824. def ggml_concat(
  1825. ctx: ggml_context_p,
  1826. a: ggml_tensor_p,
  1827. b: ggml_tensor_p,
  1828. ) -> ggml_tensor_p:
  1829. """Concatenate two tensors along the second axis and return the result.
  1830. Parameters:
  1831. ctx: ggml context
  1832. a: first tensor
  1833. b: second tensor
  1834. Returns:
  1835. Pointer to ggml_tensor"""
  1836. return lib.ggml_concat(ctx, a, b)
  1837. lib.ggml_concat.argtypes = [
  1838. ggml_context_p,
  1839. ctypes.POINTER(ggml_tensor),
  1840. ctypes.POINTER(ggml_tensor),
  1841. ]
  1842. lib.ggml_concat.restype = ctypes.POINTER(ggml_tensor)
  1843. # GGML_API struct ggml_tensor * ggml_abs(
  1844. # struct ggml_context * ctx,
  1845. # struct ggml_tensor * a);
  1846. def ggml_abs(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1847. """Take the absolute value of all elements in a tensor and return the result.
  1848. Parameters:
  1849. ctx: ggml context
  1850. a: tensor
  1851. Returns:
  1852. Pointer to ggml_tensor"""
  1853. return lib.ggml_abs(ctx, a)
  1854. lib.ggml_abs.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1855. lib.ggml_abs.restype = ctypes.POINTER(ggml_tensor)
  1856. # GGML_API struct ggml_tensor * ggml_abs_inplace(
  1857. # struct ggml_context * ctx,
  1858. # struct ggml_tensor * a);
  1859. def ggml_abs_inplace(
  1860. ctx: ggml_context_p,
  1861. a: ggml_tensor_p,
  1862. ) -> ggml_tensor_p:
  1863. """Take the absolute value of all elements in a tensor and store the result in the first tensor.
  1864. Parameters:
  1865. ctx: ggml context
  1866. a: tensor
  1867. Returns:
  1868. Pointer to ggml_tensor"""
  1869. return lib.ggml_abs_inplace(ctx, a)
  1870. lib.ggml_abs_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1871. lib.ggml_abs_inplace.restype = ctypes.POINTER(ggml_tensor)
  1872. # GGML_API struct ggml_tensor * ggml_sgn(
  1873. # struct ggml_context * ctx,
  1874. # struct ggml_tensor * a);
  1875. def ggml_sgn(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1876. """Get the sign of all elements in a tensor and return the result.
  1877. Parameters:
  1878. ctx: ggml context
  1879. a: tensor
  1880. Returns:
  1881. Pointer to ggml_tensor"""
  1882. return lib.ggml_sgn(ctx, a)
  1883. lib.ggml_sgn.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1884. lib.ggml_sgn.restype = ctypes.POINTER(ggml_tensor)
  1885. # GGML_API struct ggml_tensor * ggml_sgn_inplace(
  1886. # struct ggml_context * ctx,
  1887. # struct ggml_tensor * a);
  1888. def ggml_sgn_inplace(
  1889. ctx: ggml_context_p,
  1890. a: ggml_tensor_p,
  1891. ) -> ggml_tensor_p:
  1892. """Get the sign of all elements in a tensor and store the result in the first tensor.
  1893. Parameters:
  1894. ctx: ggml context
  1895. a: tensor
  1896. Returns:
  1897. Pointer to ggml_tensor"""
  1898. return lib.ggml_sgn_inplace(ctx, a)
  1899. lib.ggml_sgn_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1900. lib.ggml_sgn_inplace.restype = ctypes.POINTER(ggml_tensor)
  1901. # GGML_API struct ggml_tensor * ggml_neg(
  1902. # struct ggml_context * ctx,
  1903. # struct ggml_tensor * a);
  1904. def ggml_neg(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1905. """Negate all elements in a tensor and return the result.
  1906. Parameters:
  1907. ctx: ggml context
  1908. a: tensor
  1909. Returns:
  1910. Pointer to ggml_tensor"""
  1911. return lib.ggml_neg(ctx, a)
  1912. lib.ggml_neg.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1913. lib.ggml_neg.restype = ctypes.POINTER(ggml_tensor)
  1914. # GGML_API struct ggml_tensor * ggml_neg_inplace(
  1915. # struct ggml_context * ctx,
  1916. # struct ggml_tensor * a);
  1917. def ggml_neg_inplace(
  1918. ctx: ggml_context_p,
  1919. a: ggml_tensor_p,
  1920. ) -> ggml_tensor_p:
  1921. """Negate all elements in a tensor and store the result in the first tensor.
  1922. Parameters:
  1923. ctx: ggml context
  1924. a: tensor
  1925. Returns:
  1926. Pointer to ggml_tensor"""
  1927. return lib.ggml_neg_inplace(ctx, a)
  1928. lib.ggml_neg_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1929. lib.ggml_neg_inplace.restype = ctypes.POINTER(ggml_tensor)
  1930. # GGML_API struct ggml_tensor * ggml_step(
  1931. # struct ggml_context * ctx,
  1932. # struct ggml_tensor * a);
  1933. def ggml_step(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1934. return lib.ggml_step(ctx, a)
  1935. lib.ggml_step.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1936. lib.ggml_step.restype = ctypes.POINTER(ggml_tensor)
  1937. # GGML_API struct ggml_tensor * ggml_tanh(
  1938. # struct ggml_context * ctx,
  1939. # struct ggml_tensor * a);
  1940. def ggml_tanh(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1941. """Apply the tanh activation function to all elements in a tensor and return the result.
  1942. Parameters:
  1943. ctx: ggml context
  1944. a: tensor
  1945. Returns:
  1946. Pointer to ggml_tensor"""
  1947. return lib.ggml_tanh(ctx, a)
  1948. lib.ggml_tanh.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1949. lib.ggml_tanh.restype = ctypes.POINTER(ggml_tensor)
  1950. # GGML_API struct ggml_tensor * ggml_tanh_inplace(
  1951. # struct ggml_context * ctx,
  1952. # struct ggml_tensor * a);
  1953. def ggml_tanh_inplace(
  1954. ctx: ggml_context_p,
  1955. a: ggml_tensor_p,
  1956. ) -> ggml_tensor_p:
  1957. """Apply the tanh activation function to all elements in a tensor and store the result in the first tensor.
  1958. Parameters:
  1959. ctx: ggml context
  1960. a: tensor
  1961. Returns:
  1962. Pointer to ggml_tensor"""
  1963. return lib.ggml_tanh_inplace(ctx, a)
  1964. lib.ggml_tanh_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1965. lib.ggml_tanh_inplace.restype = ctypes.POINTER(ggml_tensor)
  1966. # GGML_API struct ggml_tensor * ggml_elu(
  1967. # struct ggml_context * ctx,
  1968. # struct ggml_tensor * a);
  1969. def ggml_elu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1970. """Apply the ELU activation function to all elements in a tensor and return the result.
  1971. Parameters:
  1972. ctx: ggml context
  1973. a: tensor
  1974. Returns:
  1975. Pointer to ggml_tensor"""
  1976. return lib.ggml_elu(ctx, a)
  1977. lib.ggml_elu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1978. lib.ggml_elu.restype = ctypes.POINTER(ggml_tensor)
  1979. # GGML_API struct ggml_tensor * ggml_elu_inplace(
  1980. # struct ggml_context * ctx,
  1981. # struct ggml_tensor * a);
  1982. def ggml_elu_inplace(
  1983. ctx: ggml_context_p,
  1984. a: ggml_tensor_p,
  1985. ) -> ggml_tensor_p:
  1986. """Apply the ELU activation function to all elements in a tensor and store the result in the first tensor.
  1987. Parameters:
  1988. ctx: ggml context
  1989. a: tensor
  1990. Returns:
  1991. Pointer to ggml_tensor"""
  1992. return lib.ggml_elu_inplace(ctx, a)
  1993. lib.ggml_elu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1994. lib.ggml_elu_inplace.restype = ctypes.POINTER(ggml_tensor)
  1995. # GGML_API struct ggml_tensor * ggml_relu(
  1996. # struct ggml_context * ctx,
  1997. # struct ggml_tensor * a);
  1998. def ggml_relu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1999. """Apply the ReLU activation function to all elements in a tensor and return the result.
  2000. Parameters:
  2001. ctx: ggml context
  2002. a: tensor
  2003. Returns:
  2004. Pointer to ggml_tensor"""
  2005. return lib.ggml_relu(ctx, a)
  2006. lib.ggml_relu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2007. lib.ggml_relu.restype = ctypes.POINTER(ggml_tensor)
  2008. # GGML_API struct ggml_tensor * ggml_relu_inplace(
  2009. # struct ggml_context * ctx,
  2010. # struct ggml_tensor * a);
  2011. def ggml_relu_inplace(
  2012. ctx: ggml_context_p,
  2013. a: ggml_tensor_p,
  2014. ) -> ggml_tensor_p:
  2015. """Apply the ReLU activation function to all elements in a tensor and store the result in the first tensor.
  2016. Parameters:
  2017. ctx: ggml context
  2018. a: tensor
  2019. Returns:
  2020. Pointer to ggml_tensor"""
  2021. return lib.ggml_relu_inplace(ctx, a)
  2022. lib.ggml_relu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2023. lib.ggml_relu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2024. # // TODO: double-check this computation is correct
  2025. # GGML_API struct ggml_tensor * ggml_gelu(
  2026. # struct ggml_context * ctx,
  2027. # struct ggml_tensor * a);
  2028. def ggml_gelu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2029. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2030. Parameters:
  2031. ctx: ggml context
  2032. a: tensor
  2033. Returns:
  2034. Pointer to ggml_tensor"""
  2035. return lib.ggml_gelu(ctx, a)
  2036. lib.ggml_gelu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2037. lib.ggml_gelu.restype = ctypes.POINTER(ggml_tensor)
  2038. # GGML_API struct ggml_tensor * ggml_gelu_inplace(
  2039. # struct ggml_context * ctx,
  2040. # struct ggml_tensor * a);
  2041. def ggml_gelu_inplace(
  2042. ctx: ggml_context_p,
  2043. a: ggml_tensor_p,
  2044. ) -> ggml_tensor_p:
  2045. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2046. Parameters:
  2047. ctx: ggml context
  2048. a: tensor
  2049. Returns:
  2050. Pointer to ggml_tensor"""
  2051. return lib.ggml_gelu_inplace(ctx, a)
  2052. lib.ggml_gelu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2053. lib.ggml_gelu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2054. # GGML_API struct ggml_tensor * ggml_gelu_quick(
  2055. # struct ggml_context * ctx,
  2056. # struct ggml_tensor * a);
  2057. def ggml_gelu_quick(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2058. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2059. Parameters:
  2060. ctx: ggml context
  2061. a: tensor
  2062. Returns:
  2063. Pointer to ggml_tensor"""
  2064. return lib.ggml_gelu_quick(ctx, a)
  2065. lib.ggml_gelu_quick.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2066. lib.ggml_gelu_quick.restype = ctypes.POINTER(ggml_tensor)
  2067. # GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
  2068. # struct ggml_context * ctx,
  2069. # struct ggml_tensor * a);
  2070. def ggml_gelu_quick_inplace(
  2071. ctx: ggml_context_p,
  2072. a: ggml_tensor_p,
  2073. ) -> ggml_tensor_p:
  2074. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2075. Parameters:
  2076. ctx: ggml context
  2077. a: tensor
  2078. Returns:
  2079. Pointer to ggml_tensor"""
  2080. return lib.ggml_gelu_quick_inplace(ctx, a)
  2081. lib.ggml_gelu_quick_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2082. lib.ggml_gelu_quick_inplace.restype = ctypes.POINTER(ggml_tensor)
  2083. # GGML_API struct ggml_tensor * ggml_silu(
  2084. # struct ggml_context * ctx,
  2085. # struct ggml_tensor * a);
  2086. def ggml_silu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2087. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and return the result.
  2088. Parameters:
  2089. ctx: ggml context
  2090. a: tensor
  2091. Returns:
  2092. Pointer to ggml_tensor"""
  2093. return lib.ggml_silu(ctx, a)
  2094. lib.ggml_silu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2095. lib.ggml_silu.restype = ctypes.POINTER(ggml_tensor)
  2096. # GGML_API struct ggml_tensor * ggml_silu_inplace(
  2097. # struct ggml_context * ctx,
  2098. # struct ggml_tensor * a);
  2099. def ggml_silu_inplace(
  2100. ctx: ggml_context_p,
  2101. a: ggml_tensor_p,
  2102. ) -> ggml_tensor_p:
  2103. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2104. Parameters:
  2105. ctx: ggml context
  2106. a: tensor
  2107. Returns:
  2108. Pointer to ggml_tensor"""
  2109. return lib.ggml_silu_inplace(ctx, a)
  2110. lib.ggml_silu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2111. lib.ggml_silu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2112. # // a - x
  2113. # // b - dy
  2114. # GGML_API struct ggml_tensor * ggml_silu_back(
  2115. # struct ggml_context * ctx,
  2116. # struct ggml_tensor * a,
  2117. # struct ggml_tensor * b);
  2118. def ggml_silu_back(
  2119. ctx: ggml_context_p,
  2120. a: ggml_tensor_p,
  2121. b: ggml_tensor_p,
  2122. ) -> ggml_tensor_p:
  2123. return lib.ggml_silu_back(ctx, a, b)
  2124. lib.ggml_silu_back.argtypes = [
  2125. ggml_context_p,
  2126. ctypes.POINTER(ggml_tensor),
  2127. ctypes.POINTER(ggml_tensor),
  2128. ]
  2129. lib.ggml_silu_back.restype = ctypes.POINTER(ggml_tensor)
  2130. # // normalize along rows
  2131. # GGML_API struct ggml_tensor * ggml_norm(
  2132. # struct ggml_context * ctx,
  2133. # struct ggml_tensor * a
  2134. # float eps);
  2135. def ggml_norm(
  2136. ctx: ggml_context_p,
  2137. a: ggml_tensor_p,
  2138. eps: Union[ctypes.c_float, float],
  2139. ) -> ggml_tensor_p:
  2140. """Normalize all elements in a tensor along the first axis and return the result.
  2141. normalize along rows.
  2142. Parameters:
  2143. ctx: ggml context
  2144. a: tensor
  2145. eps: minimum value to avoid division by zero
  2146. Returns:
  2147. Pointer to ggml_tensor"""
  2148. return lib.ggml_norm(ctx, a, eps)
  2149. lib.ggml_norm.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor), ctypes.c_float]
  2150. lib.ggml_norm.restype = ctypes.POINTER(ggml_tensor)
  2151. # GGML_API struct ggml_tensor * ggml_norm_inplace(
  2152. # struct ggml_context * ctx,
  2153. # struct ggml_tensor * a
  2154. # float eps);
  2155. def ggml_norm_inplace(
  2156. ctx: ggml_context_p,
  2157. a: ggml_tensor_p,
  2158. eps: Union[ctypes.c_float, float],
  2159. ) -> ggml_tensor_p:
  2160. """Normalize all elements in a tensor along the first axis and store the result in the first tensor.
  2161. normalize along rows.
  2162. Parameters:
  2163. ctx: ggml context
  2164. a: tensor
  2165. eps: minimum value to avoid division by zero
  2166. Returns:
  2167. Pointer to ggml_tensor"""
  2168. return lib.ggml_norm_inplace(ctx, a, eps)
  2169. lib.ggml_norm_inplace.argtypes = [
  2170. ggml_context_p,
  2171. ctypes.POINTER(ggml_tensor),
  2172. ctypes.c_float,
  2173. ]
  2174. lib.ggml_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2175. # GGML_API struct ggml_tensor * ggml_rms_norm(
  2176. # struct ggml_context * ctx,
  2177. # struct ggml_tensor * a,
  2178. # float eps);
  2179. def ggml_rms_norm(
  2180. ctx: ggml_context_p,
  2181. a: ggml_tensor_p,
  2182. eps: Union[ctypes.c_float, float],
  2183. ) -> ggml_tensor_p:
  2184. """Compute the RMS norm of a tensor and return the result.
  2185. Parameters:
  2186. ctx: ggml context
  2187. a: tensor
  2188. eps: float
  2189. Returns:
  2190. Pointer to ggml_tensor"""
  2191. return lib.ggml_rms_norm(ctx, a, eps)
  2192. lib.ggml_rms_norm.argtypes = [
  2193. ggml_context_p,
  2194. ctypes.POINTER(ggml_tensor),
  2195. ctypes.c_float,
  2196. ]
  2197. lib.ggml_rms_norm.restype = ctypes.POINTER(ggml_tensor)
  2198. # GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
  2199. # struct ggml_context * ctx,
  2200. # struct ggml_tensor * a,
  2201. # float eps);
  2202. def ggml_rms_norm_inplace(
  2203. ctx: ggml_context_p,
  2204. a: ggml_tensor_p,
  2205. eps: Union[ctypes.c_float, float],
  2206. ) -> ggml_tensor_p:
  2207. return lib.ggml_rms_norm_inplace(ctx, a, eps)
  2208. lib.ggml_rms_norm_inplace.argtypes = [
  2209. ggml_context_p,
  2210. ctypes.POINTER(ggml_tensor),
  2211. ctypes.c_float,
  2212. ]
  2213. lib.ggml_rms_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2214. # // group normalize along ne0*ne1*n_groups
  2215. # // used in stable-diffusion
  2216. # // TODO: eps is hardcoded to 1e-6 for now
  2217. # GGML_API struct ggml_tensor * ggml_group_norm(
  2218. # struct ggml_context * ctx,
  2219. # struct ggml_tensor * a,
  2220. # int n_groups);
  2221. def ggml_group_norm(
  2222. ctx: ggml_context_p,
  2223. a: ggml_tensor_p,
  2224. n_groups: int,
  2225. ) -> ggml_tensor_p:
  2226. """Group normalize a tensor and return the result.
  2227. Parameters:
  2228. ctx: ggml context
  2229. a: tensor
  2230. n_groups: int
  2231. Returns:
  2232. Pointer to ggml_tensor"""
  2233. return lib.ggml_group_norm(ctx, a, n_groups)
  2234. lib.ggml_group_norm.argtypes = [
  2235. ggml_context_p,
  2236. ctypes.POINTER(ggml_tensor),
  2237. ctypes.c_int,
  2238. ]
  2239. lib.ggml_group_norm.restype = ctypes.POINTER(ggml_tensor)
  2240. # GGML_API struct ggml_tensor * ggml_group_norm_inplace(
  2241. # struct ggml_context * ctx,
  2242. # struct ggml_tensor * a,
  2243. # int n_groups);
  2244. def ggml_group_norm_inplace(
  2245. ctx: ggml_context_p,
  2246. a: ggml_tensor_p,
  2247. n_groups: int,
  2248. ) -> ggml_tensor_p:
  2249. """Group normalize a tensor and store the result in the first tensor.
  2250. Parameters:
  2251. ctx: ggml context
  2252. a: tensor
  2253. n_groups: int
  2254. Returns:
  2255. Pointer to ggml_tensor"""
  2256. return lib.ggml_group_norm_inplace(ctx, a, n_groups)
  2257. lib.ggml_group_norm_inplace.argtypes = [
  2258. ggml_context_p,
  2259. ctypes.POINTER(ggml_tensor),
  2260. ctypes.c_int,
  2261. ]
  2262. lib.ggml_group_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2263. # // a - x
  2264. # // b - dy
  2265. # GGML_API struct ggml_tensor * ggml_rms_norm_back(
  2266. # struct ggml_context * ctx,
  2267. # struct ggml_tensor * a,
  2268. # struct ggml_tensor * b
  2269. # float eps);
  2270. def ggml_rms_norm_back(
  2271. ctx: ggml_context_p,
  2272. a: ggml_tensor_p,
  2273. b: ggml_tensor_p,
  2274. eps: Union[ctypes.c_float, float],
  2275. ) -> ggml_tensor_p:
  2276. return lib.ggml_rms_norm_back(ctx, a, b, eps)
  2277. lib.ggml_rms_norm_back.argtypes = [
  2278. ggml_context_p,
  2279. ctypes.POINTER(ggml_tensor),
  2280. ctypes.POINTER(ggml_tensor),
  2281. ctypes.c_float,
  2282. ]
  2283. lib.ggml_rms_norm_back.restype = ctypes.POINTER(ggml_tensor)
  2284. # // A: m rows, n columns
  2285. # // B: p rows, n columns (i.e. we transpose it internally)
  2286. # // result is m columns, p rows
  2287. # GGML_API struct ggml_tensor * ggml_mul_mat(
  2288. # struct ggml_context * ctx,
  2289. # struct ggml_tensor * a,
  2290. # struct ggml_tensor * b);
  2291. def ggml_mul_mat(
  2292. ctx: ggml_context_p,
  2293. a: ggml_tensor_p,
  2294. b: ggml_tensor_p,
  2295. ) -> ggml_tensor_p:
  2296. """Multiply two matrices and return the result.
  2297. A: m rows, n columns
  2298. B: p rows, n columns (i.e. we transpose it internally)
  2299. result is m columns, p rows
  2300. Parameters:
  2301. ctx: ggml context
  2302. a: tensor
  2303. b: tensor
  2304. Returns:
  2305. Pointer to ggml_tensor"""
  2306. return lib.ggml_mul_mat(ctx, a, b)
  2307. lib.ggml_mul_mat.argtypes = [
  2308. ggml_context_p,
  2309. ctypes.POINTER(ggml_tensor),
  2310. ctypes.POINTER(ggml_tensor),
  2311. ]
  2312. lib.ggml_mul_mat.restype = ctypes.POINTER(ggml_tensor)
  2313. # // A: m columns, n rows,
  2314. # // B: p columns, n rows,
  2315. # // result is m columns, p rows
  2316. # GGML_API struct ggml_tensor * ggml_out_prod(
  2317. # struct ggml_context * ctx,
  2318. # struct ggml_tensor * a,
  2319. # struct ggml_tensor * b);
  2320. def ggml_out_prod(
  2321. ctx: ggml_context_p,
  2322. a: ggml_tensor_p,
  2323. b: ggml_tensor_p,
  2324. ) -> ggml_tensor_p:
  2325. """Compute the outer product of two matrices and return the result.
  2326. A: m columns, n rows,
  2327. B: p columns, n rows,
  2328. result is m columns, p rows
  2329. Parameters:
  2330. ctx: ggml context
  2331. a: tensor
  2332. b: tensor
  2333. Returns:
  2334. Pointer to ggml_tensor"""
  2335. return lib.ggml_out_prod(ctx, a, b)
  2336. lib.ggml_out_prod.argtypes = [
  2337. ggml_context_p,
  2338. ctypes.POINTER(ggml_tensor),
  2339. ctypes.POINTER(ggml_tensor),
  2340. ]
  2341. lib.ggml_out_prod.restype = ctypes.POINTER(ggml_tensor)
  2342. # //
  2343. # // operations on tensors without backpropagation
  2344. # //
  2345. # GGML_API struct ggml_tensor * ggml_scale(
  2346. # struct ggml_context * ctx,
  2347. # struct ggml_tensor * a,
  2348. # struct ggml_tensor * b);
  2349. def ggml_scale(
  2350. ctx: ggml_context_p,
  2351. a: ggml_tensor_p,
  2352. b: ggml_tensor_p,
  2353. ) -> ggml_tensor_p:
  2354. """Scale a tensor by another tensor and return the result.
  2355. Parameters:
  2356. ctx: ggml context
  2357. a: tensor
  2358. b: tensor
  2359. Returns:
  2360. Pointer to ggml_tensor"""
  2361. return lib.ggml_scale(ctx, a, b)
  2362. lib.ggml_scale.argtypes = [
  2363. ggml_context_p,
  2364. ctypes.POINTER(ggml_tensor),
  2365. ctypes.POINTER(ggml_tensor),
  2366. ]
  2367. lib.ggml_scale.restype = ctypes.POINTER(ggml_tensor)
  2368. # // in-place, returns view(a)
  2369. # GGML_API struct ggml_tensor * ggml_scale_inplace(
  2370. # struct ggml_context * ctx,
  2371. # struct ggml_tensor * a,
  2372. # struct ggml_tensor * b);
  2373. def ggml_scale_inplace(
  2374. ctx: ggml_context_p,
  2375. a: ggml_tensor_p,
  2376. b: ggml_tensor_p,
  2377. ) -> ggml_tensor_p:
  2378. """Scale a tensor by another tensor and store the result in the first tensor.
  2379. Parameters:
  2380. ctx: ggml context
  2381. a: tensor
  2382. Returns:
  2383. Pointer to ggml_tensor"""
  2384. return lib.ggml_scale_inplace(ctx, a, b)
  2385. lib.ggml_scale_inplace.argtypes = [
  2386. ggml_context_p,
  2387. ctypes.POINTER(ggml_tensor),
  2388. ctypes.POINTER(ggml_tensor),
  2389. ]
  2390. lib.ggml_scale_inplace.restype = ctypes.POINTER(ggml_tensor)
  2391. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2392. # GGML_API struct ggml_tensor * ggml_set(
  2393. # struct ggml_context * ctx,
  2394. # struct ggml_tensor * a,
  2395. # struct ggml_tensor * b,
  2396. # size_t nb1,
  2397. # size_t nb2,
  2398. # size_t nb3,
  2399. # size_t offset);
  2400. def ggml_set(
  2401. ctx: ggml_context_p,
  2402. a: ggml_tensor_p,
  2403. b: ggml_tensor_p,
  2404. nb1: Union[ctypes.c_size_t, int],
  2405. nb2: Union[ctypes.c_size_t, int],
  2406. nb3: Union[ctypes.c_size_t, int],
  2407. offset: Union[ctypes.c_size_t, int],
  2408. ) -> ggml_tensor_p:
  2409. return lib.ggml_set(ctx, a, b, nb1, nb2, nb3, offset)
  2410. lib.ggml_set.argtypes = [
  2411. ggml_context_p,
  2412. ctypes.POINTER(ggml_tensor),
  2413. ctypes.POINTER(ggml_tensor),
  2414. ctypes.c_size_t,
  2415. ctypes.c_size_t,
  2416. ctypes.c_size_t,
  2417. ctypes.c_size_t,
  2418. ]
  2419. lib.ggml_set.restype = ctypes.POINTER(ggml_tensor)
  2420. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2421. # GGML_API struct ggml_tensor * ggml_set_inplace(
  2422. # struct ggml_context * ctx,
  2423. # struct ggml_tensor * a,
  2424. # struct ggml_tensor * b,
  2425. # size_t nb1,
  2426. # size_t nb2,
  2427. # size_t nb3,
  2428. # size_t offset);
  2429. def ggml_set_inplace(
  2430. ctx: ggml_context_p,
  2431. a: ggml_tensor_p,
  2432. b: ggml_tensor_p,
  2433. nb1: Union[ctypes.c_size_t, int],
  2434. nb2: Union[ctypes.c_size_t, int],
  2435. nb3: Union[ctypes.c_size_t, int],
  2436. offset: Union[ctypes.c_size_t, int],
  2437. ) -> ggml_tensor_p:
  2438. return lib.ggml_set_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  2439. lib.ggml_set_inplace.argtypes = [
  2440. ggml_context_p,
  2441. ctypes.POINTER(ggml_tensor),
  2442. ctypes.POINTER(ggml_tensor),
  2443. ctypes.c_size_t,
  2444. ctypes.c_size_t,
  2445. ctypes.c_size_t,
  2446. ctypes.c_size_t,
  2447. ]
  2448. lib.ggml_set_inplace.restype = ctypes.POINTER(ggml_tensor)
  2449. # GGML_API struct ggml_tensor * ggml_set_1d(
  2450. # struct ggml_context * ctx,
  2451. # struct ggml_tensor * a,
  2452. # struct ggml_tensor * b,
  2453. # size_t offset);
  2454. def ggml_set_1d(
  2455. ctx: ggml_context_p,
  2456. a: ggml_tensor_p,
  2457. b: ggml_tensor_p,
  2458. offset: Union[ctypes.c_size_t, int],
  2459. ) -> ggml_tensor_p:
  2460. return lib.ggml_set_1d(ctx, a, b, offset)
  2461. lib.ggml_set_1d.argtypes = [
  2462. ggml_context_p,
  2463. ctypes.POINTER(ggml_tensor),
  2464. ctypes.POINTER(ggml_tensor),
  2465. ctypes.c_size_t,
  2466. ]
  2467. lib.ggml_set_1d.restype = ctypes.POINTER(ggml_tensor)
  2468. # GGML_API struct ggml_tensor * ggml_set_1d_inplace(
  2469. # struct ggml_context * ctx,
  2470. # struct ggml_tensor * a,
  2471. # struct ggml_tensor * b,
  2472. # size_t offset);
  2473. def ggml_set_1d_inplace(
  2474. ctx: ggml_context_p,
  2475. a: ggml_tensor_p,
  2476. b: ggml_tensor_p,
  2477. offset: Union[ctypes.c_size_t, int],
  2478. ) -> ggml_tensor_p:
  2479. return lib.ggml_set_1d_inplace(ctx, a, b, offset)
  2480. lib.ggml_set_1d_inplace.argtypes = [
  2481. ggml_context_p,
  2482. ctypes.POINTER(ggml_tensor),
  2483. ctypes.POINTER(ggml_tensor),
  2484. ctypes.c_size_t,
  2485. ]
  2486. lib.ggml_set_1d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2487. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2488. # GGML_API struct ggml_tensor * ggml_set_2d(
  2489. # struct ggml_context * ctx,
  2490. # struct ggml_tensor * a,
  2491. # struct ggml_tensor * b,
  2492. # size_t nb1,
  2493. # size_t offset);
  2494. def ggml_set_2d(
  2495. ctx: ggml_context_p,
  2496. a: ggml_tensor_p,
  2497. b: ggml_tensor_p,
  2498. nb1: Union[ctypes.c_size_t, int],
  2499. offset: Union[ctypes.c_size_t, int],
  2500. ) -> ggml_tensor_p:
  2501. return lib.ggml_set_2d(ctx, a, b, nb1, offset)
  2502. lib.ggml_set_2d.argtypes = [
  2503. ggml_context_p,
  2504. ctypes.POINTER(ggml_tensor),
  2505. ctypes.POINTER(ggml_tensor),
  2506. ctypes.c_size_t,
  2507. ctypes.c_size_t,
  2508. ]
  2509. lib.ggml_set_2d.restype = ctypes.POINTER(ggml_tensor)
  2510. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2511. # GGML_API struct ggml_tensor * ggml_set_2d_inplace(
  2512. # struct ggml_context * ctx,
  2513. # struct ggml_tensor * a,
  2514. # struct ggml_tensor * b,
  2515. # size_t nb1,
  2516. # size_t offset);
  2517. def ggml_set_2d_inplace(
  2518. ctx: ggml_context_p,
  2519. a: ggml_tensor_p,
  2520. b: ggml_tensor_p,
  2521. nb1: Union[ctypes.c_size_t, int],
  2522. offset: Union[ctypes.c_size_t, int],
  2523. ) -> ggml_tensor_p:
  2524. return lib.ggml_set_2d_inplace(ctx, a, b, nb1, offset)
  2525. lib.ggml_set_2d_inplace.argtypes = [
  2526. ggml_context_p,
  2527. ctypes.POINTER(ggml_tensor),
  2528. ctypes.POINTER(ggml_tensor),
  2529. ctypes.c_size_t,
  2530. ctypes.c_size_t,
  2531. ]
  2532. lib.ggml_set_2d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2533. # // a -> b, return view(b)
  2534. # GGML_API struct ggml_tensor * ggml_cpy(
  2535. # struct ggml_context * ctx,
  2536. # struct ggml_tensor * a,
  2537. # struct ggml_tensor * b);
  2538. def ggml_cpy(
  2539. ctx: ggml_context_p,
  2540. a: ggml_tensor_p,
  2541. b: ggml_tensor_p,
  2542. ) -> ggml_tensor_p:
  2543. return lib.ggml_cpy(ctx, a, b)
  2544. lib.ggml_cpy.argtypes = [
  2545. ggml_context_p,
  2546. ctypes.POINTER(ggml_tensor),
  2547. ctypes.POINTER(ggml_tensor),
  2548. ]
  2549. lib.ggml_cpy.restype = ctypes.POINTER(ggml_tensor)
  2550. # // a -> b, in-place, return view(b)
  2551. # GGML_API struct ggml_tensor * ggml_cpy_inplace(
  2552. # struct ggml_context * ctx,
  2553. # struct ggml_tensor * a,
  2554. # struct ggml_tensor * b);
  2555. def ggml_cpy_inplace(
  2556. ctx: ggml_context_p,
  2557. a: ggml_tensor_p,
  2558. b: ggml_tensor_p,
  2559. ) -> ggml_tensor_p:
  2560. return lib.ggml_cpy_inplace(ctx, a, b)
  2561. lib.ggml_cpy_inplace.argtypes = [
  2562. ggml_context_p,
  2563. ctypes.POINTER(ggml_tensor),
  2564. ctypes.POINTER(ggml_tensor),
  2565. ]
  2566. lib.ggml_cpy_inplace.restype = ctypes.POINTER(ggml_tensor)
  2567. # // make contiguous
  2568. # GGML_API struct ggml_tensor * ggml_cont(
  2569. # struct ggml_context * ctx,
  2570. # struct ggml_tensor * a);
  2571. def ggml_cont(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2572. """Make a tensor contiguous and return the result.
  2573. Parameters:
  2574. ctx: ggml context
  2575. a: tensor
  2576. Returns:
  2577. Pointer to ggml_tensor"""
  2578. return lib.ggml_cont(ctx, a)
  2579. lib.ggml_cont.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2580. lib.ggml_cont.restype = ctypes.POINTER(ggml_tensor)
  2581. # // make contiguous, in-place
  2582. # GGML_API struct ggml_tensor * ggml_cont_inplace(
  2583. # struct ggml_context * ctx,
  2584. # struct ggml_tensor * a);
  2585. def ggml_cont_inplace(
  2586. ctx: ggml_context_p,
  2587. a: ggml_tensor_p,
  2588. ) -> ggml_tensor_p:
  2589. """Make a tensor contiguous and store the result in the first tensor.
  2590. Parameters:
  2591. ctx: ggml context
  2592. a: tensor
  2593. Returns:
  2594. Pointer to ggml_tensor"""
  2595. return lib.ggml_cont_inplace(ctx, a)
  2596. lib.ggml_cont_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2597. lib.ggml_cont_inplace.restype = ctypes.POINTER(ggml_tensor)
  2598. # // return view(a), b specifies the new shape
  2599. # // TODO: when we start computing gradient, make a copy instead of view
  2600. # GGML_API struct ggml_tensor * ggml_reshape(
  2601. # struct ggml_context * ctx,
  2602. # struct ggml_tensor * a,
  2603. # struct ggml_tensor * b);
  2604. def ggml_reshape(
  2605. ctx: ggml_context_p,
  2606. a: ggml_tensor_p,
  2607. b: ggml_tensor_p,
  2608. ) -> ggml_tensor_p:
  2609. return lib.ggml_reshape(ctx, a, b)
  2610. lib.ggml_reshape.argtypes = [
  2611. ggml_context_p,
  2612. ctypes.POINTER(ggml_tensor),
  2613. ctypes.POINTER(ggml_tensor),
  2614. ]
  2615. lib.ggml_reshape.restype = ctypes.POINTER(ggml_tensor)
  2616. # // return view(a)
  2617. # // TODO: when we start computing gradient, make a copy instead of view
  2618. # GGML_API struct ggml_tensor * ggml_reshape_1d(
  2619. # struct ggml_context * ctx,
  2620. # struct ggml_tensor * a,
  2621. # int64_t ne0);
  2622. def ggml_reshape_1d(
  2623. ctx: ggml_context_p,
  2624. a: ggml_tensor_p,
  2625. ne0: Union[ctypes.c_int64, int],
  2626. ) -> ggml_tensor_p:
  2627. return lib.ggml_reshape_1d(ctx, a, ne0)
  2628. lib.ggml_reshape_1d.argtypes = [
  2629. ggml_context_p,
  2630. ctypes.POINTER(ggml_tensor),
  2631. ctypes.c_int64,
  2632. ]
  2633. lib.ggml_reshape_1d.restype = ctypes.POINTER(ggml_tensor)
  2634. # GGML_API struct ggml_tensor * ggml_reshape_2d(
  2635. # struct ggml_context * ctx,
  2636. # struct ggml_tensor * a,
  2637. # int64_t ne0,
  2638. # int64_t ne1);
  2639. def ggml_reshape_2d(
  2640. ctx: ggml_context_p,
  2641. a: ggml_tensor_p,
  2642. ne0: Union[ctypes.c_int64, int],
  2643. ne1: Union[ctypes.c_int64, int],
  2644. ) -> ggml_tensor_p:
  2645. return lib.ggml_reshape_2d(ctx, a, ne0, ne1)
  2646. lib.ggml_reshape_2d.argtypes = [
  2647. ggml_context_p,
  2648. ctypes.POINTER(ggml_tensor),
  2649. ctypes.c_int64,
  2650. ctypes.c_int64,
  2651. ]
  2652. lib.ggml_reshape_2d.restype = ctypes.POINTER(ggml_tensor)
  2653. # // return view(a)
  2654. # // TODO: when we start computing gradient, make a copy instead of view
  2655. # GGML_API struct ggml_tensor * ggml_reshape_3d(
  2656. # struct ggml_context * ctx,
  2657. # struct ggml_tensor * a,
  2658. # int64_t ne0,
  2659. # int64_t ne1,
  2660. # int64_t ne2);
  2661. def ggml_reshape_3d(
  2662. ctx: ggml_context_p,
  2663. a: ggml_tensor_p,
  2664. ne0: Union[ctypes.c_int64, int],
  2665. ne1: Union[ctypes.c_int64, int],
  2666. ne2: Union[ctypes.c_int64, int],
  2667. ) -> ggml_tensor_p:
  2668. return lib.ggml_reshape_3d(ctx, a, ne0, ne1, ne2)
  2669. lib.ggml_reshape_3d.argtypes = [
  2670. ggml_context_p,
  2671. ctypes.POINTER(ggml_tensor),
  2672. ctypes.c_int64,
  2673. ctypes.c_int64,
  2674. ctypes.c_int64,
  2675. ]
  2676. lib.ggml_reshape_3d.restype = ctypes.POINTER(ggml_tensor)
  2677. # GGML_API struct ggml_tensor * ggml_reshape_4d(
  2678. # struct ggml_context * ctx,
  2679. # struct ggml_tensor * a,
  2680. # int64_t ne0,
  2681. # int64_t ne1,
  2682. # int64_t ne2,
  2683. # int64_t ne3);
  2684. def ggml_reshape_4d(
  2685. ctx: ggml_context_p,
  2686. a: ggml_tensor_p,
  2687. ne0: Union[ctypes.c_int64, int],
  2688. ne1: Union[ctypes.c_int64, int],
  2689. ne2: Union[ctypes.c_int64, int],
  2690. ne3: Union[ctypes.c_int64, int],
  2691. ) -> ggml_tensor_p:
  2692. return lib.ggml_reshape_4d(ctx, a, ne0, ne1, ne2, ne3)
  2693. lib.ggml_reshape_4d.argtypes = [
  2694. ggml_context_p,
  2695. ctypes.POINTER(ggml_tensor),
  2696. ctypes.c_int64,
  2697. ctypes.c_int64,
  2698. ctypes.c_int64,
  2699. ctypes.c_int64,
  2700. ]
  2701. lib.ggml_reshape_4d.restype = ctypes.POINTER(ggml_tensor)
  2702. # // offset in bytes
  2703. # GGML_API struct ggml_tensor * ggml_view_1d(
  2704. # struct ggml_context * ctx,
  2705. # struct ggml_tensor * a,
  2706. # int64_t ne0,
  2707. # size_t offset);
  2708. def ggml_view_1d(
  2709. ctx: ggml_context_p,
  2710. a: ggml_tensor_p,
  2711. ne0: Union[ctypes.c_int64, int],
  2712. offset: Union[ctypes.c_size_t, int],
  2713. ) -> ggml_tensor_p:
  2714. return lib.ggml_view_1d(ctx, a, ne0, offset)
  2715. lib.ggml_view_1d.argtypes = [
  2716. ggml_context_p,
  2717. ctypes.POINTER(ggml_tensor),
  2718. ctypes.c_int64,
  2719. ctypes.c_size_t,
  2720. ]
  2721. lib.ggml_view_1d.restype = ctypes.POINTER(ggml_tensor)
  2722. # GGML_API struct ggml_tensor * ggml_view_2d(
  2723. # struct ggml_context * ctx,
  2724. # struct ggml_tensor * a,
  2725. # int64_t ne0,
  2726. # int64_t ne1,
  2727. # size_t nb1, // row stride in bytes
  2728. # size_t offset);
  2729. def ggml_view_2d(
  2730. ctx: ggml_context_p,
  2731. a: ggml_tensor_p,
  2732. ne0: Union[ctypes.c_int64, int],
  2733. ne1: Union[ctypes.c_int64, int],
  2734. nb1: Union[ctypes.c_size_t, int],
  2735. offset: Union[ctypes.c_size_t, int],
  2736. ) -> ggml_tensor_p:
  2737. return lib.ggml_view_2d(ctx, a, ne0, ne1, nb1, offset)
  2738. lib.ggml_view_2d.argtypes = [
  2739. ggml_context_p,
  2740. ctypes.POINTER(ggml_tensor),
  2741. ctypes.c_int64,
  2742. ctypes.c_int64,
  2743. ctypes.c_size_t,
  2744. ctypes.c_size_t,
  2745. ]
  2746. lib.ggml_view_2d.restype = ctypes.POINTER(ggml_tensor)
  2747. # GGML_API struct ggml_tensor * ggml_view_3d(
  2748. # struct ggml_context * ctx,
  2749. # struct ggml_tensor * a,
  2750. # int64_t ne0,
  2751. # int64_t ne1,
  2752. # int64_t ne2,
  2753. # size_t nb1, // row stride in bytes
  2754. # size_t nb2, // slice stride in bytes
  2755. # size_t offset);
  2756. def ggml_view_3d(
  2757. ctx: ggml_context_p,
  2758. a: ggml_tensor_p,
  2759. ne0: Union[ctypes.c_int64, int],
  2760. ne1: Union[ctypes.c_int64, int],
  2761. ne2: Union[ctypes.c_int64, int],
  2762. nb1: Union[ctypes.c_size_t, int],
  2763. nb2: Union[ctypes.c_size_t, int],
  2764. offset: Union[ctypes.c_size_t, int],
  2765. ) -> ggml_tensor_p:
  2766. return lib.ggml_view_3d(ctx, a, ne0, ne1, ne2, nb1, nb2, offset)
  2767. lib.ggml_view_3d.argtypes = [
  2768. ggml_context_p,
  2769. ctypes.POINTER(ggml_tensor),
  2770. ctypes.c_int64,
  2771. ctypes.c_int64,
  2772. ctypes.c_int64,
  2773. ctypes.c_size_t,
  2774. ctypes.c_size_t,
  2775. ctypes.c_size_t,
  2776. ]
  2777. lib.ggml_view_3d.restype = ctypes.POINTER(ggml_tensor)
  2778. # GGML_API struct ggml_tensor * ggml_view_4d(
  2779. # struct ggml_context * ctx,
  2780. # struct ggml_tensor * a,
  2781. # int64_t ne0,
  2782. # int64_t ne1,
  2783. # int64_t ne2,
  2784. # int64_t ne3,
  2785. # size_t nb1, // row stride in bytes
  2786. # size_t nb2, // slice stride in bytes
  2787. # size_t nb3,
  2788. # size_t offset);
  2789. def ggml_view_4d(
  2790. ctx: ggml_context_p,
  2791. a: ggml_tensor_p,
  2792. ne0: Union[ctypes.c_int64, int],
  2793. ne1: Union[ctypes.c_int64, int],
  2794. ne2: Union[ctypes.c_int64, int],
  2795. ne3: Union[ctypes.c_int64, int],
  2796. nb1: Union[ctypes.c_size_t, int],
  2797. nb2: Union[ctypes.c_size_t, int],
  2798. nb3: Union[ctypes.c_size_t, int],
  2799. offset: Union[ctypes.c_size_t, int],
  2800. ) -> ggml_tensor_p:
  2801. return lib.ggml_view_4d(ctx, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset)
  2802. lib.ggml_view_4d.argtypes = [
  2803. ggml_context_p,
  2804. ctypes.POINTER(ggml_tensor),
  2805. ctypes.c_int64,
  2806. ctypes.c_int64,
  2807. ctypes.c_int64,
  2808. ctypes.c_int64,
  2809. ctypes.c_size_t,
  2810. ctypes.c_size_t,
  2811. ctypes.c_size_t,
  2812. ctypes.c_size_t,
  2813. ]
  2814. lib.ggml_view_4d.restype = ctypes.POINTER(ggml_tensor)
  2815. # GGML_API struct ggml_tensor * ggml_permute(
  2816. # struct ggml_context * ctx,
  2817. # struct ggml_tensor * a,
  2818. # int axis0,
  2819. # int axis1,
  2820. # int axis2,
  2821. # int axis3);
  2822. def ggml_permute(
  2823. ctx: ggml_context_p,
  2824. a: ggml_tensor_p,
  2825. axis0: Union[ctypes.c_int, int],
  2826. axis1: Union[ctypes.c_int, int],
  2827. axis2: Union[ctypes.c_int, int],
  2828. axis3: Union[ctypes.c_int, int],
  2829. ) -> ggml_tensor_p:
  2830. return lib.ggml_permute(ctx, a, axis0, axis1, axis2, axis3)
  2831. lib.ggml_permute.argtypes = [
  2832. ggml_context_p,
  2833. ctypes.POINTER(ggml_tensor),
  2834. ctypes.c_int,
  2835. ctypes.c_int,
  2836. ctypes.c_int,
  2837. ctypes.c_int,
  2838. ]
  2839. lib.ggml_permute.restype = ctypes.POINTER(ggml_tensor)
  2840. # // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
  2841. # GGML_API struct ggml_tensor * ggml_transpose(
  2842. # struct ggml_context * ctx,
  2843. # struct ggml_tensor * a);
  2844. def ggml_transpose(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2845. """Transpose *the first two dimensions* of a tensor and return the result.
  2846. alias for `ggml_permute(ctx, a, 1, 0, 2, 3)`
  2847. Parameters:
  2848. ctx: ggml context
  2849. a: tensor
  2850. Returns:
  2851. Pointer to ggml_tensor"""
  2852. return lib.ggml_transpose(ctx, a)
  2853. lib.ggml_transpose.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2854. lib.ggml_transpose.restype = ctypes.POINTER(ggml_tensor)
  2855. # GGML_API struct ggml_tensor * ggml_get_rows(
  2856. # struct ggml_context * ctx,
  2857. # struct ggml_tensor * a,
  2858. # struct ggml_tensor * b);
  2859. def ggml_get_rows(
  2860. ctx: ggml_context_p,
  2861. a: ggml_tensor_p,
  2862. b: ggml_tensor_p,
  2863. ) -> ggml_tensor_p:
  2864. return lib.ggml_get_rows(ctx, a, b)
  2865. lib.ggml_get_rows.argtypes = [
  2866. ggml_context_p,
  2867. ctypes.POINTER(ggml_tensor),
  2868. ctypes.POINTER(ggml_tensor),
  2869. ]
  2870. lib.ggml_get_rows.restype = ctypes.POINTER(ggml_tensor)
  2871. # GGML_API struct ggml_tensor * ggml_get_rows_back(
  2872. # struct ggml_context * ctx,
  2873. # struct ggml_tensor * a,
  2874. # struct ggml_tensor * b,
  2875. # struct ggml_tensor * c);
  2876. def ggml_get_rows_back(
  2877. ctx: ggml_context_p,
  2878. a: ggml_tensor_p,
  2879. b: ggml_tensor_p,
  2880. c: ggml_tensor_p,
  2881. ) -> ggml_tensor_p:
  2882. return lib.ggml_get_rows_back(ctx, a, b, c)
  2883. lib.ggml_get_rows_back.argtypes = [
  2884. ggml_context_p,
  2885. ctypes.POINTER(ggml_tensor),
  2886. ctypes.POINTER(ggml_tensor),
  2887. ctypes.POINTER(ggml_tensor),
  2888. ]
  2889. lib.ggml_get_rows_back.restype = ctypes.POINTER(ggml_tensor)
  2890. # GGML_API struct ggml_tensor * ggml_diag(
  2891. # struct ggml_context * ctx,
  2892. # struct ggml_tensor * a);
  2893. def ggml_diag(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2894. return lib.ggml_diag(ctx, a)
  2895. lib.ggml_diag.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2896. lib.ggml_diag.restype = ctypes.POINTER(ggml_tensor)
  2897. # // set elements above the diagonal to -INF
  2898. # GGML_API struct ggml_tensor * ggml_diag_mask_inf(
  2899. # struct ggml_context * ctx,
  2900. # struct ggml_tensor * a,
  2901. # int n_past);
  2902. def ggml_diag_mask_inf(
  2903. ctx: ggml_context_p,
  2904. a: ggml_tensor_p,
  2905. n_past: Union[ctypes.c_int, int],
  2906. ) -> ggml_tensor_p:
  2907. return lib.ggml_diag_mask_inf(ctx, a, n_past)
  2908. lib.ggml_diag_mask_inf.argtypes = [
  2909. ggml_context_p,
  2910. ctypes.POINTER(ggml_tensor),
  2911. ctypes.c_int,
  2912. ]
  2913. lib.ggml_diag_mask_inf.restype = ctypes.POINTER(ggml_tensor)
  2914. # // in-place, returns view(a)
  2915. # GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2916. # struct ggml_context * ctx,
  2917. # struct ggml_tensor * a,
  2918. # int n_past);
  2919. def ggml_diag_mask_inf_inplace(
  2920. ctx: ggml_context_p,
  2921. a: ggml_tensor_p,
  2922. n_past: Union[ctypes.c_int, int],
  2923. ) -> ggml_tensor_p:
  2924. return lib.ggml_diag_mask_inf_inplace(ctx, a, n_past)
  2925. lib.ggml_diag_mask_inf_inplace.argtypes = [
  2926. ggml_context_p,
  2927. ctypes.POINTER(ggml_tensor),
  2928. ctypes.c_int,
  2929. ]
  2930. lib.ggml_diag_mask_inf_inplace.restype = ctypes.POINTER(ggml_tensor)
  2931. # // set elements above the diagonal to 0
  2932. # GGML_API struct ggml_tensor * ggml_diag_mask_zero(
  2933. # struct ggml_context * ctx,
  2934. # struct ggml_tensor * a,
  2935. # int n_past);
  2936. def ggml_diag_mask_zero(
  2937. ctx: ggml_context_p,
  2938. a: ggml_tensor_p,
  2939. n_past: Union[ctypes.c_int, int],
  2940. ) -> ggml_tensor_p:
  2941. return lib.ggml_diag_mask_zero(ctx, a, n_past)
  2942. lib.ggml_diag_mask_zero.argtypes = [
  2943. ggml_context_p,
  2944. ctypes.POINTER(ggml_tensor),
  2945. ctypes.c_int,
  2946. ]
  2947. lib.ggml_diag_mask_zero.restype = ctypes.POINTER(ggml_tensor)
  2948. # // in-place, returns view(a)
  2949. # GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2950. # struct ggml_context * ctx,
  2951. # struct ggml_tensor * a,
  2952. # int n_past);
  2953. def ggml_diag_mask_zero_inplace(
  2954. ctx: ggml_context_p,
  2955. a: ggml_tensor_p,
  2956. n_past: Union[ctypes.c_int, int],
  2957. ) -> ggml_tensor_p:
  2958. return lib.ggml_diag_mask_zero_inplace(ctx, a, n_past)
  2959. lib.ggml_diag_mask_zero_inplace.argtypes = [
  2960. ggml_context_p,
  2961. ctypes.POINTER(ggml_tensor),
  2962. ctypes.c_int,
  2963. ]
  2964. lib.ggml_diag_mask_zero_inplace.restype = ctypes.POINTER(ggml_tensor)
  2965. # GGML_API struct ggml_tensor * ggml_soft_max(
  2966. # struct ggml_context * ctx,
  2967. # struct ggml_tensor * a);
  2968. def ggml_soft_max(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2969. return lib.ggml_soft_max(ctx, a)
  2970. lib.ggml_soft_max.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2971. lib.ggml_soft_max.restype = ctypes.POINTER(ggml_tensor)
  2972. # // in-place, returns view(a)
  2973. # GGML_API struct ggml_tensor * ggml_soft_max_inplace(
  2974. # struct ggml_context * ctx,
  2975. # struct ggml_tensor * a);
  2976. def ggml_soft_max_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2977. return lib.ggml_soft_max_inplace(ctx, a)
  2978. lib.ggml_soft_max_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2979. lib.ggml_soft_max_inplace.restype = ctypes.POINTER(ggml_tensor)
  2980. # GGML_API struct ggml_tensor * ggml_soft_max_back(
  2981. # struct ggml_context * ctx,
  2982. # struct ggml_tensor * a,
  2983. # struct ggml_tensor * b);
  2984. def ggml_soft_max_back(
  2985. ctx: ggml_context_p,
  2986. a: ggml_tensor_p,
  2987. b: ggml_tensor_p,
  2988. ) -> ggml_tensor_p:
  2989. return lib.ggml_soft_max_back(ctx, a, b)
  2990. lib.ggml_soft_max_back.argtypes = [
  2991. ggml_context_p,
  2992. ctypes.POINTER(ggml_tensor),
  2993. ctypes.POINTER(ggml_tensor),
  2994. ]
  2995. lib.ggml_soft_max_back.restype = ctypes.POINTER(ggml_tensor)
  2996. # // in-place, returns view(a)
  2997. # GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
  2998. # struct ggml_context * ctx,
  2999. # struct ggml_tensor * a,
  3000. # struct ggml_tensor * b);
  3001. def ggml_soft_max_back_inplace(
  3002. ctx: ggml_context_p,
  3003. a: ggml_tensor_p,
  3004. b: ggml_tensor_p,
  3005. ) -> ggml_tensor_p:
  3006. return lib.ggml_soft_max_back_inplace(ctx, a, b)
  3007. lib.ggml_soft_max_back_inplace.argtypes = [
  3008. ggml_context_p,
  3009. ctypes.POINTER(ggml_tensor),
  3010. ctypes.POINTER(ggml_tensor),
  3011. ]
  3012. lib.ggml_soft_max_back_inplace.restype = ctypes.POINTER(ggml_tensor)
  3013. # // rotary position embedding
  3014. # // if mode & 1 == 1, skip n_past elements
  3015. # // if mode & 2 == 1, GPT-NeoX style
  3016. # // if mode & 4 == 1, ChatGLM style
  3017. # // TODO: avoid creating a new tensor every time
  3018. # GGML_API struct ggml_tensor * ggml_rope(
  3019. # struct ggml_context * ctx,
  3020. # struct ggml_tensor * a,
  3021. # int n_past,
  3022. # int n_dims,
  3023. # int mode,
  3024. # int n_ctx);
  3025. def ggml_rope(
  3026. ctx: ggml_context_p,
  3027. a: ggml_tensor_p,
  3028. n_past: Union[ctypes.c_int, int],
  3029. n_dims: Union[ctypes.c_int, int],
  3030. mode: Union[ctypes.c_int, int],
  3031. n_ctx: Union[ctypes.c_int, int],
  3032. ) -> ggml_tensor_p:
  3033. return lib.ggml_rope(ctx, a, n_past, n_dims, mode, n_ctx)
  3034. lib.ggml_rope.argtypes = [
  3035. ggml_context_p,
  3036. ctypes.POINTER(ggml_tensor),
  3037. ctypes.c_int,
  3038. ctypes.c_int,
  3039. ctypes.c_int,
  3040. ctypes.c_int,
  3041. ]
  3042. lib.ggml_rope.restype = ctypes.POINTER(ggml_tensor)
  3043. # // in-place, returns view(a)
  3044. # GGML_API struct ggml_tensor * ggml_rope_inplace(
  3045. # struct ggml_context * ctx,
  3046. # struct ggml_tensor * a,
  3047. # int n_past,
  3048. # int n_dims,
  3049. # int mode,
  3050. # int n_ctx);
  3051. def ggml_rope_inplace(
  3052. ctx: ggml_context_p,
  3053. a: ggml_tensor_p,
  3054. n_past: Union[ctypes.c_int, int],
  3055. n_dims: Union[ctypes.c_int, int],
  3056. mode: Union[ctypes.c_int, int],
  3057. n_ctx: Union[ctypes.c_int, int],
  3058. ) -> ggml_tensor_p:
  3059. return lib.ggml_rope_inplace(ctx, a, n_past, n_dims, mode, n_ctx)
  3060. lib.ggml_rope_inplace.argtypes = [
  3061. ggml_context_p,
  3062. ctypes.POINTER(ggml_tensor),
  3063. ctypes.c_int,
  3064. ctypes.c_int,
  3065. ctypes.c_int,
  3066. ctypes.c_int,
  3067. ]
  3068. lib.ggml_rope_inplace.restype = ctypes.POINTER(ggml_tensor)
  3069. # // custom RoPE
  3070. # GGML_API struct ggml_tensor * ggml_rope_custom(
  3071. # struct ggml_context * ctx,
  3072. # struct ggml_tensor * a,
  3073. # int n_past,
  3074. # int n_dims,
  3075. # int mode,
  3076. # int n_ctx,
  3077. # float freq_base,
  3078. # float freq_scale);
  3079. def ggml_rope_custom(
  3080. ctx: ggml_context_p,
  3081. a: ggml_tensor_p,
  3082. n_past: Union[ctypes.c_int, int],
  3083. n_dims: Union[ctypes.c_int, int],
  3084. mode: Union[ctypes.c_int, int],
  3085. n_ctx: Union[ctypes.c_int, int],
  3086. freq_base: Union[ctypes.c_float, float],
  3087. freq_scale: Union[ctypes.c_float, float],
  3088. ) -> ggml_tensor_p:
  3089. return lib.ggml_rope_custom(
  3090. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale
  3091. )
  3092. lib.ggml_rope_custom.argtypes = [
  3093. ggml_context_p,
  3094. ctypes.POINTER(ggml_tensor),
  3095. ctypes.c_int,
  3096. ctypes.c_int,
  3097. ctypes.c_int,
  3098. ctypes.c_int,
  3099. ctypes.c_float,
  3100. ctypes.c_float,
  3101. ]
  3102. lib.ggml_rope_custom.restype = ctypes.POINTER(ggml_tensor)
  3103. # // in-place, returns view(a)
  3104. # GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
  3105. # struct ggml_context * ctx,
  3106. # struct ggml_tensor * a,
  3107. # int n_past,
  3108. # int n_dims,
  3109. # int mode,
  3110. # int n_ctx,
  3111. # float freq_base,
  3112. # float freq_scale);
  3113. def ggml_rope_custom_inplace(
  3114. ctx: ggml_context_p,
  3115. a: ggml_tensor_p,
  3116. n_past: Union[ctypes.c_int, int],
  3117. n_dims: Union[ctypes.c_int, int],
  3118. mode: Union[ctypes.c_int, int],
  3119. n_ctx: Union[ctypes.c_int, int],
  3120. freq_base: Union[ctypes.c_float, float],
  3121. freq_scale: Union[ctypes.c_float, float],
  3122. ) -> ggml_tensor_p:
  3123. return lib.ggml_rope_custom_inplace(
  3124. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale
  3125. )
  3126. lib.ggml_rope_custom_inplace.argtypes = [
  3127. ggml_context_p,
  3128. ctypes.POINTER(ggml_tensor),
  3129. ctypes.c_int,
  3130. ctypes.c_int,
  3131. ctypes.c_int,
  3132. ctypes.c_int,
  3133. ctypes.c_float,
  3134. ctypes.c_float,
  3135. ]
  3136. lib.ggml_rope_custom_inplace.restype = ctypes.POINTER(ggml_tensor)
  3137. # // xPos RoPE, in-place, returns view(a)
  3138. # GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
  3139. # struct ggml_context * ctx,
  3140. # struct ggml_tensor * a,
  3141. # int n_past,
  3142. # int n_dims,
  3143. # float base,
  3144. # bool down);
  3145. def ggml_rope_xpos_inplace(
  3146. ctx: ggml_context_p,
  3147. a: ggml_tensor_p,
  3148. n_past: Union[ctypes.c_int, int],
  3149. n_dims: Union[ctypes.c_int, int],
  3150. base: Union[ctypes.c_float, float],
  3151. down: Union[ctypes.c_bool, bool],
  3152. ) -> ggml_tensor_p:
  3153. return lib.ggml_rope_xpos_inplace(ctx, a, n_past, n_dims, base, down)
  3154. lib.ggml_rope_xpos_inplace.argtypes = [
  3155. ggml_context_p,
  3156. ctypes.POINTER(ggml_tensor),
  3157. ctypes.c_int,
  3158. ctypes.c_int,
  3159. ctypes.c_float,
  3160. ctypes.c_bool,
  3161. ]
  3162. lib.ggml_rope_xpos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3163. # // rotary position embedding backward, i.e compute dx from dy
  3164. # // a - dy
  3165. # GGML_API struct ggml_tensor * ggml_rope_back(
  3166. # struct ggml_context * ctx,
  3167. # struct ggml_tensor * a,
  3168. # int n_past,
  3169. # int n_dims,
  3170. # int mode,
  3171. # int n_ctx,
  3172. # float freq_base,
  3173. # float freq_scale,
  3174. # float xpos_base,
  3175. # bool xpos_down);
  3176. def ggml_rope_back(
  3177. ctx: ggml_context_p,
  3178. a: ggml_tensor_p,
  3179. n_past: Union[ctypes.c_int, int],
  3180. n_dims: Union[ctypes.c_int, int],
  3181. mode: Union[ctypes.c_int, int],
  3182. n_ctx: Union[ctypes.c_int, int],
  3183. freq_base: Union[ctypes.c_float, float],
  3184. freq_scale: Union[ctypes.c_float, float],
  3185. xpos_base: Union[ctypes.c_float, float],
  3186. xpos_down: Union[ctypes.c_bool, bool],
  3187. ) -> ggml_tensor_p:
  3188. return lib.ggml_rope_back(
  3189. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, xpos_base, xpos_down
  3190. )
  3191. lib.ggml_rope_back.argtypes = [
  3192. ggml_context_p,
  3193. ctypes.POINTER(ggml_tensor),
  3194. ctypes.c_int,
  3195. ctypes.c_int,
  3196. ctypes.c_int,
  3197. ctypes.c_int,
  3198. ctypes.c_float,
  3199. ctypes.c_float,
  3200. ctypes.c_float,
  3201. ctypes.c_bool,
  3202. ]
  3203. lib.ggml_rope_back.restype = ctypes.POINTER(ggml_tensor)
  3204. # // alibi position embedding
  3205. # // in-place, returns view(a)
  3206. # struct ggml_tensor * ggml_alibi(
  3207. # struct ggml_context * ctx,
  3208. # struct ggml_tensor * a,
  3209. # int n_past,
  3210. # int n_head,
  3211. # float bias_max);
  3212. def ggml_alibi(
  3213. ctx: ggml_context_p,
  3214. a: ggml_tensor_p,
  3215. n_past: Union[ctypes.c_int, int],
  3216. n_head: Union[ctypes.c_int, int],
  3217. bias_max: Union[ctypes.c_float, float],
  3218. ) -> ggml_tensor_p:
  3219. return lib.ggml_alibi(ctx, a, n_past, n_head, bias_max)
  3220. lib.ggml_alibi.argtypes = [
  3221. ggml_context_p,
  3222. ctypes.POINTER(ggml_tensor),
  3223. ctypes.c_int,
  3224. ctypes.c_int,
  3225. ctypes.c_float,
  3226. ]
  3227. lib.ggml_alibi.restype = ctypes.POINTER(ggml_tensor)
  3228. # // clamp
  3229. # // in-place, returns view(a)
  3230. # struct ggml_tensor * ggml_clamp(
  3231. # struct ggml_context * ctx,
  3232. # struct ggml_tensor * a,
  3233. # float min,
  3234. # float max);
  3235. def ggml_clamp(
  3236. ctx: ggml_context_p,
  3237. a: ggml_tensor_p,
  3238. min: Union[ctypes.c_float, float],
  3239. max: Union[ctypes.c_float, float],
  3240. ) -> ggml_tensor_p:
  3241. return lib.ggml_clamp(ctx, a, min, max)
  3242. lib.ggml_clamp.argtypes = [
  3243. ggml_context_p,
  3244. ctypes.POINTER(ggml_tensor),
  3245. ctypes.c_float,
  3246. ctypes.c_float,
  3247. ]
  3248. lib.ggml_clamp.restype = ctypes.POINTER(ggml_tensor)
  3249. # GGML_API struct ggml_tensor * ggml_conv_1d(
  3250. # struct ggml_context * ctx,
  3251. # struct ggml_tensor * a,
  3252. # struct ggml_tensor * b,
  3253. # int s0, // stride
  3254. # int p0, // padding
  3255. # int d0); // dilation
  3256. def ggml_conv_1d(
  3257. ctx: ggml_context_p,
  3258. a: ggml_tensor_p,
  3259. b: ggml_tensor_p,
  3260. s0: Union[ctypes.c_int, int],
  3261. p0: Union[ctypes.c_int, int],
  3262. d0: Union[ctypes.c_int, int],
  3263. ) -> ggml_tensor_p:
  3264. """Convolution 1D
  3265. Parameters:
  3266. a: input tensor
  3267. b: filter tensor
  3268. s0: stride
  3269. p0: padding
  3270. d0: dilation
  3271. Returns:
  3272. output tensor"""
  3273. return lib.ggml_conv_1d(ctx, a, b, s0, p0, d0)
  3274. lib.ggml_conv_1d.argtypes = [
  3275. ggml_context_p,
  3276. ctypes.POINTER(ggml_tensor),
  3277. ctypes.POINTER(ggml_tensor),
  3278. ctypes.c_int,
  3279. ctypes.c_int,
  3280. ctypes.c_int,
  3281. ]
  3282. lib.ggml_conv_1d.restype = ctypes.POINTER(ggml_tensor)
  3283. # // conv_1d with padding = half
  3284. # // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
  3285. # GGML_API struct ggml_tensor* ggml_conv_1d_ph(
  3286. # struct ggml_context * ctx,
  3287. # struct ggml_tensor * a,
  3288. # struct ggml_tensor * b,
  3289. # int s,
  3290. # int d);
  3291. def ggml_conv_1d_ph(
  3292. ctx: ggml_context_p,
  3293. a: ggml_tensor_p,
  3294. b: ggml_tensor_p,
  3295. s: Union[ctypes.c_int, int],
  3296. d: Union[ctypes.c_int, int],
  3297. ) -> ggml_tensor_p:
  3298. """Convolution 1D with padding = half
  3299. Parameters:
  3300. a: input tensor
  3301. b: filter tensor
  3302. s: stride
  3303. d: dilation
  3304. Returns:
  3305. output tensor"""
  3306. return lib.ggml_conv_1d_ph(ctx, a, b, s, d)
  3307. lib.ggml_conv_1d_ph.argtypes = [
  3308. ggml_context_p,
  3309. ctypes.POINTER(ggml_tensor),
  3310. ctypes.POINTER(ggml_tensor),
  3311. ctypes.c_int,
  3312. ctypes.c_int,
  3313. ]
  3314. lib.ggml_conv_1d_ph.restype = ctypes.POINTER(ggml_tensor)
  3315. # GGML_API struct ggml_tensor * ggml_conv_2d(
  3316. # struct ggml_context * ctx,
  3317. # struct ggml_tensor * a,
  3318. # struct ggml_tensor * b,
  3319. # int s0,
  3320. # int s1,
  3321. # int p0,
  3322. # int p1,
  3323. # int d0,
  3324. # int d1);
  3325. def ggml_conv_2d(
  3326. ctx: ggml_context_p,
  3327. a: ggml_tensor_p,
  3328. b: ggml_tensor_p,
  3329. s0: Union[ctypes.c_int, int],
  3330. s1: Union[ctypes.c_int, int],
  3331. p0: Union[ctypes.c_int, int],
  3332. p1: Union[ctypes.c_int, int],
  3333. d0: Union[ctypes.c_int, int],
  3334. d1: Union[ctypes.c_int, int],
  3335. ) -> ggml_tensor_p:
  3336. """Convolution 2D
  3337. Parameters:
  3338. a: input tensor
  3339. b: filter tensor
  3340. s0: stride
  3341. s1: stride
  3342. p0: padding
  3343. p1: padding
  3344. d0: dilation
  3345. d1: dilation
  3346. Returns:
  3347. output tensor"""
  3348. return lib.ggml_conv_2d(ctx, a, b, s0, s1, p0, p1, d0, d1)
  3349. lib.ggml_conv_2d.argtypes = [
  3350. ggml_context_p,
  3351. ctypes.POINTER(ggml_tensor),
  3352. ctypes.POINTER(ggml_tensor),
  3353. ctypes.c_int,
  3354. ctypes.c_int,
  3355. ctypes.c_int,
  3356. ctypes.c_int,
  3357. ctypes.c_int,
  3358. ctypes.c_int,
  3359. ]
  3360. lib.ggml_conv_2d.restype = ctypes.POINTER(ggml_tensor)
  3361. # // kernel size is a->ne[0] x a->ne[1]
  3362. # // stride is equal to kernel size
  3363. # // padding is zero
  3364. # // example:
  3365. # // a: 16 16 3 768
  3366. # // b: 1024 1024 3 1
  3367. # // res: 64 64 768 1
  3368. # // used in sam
  3369. # GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
  3370. # struct ggml_context * ctx,
  3371. # struct ggml_tensor * a,
  3372. # struct ggml_tensor * b);
  3373. def ggml_conv_2d_sk_p0(
  3374. ctx: ggml_context_p,
  3375. a: ggml_tensor_p,
  3376. b: ggml_tensor_p,
  3377. ) -> ggml_tensor_p:
  3378. """Convolution 2D
  3379. Parameters:
  3380. a: input tensor
  3381. b: filter tensor
  3382. Returns:
  3383. output tensor"""
  3384. return lib.ggml_conv_2d_sk_p0(ctx, a, b)
  3385. lib.ggml_conv_2d_sk_p0.argtypes = [
  3386. ggml_context_p,
  3387. ctypes.POINTER(ggml_tensor),
  3388. ctypes.POINTER(ggml_tensor),
  3389. ]
  3390. lib.ggml_conv_2d_sk_p0.restype = ctypes.POINTER(ggml_tensor)
  3391. # // kernel size is a->ne[0] x a->ne[1]
  3392. # // stride is 1
  3393. # // padding is half
  3394. # // example:
  3395. # // a: 3 3 256 256
  3396. # // b: 64 64 256 1
  3397. # // res: 64 64 256 1
  3398. # // used in sam
  3399. # GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
  3400. # struct ggml_context * ctx,
  3401. # struct ggml_tensor * a,
  3402. # struct ggml_tensor * b);
  3403. def ggml_conv_2d_s1_ph(
  3404. ctx: ggml_context_p,
  3405. a: ggml_tensor_p,
  3406. b: ggml_tensor_p,
  3407. ) -> ggml_tensor_p:
  3408. """Convolution 2D with stride = 1 and padding = half
  3409. Parameters:
  3410. a: input tensor
  3411. b: filter tensor
  3412. Returns:
  3413. output tensor"""
  3414. return lib.ggml_conv_2d_s1_ph(ctx, a, b)
  3415. lib.ggml_conv_2d_s1_ph.argtypes = [
  3416. ggml_context_p,
  3417. ctypes.POINTER(ggml_tensor),
  3418. ctypes.POINTER(ggml_tensor),
  3419. ]
  3420. lib.ggml_conv_2d_s1_ph.restype = ctypes.POINTER(ggml_tensor)
  3421. # GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3422. # struct ggml_context * ctx,
  3423. # struct ggml_tensor * a,
  3424. # struct ggml_tensor * b,
  3425. # int stride);
  3426. def ggml_conv_transpose_2d_p0(
  3427. ctx: ggml_context_p,
  3428. a: ggml_tensor_p,
  3429. b: ggml_tensor_p,
  3430. stride: Union[ctypes.c_int, int],
  3431. ) -> ggml_tensor_p:
  3432. """Convolution Transpose 2D with padding = zero
  3433. Parameters:
  3434. a: input tensor
  3435. b: filter tensor
  3436. stride: stride
  3437. Returns:
  3438. output tensor"""
  3439. return lib.ggml_conv_transpose_2d_p0(ctx, a, b, stride)
  3440. lib.ggml_conv_transpose_2d_p0.argtypes = [
  3441. ggml_context_p,
  3442. ctypes.POINTER(ggml_tensor),
  3443. ctypes.POINTER(ggml_tensor),
  3444. ctypes.c_int,
  3445. ]
  3446. lib.ggml_conv_transpose_2d_p0.restype = ctypes.POINTER(ggml_tensor)
  3447. # enum ggml_op_pool {
  3448. # GGML_OP_POOL_MAX,
  3449. # GGML_OP_POOL_AVG,
  3450. # GGML_OP_POOL_COUNT,
  3451. # };
  3452. GGML_OP_POOL_MAX = 0
  3453. GGML_OP_POOL_AVG = 1
  3454. GGML_OP_POOL_COUNT = 2
  3455. # GGML_API struct ggml_tensor * ggml_pool_1d(
  3456. # struct ggml_context * ctx,
  3457. # struct ggml_tensor * a,
  3458. # enum ggml_op_pool op,
  3459. # int k0, // kernel size
  3460. # int s0, // stride
  3461. # int p0); // padding
  3462. def ggml_pool_1d(
  3463. ctx: ggml_context_p,
  3464. a: ggml_tensor_p,
  3465. op: Union[ctypes.c_int, int],
  3466. k0: Union[ctypes.c_int, int],
  3467. s0: Union[ctypes.c_int, int],
  3468. p0: Union[ctypes.c_int, int],
  3469. ) -> ggml_tensor_p:
  3470. """1D Pooling
  3471. Parameters:
  3472. a: input tensor
  3473. op: pooling operation
  3474. k0: kernel size
  3475. s0: stride
  3476. p0: padding
  3477. Returns:
  3478. output tensor"""
  3479. return lib.ggml_pool_1d(ctx, a, op, k0, s0, p0)
  3480. lib.ggml_pool_1d.argtypes = [
  3481. ggml_context_p,
  3482. ctypes.POINTER(ggml_tensor),
  3483. ctypes.c_int,
  3484. ctypes.c_int,
  3485. ctypes.c_int,
  3486. ctypes.c_int,
  3487. ]
  3488. lib.ggml_pool_1d.restype = ctypes.POINTER(ggml_tensor)
  3489. # GGML_API struct ggml_tensor * ggml_pool_2d(
  3490. # struct ggml_context * ctx,
  3491. # struct ggml_tensor * a,
  3492. # enum ggml_op_pool op,
  3493. # int k0,
  3494. # int k1,
  3495. # int s0,
  3496. # int s1,
  3497. # int p0,
  3498. # int p1);
  3499. def ggml_pool_2d(
  3500. ctx: ggml_context_p,
  3501. a: ggml_tensor_p,
  3502. op: Union[ctypes.c_int, int],
  3503. k0: Union[ctypes.c_int, int],
  3504. k1: Union[ctypes.c_int, int],
  3505. s0: Union[ctypes.c_int, int],
  3506. s1: Union[ctypes.c_int, int],
  3507. p0: Union[ctypes.c_int, int],
  3508. p1: Union[ctypes.c_int, int],
  3509. ) -> ggml_tensor_p:
  3510. """2D Pooling
  3511. Parameters:
  3512. a: input tensor
  3513. op: pooling operation
  3514. k0: kernel size
  3515. k1: kernel size
  3516. s0: stride
  3517. s1: stride
  3518. p0: padding
  3519. p1: padding
  3520. Returns:
  3521. output tensor"""
  3522. return lib.ggml_pool_2d(ctx, a, op, k0, k1, s0, s1, p0, p1)
  3523. lib.ggml_pool_2d.argtypes = [
  3524. ggml_context_p,
  3525. ctypes.POINTER(ggml_tensor),
  3526. ctypes.c_int,
  3527. ctypes.c_int,
  3528. ctypes.c_int,
  3529. ctypes.c_int,
  3530. ctypes.c_int,
  3531. ctypes.c_int,
  3532. ]
  3533. lib.ggml_pool_2d.restype = ctypes.POINTER(ggml_tensor)
  3534. # // nearest interpolate
  3535. # // used in stable-diffusion
  3536. # GGML_API struct ggml_tensor * ggml_upscale(
  3537. # struct ggml_context * ctx,
  3538. # struct ggml_tensor * a,
  3539. # int scale_factor);
  3540. def ggml_upscale(
  3541. ctx: ggml_context_p,
  3542. a: ggml_tensor_p,
  3543. scale_factor: Union[ctypes.c_int, int],
  3544. ) -> ggml_tensor_p:
  3545. """Upscale
  3546. Parameters:
  3547. a: input tensor
  3548. scale_factor: scale factor
  3549. Returns:
  3550. output tensor"""
  3551. return lib.ggml_upscale(ctx, a, scale_factor)
  3552. lib.ggml_upscale.argtypes = [
  3553. ggml_context_p,
  3554. ctypes.POINTER(ggml_tensor),
  3555. ctypes.c_int,
  3556. ]
  3557. lib.ggml_upscale.restype = ctypes.POINTER(ggml_tensor)
  3558. # GGML_API struct ggml_tensor * ggml_flash_attn(
  3559. # struct ggml_context * ctx,
  3560. # struct ggml_tensor * q,
  3561. # struct ggml_tensor * k,
  3562. # struct ggml_tensor * v,
  3563. # bool masked);
  3564. def ggml_flash_attn(
  3565. ctx: ggml_context_p,
  3566. q: ggml_tensor_p,
  3567. k: ggml_tensor_p,
  3568. v: ggml_tensor_p,
  3569. masked: Union[ctypes.c_bool, bool],
  3570. ) -> ggml_tensor_p:
  3571. return lib.ggml_flash_attn(ctx, q, k, v, masked)
  3572. lib.ggml_flash_attn.argtypes = [
  3573. ggml_context_p,
  3574. ctypes.POINTER(ggml_tensor),
  3575. ctypes.POINTER(ggml_tensor),
  3576. ctypes.POINTER(ggml_tensor),
  3577. ctypes.c_bool,
  3578. ]
  3579. lib.ggml_flash_attn.restype = ctypes.POINTER(ggml_tensor)
  3580. # GGML_API struct ggml_tensor * ggml_flash_attn_back(
  3581. # struct ggml_context * ctx,
  3582. # struct ggml_tensor * q,
  3583. # struct ggml_tensor * k,
  3584. # struct ggml_tensor * v,
  3585. # struct ggml_tensor * d,
  3586. # bool masked);
  3587. def ggml_flash_attn_back(
  3588. ctx: ggml_context_p,
  3589. q: ggml_tensor_p,
  3590. k: ggml_tensor_p,
  3591. v: ggml_tensor_p,
  3592. d: ggml_tensor_p,
  3593. masked: Union[ctypes.c_bool, bool],
  3594. ) -> ggml_tensor_p:
  3595. return lib.ggml_flash_attn_back(ctx, q, k, v, d, masked)
  3596. lib.ggml_flash_attn_back.argtypes = [
  3597. ggml_context_p,
  3598. ctypes.POINTER(ggml_tensor),
  3599. ctypes.POINTER(ggml_tensor),
  3600. ctypes.POINTER(ggml_tensor),
  3601. ctypes.POINTER(ggml_tensor),
  3602. ctypes.c_bool,
  3603. ]
  3604. lib.ggml_flash_attn_back.restype = ctypes.POINTER(ggml_tensor)
  3605. # GGML_API struct ggml_tensor * ggml_flash_ff(
  3606. # struct ggml_context * ctx,
  3607. # struct ggml_tensor * a,
  3608. # struct ggml_tensor * b0,
  3609. # struct ggml_tensor * b1,
  3610. # struct ggml_tensor * c0,
  3611. # struct ggml_tensor * c1);
  3612. def ggml_flash_ff(
  3613. ctx: ggml_context_p,
  3614. a: ggml_tensor_p,
  3615. b0: ggml_tensor_p,
  3616. b1: ggml_tensor_p,
  3617. c0: ggml_tensor_p,
  3618. c1: ggml_tensor_p,
  3619. ) -> ggml_tensor_p:
  3620. return lib.ggml_flash_ff(ctx, a, b0, b1, c0, c1)
  3621. lib.ggml_flash_ff.argtypes = [
  3622. ggml_context_p,
  3623. ctypes.POINTER(ggml_tensor),
  3624. ctypes.POINTER(ggml_tensor),
  3625. ctypes.POINTER(ggml_tensor),
  3626. ctypes.POINTER(ggml_tensor),
  3627. ctypes.POINTER(ggml_tensor),
  3628. ]
  3629. lib.ggml_flash_ff.restype = ctypes.POINTER(ggml_tensor)
  3630. # // partition into non-overlapping windows with padding if needed
  3631. # // example:
  3632. # // a: 768 64 64 1
  3633. # // w: 14
  3634. # // res: 768 14 14 25
  3635. # // used in sam
  3636. # GGML_API struct ggml_tensor * ggml_win_part(
  3637. # struct ggml_context * ctx,
  3638. # struct ggml_tensor * a,
  3639. # int w);
  3640. def ggml_win_part(
  3641. ctx: ggml_context_p,
  3642. a: ggml_tensor_p,
  3643. w: Union[ctypes.c_int, int],
  3644. ) -> ggml_tensor_p:
  3645. return lib.ggml_win_part(ctx, a, w)
  3646. lib.ggml_win_part.argtypes = [
  3647. ggml_context_p,
  3648. ctypes.POINTER(ggml_tensor),
  3649. ctypes.c_int,
  3650. ]
  3651. lib.ggml_win_part.restype = ctypes.POINTER(ggml_tensor)
  3652. # // reverse of ggml_win_part
  3653. # // used in sam
  3654. # GGML_API struct ggml_tensor * ggml_win_unpart(
  3655. # struct ggml_context * ctx,
  3656. # struct ggml_tensor * a,
  3657. # int w0,
  3658. # int h0,
  3659. # int w);
  3660. def ggml_win_unpart(
  3661. ctx: ggml_context_p,
  3662. a: ggml_tensor_p,
  3663. w0: Union[ctypes.c_int, int],
  3664. h0: Union[ctypes.c_int, int],
  3665. w: Union[ctypes.c_int, int],
  3666. ) -> ggml_tensor_p:
  3667. return lib.ggml_win_unpart(ctx, a, w0, h0, w)
  3668. lib.ggml_win_unpart.argtypes = [
  3669. ggml_context_p,
  3670. ctypes.POINTER(ggml_tensor),
  3671. ctypes.c_int,
  3672. ctypes.c_int,
  3673. ctypes.c_int,
  3674. ]
  3675. lib.ggml_win_unpart.restype = ctypes.POINTER(ggml_tensor)
  3676. # GGML_API struct ggml_tensor * ggml_unary(
  3677. # struct ggml_context * ctx,
  3678. # struct ggml_tensor * a,
  3679. # enum ggml_unary_op op);
  3680. def ggml_unary(
  3681. ctx: ggml_context_p,
  3682. a: ggml_tensor_p,
  3683. op: Union[ctypes.c_int, int],
  3684. ) -> ggml_tensor_p:
  3685. return lib.ggml_unary(ctx, a, op)
  3686. lib.ggml_unary.argtypes = [
  3687. ggml_context_p,
  3688. ctypes.POINTER(ggml_tensor),
  3689. ctypes.c_int,
  3690. ]
  3691. lib.ggml_unary.restype = ctypes.POINTER(ggml_tensor)
  3692. # GGML_API struct ggml_tensor * ggml_unary_inplace(
  3693. # struct ggml_context * ctx,
  3694. # struct ggml_tensor * a,
  3695. # enum ggml_unary_op op);
  3696. def ggml_unary_inplace(
  3697. ctx: ggml_context_p,
  3698. a: ggml_tensor_p,
  3699. op: Union[ctypes.c_int, int],
  3700. ) -> ggml_tensor_p:
  3701. return lib.ggml_unary_inplace(ctx, a, op)
  3702. lib.ggml_unary_inplace.argtypes = [
  3703. ggml_context_p,
  3704. ctypes.POINTER(ggml_tensor),
  3705. ctypes.c_int,
  3706. ]
  3707. lib.ggml_unary_inplace.restype = ctypes.POINTER(ggml_tensor)
  3708. # // used in sam
  3709. # GGML_API struct ggml_tensor * ggml_get_rel_pos(
  3710. # struct ggml_context * ctx,
  3711. # struct ggml_tensor * a,
  3712. # int qh,
  3713. # int kh);
  3714. def ggml_get_rel_pos(
  3715. ctx: ggml_context_p,
  3716. a: ggml_tensor_p,
  3717. qh: Union[ctypes.c_int, int],
  3718. kh: Union[ctypes.c_int, int],
  3719. ) -> ggml_tensor_p:
  3720. return lib.ggml_get_rel_pos(ctx, a, qh, kh)
  3721. lib.ggml_get_rel_pos.argtypes = [
  3722. ggml_context_p,
  3723. ctypes.POINTER(ggml_tensor),
  3724. ctypes.c_int,
  3725. ctypes.c_int,
  3726. ]
  3727. lib.ggml_get_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  3728. # // used in sam
  3729. # GGML_API struct ggml_tensor * ggml_add_rel_pos(
  3730. # struct ggml_context * ctx,
  3731. # struct ggml_tensor * a,
  3732. # struct ggml_tensor * pw,
  3733. # struct ggml_tensor * ph);
  3734. def ggml_add_rel_pos(
  3735. ctx: ggml_context_p,
  3736. a: ggml_tensor_p,
  3737. pw: ggml_tensor_p,
  3738. ph: ggml_tensor_p,
  3739. ) -> ggml_tensor_p:
  3740. return lib.ggml_add_rel_pos(ctx, a, pw, ph)
  3741. lib.ggml_add_rel_pos.argtypes = [
  3742. ggml_context_p,
  3743. ctypes.POINTER(ggml_tensor),
  3744. ctypes.POINTER(ggml_tensor),
  3745. ctypes.POINTER(ggml_tensor),
  3746. ]
  3747. lib.ggml_add_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  3748. # GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
  3749. # struct ggml_context * ctx,
  3750. # struct ggml_tensor * a,
  3751. # struct ggml_tensor * pw,
  3752. # struct ggml_tensor * ph);
  3753. def ggml_add_rel_pos_inplace(
  3754. ctx: ggml_context_p,
  3755. a: ggml_tensor_p,
  3756. pw: ggml_tensor_p,
  3757. ph: ggml_tensor_p,
  3758. ) -> ggml_tensor_p:
  3759. return lib.ggml_add_rel_pos_inplace(ctx, a, pw, ph)
  3760. lib.ggml_add_rel_pos_inplace.argtypes = [
  3761. ggml_context_p,
  3762. ctypes.POINTER(ggml_tensor),
  3763. ctypes.POINTER(ggml_tensor),
  3764. ctypes.POINTER(ggml_tensor),
  3765. ]
  3766. lib.ggml_add_rel_pos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3767. # // custom operators (DEPRECATED)
  3768. # typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
  3769. ggml_unary_op_f32_t = ctypes.CFUNCTYPE(
  3770. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float)
  3771. )
  3772. # typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
  3773. ggml_binary_op_f32_t = ctypes.CFUNCTYPE(
  3774. None,
  3775. ctypes.c_int,
  3776. ctypes.POINTER(ctypes.c_float),
  3777. ctypes.POINTER(ctypes.c_float),
  3778. ctypes.POINTER(ctypes.c_float),
  3779. )
  3780. # typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
  3781. ggml_custom1_op_f32_t = ctypes.CFUNCTYPE(
  3782. None, ctypes.POINTER(ggml_tensor), ctypes.POINTER(ggml_tensor)
  3783. )
  3784. """Unary operator function type"""
  3785. # typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  3786. ggml_custom2_op_f32_t = ctypes.CFUNCTYPE(
  3787. None,
  3788. ctypes.POINTER(ggml_tensor),
  3789. ctypes.POINTER(ggml_tensor),
  3790. ctypes.POINTER(ggml_tensor),
  3791. )
  3792. """Binary operator function type"""
  3793. # typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  3794. ggml_custom3_op_f32_t = ctypes.CFUNCTYPE(
  3795. None,
  3796. ctypes.POINTER(ggml_tensor),
  3797. ctypes.POINTER(ggml_tensor),
  3798. ctypes.POINTER(ggml_tensor),
  3799. ctypes.POINTER(ggml_tensor),
  3800. )
  3801. """Ternary operator function type"""
  3802. # GGML_API struct ggml_tensor * ggml_map_unary_f32(
  3803. # struct ggml_context * ctx,
  3804. # struct ggml_tensor * a,
  3805. # ggml_unary_op_f32_t fun);
  3806. def ggml_map_unary_f32(
  3807. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  3808. ) -> ggml_tensor_p:
  3809. return lib.ggml_map_unary_f32(ctx, a, fun)
  3810. lib.ggml_map_unary_f32.argtypes = [
  3811. ggml_context_p,
  3812. ctypes.POINTER(ggml_tensor),
  3813. ggml_unary_op_f32_t,
  3814. ]
  3815. lib.ggml_map_unary_f32.restype = ctypes.POINTER(ggml_tensor)
  3816. # GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
  3817. # struct ggml_context * ctx,
  3818. # struct ggml_tensor * a,
  3819. # ggml_unary_op_f32_t fun);
  3820. def ggml_map_unary_inplace_f32(
  3821. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  3822. ) -> ggml_tensor_p:
  3823. return lib.ggml_map_unary_inplace_f32(ctx, a, fun)
  3824. lib.ggml_map_unary_inplace_f32.argtypes = [
  3825. ggml_context_p,
  3826. ctypes.POINTER(ggml_tensor),
  3827. ggml_unary_op_f32_t,
  3828. ]
  3829. lib.ggml_map_unary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3830. # GGML_API struct ggml_tensor * ggml_map_binary_f32(
  3831. # struct ggml_context * ctx,
  3832. # struct ggml_tensor * a,
  3833. # struct ggml_tensor * b,
  3834. # ggml_binary_op_f32_t fun);
  3835. def ggml_map_binary_f32(
  3836. ctx: ggml_context_p,
  3837. a: ggml_tensor_p,
  3838. b: ggml_tensor_p,
  3839. fun: "ctypes._FuncPointer", # type: ignore
  3840. ) -> ggml_tensor_p:
  3841. return lib.ggml_map_binary_f32(ctx, a, b, fun)
  3842. lib.ggml_map_binary_f32.argtypes = [
  3843. ggml_context_p,
  3844. ctypes.POINTER(ggml_tensor),
  3845. ctypes.POINTER(ggml_tensor),
  3846. ggml_binary_op_f32_t,
  3847. ]
  3848. lib.ggml_map_binary_f32.restype = ctypes.POINTER(ggml_tensor)
  3849. # GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
  3850. # struct ggml_context * ctx,
  3851. # struct ggml_tensor * a,
  3852. # struct ggml_tensor * b,
  3853. # ggml_binary_op_f32_t fun);
  3854. def ggml_map_binary_inplace_f32(
  3855. ctx: ggml_context_p,
  3856. a: ggml_tensor_p,
  3857. b: ggml_tensor_p,
  3858. fun: "ctypes._FuncPointer", # type: ignore
  3859. ) -> ggml_tensor_p:
  3860. return lib.ggml_map_binary_inplace_f32(ctx, a, b, fun)
  3861. lib.ggml_map_binary_inplace_f32.argtypes = [
  3862. ggml_context_p,
  3863. ctypes.POINTER(ggml_tensor),
  3864. ctypes.POINTER(ggml_tensor),
  3865. ggml_binary_op_f32_t,
  3866. ]
  3867. lib.ggml_map_binary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3868. # GGML_API struct ggml_tensor * ggml_map_custom1_f32(
  3869. # struct ggml_context * ctx,
  3870. # struct ggml_tensor * a,
  3871. # ggml_custom1_op_f32_t fun);
  3872. def ggml_map_custom1_f32(
  3873. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  3874. ) -> ggml_tensor_p:
  3875. """Custom unary operator on a tensor.
  3876. Example:
  3877. ```python
  3878. import ggml
  3879. @ggml.ggml_custom1_op_f32_t
  3880. def custom_op(b: ggml.tensor_p, a: ggml.tensor_p):
  3881. # do something with a and copy to b
  3882. return
  3883. ...
  3884. b = ggml.ggml_map_custom1_f32(ctx, a, custom_op)
  3885. ```
  3886. Parameters:
  3887. a: input tensor
  3888. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  3889. Returns:
  3890. output tensor"""
  3891. return lib.ggml_map_custom1_f32(ctx, a, fun)
  3892. lib.ggml_map_custom1_f32.argtypes = [
  3893. ggml_context_p,
  3894. ctypes.POINTER(ggml_tensor),
  3895. ggml_custom1_op_f32_t,
  3896. ]
  3897. lib.ggml_map_custom1_f32.restype = ctypes.POINTER(ggml_tensor)
  3898. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
  3899. # struct ggml_context * ctx,
  3900. # struct ggml_tensor * a,
  3901. # ggml_custom1_op_f32_t fun);
  3902. def ggml_map_custom1_inplace_f32(
  3903. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._CFuncPtr" # type: ignore
  3904. ) -> ggml_tensor_p:
  3905. """Custom unary operator on a tensor inplace.
  3906. Parameters:
  3907. a: input tensor
  3908. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  3909. Returns:
  3910. output tensor"""
  3911. return lib.ggml_map_custom1_inplace_f32(ctx, a, fun)
  3912. lib.ggml_map_custom1_inplace_f32.argtypes = [
  3913. ggml_context_p,
  3914. ctypes.POINTER(ggml_tensor),
  3915. ggml_custom1_op_f32_t,
  3916. ]
  3917. lib.ggml_map_custom1_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3918. # GGML_API struct ggml_tensor * ggml_map_custom2_f32(
  3919. # struct ggml_context * ctx,
  3920. # struct ggml_tensor * a,
  3921. # struct ggml_tensor * b,
  3922. # ggml_custom2_op_f32_t fun);
  3923. def ggml_map_custom2_f32(
  3924. ctx: ggml_context_p,
  3925. a: ggml_tensor_p,
  3926. b: ggml_tensor_p,
  3927. fun: "ctypes._FuncPointer", # type: ignore
  3928. ) -> ggml_tensor_p:
  3929. """Custom binary operator on two tensors.
  3930. Parameters:
  3931. a: input tensor
  3932. b: input tensor
  3933. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  3934. Returns:
  3935. output tensor"""
  3936. return lib.ggml_map_custom2_f32(ctx, a, b, fun)
  3937. lib.ggml_map_custom2_f32.argtypes = [
  3938. ggml_context_p,
  3939. ctypes.POINTER(ggml_tensor),
  3940. ctypes.POINTER(ggml_tensor),
  3941. ggml_custom2_op_f32_t,
  3942. ]
  3943. lib.ggml_map_custom2_f32.restype = ctypes.POINTER(ggml_tensor)
  3944. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
  3945. # struct ggml_context * ctx,
  3946. # struct ggml_tensor * a,
  3947. # struct ggml_tensor * b,
  3948. # ggml_custom2_op_f32_t fun);
  3949. def ggml_map_custom2_inplace_f32(
  3950. ctx: ggml_context_p,
  3951. a: ggml_tensor_p,
  3952. b: ggml_tensor_p,
  3953. fun: "ctypes._FuncPointer", # type: ignore
  3954. ) -> ggml_tensor_p:
  3955. """Custom binary operator on two tensors inplace.
  3956. Parameters:
  3957. a: input tensor
  3958. b: input tensor
  3959. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  3960. Returns:
  3961. output tensor"""
  3962. return lib.ggml_map_custom2_inplace_f32(ctx, a, b, fun)
  3963. lib.ggml_map_custom2_inplace_f32.argtypes = [
  3964. ggml_context_p,
  3965. ctypes.POINTER(ggml_tensor),
  3966. ctypes.POINTER(ggml_tensor),
  3967. ggml_custom2_op_f32_t,
  3968. ]
  3969. lib.ggml_map_custom2_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3970. # GGML_API struct ggml_tensor * ggml_map_custom3_f32(
  3971. # struct ggml_context * ctx,
  3972. # struct ggml_tensor * a,
  3973. # struct ggml_tensor * b,
  3974. # struct ggml_tensor * c,
  3975. # ggml_custom3_op_f32_t fun);
  3976. def ggml_map_custom3_f32(
  3977. ctx: ggml_context_p,
  3978. a: ggml_tensor_p,
  3979. b: ggml_tensor_p,
  3980. c: ggml_tensor_p,
  3981. fun: "ctypes._FuncPointer", # type: ignore
  3982. ) -> ggml_tensor_p:
  3983. """Custom ternary operator on three tensors.
  3984. Parameters:
  3985. a: input tensor
  3986. b: input tensor
  3987. c: input tensor
  3988. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  3989. Returns:
  3990. output tensor"""
  3991. return lib.ggml_map_custom3_f32(ctx, a, b, c, fun)
  3992. lib.ggml_map_custom3_f32.argtypes = [
  3993. ggml_context_p,
  3994. ctypes.POINTER(ggml_tensor),
  3995. ctypes.POINTER(ggml_tensor),
  3996. ctypes.POINTER(ggml_tensor),
  3997. ggml_custom3_op_f32_t,
  3998. ]
  3999. lib.ggml_map_custom3_f32.restype = ctypes.POINTER(ggml_tensor)
  4000. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
  4001. # struct ggml_context * ctx,
  4002. # struct ggml_tensor * a,
  4003. # struct ggml_tensor * b,
  4004. # struct ggml_tensor * c,
  4005. # ggml_custom3_op_f32_t fun);
  4006. def ggml_map_custom3_inplace_f32(
  4007. ctx: ggml_context_p,
  4008. a: ggml_tensor_p,
  4009. b: ggml_tensor_p,
  4010. c: ggml_tensor_p,
  4011. fun: "ctypes._FuncPointer", # type: ignore
  4012. ) -> ggml_tensor_p:
  4013. """Custom ternary operator on three tensors inplace.
  4014. Parameters:
  4015. a: input tensor
  4016. b: input tensor
  4017. c: input tensor
  4018. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  4019. Returns:
  4020. output tensor"""
  4021. return lib.ggml_map_custom3_inplace_f32(ctx, a, b, c, fun)
  4022. lib.ggml_map_custom3_inplace_f32.argtypes = [
  4023. ggml_context_p,
  4024. ctypes.POINTER(ggml_tensor),
  4025. ctypes.POINTER(ggml_tensor),
  4026. ctypes.POINTER(ggml_tensor),
  4027. ggml_custom3_op_f32_t,
  4028. ]
  4029. lib.ggml_map_custom3_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4030. # // custom operators v2
  4031. # typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
  4032. ggml_custom1_op_t = ctypes.CFUNCTYPE(
  4033. None,
  4034. ctypes.POINTER(ggml_tensor),
  4035. ctypes.POINTER(ggml_tensor),
  4036. ctypes.c_int,
  4037. ctypes.c_int,
  4038. ctypes.c_void_p,
  4039. )
  4040. """Custom unary operator on a tensor."""
  4041. # typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
  4042. ggml_custom2_op_t = ctypes.CFUNCTYPE(
  4043. None,
  4044. ctypes.POINTER(ggml_tensor),
  4045. ctypes.POINTER(ggml_tensor),
  4046. ctypes.POINTER(ggml_tensor),
  4047. ctypes.c_int,
  4048. ctypes.c_int,
  4049. ctypes.c_void_p,
  4050. )
  4051. """Custom binary operator on two tensors."""
  4052. # typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
  4053. ggml_custom3_op_t = ctypes.CFUNCTYPE(
  4054. None,
  4055. ctypes.POINTER(ggml_tensor),
  4056. ctypes.POINTER(ggml_tensor),
  4057. ctypes.POINTER(ggml_tensor),
  4058. ctypes.POINTER(ggml_tensor),
  4059. ctypes.c_int,
  4060. ctypes.c_int,
  4061. ctypes.c_void_p,
  4062. )
  4063. """Custom ternary operator on three tensors."""
  4064. # #define GGML_N_TASKS_MAX -1
  4065. GGML_N_TASKS_MAX = -1
  4066. # GGML_API struct ggml_tensor * ggml_map_custom1(
  4067. # struct ggml_context * ctx,
  4068. # struct ggml_tensor * a,
  4069. # ggml_custom1_op_t fun,
  4070. # int n_tasks,
  4071. # void * userdata);
  4072. def ggml_map_custom1(
  4073. ctx: ggml_context_p,
  4074. a: ggml_tensor_p,
  4075. fun: "ctypes._FuncPointer", # type: ignore
  4076. n_tasks: Union[ctypes.c_int, int],
  4077. userdata: Optional[ctypes.c_void_p],
  4078. ) -> ggml_tensor_p:
  4079. return lib.ggml_map_custom1(ctx, a, fun, n_tasks, userdata)
  4080. lib.ggml_map_custom1.argtypes = [
  4081. ggml_context_p,
  4082. ctypes.POINTER(ggml_tensor),
  4083. ggml_custom1_op_t,
  4084. ctypes.c_int,
  4085. ctypes.c_void_p,
  4086. ]
  4087. lib.ggml_map_custom1.restype = ctypes.POINTER(ggml_tensor)
  4088. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
  4089. # struct ggml_context * ctx,
  4090. # struct ggml_tensor * a,
  4091. # ggml_custom1_op_t fun,
  4092. # int n_tasks,
  4093. # void * userdata);
  4094. def ggml_map_custom1_inplace(
  4095. ctx: ggml_context_p,
  4096. a: ggml_tensor_p,
  4097. fun: "ctypes._FuncPointer", # type: ignore
  4098. n_tasks: Union[ctypes.c_int, int],
  4099. userdata: Optional[ctypes.c_void_p],
  4100. ) -> ggml_tensor_p:
  4101. return lib.ggml_map_custom1_inplace(ctx, a, fun, n_tasks, userdata)
  4102. lib.ggml_map_custom1_inplace.argtypes = [
  4103. ggml_context_p,
  4104. ctypes.POINTER(ggml_tensor),
  4105. ggml_custom1_op_t,
  4106. ctypes.c_int,
  4107. ctypes.c_void_p,
  4108. ]
  4109. lib.ggml_map_custom1_inplace.restype = ctypes.POINTER(ggml_tensor)
  4110. # GGML_API struct ggml_tensor * ggml_map_custom2(
  4111. # struct ggml_context * ctx,
  4112. # struct ggml_tensor * a,
  4113. # struct ggml_tensor * b,
  4114. # ggml_custom2_op_t fun,
  4115. # int n_tasks,
  4116. # void * userdata);
  4117. def ggml_map_custom2(
  4118. ctx: ggml_context_p,
  4119. a: ggml_tensor_p,
  4120. b: ggml_tensor_p,
  4121. fun: "ctypes._FuncPointer", # type: ignore
  4122. n_tasks: Union[ctypes.c_int, int],
  4123. userdata: Optional[ctypes.c_void_p],
  4124. ) -> ggml_tensor_p:
  4125. return lib.ggml_map_custom2(ctx, a, b, fun, n_tasks, userdata)
  4126. lib.ggml_map_custom2.argtypes = [
  4127. ggml_context_p,
  4128. ctypes.POINTER(ggml_tensor),
  4129. ctypes.POINTER(ggml_tensor),
  4130. ggml_custom2_op_t,
  4131. ctypes.c_int,
  4132. ctypes.c_void_p,
  4133. ]
  4134. lib.ggml_map_custom2.restype = ctypes.POINTER(ggml_tensor)
  4135. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
  4136. # struct ggml_context * ctx,
  4137. # struct ggml_tensor * a,
  4138. # struct ggml_tensor * b,
  4139. # ggml_custom2_op_t fun,
  4140. # int n_tasks,
  4141. # void * userdata);
  4142. def ggml_map_custom2_inplace(
  4143. ctx: ggml_context_p,
  4144. a: ggml_tensor_p,
  4145. b: ggml_tensor_p,
  4146. fun: "ctypes._FuncPointer", # type: ignore
  4147. n_tasks: Union[ctypes.c_int, int],
  4148. userdata: Optional[ctypes.c_void_p],
  4149. ) -> ggml_tensor_p:
  4150. return lib.ggml_map_custom2_inplace(ctx, a, b, fun, n_tasks, userdata)
  4151. lib.ggml_map_custom2_inplace.argtypes = [
  4152. ggml_context_p,
  4153. ctypes.POINTER(ggml_tensor),
  4154. ctypes.POINTER(ggml_tensor),
  4155. ggml_custom2_op_t,
  4156. ctypes.c_int,
  4157. ctypes.c_void_p,
  4158. ]
  4159. lib.ggml_map_custom2_inplace.restype = ctypes.POINTER(ggml_tensor)
  4160. # GGML_API struct ggml_tensor * ggml_map_custom3(
  4161. # struct ggml_context * ctx,
  4162. # struct ggml_tensor * a,
  4163. # struct ggml_tensor * b,
  4164. # struct ggml_tensor * c,
  4165. # ggml_custom3_op_t fun,
  4166. # int n_tasks,
  4167. # void * userdata);
  4168. def ggml_map_custom3(
  4169. ctx: ggml_context_p,
  4170. a: ggml_tensor_p,
  4171. b: ggml_tensor_p,
  4172. c: ggml_tensor_p,
  4173. fun: "ctypes._FuncPointer", # type: ignore
  4174. n_tasks: Union[ctypes.c_int, int],
  4175. userdata: Optional[ctypes.c_void_p],
  4176. ) -> ggml_tensor_p:
  4177. return lib.ggml_map_custom3(ctx, a, b, c, fun, n_tasks, userdata)
  4178. lib.ggml_map_custom3.argtypes = [
  4179. ggml_context_p,
  4180. ctypes.POINTER(ggml_tensor),
  4181. ctypes.POINTER(ggml_tensor),
  4182. ctypes.POINTER(ggml_tensor),
  4183. ggml_custom3_op_t,
  4184. ctypes.c_int,
  4185. ctypes.c_void_p,
  4186. ]
  4187. lib.ggml_map_custom3.restype = ctypes.POINTER(ggml_tensor)
  4188. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
  4189. # struct ggml_context * ctx,
  4190. # struct ggml_tensor * a,
  4191. # struct ggml_tensor * b,
  4192. # struct ggml_tensor * c,
  4193. # ggml_custom3_op_t fun,
  4194. # int n_tasks,
  4195. # void * userdata);
  4196. def ggml_map_custom3_inplace(
  4197. ctx: ggml_context_p,
  4198. a: ggml_tensor_p,
  4199. b: ggml_tensor_p,
  4200. c: ggml_tensor_p,
  4201. fun: "ctypes._FuncPointer", # type: ignore
  4202. n_tasks: Union[ctypes.c_int, int],
  4203. userdata: Optional[ctypes.c_void_p],
  4204. ) -> ggml_tensor_p:
  4205. return lib.ggml_map_custom3_inplace(ctx, a, b, c, fun, n_tasks, userdata)
  4206. lib.ggml_map_custom3_inplace.argtypes = [
  4207. ggml_context_p,
  4208. ctypes.POINTER(ggml_tensor),
  4209. ctypes.POINTER(ggml_tensor),
  4210. ctypes.POINTER(ggml_tensor),
  4211. ggml_custom3_op_t,
  4212. ctypes.c_int,
  4213. ctypes.c_void_p,
  4214. ]
  4215. lib.ggml_map_custom3_inplace.restype = ctypes.POINTER(ggml_tensor)
  4216. # // loss function
  4217. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
  4218. # struct ggml_context * ctx,
  4219. # struct ggml_tensor * a,
  4220. # struct ggml_tensor * b);
  4221. def ggml_cross_entropy_loss(
  4222. ctx: ggml_context_p,
  4223. a: ggml_tensor_p,
  4224. b: ggml_tensor_p,
  4225. ) -> ggml_tensor_p:
  4226. return lib.ggml_cross_entropy_loss(ctx, a, b)
  4227. lib.ggml_cross_entropy_loss.argtypes = [
  4228. ggml_context_p,
  4229. ctypes.POINTER(ggml_tensor),
  4230. ctypes.POINTER(ggml_tensor),
  4231. ]
  4232. lib.ggml_cross_entropy_loss.restype = ctypes.POINTER(ggml_tensor)
  4233. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
  4234. # struct ggml_context * ctx,
  4235. # struct ggml_tensor * a,
  4236. # struct ggml_tensor * b,
  4237. # struct ggml_tensor * c);
  4238. def ggml_cross_entropy_loss_back(
  4239. ctx: ggml_context_p,
  4240. a: ggml_tensor_p,
  4241. b: ggml_tensor_p,
  4242. c: ggml_tensor_p,
  4243. ) -> ggml_tensor_p:
  4244. return lib.ggml_cross_entropy_loss_back(ctx, a, b, c)
  4245. lib.ggml_cross_entropy_loss_back.argtypes = [
  4246. ggml_context_p,
  4247. ctypes.POINTER(ggml_tensor),
  4248. ctypes.POINTER(ggml_tensor),
  4249. ctypes.POINTER(ggml_tensor),
  4250. ]
  4251. lib.ggml_cross_entropy_loss_back.restype = ctypes.POINTER(ggml_tensor)
  4252. # //
  4253. # // automatic differentiation
  4254. # //
  4255. # GGML_API void ggml_set_param(
  4256. # struct ggml_context * ctx,
  4257. # struct ggml_tensor * tensor);
  4258. def ggml_set_param(ctx: ggml_context_p, tensor: ggml_tensor_p):
  4259. return lib.ggml_set_param(ctx, tensor)
  4260. lib.ggml_set_param.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  4261. lib.ggml_set_param.restype = None
  4262. # GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
  4263. def ggml_build_forward_expand(
  4264. cgraph: ggml_cgraph_p,
  4265. tensor: ggml_tensor_p,
  4266. ):
  4267. """Add a tensor to the forward computation graph. This is used to
  4268. compute and save the value of the tensor.
  4269. Parameters:
  4270. cgraph: The graph.
  4271. tensor: The tensor."""
  4272. return lib.ggml_build_forward_expand(cgraph, tensor)
  4273. lib.ggml_build_forward_expand.argtypes = [
  4274. ctypes.POINTER(ggml_cgraph),
  4275. ctypes.POINTER(ggml_tensor),
  4276. ]
  4277. lib.ggml_build_forward_expand.restype = None
  4278. # GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
  4279. def ggml_build_backward_expand(
  4280. ctx: ggml_context_p,
  4281. gf: ggml_cgraph_p,
  4282. gb: ggml_cgraph_p,
  4283. keep: Union[ctypes.c_bool, bool],
  4284. ):
  4285. """Add a tensor to the backward computation graph. This is used to
  4286. compute the gradient of the tensor.
  4287. Parameters:
  4288. ctx: The context.
  4289. gf: The forward graph.
  4290. gb: The backward graph.
  4291. keep: Whether to keep the tensor."""
  4292. return lib.ggml_build_backward_expand(ctx, gf, gb, keep)
  4293. lib.ggml_build_backward_expand.argtypes = [
  4294. ggml_context_p,
  4295. ctypes.POINTER(ggml_cgraph),
  4296. ctypes.POINTER(ggml_cgraph),
  4297. ctypes.c_bool,
  4298. ]
  4299. lib.ggml_build_backward_expand.restype = None
  4300. # GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
  4301. def ggml_build_forward(
  4302. tensor: ggml_tensor_p,
  4303. ) -> ggml_cgraph:
  4304. """Build the forward computation graph.
  4305. Parameters:
  4306. tensor: The tensor.
  4307. Returns:
  4308. The graph."""
  4309. return lib.ggml_build_forward(tensor)
  4310. lib.ggml_build_forward.argtypes = [ctypes.POINTER(ggml_tensor)]
  4311. lib.ggml_build_forward.restype = ggml_cgraph
  4312. # GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
  4313. def ggml_build_backward(
  4314. ctx: ggml_context_p,
  4315. gf: ggml_cgraph_p,
  4316. keep: Union[ctypes.c_bool, bool],
  4317. ) -> ggml_cgraph:
  4318. return lib.ggml_build_backward(ctx, gf, keep)
  4319. lib.ggml_build_backward.argtypes = [
  4320. ggml_context_p,
  4321. ctypes.POINTER(ggml_cgraph),
  4322. ctypes.c_bool,
  4323. ]
  4324. lib.ggml_build_backward.restype = ggml_cgraph
  4325. # // graph allocation in a context
  4326. # GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
  4327. def ggml_new_graph(
  4328. ctx: ggml_context_p,
  4329. ) -> ggml_cgraph:
  4330. """Create a new graph.
  4331. Parameters:
  4332. ctx: The context.
  4333. Returns:
  4334. The graph."""
  4335. return lib.ggml_new_graph(ctx)
  4336. lib.ggml_new_graph.argtypes = [ggml_context_p]
  4337. lib.ggml_new_graph.restype = ggml_cgraph
  4338. # GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
  4339. def ggml_build_forward_ctx(
  4340. ctx: ggml_context_p,
  4341. tensor: ggml_tensor_p,
  4342. ) -> ggml_cgraph:
  4343. """Build the forward computation graph in a context.
  4344. Parameters:
  4345. ctx: The context.
  4346. tensor: The tensor.
  4347. Returns:
  4348. The graph."""
  4349. return lib.ggml_build_forward_ctx(ctx, tensor)
  4350. lib.ggml_build_forward_ctx.argtypes = [
  4351. ggml_context_p,
  4352. ctypes.POINTER(ggml_tensor),
  4353. ]
  4354. lib.ggml_build_forward_ctx.restype = ggml_cgraph
  4355. # GGML_API size_t ggml_graph_overhead(void);
  4356. def ggml_graph_overhead() -> int:
  4357. """Get the overhead of the graph."""
  4358. return lib.ggml_graph_overhead()
  4359. lib.ggml_graph_overhead.argtypes = []
  4360. lib.ggml_graph_overhead.restype = ctypes.c_size_t
  4361. # // ggml_graph_plan() has to be called before ggml_graph_compute()
  4362. # // when plan.work_size > 0, caller must allocate memory for plan.work_data
  4363. # GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
  4364. def ggml_graph_plan(
  4365. cgraph: ggml_cgraph_p,
  4366. n_threads: Union[ctypes.c_int, int] = GGML_DEFAULT_N_THREADS,
  4367. ) -> ggml_cplan:
  4368. """Plan the computation graph.
  4369. Parameters:
  4370. cgraph: The graph.
  4371. n_threads: The number of threads to use.
  4372. Returns:
  4373. The plan."""
  4374. return lib.ggml_graph_plan(cgraph, n_threads)
  4375. lib.ggml_graph_plan.argtypes = [
  4376. ctypes.POINTER(ggml_cgraph),
  4377. ctypes.c_int,
  4378. ]
  4379. lib.ggml_graph_plan.restype = ggml_cplan
  4380. # GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
  4381. def ggml_graph_compute(
  4382. cgraph: ggml_cgraph_p,
  4383. cplan: ggml_cplan_p,
  4384. ) -> int:
  4385. """Compute the graph.
  4386. Parameters:
  4387. cgraph: The graph.
  4388. cplan: The plan."""
  4389. return lib.ggml_graph_compute(cgraph, cplan)
  4390. lib.ggml_graph_compute.argtypes = [
  4391. ctypes.POINTER(ggml_cgraph),
  4392. ctypes.POINTER(ggml_cplan),
  4393. ]
  4394. lib.ggml_graph_compute.restype = ctypes.c_int
  4395. # GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
  4396. def ggml_graph_reset(
  4397. cgraph: ggml_cgraph_p,
  4398. ):
  4399. """Reset the graph.
  4400. Parameters:
  4401. cgraph: The graph."""
  4402. return lib.ggml_graph_reset(cgraph)
  4403. # // same as ggml_graph_compute() but the work data is allocated as a part of the context
  4404. # // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
  4405. # GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
  4406. def ggml_graph_compute_with_ctx(
  4407. ctx: ggml_context_p,
  4408. cgraph: ggml_cgraph_p,
  4409. n_threads: Union[ctypes.c_int, int],
  4410. ):
  4411. """Compute the graph with a context.
  4412. Parameters:
  4413. ctx: The context.
  4414. cgraph: The graph.
  4415. n_threads: The number of threads to use."""
  4416. return lib.ggml_graph_compute_with_ctx(ctx, cgraph, n_threads)
  4417. lib.ggml_graph_compute_with_ctx.argtypes = [
  4418. ggml_context_p,
  4419. ctypes.POINTER(ggml_cgraph),
  4420. ctypes.c_int,
  4421. ]
  4422. lib.ggml_graph_compute_with_ctx.restype = None
  4423. # GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
  4424. def ggml_graph_get_tensor(
  4425. cgraph: ggml_cgraph_p,
  4426. name: bytes,
  4427. ) -> ggml_tensor_p:
  4428. """Get a tensor from the graph by name.
  4429. Parameters:
  4430. cgraph: The graph.
  4431. name: The name of the tensor.
  4432. Returns:
  4433. The tensor."""
  4434. return lib.ggml_graph_get_tensor(cgraph, name)
  4435. lib.ggml_graph_get_tensor.argtypes = [
  4436. ctypes.POINTER(ggml_cgraph),
  4437. ctypes.c_char_p,
  4438. ]
  4439. lib.ggml_graph_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  4440. # GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
  4441. def ggml_graph_export(
  4442. cgraph: ggml_cgraph_p,
  4443. fname: bytes,
  4444. ):
  4445. return lib.ggml_graph_export(cgraph, fname)
  4446. lib.ggml_graph_export.argtypes = [
  4447. ctypes.POINTER(ggml_cgraph),
  4448. ctypes.c_char_p,
  4449. ]
  4450. lib.ggml_graph_export.restype = None
  4451. # GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
  4452. def ggml_graph_import(
  4453. fname: bytes,
  4454. ctx_data: "ctypes._Pointer[ggml_context_p]", # type: ignore
  4455. ctx_eval: "ctypes._Pointer[ggml_context_p]", # type: ignore
  4456. ) -> ggml_cgraph:
  4457. return lib.ggml_graph_import(fname, ctx_data, ctx_eval)
  4458. lib.ggml_graph_import.argtypes = [
  4459. ctypes.c_char_p,
  4460. ctypes.POINTER(ggml_context_p),
  4461. ctypes.POINTER(ggml_context_p),
  4462. ]
  4463. lib.ggml_graph_import.restype = ggml_cgraph
  4464. # // print info and performance information for the graph
  4465. # GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
  4466. def ggml_graph_print(
  4467. cgraph: ggml_cgraph_p,
  4468. ):
  4469. return lib.ggml_graph_print(cgraph)
  4470. lib.ggml_graph_print.argtypes = [ctypes.POINTER(ggml_cgraph)]
  4471. lib.ggml_graph_print.restype = None
  4472. # // dump the graph into a file using the dot format
  4473. # GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
  4474. def ggml_graph_dump_dot(
  4475. gb: ggml_cgraph_p,
  4476. gf: ggml_cgraph_p,
  4477. filename: bytes,
  4478. ):
  4479. return lib.ggml_graph_dump_dot(gb, gf, filename)
  4480. lib.ggml_graph_dump_dot.argtypes = [
  4481. ctypes.POINTER(ggml_cgraph),
  4482. ctypes.POINTER(ggml_cgraph),
  4483. ctypes.c_char_p,
  4484. ]
  4485. lib.ggml_graph_dump_dot.restype = None
  4486. # //
  4487. # // optimization
  4488. # //
  4489. # // optimization methods
  4490. # enum ggml_opt_type {
  4491. # GGML_OPT_ADAM,
  4492. # GGML_OPT_LBFGS,
  4493. # };
  4494. GGML_OPT_ADAM = 0
  4495. GGML_OPT_LBFGS = 1
  4496. # // linesearch methods
  4497. # enum ggml_linesearch {
  4498. # GGML_LINESEARCH_DEFAULT = 1,
  4499. # GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
  4500. # GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
  4501. # GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
  4502. # };
  4503. GGML_LINESEARCH_DEFAULT = 1
  4504. GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0
  4505. GGML_LINESEARCH_BACKTRACKING_WOLFE = 1
  4506. GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2
  4507. # // optimization return values
  4508. # enum ggml_opt_result {
  4509. # GGML_OPT_OK = 0,
  4510. # GGML_OPT_DID_NOT_CONVERGE,
  4511. # GGML_OPT_NO_CONTEXT,
  4512. # GGML_OPT_INVALID_WOLFE,
  4513. # GGML_OPT_FAIL,
  4514. # GGML_LINESEARCH_FAIL = -128,
  4515. # GGML_LINESEARCH_MINIMUM_STEP,
  4516. # GGML_LINESEARCH_MAXIMUM_STEP,
  4517. # GGML_LINESEARCH_MAXIMUM_ITERATIONS,
  4518. # GGML_LINESEARCH_INVALID_PARAMETERS,
  4519. # };
  4520. GGML_OPT_OK = 0
  4521. GGML_OPT_DID_NOT_CONVERGE = 1
  4522. GGML_OPT_NO_CONTEXT = 2
  4523. GGML_OPT_INVALID_WOLFE = 3
  4524. GGML_OPT_FAIL = 4
  4525. GGML_LINESEARCH_FAIL = -128
  4526. GGML_LINESEARCH_MINIMUM_STEP = -127
  4527. GGML_LINESEARCH_MAXIMUM_STEP = -126
  4528. GGML_LINESEARCH_MAXIMUM_ITERATIONS = -125
  4529. GGML_LINESEARCH_INVALID_PARAMETERS = -124
  4530. # typedef void (*ggml_opt_callback)(void * data, float * sched);
  4531. ggml_opt_callback = ctypes.CFUNCTYPE(
  4532. None,
  4533. ctypes.c_void_p,
  4534. ctypes.POINTER(ctypes.c_float),
  4535. )
  4536. # // optimization parameters
  4537. # //
  4538. # // see ggml.c (ggml_opt_default_params) for default values
  4539. # //
  4540. # struct ggml_opt_params {
  4541. # enum ggml_opt_type type;
  4542. # int n_threads;
  4543. # // delta-based convergence test
  4544. # //
  4545. # // if past == 0 - disabled
  4546. # // if past > 0:
  4547. # // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
  4548. # //
  4549. # int past;
  4550. # float delta;
  4551. # // maximum number of iterations without improvement
  4552. # //
  4553. # // if 0 - disabled
  4554. # // if > 0:
  4555. # // assume convergence if no cost improvement in this number of iterations
  4556. # //
  4557. # int max_no_improvement;
  4558. # bool print_forward_graph;
  4559. # bool print_backward_graph;
  4560. # // ADAM parameters
  4561. # struct {
  4562. # int n_iter;
  4563. # float sched; // schedule multiplier (fixed, decay or warmup)
  4564. # float decay; // weight decay for AdamW, use 0.0f to disable
  4565. # int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
  4566. # float alpha; // learning rate
  4567. # float beta1;
  4568. # float beta2;
  4569. # float eps; // epsilon for numerical stability
  4570. # float eps_f; // epsilon for convergence test
  4571. # float eps_g; // epsilon for convergence test
  4572. # float gclip; // gradient clipping
  4573. # } adam;
  4574. # // LBFGS parameters
  4575. # struct {
  4576. # int m; // number of corrections to approximate the inv. Hessian
  4577. # int n_iter;
  4578. # int max_linesearch;
  4579. # float eps; // convergence tolerance
  4580. # float ftol; // line search tolerance
  4581. # float wolfe;
  4582. # float min_step;
  4583. # float max_step;
  4584. # enum ggml_linesearch linesearch;
  4585. # } lbfgs;
  4586. # };
  4587. class ggml_opt_params_adam(ctypes.Structure):
  4588. _fields_ = [
  4589. ("n_iter", ctypes.c_int),
  4590. ("sched", ctypes.c_float),
  4591. ("decay", ctypes.c_float),
  4592. ("decay_min_ndim", ctypes.c_int),
  4593. ("alpha", ctypes.c_float),
  4594. ("beta1", ctypes.c_float),
  4595. ("beta2", ctypes.c_float),
  4596. ("eps", ctypes.c_float),
  4597. ("eps_f", ctypes.c_float),
  4598. ("eps_g", ctypes.c_float),
  4599. ("gclip", ctypes.c_float),
  4600. ]
  4601. class ggml_opt_params_lbfgs(ctypes.Structure):
  4602. _fields_ = [
  4603. ("m", ctypes.c_int),
  4604. ("n_iter", ctypes.c_int),
  4605. ("max_linesearch", ctypes.c_int),
  4606. ("eps", ctypes.c_float),
  4607. ("ftol", ctypes.c_float),
  4608. ("wolfe", ctypes.c_float),
  4609. ("min_step", ctypes.c_float),
  4610. ("max_step", ctypes.c_float),
  4611. ("linesearch", ctypes.c_int),
  4612. ]
  4613. class ggml_opt_params(ctypes.Structure):
  4614. _fields_ = [
  4615. ("type", ctypes.c_int),
  4616. ("n_threads", ctypes.c_int),
  4617. ("past", ctypes.c_int),
  4618. ("delta", ctypes.c_float),
  4619. ("max_no_improvement", ctypes.c_int),
  4620. ("print_forward_graph", ctypes.c_bool),
  4621. ("print_backward_graph", ctypes.c_bool),
  4622. ("adam", ggml_opt_params_adam),
  4623. ("lbfgs", ggml_opt_params_lbfgs),
  4624. ]
  4625. # struct ggml_opt_context {
  4626. # struct ggml_context * ctx;
  4627. # struct ggml_opt_params params;
  4628. # int iter;
  4629. # int64_t nx; // number of parameter elements
  4630. # bool just_initialized;
  4631. # float loss_before;
  4632. # float loss_after;
  4633. # struct {
  4634. # struct ggml_tensor * m; // first moment
  4635. # struct ggml_tensor * v; // second moment
  4636. # struct ggml_tensor * pf; // past function values
  4637. # float fx_best;
  4638. # float fx_prev;
  4639. # int n_no_improvement;
  4640. # } adam;
  4641. # struct {
  4642. # struct ggml_tensor * x; // current parameters
  4643. # struct ggml_tensor * xp; // previous parameters
  4644. # struct ggml_tensor * g; // current gradient
  4645. # struct ggml_tensor * gp; // previous gradient
  4646. # struct ggml_tensor * d; // search direction
  4647. # struct ggml_tensor * pf; // past function values
  4648. # struct ggml_tensor * lmal; // the L-BFGS memory alpha
  4649. # struct ggml_tensor * lmys; // the L-BFGS memory ys
  4650. # struct ggml_tensor * lms; // the L-BFGS memory s
  4651. # struct ggml_tensor * lmy; // the L-BFGS memory y
  4652. # float fx_best;
  4653. # float step;
  4654. # int j;
  4655. # int k;
  4656. # int end;
  4657. # int n_no_improvement;
  4658. # } lbfgs;
  4659. # };
  4660. class ggml_opt_context_adam(ctypes.Structure):
  4661. _fields_ = [
  4662. ("m", ctypes.POINTER(ggml_tensor)),
  4663. ("v", ctypes.POINTER(ggml_tensor)),
  4664. ("pf", ctypes.POINTER(ggml_tensor)),
  4665. ("fx_best", ctypes.c_float),
  4666. ("fx_prev", ctypes.c_float),
  4667. ("n_no_improvement", ctypes.c_int),
  4668. ]
  4669. class ggml_opt_context_lbfgs(ctypes.Structure):
  4670. _fields_ = [
  4671. ("x", ctypes.POINTER(ggml_tensor)),
  4672. ("xp", ctypes.POINTER(ggml_tensor)),
  4673. ("g", ctypes.POINTER(ggml_tensor)),
  4674. ("gp", ctypes.POINTER(ggml_tensor)),
  4675. ("d", ctypes.POINTER(ggml_tensor)),
  4676. ("pf", ctypes.POINTER(ggml_tensor)),
  4677. ("lmal", ctypes.POINTER(ggml_tensor)),
  4678. ("lmys", ctypes.POINTER(ggml_tensor)),
  4679. ("lms", ctypes.POINTER(ggml_tensor)),
  4680. ("lmy", ctypes.POINTER(ggml_tensor)),
  4681. ("fx_best", ctypes.c_float),
  4682. ("step", ctypes.c_float),
  4683. ("j", ctypes.c_int),
  4684. ("k", ctypes.c_int),
  4685. ("end", ctypes.c_int),
  4686. ("n_no_improvement", ctypes.c_int),
  4687. ]
  4688. class ggml_opt_context(ctypes.Structure):
  4689. _fields_ = [
  4690. ("ctx", ggml_context_p),
  4691. ("params", ggml_opt_params),
  4692. ("iter", ctypes.c_int),
  4693. ("nx", ctypes.c_int64),
  4694. ("just_initialized", ctypes.c_bool),
  4695. ("loss_before", ctypes.c_float),
  4696. ("loss_after", ctypes.c_float),
  4697. ("adam", ggml_opt_context_adam),
  4698. ("lbfgs", ggml_opt_context_lbfgs),
  4699. ]
  4700. ggml_opt_context_p = ctypes.POINTER(ggml_opt_context)
  4701. # GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
  4702. def ggml_opt_default_params(type: Union[ctypes.c_int, bool]) -> ggml_opt_params:
  4703. return lib.ggml_opt_default_params(type)
  4704. lib.ggml_opt_default_params.argtypes = [ctypes.c_int]
  4705. lib.ggml_opt_default_params.restype = ggml_opt_params
  4706. # // optimize the function defined by the tensor f
  4707. # GGML_API enum ggml_opt_result ggml_opt(
  4708. # struct ggml_context * ctx,
  4709. # struct ggml_opt_params params,
  4710. # struct ggml_tensor * f);
  4711. def ggml_opt(
  4712. ctx: ggml_context_p,
  4713. params: ggml_opt_params,
  4714. f: ggml_tensor_p,
  4715. ) -> int:
  4716. return lib.ggml_opt(ctx, params, f)
  4717. lib.ggml_opt.argtypes = [ggml_context_p, ggml_opt_params, ctypes.POINTER(ggml_tensor)]
  4718. lib.ggml_opt.restype = ctypes.c_int
  4719. # // initialize optimizer context
  4720. # GGML_API void ggml_opt_init(
  4721. # struct ggml_context * ctx,
  4722. # struct ggml_opt_context * opt,
  4723. # struct ggml_opt_params params,
  4724. # int64_t nx);
  4725. def ggml_opt_init(
  4726. ctx: ggml_context_p,
  4727. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4728. params: ggml_opt_params,
  4729. nx: Union[ctypes.c_int64, int],
  4730. ):
  4731. return lib.ggml_opt_init(ctx, opt, params, nx)
  4732. lib.ggml_opt_init.argtypes = [
  4733. ggml_context_p,
  4734. ctypes.POINTER(ggml_opt_context),
  4735. ggml_opt_params,
  4736. ctypes.c_int64,
  4737. ]
  4738. lib.ggml_opt_init.restype = None
  4739. # // continue optimizing the function defined by the tensor f
  4740. # GGML_API enum ggml_opt_result ggml_opt_resume(
  4741. # struct ggml_context * ctx,
  4742. # struct ggml_opt_context * opt,
  4743. # struct ggml_tensor * f);
  4744. def ggml_opt_resume(
  4745. ctx: ggml_context_p,
  4746. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4747. f: ggml_tensor_p,
  4748. ) -> int:
  4749. return lib.ggml_opt_resume(ctx, opt, f)
  4750. lib.ggml_opt_resume.argtypes = [
  4751. ggml_context_p,
  4752. ctypes.POINTER(ggml_opt_context),
  4753. ctypes.POINTER(ggml_tensor),
  4754. ]
  4755. lib.ggml_opt_resume.restype = ctypes.c_int
  4756. # // continue optimizing the function defined by the tensor f
  4757. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  4758. # struct ggml_context * ctx,
  4759. # struct ggml_opt_context * opt,
  4760. # struct ggml_tensor * f,
  4761. # struct ggml_cgraph * gf,
  4762. # struct ggml_cgraph * gb,
  4763. # ggml_opt_callback callback,
  4764. # void * callback_data);
  4765. # // continue optimizing the function defined by the tensor f
  4766. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  4767. # struct ggml_context * ctx,
  4768. # struct ggml_opt_context * opt,
  4769. # struct ggml_tensor * f,
  4770. # struct ggml_cgraph * gf,
  4771. # struct ggml_cgraph * gb);
  4772. def ggml_opt_resume_g(
  4773. ctx: ggml_context_p,
  4774. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4775. f: ggml_tensor_p,
  4776. gf: ggml_cgraph_p,
  4777. gb: ggml_cgraph_p,
  4778. callback: ggml_opt_callback = None,
  4779. callback_data: ctypes.c_void_p = None,
  4780. ) -> int:
  4781. return lib.ggml_opt_resume_g(ctx, opt, f, gf, gb, callback, callback_data)
  4782. lib.ggml_opt_resume_g.argtypes = [
  4783. ggml_context_p,
  4784. ctypes.POINTER(ggml_opt_context),
  4785. ctypes.POINTER(ggml_tensor),
  4786. ctypes.POINTER(ggml_cgraph),
  4787. ctypes.POINTER(ggml_cgraph),
  4788. ggml_opt_callback,
  4789. ctypes.c_void_p,
  4790. ]
  4791. lib.ggml_opt_resume_g.restype = ctypes.c_int
  4792. # //
  4793. # // quantization
  4794. # //
  4795. # GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4796. def ggml_quantize_q4_0(
  4797. src: CFloatArray,
  4798. dst: ctypes.c_void_p,
  4799. n: Union[ctypes.c_int, int],
  4800. k: Union[ctypes.c_int, int],
  4801. hist: CInt64Array,
  4802. ) -> int:
  4803. return lib.ggml_quantize_q4_0(src, dst, n, k, hist)
  4804. lib.ggml_quantize_q4_0.argtypes = [
  4805. ctypes.POINTER(ctypes.c_float),
  4806. ctypes.c_void_p,
  4807. ctypes.c_int,
  4808. ctypes.c_int,
  4809. ctypes.POINTER(ctypes.c_int64),
  4810. ]
  4811. lib.ggml_quantize_q4_0.restype = ctypes.c_size_t
  4812. # GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
  4813. def ggml_quantize_q4_1(
  4814. src: CFloatArray,
  4815. dst: ctypes.c_void_p,
  4816. n: Union[ctypes.c_int, int],
  4817. k: Union[ctypes.c_int, int],
  4818. hist: CInt64Array,
  4819. ) -> int:
  4820. return lib.ggml_quantize_q4_1(src, dst, n, k, hist)
  4821. lib.ggml_quantize_q4_1.argtypes = [
  4822. ctypes.POINTER(ctypes.c_float),
  4823. ctypes.c_void_p,
  4824. ctypes.c_int,
  4825. ctypes.c_int,
  4826. ctypes.POINTER(ctypes.c_int64),
  4827. ]
  4828. lib.ggml_quantize_q4_1.restype = ctypes.c_size_t
  4829. # GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4830. def ggml_quantize_q5_0(
  4831. src: CFloatArray,
  4832. dst: ctypes.c_void_p,
  4833. n: Union[ctypes.c_int, int],
  4834. k: Union[ctypes.c_int, int],
  4835. hist: CInt64Array,
  4836. ) -> int:
  4837. return lib.ggml_quantize_q5_0(src, dst, n, k, hist)
  4838. lib.ggml_quantize_q5_0.argtypes = [
  4839. ctypes.POINTER(ctypes.c_float),
  4840. ctypes.c_void_p,
  4841. ctypes.c_int,
  4842. ctypes.c_int,
  4843. ctypes.POINTER(ctypes.c_int64),
  4844. ]
  4845. lib.ggml_quantize_q5_0.restype = ctypes.c_size_t
  4846. # GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
  4847. def ggml_quantize_q5_1(
  4848. src: CFloatArray,
  4849. dst: ctypes.c_void_p,
  4850. n: Union[ctypes.c_int, int],
  4851. k: Union[ctypes.c_int, int],
  4852. hist: CInt64Array,
  4853. ) -> int:
  4854. return lib.ggml_quantize_q5_1(src, dst, n, k, hist)
  4855. lib.ggml_quantize_q5_1.argtypes = [
  4856. ctypes.POINTER(ctypes.c_float),
  4857. ctypes.c_void_p,
  4858. ctypes.c_int,
  4859. ctypes.c_int,
  4860. ctypes.POINTER(ctypes.c_int64),
  4861. ]
  4862. lib.ggml_quantize_q5_1.restype = ctypes.c_size_t
  4863. # GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4864. def ggml_quantize_q8_0(
  4865. src: CFloatArray,
  4866. dst: ctypes.c_void_p,
  4867. n: Union[ctypes.c_int, int],
  4868. k: Union[ctypes.c_int, int],
  4869. hist: CInt64Array,
  4870. ) -> int:
  4871. return lib.ggml_quantize_q8_0(src, dst, n, k, hist)
  4872. lib.ggml_quantize_q8_0.argtypes = [
  4873. ctypes.POINTER(ctypes.c_float),
  4874. ctypes.c_void_p,
  4875. ctypes.c_int,
  4876. ctypes.c_int,
  4877. ctypes.POINTER(ctypes.c_int64),
  4878. ]
  4879. lib.ggml_quantize_q8_0.restype = ctypes.c_size_t
  4880. # GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
  4881. def ggml_quantize_chunk(
  4882. type: Union[ctypes.c_int, int],
  4883. src: CFloatArray,
  4884. dst: ctypes.c_void_p,
  4885. start: Union[ctypes.c_int, int],
  4886. n: Union[ctypes.c_int, int],
  4887. hist: CInt64Array,
  4888. ) -> int:
  4889. return lib.ggml_quantize_chunk(type, src, dst, start, n, hist)
  4890. lib.ggml_quantize_chunk.argtypes = [
  4891. ctypes.c_int,
  4892. ctypes.POINTER(ctypes.c_float),
  4893. ctypes.c_void_p,
  4894. ctypes.c_int,
  4895. ctypes.c_int,
  4896. ctypes.POINTER(ctypes.c_int64),
  4897. ]
  4898. lib.ggml_quantize_chunk.restype = ctypes.c_size_t
  4899. # //
  4900. # // gguf
  4901. # //
  4902. # enum gguf_type {
  4903. # GGUF_TYPE_UINT8 = 0,
  4904. # GGUF_TYPE_INT8 = 1,
  4905. # GGUF_TYPE_UINT16 = 2,
  4906. # GGUF_TYPE_INT16 = 3,
  4907. # GGUF_TYPE_UINT32 = 4,
  4908. # GGUF_TYPE_INT32 = 5,
  4909. # GGUF_TYPE_FLOAT32 = 6,
  4910. # GGUF_TYPE_BOOL = 7,
  4911. # GGUF_TYPE_STRING = 8,
  4912. # GGUF_TYPE_ARRAY = 9,
  4913. # GGUF_TYPE_UINT64 = 10,
  4914. # GGUF_TYPE_INT64 = 11,
  4915. # GGUF_TYPE_FLOAT64 = 12,
  4916. # GGUF_TYPE_COUNT, // marks the end of the enum
  4917. # };
  4918. GGUF_TYPE_UINT8 = 0
  4919. GGUF_TYPE_INT8 = 1
  4920. GGUF_TYPE_UINT16 = 2
  4921. GGUF_TYPE_INT16 = 3
  4922. GGUF_TYPE_UINT32 = 4
  4923. GGUF_TYPE_INT32 = 5
  4924. GGUF_TYPE_FLOAT32 = 6
  4925. GGUF_TYPE_BOOL = 7
  4926. GGUF_TYPE_STRING = 8
  4927. GGUF_TYPE_ARRAY = 9
  4928. GGUF_TYPE_COUNT = 10
  4929. # struct gguf_context;
  4930. gguf_context_p = ctypes.c_void_p
  4931. # struct gguf_init_params {
  4932. # bool no_alloc;
  4933. # // if not NULL, create a ggml_context and allocate the tensor data in it
  4934. # struct ggml_context ** ctx;
  4935. # };
  4936. class gguf_init_params(ctypes.Structure):
  4937. _fields_ = [
  4938. ("no_alloc", ctypes.c_bool),
  4939. ("ctx", ctypes.POINTER(ggml_context_p)),
  4940. ]
  4941. # GGML_API struct gguf_context * gguf_init_empty(void);
  4942. def gguf_init_empty() -> gguf_context_p:
  4943. return lib.gguf_init_empty()
  4944. lib.gguf_init_empty.argtypes = []
  4945. lib.gguf_init_empty.restype = gguf_context_p
  4946. # GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
  4947. def gguf_init_from_file(
  4948. fname: bytes,
  4949. params: gguf_init_params,
  4950. ) -> gguf_context_p:
  4951. return lib.gguf_init_from_file(fname, params)
  4952. lib.gguf_init_from_file.argtypes = [
  4953. ctypes.c_char_p,
  4954. gguf_init_params,
  4955. ]
  4956. lib.gguf_init_from_file.restype = gguf_context_p
  4957. # //GGML_API struct gguf_context * gguf_init_from_buffer(..);
  4958. # GGML_API void gguf_free(struct gguf_context * ctx);
  4959. def gguf_free(
  4960. ctx: gguf_context_p,
  4961. ):
  4962. return lib.gguf_free(ctx)
  4963. lib.gguf_free.argtypes = [
  4964. gguf_context_p,
  4965. ]
  4966. lib.gguf_free.restype = None
  4967. # GGML_API const char * gguf_type_name(enum gguf_type type);
  4968. def gguf_type_name(
  4969. type: Union[ctypes.c_int, int],
  4970. ) -> bytes:
  4971. return lib.gguf_type_name(type)
  4972. lib.gguf_type_name.argtypes = [
  4973. ctypes.c_int,
  4974. ]
  4975. lib.gguf_type_name.restype = ctypes.c_char_p
  4976. # GGML_API int gguf_get_version (const struct gguf_context * ctx);
  4977. def gguf_get_version(
  4978. ctx: gguf_context_p,
  4979. ) -> int:
  4980. return lib.gguf_get_version(ctx)
  4981. lib.gguf_get_version.argtypes = [
  4982. gguf_context_p,
  4983. ]
  4984. lib.gguf_get_version.restype = ctypes.c_int
  4985. # GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
  4986. def gguf_get_alignment(
  4987. ctx: gguf_context_p,
  4988. ) -> int:
  4989. return lib.gguf_get_alignment(ctx)
  4990. lib.gguf_get_alignment.argtypes = [
  4991. gguf_context_p,
  4992. ]
  4993. lib.gguf_get_alignment.restype = ctypes.c_size_t
  4994. # GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
  4995. def gguf_get_data_offset(
  4996. ctx: gguf_context_p,
  4997. ) -> int:
  4998. return lib.gguf_get_data_offset(ctx)
  4999. lib.gguf_get_data_offset.argtypes = [
  5000. gguf_context_p,
  5001. ]
  5002. lib.gguf_get_data_offset.restype = ctypes.c_size_t
  5003. # GGML_API void * gguf_get_data (const struct gguf_context * ctx);
  5004. def gguf_get_data(
  5005. ctx: gguf_context_p,
  5006. ) -> ctypes.c_void_p:
  5007. return lib.gguf_get_data(ctx)
  5008. lib.gguf_get_data.argtypes = [
  5009. gguf_context_p,
  5010. ]
  5011. lib.gguf_get_data.restype = ctypes.c_void_p
  5012. # GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
  5013. def gguf_get_n_kv(
  5014. ctx: gguf_context_p,
  5015. ) -> int:
  5016. return lib.gguf_get_n_kv(ctx)
  5017. lib.gguf_get_n_kv.argtypes = [
  5018. gguf_context_p,
  5019. ]
  5020. lib.gguf_get_n_kv.restype = ctypes.c_int
  5021. # GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
  5022. def gguf_find_key(
  5023. ctx: gguf_context_p,
  5024. key: bytes,
  5025. ) -> int:
  5026. return lib.gguf_find_key(ctx, key)
  5027. lib.gguf_find_key.argtypes = [
  5028. gguf_context_p,
  5029. ctypes.c_char_p,
  5030. ]
  5031. lib.gguf_find_key.restype = ctypes.c_int
  5032. # GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
  5033. def gguf_get_key(
  5034. ctx: gguf_context_p,
  5035. i: Union[ctypes.c_int, int],
  5036. ) -> bytes:
  5037. return lib.gguf_get_key(ctx, i)
  5038. lib.gguf_get_key.argtypes = [
  5039. gguf_context_p,
  5040. ctypes.c_int,
  5041. ]
  5042. lib.gguf_get_key.restype = ctypes.c_char_p
  5043. # GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
  5044. def gguf_get_kv_type(
  5045. ctx: gguf_context_p,
  5046. i: Union[ctypes.c_int, int],
  5047. ) -> int:
  5048. return lib.gguf_get_kv_type(ctx, i)
  5049. lib.gguf_get_kv_type.argtypes = [
  5050. gguf_context_p,
  5051. ctypes.c_int,
  5052. ]
  5053. lib.gguf_get_kv_type.restype = ctypes.c_int
  5054. # GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
  5055. def gguf_get_arr_type(
  5056. ctx: gguf_context_p,
  5057. i: Union[ctypes.c_int, int],
  5058. ) -> int:
  5059. return lib.gguf_get_arr_type(ctx, i)
  5060. lib.gguf_get_arr_type.argtypes = [
  5061. gguf_context_p,
  5062. ctypes.c_int,
  5063. ]
  5064. lib.gguf_get_arr_type.restype = ctypes.c_int
  5065. # // results are undefined if the wrong type is used for the key
  5066. # GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
  5067. def gguf_get_val_u8(
  5068. ctx: gguf_context_p,
  5069. i: Union[ctypes.c_int, int],
  5070. ) -> int:
  5071. return lib.gguf_get_val_u8(ctx, i)
  5072. lib.gguf_get_val_u8.argtypes = [
  5073. gguf_context_p,
  5074. ctypes.c_int,
  5075. ]
  5076. lib.gguf_get_val_u8.restype = ctypes.c_uint8
  5077. # GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
  5078. def gguf_get_val_i8(
  5079. ctx: gguf_context_p,
  5080. i: Union[ctypes.c_int, int],
  5081. ) -> int:
  5082. return lib.gguf_get_val_i8(ctx, i)
  5083. lib.gguf_get_val_i8.argtypes = [
  5084. gguf_context_p,
  5085. ctypes.c_int,
  5086. ]
  5087. lib.gguf_get_val_i8.restype = ctypes.c_int8
  5088. # GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
  5089. def gguf_get_val_u16(
  5090. ctx: gguf_context_p,
  5091. i: Union[ctypes.c_int, int],
  5092. ) -> int:
  5093. return lib.gguf_get_val_u16(ctx, i)
  5094. lib.gguf_get_val_u16.argtypes = [
  5095. gguf_context_p,
  5096. ctypes.c_int,
  5097. ]
  5098. lib.gguf_get_val_u16.restype = ctypes.c_uint16
  5099. # GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
  5100. def gguf_get_val_i16(
  5101. ctx: gguf_context_p,
  5102. i: Union[ctypes.c_int, int],
  5103. ) -> int:
  5104. return lib.gguf_get_val_i16(ctx, i)
  5105. lib.gguf_get_val_i16.argtypes = [
  5106. gguf_context_p,
  5107. ctypes.c_int,
  5108. ]
  5109. lib.gguf_get_val_i16.restype = ctypes.c_int16
  5110. # GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
  5111. def gguf_get_val_u32(
  5112. ctx: gguf_context_p,
  5113. i: Union[ctypes.c_int, int],
  5114. ) -> int:
  5115. return lib.gguf_get_val_u32(ctx, i)
  5116. lib.gguf_get_val_u32.argtypes = [
  5117. gguf_context_p,
  5118. ctypes.c_int,
  5119. ]
  5120. lib.gguf_get_val_u32.restype = ctypes.c_uint32
  5121. # GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
  5122. def gguf_get_val_i32(
  5123. ctx: gguf_context_p,
  5124. i: Union[ctypes.c_int, int],
  5125. ) -> int:
  5126. return lib.gguf_get_val_i32(ctx, i)
  5127. lib.gguf_get_val_i32.argtypes = [
  5128. gguf_context_p,
  5129. ctypes.c_int,
  5130. ]
  5131. lib.gguf_get_val_i32.restype = ctypes.c_int32
  5132. # GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
  5133. def gguf_get_val_f32(
  5134. ctx: gguf_context_p,
  5135. i: Union[ctypes.c_int, int],
  5136. ) -> float:
  5137. return lib.gguf_get_val_f32(ctx, i)
  5138. lib.gguf_get_val_f32.argtypes = [
  5139. gguf_context_p,
  5140. ctypes.c_int,
  5141. ]
  5142. lib.gguf_get_val_f32.restype = ctypes.c_float
  5143. # GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
  5144. def gguf_get_val_u64(
  5145. ctx: gguf_context_p,
  5146. i: Union[ctypes.c_int, int],
  5147. ) -> int:
  5148. return lib.gguf_get_val_u64(ctx, i)
  5149. lib.gguf_get_val_u64.argtypes = [
  5150. gguf_context_p,
  5151. ctypes.c_int,
  5152. ]
  5153. lib.gguf_get_val_u64.restype = ctypes.c_uint64
  5154. # GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
  5155. def gguf_get_val_i64(
  5156. ctx: gguf_context_p,
  5157. i: Union[ctypes.c_int, int],
  5158. ) -> int:
  5159. return lib.gguf_get_val_i64(ctx, i)
  5160. lib.gguf_get_val_i64.argtypes = [
  5161. gguf_context_p,
  5162. ctypes.c_int,
  5163. ]
  5164. lib.gguf_get_val_i64.restype = ctypes.c_int64
  5165. # GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
  5166. def gguf_get_val_f64(
  5167. ctx: gguf_context_p,
  5168. i: Union[ctypes.c_int, int],
  5169. ) -> float:
  5170. return lib.gguf_get_val_f64(ctx, i)
  5171. lib.gguf_get_val_f64.argtypes = [
  5172. gguf_context_p,
  5173. ctypes.c_int,
  5174. ]
  5175. lib.gguf_get_val_f64.restype = ctypes.c_double
  5176. # GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
  5177. def gguf_get_val_bool(
  5178. ctx: gguf_context_p,
  5179. i: Union[ctypes.c_int, int],
  5180. ) -> bool:
  5181. return lib.gguf_get_val_bool(ctx, i)
  5182. lib.gguf_get_val_bool.argtypes = [
  5183. gguf_context_p,
  5184. ctypes.c_int,
  5185. ]
  5186. lib.gguf_get_val_bool.restype = ctypes.c_bool
  5187. # GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
  5188. def gguf_get_val_str(
  5189. ctx: gguf_context_p,
  5190. i: Union[ctypes.c_int, int],
  5191. ) -> bytes:
  5192. return lib.gguf_get_val_str(ctx, i)
  5193. lib.gguf_get_val_str.argtypes = [
  5194. gguf_context_p,
  5195. ctypes.c_int,
  5196. ]
  5197. lib.gguf_get_val_str.restype = ctypes.c_char_p
  5198. # GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
  5199. def gguf_get_arr_n(
  5200. ctx: gguf_context_p,
  5201. i: Union[ctypes.c_int, int],
  5202. ) -> int:
  5203. return lib.gguf_get_arr_n(ctx, i)
  5204. lib.gguf_get_arr_n.argtypes = [
  5205. gguf_context_p,
  5206. ctypes.c_int,
  5207. ]
  5208. lib.gguf_get_arr_n.restype = ctypes.c_int
  5209. # GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
  5210. def gguf_get_arr_data(
  5211. ctx: gguf_context_p,
  5212. i: Union[ctypes.c_int, int],
  5213. ) -> ctypes.c_void_p:
  5214. return lib.gguf_get_arr_data(ctx, i)
  5215. lib.gguf_get_arr_data.argtypes = [
  5216. gguf_context_p,
  5217. ctypes.c_int,
  5218. ]
  5219. lib.gguf_get_arr_data.restype = ctypes.c_void_p
  5220. # GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
  5221. def gguf_get_arr_str(
  5222. ctx: gguf_context_p,
  5223. key_id: Union[ctypes.c_int, int],
  5224. i: Union[ctypes.c_int, int],
  5225. ) -> bytes:
  5226. return lib.gguf_get_arr_str(ctx, key_id, i)
  5227. lib.gguf_get_arr_str.argtypes = [
  5228. gguf_context_p,
  5229. ctypes.c_int,
  5230. ctypes.c_int,
  5231. ]
  5232. lib.gguf_get_arr_str.restype = ctypes.c_char_p
  5233. # GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
  5234. def gguf_get_n_tensors(
  5235. ctx: gguf_context_p,
  5236. ) -> int:
  5237. return lib.gguf_get_n_tensors(ctx)
  5238. lib.gguf_get_n_tensors.argtypes = [
  5239. gguf_context_p,
  5240. ]
  5241. lib.gguf_get_n_tensors.restype = ctypes.c_int
  5242. # GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
  5243. def gguf_find_tensor(
  5244. ctx: gguf_context_p,
  5245. name: bytes,
  5246. ) -> int:
  5247. return lib.gguf_find_tensor(ctx, name)
  5248. lib.gguf_find_tensor.argtypes = [
  5249. gguf_context_p,
  5250. ctypes.c_char_p,
  5251. ]
  5252. lib.gguf_find_tensor.restype = ctypes.c_int
  5253. # GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
  5254. def gguf_get_tensor_offset(
  5255. ctx: gguf_context_p,
  5256. i: Union[ctypes.c_int, int],
  5257. ) -> int:
  5258. return lib.gguf_get_tensor_offset(ctx, i)
  5259. lib.gguf_get_tensor_offset.argtypes = [
  5260. gguf_context_p,
  5261. ctypes.c_int,
  5262. ]
  5263. lib.gguf_get_tensor_offset.restype = ctypes.c_size_t
  5264. # GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
  5265. def gguf_get_tensor_name(
  5266. ctx: gguf_context_p,
  5267. i: Union[ctypes.c_int, int],
  5268. ) -> bytes:
  5269. return lib.gguf_get_tensor_name(ctx, i)
  5270. lib.gguf_get_tensor_name.argtypes = [
  5271. gguf_context_p,
  5272. ctypes.c_int,
  5273. ]
  5274. lib.gguf_get_tensor_name.restype = ctypes.c_char_p
  5275. # // overrides existing values or adds a new one
  5276. # GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
  5277. def gguf_set_val_u8(
  5278. ctx: gguf_context_p,
  5279. key: bytes,
  5280. val: Union[ctypes.c_uint8, int],
  5281. ):
  5282. return lib.gguf_set_val_u8(ctx, key, val)
  5283. lib.gguf_set_val_u8.argtypes = [
  5284. gguf_context_p,
  5285. ctypes.c_char_p,
  5286. ctypes.c_uint8,
  5287. ]
  5288. lib.gguf_set_val_u8.restype = None
  5289. # GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
  5290. def gguf_set_val_i8(
  5291. ctx: gguf_context_p,
  5292. key: bytes,
  5293. val: Union[ctypes.c_int8, int],
  5294. ):
  5295. return lib.gguf_set_val_i8(ctx, key, val)
  5296. lib.gguf_set_val_i8.argtypes = [
  5297. gguf_context_p,
  5298. ctypes.c_char_p,
  5299. ctypes.c_int8,
  5300. ]
  5301. lib.gguf_set_val_i8.restype = None
  5302. # GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
  5303. def gguf_set_val_u16(
  5304. ctx: gguf_context_p,
  5305. key: bytes,
  5306. val: Union[ctypes.c_uint16, int],
  5307. ):
  5308. return lib.gguf_set_val_u16(ctx, key, val)
  5309. lib.gguf_set_val_u16.argtypes = [
  5310. gguf_context_p,
  5311. ctypes.c_char_p,
  5312. ctypes.c_uint16,
  5313. ]
  5314. lib.gguf_set_val_u16.restype = None
  5315. # GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
  5316. def gguf_set_val_i16(
  5317. ctx: gguf_context_p,
  5318. key: bytes,
  5319. val: Union[ctypes.c_int16, int],
  5320. ):
  5321. return lib.gguf_set_val_i16(ctx, key, val)
  5322. lib.gguf_set_val_i16.argtypes = [
  5323. gguf_context_p,
  5324. ctypes.c_char_p,
  5325. ctypes.c_int16,
  5326. ]
  5327. lib.gguf_set_val_i16.restype = None
  5328. # GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
  5329. def gguf_set_val_u32(
  5330. ctx: gguf_context_p,
  5331. key: bytes,
  5332. val: Union[ctypes.c_uint32, int],
  5333. ):
  5334. return lib.gguf_set_val_u32(ctx, key, val)
  5335. lib.gguf_set_val_u32.argtypes = [
  5336. gguf_context_p,
  5337. ctypes.c_char_p,
  5338. ctypes.c_uint32,
  5339. ]
  5340. lib.gguf_set_val_u32.restype = None
  5341. # GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
  5342. def gguf_set_val_i32(
  5343. ctx: gguf_context_p,
  5344. key: bytes,
  5345. val: Union[ctypes.c_int32, int],
  5346. ):
  5347. return lib.gguf_set_val_i32(ctx, key, val)
  5348. lib.gguf_set_val_i32.argtypes = [
  5349. gguf_context_p,
  5350. ctypes.c_char_p,
  5351. ctypes.c_int32,
  5352. ]
  5353. lib.gguf_set_val_i32.restype = None
  5354. # GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
  5355. def gguf_set_val_f32(
  5356. ctx: gguf_context_p,
  5357. key: bytes,
  5358. val: Union[ctypes.c_float, float],
  5359. ):
  5360. return lib.gguf_set_val_f32(ctx, key, val)
  5361. lib.gguf_set_val_f32.argtypes = [
  5362. gguf_context_p,
  5363. ctypes.c_char_p,
  5364. ctypes.c_float,
  5365. ]
  5366. lib.gguf_set_val_f32.restype = None
  5367. # GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
  5368. def gguf_set_val_u64(
  5369. ctx: gguf_context_p,
  5370. key: bytes,
  5371. val: Union[ctypes.c_uint64, int],
  5372. ):
  5373. return lib.gguf_set_val_u64(ctx, key, val)
  5374. lib.gguf_set_val_u64.argtypes = [
  5375. gguf_context_p,
  5376. ctypes.c_char_p,
  5377. ctypes.c_uint64,
  5378. ]
  5379. lib.gguf_set_val_u64.restype = None
  5380. # GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
  5381. def gguf_set_val_i64(
  5382. ctx: gguf_context_p,
  5383. key: bytes,
  5384. val: Union[ctypes.c_int64, int],
  5385. ):
  5386. return lib.gguf_set_val_i64(ctx, key, val)
  5387. lib.gguf_set_val_i64.argtypes = [
  5388. gguf_context_p,
  5389. ctypes.c_char_p,
  5390. ctypes.c_int64,
  5391. ]
  5392. lib.gguf_set_val_i64.restype = None
  5393. # GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
  5394. def gguf_set_val_f64(
  5395. ctx: gguf_context_p,
  5396. key: bytes,
  5397. val: Union[ctypes.c_double, float],
  5398. ):
  5399. return lib.gguf_set_val_f64(ctx, key, val)
  5400. lib.gguf_set_val_f64.argtypes = [
  5401. gguf_context_p,
  5402. ctypes.c_char_p,
  5403. ctypes.c_double,
  5404. ]
  5405. lib.gguf_set_val_f64.restype = None
  5406. # GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
  5407. def gguf_set_val_bool(
  5408. ctx: gguf_context_p,
  5409. key: bytes,
  5410. val: Union[ctypes.c_bool, bool],
  5411. ):
  5412. return lib.gguf_set_val_bool(ctx, key, val)
  5413. lib.gguf_set_val_bool.argtypes = [
  5414. gguf_context_p,
  5415. ctypes.c_char_p,
  5416. ctypes.c_bool,
  5417. ]
  5418. lib.gguf_set_val_bool.restype = None
  5419. # GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
  5420. def gguf_set_val_str(
  5421. ctx: gguf_context_p,
  5422. key: bytes,
  5423. val: bytes,
  5424. ):
  5425. return lib.gguf_set_val_str(ctx, key, val)
  5426. lib.gguf_set_val_str.argtypes = [
  5427. gguf_context_p,
  5428. ctypes.c_char_p,
  5429. ctypes.c_char_p,
  5430. ]
  5431. lib.gguf_set_val_str.restype = None
  5432. # GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
  5433. def gguf_set_arr_data(
  5434. ctx: gguf_context_p,
  5435. key: bytes,
  5436. type: Union[ctypes.c_int, int],
  5437. data: ctypes.c_void_p,
  5438. n: Union[ctypes.c_int, int],
  5439. ):
  5440. return lib.gguf_set_arr_data(ctx, key, type, data, n)
  5441. lib.gguf_set_arr_data.argtypes = [
  5442. gguf_context_p,
  5443. ctypes.c_char_p,
  5444. ctypes.c_int,
  5445. ctypes.c_void_p,
  5446. ctypes.c_int,
  5447. ]
  5448. lib.gguf_set_arr_data.restype = None
  5449. # GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
  5450. def gguf_set_arr_str(
  5451. ctx: gguf_context_p,
  5452. key: bytes,
  5453. data: CCharPointer,
  5454. n: Union[ctypes.c_int, int],
  5455. ):
  5456. return lib.gguf_set_arr_str(ctx, key, data, n)
  5457. lib.gguf_set_arr_str.argtypes = [
  5458. gguf_context_p,
  5459. ctypes.c_char_p,
  5460. ctypes.POINTER(ctypes.c_char_p),
  5461. ctypes.c_int,
  5462. ]
  5463. lib.gguf_set_arr_str.restype = None
  5464. # // set or add KV pairs from another context
  5465. # GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
  5466. def gguf_set_kv(
  5467. ctx: gguf_context_p,
  5468. src: gguf_context_p,
  5469. ):
  5470. return lib.gguf_set_kv(ctx, src)
  5471. lib.gguf_set_kv.argtypes = [
  5472. gguf_context_p,
  5473. gguf_context_p,
  5474. ]
  5475. lib.gguf_set_kv.restype = None
  5476. # // manage tensor info
  5477. # GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
  5478. def gguf_add_tensor(
  5479. ctx: gguf_context_p,
  5480. tensor: ggml_tensor_p,
  5481. ):
  5482. return lib.gguf_add_tensor(ctx, tensor)
  5483. lib.gguf_add_tensor.argtypes = [
  5484. gguf_context_p,
  5485. ctypes.POINTER(ggml_tensor),
  5486. ]
  5487. lib.gguf_add_tensor.restype = None
  5488. # GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
  5489. def gguf_set_tensor_type(
  5490. ctx: gguf_context_p,
  5491. name: bytes,
  5492. type: Union[ctypes.c_int, int],
  5493. ):
  5494. return lib.gguf_set_tensor_type(ctx, name, type)
  5495. lib.gguf_set_tensor_type.argtypes = [
  5496. gguf_context_p,
  5497. ctypes.c_char_p,
  5498. ctypes.c_int,
  5499. ]
  5500. lib.gguf_set_tensor_type.restype = None
  5501. # GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
  5502. def gguf_set_tensor_data(
  5503. ctx: gguf_context_p,
  5504. name: bytes,
  5505. data: ctypes.c_void_p,
  5506. size: Union[ctypes.c_size_t, int],
  5507. ):
  5508. return lib.gguf_set_tensor_data(ctx, name, data, size)
  5509. lib.gguf_set_tensor_data.argtypes = [
  5510. gguf_context_p,
  5511. ctypes.c_char_p,
  5512. ctypes.c_void_p,
  5513. ctypes.c_size_t,
  5514. ]
  5515. lib.gguf_set_tensor_data.restype = None
  5516. # // writing gguf files can be done in 2 ways:
  5517. # //
  5518. # // - write the entire gguf_context to a binary file in a single pass:
  5519. # //
  5520. # // gguf_write_to_file(ctx, fname);
  5521. # //
  5522. # // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
  5523. # //
  5524. # // FILE * f = fopen(fname, "wb");
  5525. # // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
  5526. # // fwrite(f, ...);
  5527. # // void * data = gguf_meta_get_meta_data(ctx);
  5528. # // fseek(f, 0, SEEK_SET);
  5529. # // fwrite(f, data, gguf_get_meta_size(ctx));
  5530. # // free(data);
  5531. # // fclose(f);
  5532. # //
  5533. # // write the entire context to a binary file
  5534. # GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
  5535. def gguf_write_to_file(
  5536. ctx: gguf_context_p,
  5537. fname: bytes,
  5538. only_meta: Union[ctypes.c_bool, bool],
  5539. ):
  5540. return lib.gguf_write_to_file(ctx, fname, only_meta)
  5541. lib.gguf_write_to_file.argtypes = [
  5542. gguf_context_p,
  5543. ctypes.c_char_p,
  5544. ctypes.c_bool,
  5545. ]
  5546. lib.gguf_write_to_file.restype = None
  5547. # // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
  5548. # GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
  5549. def gguf_get_meta_size(
  5550. ctx: gguf_context_p,
  5551. ) -> int:
  5552. return lib.gguf_get_meta_size(ctx)
  5553. lib.gguf_get_meta_size.argtypes = [
  5554. gguf_context_p,
  5555. ]
  5556. lib.gguf_get_meta_size.restype = ctypes.c_size_t
  5557. # GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
  5558. def gguf_get_meta_data(
  5559. ctx: gguf_context_p,
  5560. data: ctypes.c_void_p,
  5561. ):
  5562. return lib.gguf_get_meta_data(ctx, data)
  5563. lib.gguf_get_meta_data.argtypes = [
  5564. gguf_context_p,
  5565. ctypes.c_void_p,
  5566. ]
  5567. lib.gguf_get_meta_data.restype = None
  5568. # //
  5569. # // system info
  5570. # //
  5571. # GGML_API int ggml_cpu_has_avx (void);
  5572. def ggml_cpu_has_avx() -> int:
  5573. return lib.ggml_cpu_has_avx()
  5574. lib.ggml_cpu_has_avx.argtypes = []
  5575. lib.ggml_cpu_has_avx.restype = ctypes.c_int
  5576. # GGML_API int ggml_cpu_has_avx2 (void);
  5577. def ggml_cpu_has_avx2() -> int:
  5578. return lib.ggml_cpu_has_avx2()
  5579. lib.ggml_cpu_has_avx2.argtypes = []
  5580. lib.ggml_cpu_has_avx2.restype = ctypes.c_int
  5581. # GGML_API int ggml_cpu_has_avx512 (void);
  5582. def ggml_cpu_has_avx512() -> int:
  5583. return lib.ggml_cpu_has_avx512()
  5584. lib.ggml_cpu_has_avx512.argtypes = []
  5585. lib.ggml_cpu_has_avx512.restype = ctypes.c_int
  5586. # GGML_API int ggml_cpu_has_avx512_vbmi(void);
  5587. def ggml_cpu_has_avx512_vbmi() -> int:
  5588. return lib.ggml_cpu_has_avx512_vbmi()
  5589. lib.ggml_cpu_has_avx512_vbmi.argtypes = []
  5590. lib.ggml_cpu_has_avx512_vbmi.restype = ctypes.c_int
  5591. # GGML_API int ggml_cpu_has_avx512_vnni(void);
  5592. def ggml_cpu_has_avx512_vnni() -> int:
  5593. return lib.ggml_cpu_has_avx512_vnni()
  5594. lib.ggml_cpu_has_avx512_vnni.argtypes = []
  5595. lib.ggml_cpu_has_avx512_vnni.restype = ctypes.c_int
  5596. # GGML_API int ggml_cpu_has_fma (void);
  5597. def ggml_cpu_has_fma() -> int:
  5598. return lib.ggml_cpu_has_fma()
  5599. lib.ggml_cpu_has_fma.argtypes = []
  5600. lib.ggml_cpu_has_fma.restype = ctypes.c_int
  5601. # GGML_API int ggml_cpu_has_neon (void);
  5602. def ggml_cpu_has_neon() -> int:
  5603. return lib.ggml_cpu_has_neon()
  5604. lib.ggml_cpu_has_neon.argtypes = []
  5605. lib.ggml_cpu_has_neon.restype = ctypes.c_int
  5606. # GGML_API int ggml_cpu_has_arm_fma (void);
  5607. def ggml_cpu_has_arm_fma() -> int:
  5608. return lib.ggml_cpu_has_arm_fma()
  5609. lib.ggml_cpu_has_arm_fma.argtypes = []
  5610. lib.ggml_cpu_has_arm_fma.restype = ctypes.c_int
  5611. # GGML_API int ggml_cpu_has_f16c (void);
  5612. def ggml_cpu_has_f16c() -> int:
  5613. return lib.ggml_cpu_has_f16c()
  5614. lib.ggml_cpu_has_f16c.argtypes = []
  5615. lib.ggml_cpu_has_f16c.restype = ctypes.c_int
  5616. # GGML_API int ggml_cpu_has_fp16_va (void);
  5617. def ggml_cpu_has_fp16_va() -> int:
  5618. return lib.ggml_cpu_has_fp16_va()
  5619. lib.ggml_cpu_has_fp16_va.argtypes = []
  5620. lib.ggml_cpu_has_fp16_va.restype = ctypes.c_int
  5621. # GGML_API int ggml_cpu_has_wasm_simd (void);
  5622. def ggml_cpu_has_wasm_simd() -> int:
  5623. return lib.ggml_cpu_has_wasm_simd()
  5624. lib.ggml_cpu_has_wasm_simd.argtypes = []
  5625. lib.ggml_cpu_has_wasm_simd.restype = ctypes.c_int
  5626. # GGML_API int ggml_cpu_has_blas (void);
  5627. def ggml_cpu_has_blas() -> int:
  5628. return lib.ggml_cpu_has_blas()
  5629. lib.ggml_cpu_has_blas.argtypes = []
  5630. lib.ggml_cpu_has_blas.restype = ctypes.c_int
  5631. # GGML_API int ggml_cpu_has_cublas (void);
  5632. def ggml_cpu_has_cublas() -> int:
  5633. return lib.ggml_cpu_has_cublas()
  5634. lib.ggml_cpu_has_cublas.argtypes = []
  5635. lib.ggml_cpu_has_cublas.restype = ctypes.c_int
  5636. # GGML_API int ggml_cpu_has_clblast (void);
  5637. def ggml_cpu_has_clblast() -> int:
  5638. return lib.ggml_cpu_has_clblast()
  5639. lib.ggml_cpu_has_clblast.argtypes = []
  5640. lib.ggml_cpu_has_clblast.restype = ctypes.c_int
  5641. # GGML_API int ggml_cpu_has_gpublas (void);
  5642. def ggml_cpu_has_gpublas() -> int:
  5643. return lib.ggml_cpu_has_gpublas()
  5644. lib.ggml_cpu_has_gpublas.argtypes = []
  5645. lib.ggml_cpu_has_gpublas.restype = ctypes.c_int
  5646. # GGML_API int ggml_cpu_has_sse3 (void);
  5647. def ggml_cpu_has_sse3() -> int:
  5648. return lib.ggml_cpu_has_sse3()
  5649. lib.ggml_cpu_has_sse3.argtypes = []
  5650. lib.ggml_cpu_has_sse3.restype = ctypes.c_int
  5651. # GGML_API int ggml_cpu_has_ssse3 (void);
  5652. def ggml_cpu_has_ssse3() -> int:
  5653. return lib.ggml_cpu_has_ssse3()
  5654. lib.ggml_cpu_has_ssse3.argtypes = []
  5655. lib.ggml_cpu_has_ssse3.restype = ctypes.c_int
  5656. # GGML_API int ggml_cpu_has_vsx (void);
  5657. def ggml_cpu_has_vsx() -> int:
  5658. return lib.ggml_cpu_has_vsx()
  5659. lib.ggml_cpu_has_vsx.argtypes = []
  5660. lib.ggml_cpu_has_vsx.restype = ctypes.c_int
  5661. # //
  5662. # // Internal types and functions exposed for tests and benchmarks
  5663. # //
  5664. # typedef void (*ggml_to_float_t)(const void * x, float * y, int k);
  5665. ggml_to_float_t = ctypes.CFUNCTYPE(
  5666. None, ctypes.c_void_p, ctypes.POINTER(ctypes.c_float), ctypes.c_int
  5667. )
  5668. # typedef void (*ggml_from_float_t)(const float * x, void * y, int k);
  5669. ggml_from_float_t = ctypes.CFUNCTYPE(
  5670. None, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int
  5671. )
  5672. # typedef void (*ggml_vec_dot_t)(const int n, float * s, const void * x, const void * y);
  5673. ggml_vec_dot_t = ctypes.CFUNCTYPE(
  5674. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_void_p
  5675. )
  5676. # typedef struct {
  5677. # const char * type_name;
  5678. # int blck_size;
  5679. # size_t type_size;
  5680. # bool is_quantized;
  5681. # ggml_to_float_t to_float;
  5682. # ggml_from_float_t from_float;
  5683. # ggml_from_float_t from_float_reference;
  5684. # ggml_vec_dot_t vec_dot;
  5685. # enum ggml_type vec_dot_type;
  5686. # } ggml_type_traits_t;
  5687. class ggml_type_traits_t(ctypes.Structure):
  5688. _fields_ = [
  5689. ("type_name", ctypes.c_char_p),
  5690. ("blck_size", ctypes.c_int),
  5691. ("type_size", ctypes.c_size_t),
  5692. ("is_quantized", ctypes.c_bool),
  5693. ("to_float", ggml_to_float_t),
  5694. ("from_float", ggml_from_float_t),
  5695. ("from_float_reference", ggml_from_float_t),
  5696. ("vec_dot", ggml_vec_dot_t),
  5697. ("vec_dot_type", ctypes.c_int),
  5698. ]
  5699. # ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
  5700. def ggml_internal_get_type_traits(type: Union[ctypes.c_int, int]) -> ggml_type_traits_t:
  5701. return lib.ggml_internal_get_type_traits(type)
  5702. lib.ggml_internal_get_type_traits.argtypes = [ctypes.c_int]
  5703. lib.ggml_internal_get_type_traits.restype = ggml_type_traits_t
  5704. #####################################################
  5705. # GGML ALLOC API
  5706. # source: ggml-alloc.h
  5707. #####################################################
  5708. ggml_allocr_p = ctypes.c_void_p
  5709. # GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
  5710. def ggml_allocr_new(
  5711. data: ctypes.c_void_p,
  5712. size: Union[ctypes.c_size_t, int],
  5713. alignment: Union[ctypes.c_size_t, int],
  5714. ) -> ggml_allocr_p:
  5715. return lib.ggml_allocr_new(data, size, alignment)
  5716. lib.ggml_allocr_new.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t]
  5717. lib.ggml_allocr_new.restype = ggml_allocr_p
  5718. # GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
  5719. def ggml_allocr_new_measure(
  5720. alignment: Union[ctypes.c_size_t, int],
  5721. ) -> ggml_allocr_p:
  5722. return lib.ggml_allocr_new_measure(alignment)
  5723. lib.ggml_allocr_new_measure.argtypes = [ctypes.c_size_t]
  5724. lib.ggml_allocr_new_measure.restype = ggml_allocr_p
  5725. # // tell the allocator to parse nodes following the order described in the list
  5726. # // you should call this if your graph are optimized to execute out-of-order
  5727. # GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);
  5728. def ggml_allocr_set_parse_seq(
  5729. alloc: ggml_allocr_p,
  5730. list: CIntPointer,
  5731. n: Union[ctypes.c_int, int],
  5732. ):
  5733. return lib.ggml_allocr_set_parse_seq(alloc, list, n)
  5734. lib.ggml_allocr_set_parse_seq.argtypes = [
  5735. ggml_allocr_p,
  5736. ctypes.POINTER(ctypes.c_int),
  5737. ctypes.c_int,
  5738. ]
  5739. lib.ggml_allocr_set_parse_seq.restype = None
  5740. # GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
  5741. def ggml_allocr_free(
  5742. alloc: ggml_allocr_p,
  5743. ):
  5744. return lib.ggml_allocr_free(alloc)
  5745. lib.ggml_allocr_free.argtypes = [ggml_allocr_p]
  5746. lib.ggml_allocr_free.restype = None
  5747. # GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
  5748. def ggml_allocr_is_measure(
  5749. alloc: ggml_allocr_p,
  5750. ) -> bool:
  5751. return lib.ggml_allocr_is_measure(alloc)
  5752. lib.ggml_allocr_is_measure.argtypes = [ggml_allocr_p]
  5753. lib.ggml_allocr_is_measure.restype = ctypes.c_bool
  5754. # GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
  5755. def ggml_allocr_reset(
  5756. alloc: ggml_allocr_p,
  5757. ):
  5758. return lib.ggml_allocr_reset(alloc)
  5759. lib.ggml_allocr_reset.argtypes = [ggml_allocr_p]
  5760. lib.ggml_allocr_reset.restype = None
  5761. # GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
  5762. def ggml_allocr_alloc(
  5763. alloc: ggml_allocr_p,
  5764. tensor: ggml_tensor_p,
  5765. ):
  5766. return lib.ggml_allocr_alloc(alloc, tensor)
  5767. lib.ggml_allocr_alloc.argtypes = [ggml_allocr_p, ctypes.POINTER(ggml_tensor)]
  5768. lib.ggml_allocr_alloc.restype = None
  5769. # GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
  5770. def ggml_allocr_alloc_graph(
  5771. alloc: ggml_allocr_p,
  5772. graph: ggml_cgraph_p,
  5773. ) -> int:
  5774. return lib.ggml_allocr_alloc_graph(alloc, graph)
  5775. lib.ggml_allocr_alloc_graph.argtypes = [ggml_allocr_p, ctypes.POINTER(ggml_cgraph)]
  5776. lib.ggml_allocr_alloc_graph.restype = ctypes.c_size_t
  5777. #####################################################
  5778. # GGML CUDA API
  5779. # source: ggml-cuda.h
  5780. #####################################################
  5781. GGML_USE_CUBLAS = hasattr(lib, "ggml_init_cublas")
  5782. GGML_CUDA_MAX_DEVICES = 16
  5783. # GGML_API void ggml_init_cublas(void);
  5784. def ggml_init_cublas():
  5785. return lib.ggml_init_cublas()
  5786. if GGML_USE_CUBLAS:
  5787. lib.ggml_init_cublas.argtypes = []
  5788. lib.ggml_init_cublas.restype = None
  5789. # void * ggml_cuda_host_malloc(size_t size);
  5790. def ggml_cuda_host_malloc(
  5791. size: Union[ctypes.c_size_t, int],
  5792. ) -> Optional[ctypes.c_void_p]:
  5793. return lib.ggml_cuda_host_malloc(size)
  5794. if GGML_USE_CUBLAS:
  5795. lib.ggml_cuda_host_malloc.argtypes = [ctypes.c_size_t]
  5796. lib.ggml_cuda_host_malloc.restype = ctypes.c_void_p
  5797. # void ggml_cuda_host_free(void * ptr);
  5798. def ggml_cuda_host_free(
  5799. ptr: ctypes.c_void_p,
  5800. ):
  5801. return lib.ggml_cuda_host_free(ptr)
  5802. if GGML_USE_CUBLAS:
  5803. lib.ggml_cuda_host_free.argtypes = [ctypes.c_void_p]
  5804. lib.ggml_cuda_host_free.restype = None
  5805. # GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  5806. def ggml_cuda_can_mul_mat(
  5807. src0: ggml_tensor_p,
  5808. src1: ggml_tensor_p,
  5809. dst: ggml_tensor_p,
  5810. ) -> bool:
  5811. return lib.ggml_cuda_can_mul_mat(src0, src1, dst)
  5812. if GGML_USE_CUBLAS:
  5813. lib.ggml_cuda_can_mul_mat.argtypes = [
  5814. ctypes.POINTER(ggml_tensor),
  5815. ctypes.POINTER(ggml_tensor),
  5816. ctypes.POINTER(ggml_tensor),
  5817. ]
  5818. lib.ggml_cuda_can_mul_mat.restype = ctypes.c_bool
  5819. # GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
  5820. def ggml_cuda_set_tensor_split(
  5821. tensor_split: CFloatArray,
  5822. ):
  5823. return lib.ggml_cuda_set_tensor_split(tensor_split)
  5824. if GGML_USE_CUBLAS:
  5825. lib.ggml_cuda_set_tensor_split.argtypes = [ctypes.POINTER(ctypes.c_float)]
  5826. lib.ggml_cuda_set_tensor_split.restype = None
  5827. # void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
  5828. def ggml_cuda_transform_tensor(
  5829. data: ctypes.c_void_p,
  5830. tensor: ggml_tensor_p,
  5831. ):
  5832. return lib.ggml_cuda_transform_tensor(data, tensor)
  5833. if GGML_USE_CUBLAS:
  5834. lib.ggml_cuda_transform_tensor.argtypes = [
  5835. ctypes.c_void_p,
  5836. ctypes.POINTER(ggml_tensor),
  5837. ]
  5838. lib.ggml_cuda_transform_tensor.restype = None
  5839. # void ggml_cuda_free_data(struct ggml_tensor * tensor);
  5840. def ggml_cuda_free_data(
  5841. tensor: ggml_tensor_p,
  5842. ):
  5843. return lib.ggml_cuda_free_data(tensor)
  5844. if GGML_USE_CUBLAS:
  5845. lib.ggml_cuda_free_data.argtypes = [
  5846. ctypes.POINTER(ggml_tensor),
  5847. ]
  5848. lib.ggml_cuda_free_data.restype = None
  5849. # void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
  5850. def ggml_cuda_assign_buffers(
  5851. tensor: ggml_tensor_p,
  5852. ):
  5853. return lib.ggml_cuda_assign_buffers(tensor)
  5854. if GGML_USE_CUBLAS:
  5855. lib.ggml_cuda_assign_buffers.argtypes = [
  5856. ctypes.POINTER(ggml_tensor),
  5857. ]
  5858. lib.ggml_cuda_assign_buffers.restype = None
  5859. # void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
  5860. def ggml_cuda_assign_buffers_no_scratch(
  5861. tensor: ggml_tensor_p,
  5862. ):
  5863. return lib.ggml_cuda_assign_buffers_no_scratch(tensor)
  5864. if GGML_USE_CUBLAS:
  5865. lib.ggml_cuda_assign_buffers_no_scratch.argtypes = [
  5866. ctypes.POINTER(ggml_tensor),
  5867. ]
  5868. lib.ggml_cuda_assign_buffers_no_scratch.restype = None
  5869. # GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
  5870. def ggml_cuda_assign_buffers_force_inplace(
  5871. tensor: ggml_tensor_p,
  5872. ):
  5873. return lib.ggml_cuda_assign_buffers_force_inplace(tensor)
  5874. if GGML_USE_CUBLAS:
  5875. lib.ggml_cuda_assign_buffers_force_inplace.argtypes = [
  5876. ctypes.POINTER(ggml_tensor),
  5877. ]
  5878. lib.ggml_cuda_assign_buffers_force_inplace.restype = None
  5879. # GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
  5880. def ggml_cuda_assign_buffers_no_alloc(
  5881. tensor: ggml_tensor_p,
  5882. ):
  5883. return lib.ggml_cuda_assign_buffers_no_alloc(tensor)
  5884. if GGML_USE_CUBLAS:
  5885. lib.ggml_cuda_assign_buffers_no_alloc.argtypes = [
  5886. ctypes.POINTER(ggml_tensor),
  5887. ]
  5888. lib.ggml_cuda_assign_buffers_no_alloc.restype = None
  5889. # GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
  5890. def ggml_cuda_assign_scratch_offset(
  5891. tensor: ggml_tensor_p,
  5892. offset: Union[ctypes.c_size_t, int],
  5893. ):
  5894. return lib.ggml_cuda_assign_scratch_offset(tensor, offset)
  5895. if GGML_USE_CUBLAS:
  5896. lib.ggml_cuda_assign_scratch_offset.argtypes = [
  5897. ctypes.POINTER(ggml_tensor),
  5898. ctypes.c_size_t,
  5899. ]
  5900. lib.ggml_cuda_assign_scratch_offset.restype = None
  5901. # void ggml_cuda_set_main_device(int main_device);
  5902. def ggml_cuda_set_main_device(
  5903. main_device: Union[ctypes.c_int, int],
  5904. ):
  5905. return lib.ggml_cuda_set_main_device(main_device)
  5906. if GGML_USE_CUBLAS:
  5907. lib.ggml_cuda_set_main_device.argtypes = [
  5908. ctypes.c_int,
  5909. ]
  5910. lib.ggml_cuda_set_main_device.restype = None
  5911. # GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
  5912. def ggml_cuda_set_mul_mat_q(
  5913. mul_mat_q: Union[ctypes.c_bool, bool],
  5914. ):
  5915. return lib.ggml_cuda_set_mul_mat_q(mul_mat_q)
  5916. if GGML_USE_CUBLAS:
  5917. lib.ggml_cuda_set_mul_mat_q.argtypes = [
  5918. ctypes.c_bool,
  5919. ]
  5920. lib.ggml_cuda_set_mul_mat_q.restype = None
  5921. # void ggml_cuda_set_scratch_size(size_t scratch_size);
  5922. def ggml_cuda_set_scratch_size(
  5923. scratch_size: Union[ctypes.c_size_t, int],
  5924. ):
  5925. return lib.ggml_cuda_set_scratch_size(scratch_size)
  5926. if GGML_USE_CUBLAS:
  5927. lib.ggml_cuda_set_scratch_size.argtypes = [
  5928. ctypes.c_size_t,
  5929. ]
  5930. lib.ggml_cuda_set_scratch_size.restype = None
  5931. # void ggml_cuda_free_scratch(void);
  5932. def ggml_cuda_free_scratch():
  5933. return lib.ggml_cuda_free_scratch()
  5934. if GGML_USE_CUBLAS:
  5935. lib.ggml_cuda_free_scratch.argtypes = []
  5936. lib.ggml_cuda_free_scratch.restype = None
  5937. # GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
  5938. def ggml_cuda_compute_forward(
  5939. params: ggml_compute_params_p,
  5940. tensor: ggml_tensor_p,
  5941. ) -> bool:
  5942. return lib.ggml_cuda_compute_forward(params, tensor)
  5943. if GGML_USE_CUBLAS:
  5944. lib.ggml_cuda_compute_forward.argtypes = [
  5945. ctypes.POINTER(ggml_compute_params),
  5946. ctypes.POINTER(ggml_tensor),
  5947. ]
  5948. lib.ggml_cuda_compute_forward.restype = ctypes.c_bool
  5949. # GGML_API int ggml_cuda_get_device_count(void);
  5950. def ggml_cuda_get_device_count() -> int:
  5951. return lib.ggml_cuda_get_device_count()
  5952. if GGML_USE_CUBLAS:
  5953. lib.ggml_cuda_get_device_count.argtypes = []
  5954. lib.ggml_cuda_get_device_count.restype = ctypes.c_int
  5955. # GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
  5956. def ggml_cuda_get_device_description(
  5957. device: Union[ctypes.c_int, int],
  5958. description: bytes,
  5959. description_size: Union[ctypes.c_size_t, int],
  5960. ):
  5961. return lib.ggml_cuda_get_device_description(device, description, description_size)
  5962. if GGML_USE_CUBLAS:
  5963. lib.ggml_cuda_get_device_description.argtypes = [
  5964. ctypes.c_int,
  5965. ctypes.c_char_p,
  5966. ctypes.c_size_t,
  5967. ]
  5968. lib.ggml_cuda_get_device_description.restype = None
  5969. #####################################################
  5970. # GGML METAL API
  5971. # source: ggml-metal.h
  5972. #####################################################
  5973. GGML_USE_METAL = hasattr(lib, "ggml_metal_init")
  5974. # // max memory buffers that can be mapped to the device
  5975. # #define GGML_METAL_MAX_BUFFERS 16
  5976. GGML_METAL_MAX_BUFFERS = 16
  5977. # #define GGML_METAL_MAX_COMMAND_BUFFERS 32
  5978. GGML_METAL_MAX_COMMAND_BUFFERS = 32
  5979. # struct ggml_metal_context;
  5980. ggml_metal_context_p = ctypes.c_void_p
  5981. # struct ggml_metal_context * ggml_metal_init(int n_cb);
  5982. def ggml_metal_init(
  5983. n_cb: Union[ctypes.c_int, int],
  5984. ) -> ggml_metal_context_p:
  5985. return lib.ggml_metal_init(n_cb)
  5986. if GGML_USE_METAL:
  5987. lib.ggml_metal_init.argtypes = [ctypes.c_int]
  5988. lib.ggml_metal_init.restype = ggml_metal_context_p
  5989. # void ggml_metal_free(struct ggml_metal_context * ctx);
  5990. def ggml_metal_free(
  5991. ctx: ggml_metal_context_p,
  5992. ):
  5993. return lib.ggml_metal_free(ctx)
  5994. if GGML_USE_METAL:
  5995. lib.ggml_metal_free.argtypes = [ggml_metal_context_p]
  5996. lib.ggml_metal_free.restype = None
  5997. # // set the number of command buffers to use
  5998. # void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
  5999. def ggml_metal_set_n_cb(
  6000. ctx: ggml_metal_context_p,
  6001. n_cb: Union[ctypes.c_int, int],
  6002. ):
  6003. return lib.ggml_metal_set_n_cb(ctx, n_cb)
  6004. if GGML_USE_METAL:
  6005. lib.ggml_metal_set_n_cb.argtypes = [ggml_metal_context_p, ctypes.c_int]
  6006. lib.ggml_metal_set_n_cb.restype = None
  6007. # // creates a mapping between a host memory buffer and a device memory buffer
  6008. # // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
  6009. # // - the mapping is used during computation to determine the arguments of the compute kernels
  6010. # // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
  6011. # // - max_size specifies the maximum size of a tensor and is used to create shared views such
  6012. # // that it is guaranteed that the tensor will fit in at least one of the views
  6013. # //
  6014. # bool ggml_metal_add_buffer(
  6015. # struct ggml_metal_context * ctx,
  6016. # const char * name,
  6017. # void * data,
  6018. # size_t size,
  6019. # size_t max_size);
  6020. def ggml_metal_add_buffer(
  6021. ctx: ggml_metal_context_p,
  6022. name: bytes,
  6023. data: ctypes.c_void_p,
  6024. size: Union[ctypes.c_size_t, int],
  6025. max_size: Union[ctypes.c_size_t, int],
  6026. ) -> bool:
  6027. return lib.ggml_metal_add_buffer(ctx, name, data, size, max_size)
  6028. if GGML_USE_METAL:
  6029. lib.ggml_metal_add_buffer.argtypes = [
  6030. ggml_metal_context_p,
  6031. ctypes.c_char_p,
  6032. ctypes.c_void_p,
  6033. ctypes.c_size_t,
  6034. ctypes.c_size_t,
  6035. ]
  6036. lib.ggml_metal_add_buffer.restype = ctypes.c_bool
  6037. # // set data from host memory into the device
  6038. # void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  6039. def ggml_metal_set_tensor(
  6040. ctx: ggml_metal_context_p,
  6041. t: ggml_tensor_p,
  6042. ):
  6043. return lib.ggml_metal_set_tensor(ctx, t)
  6044. if GGML_USE_METAL:
  6045. lib.ggml_metal_set_tensor.argtypes = [
  6046. ggml_metal_context_p,
  6047. ctypes.POINTER(ggml_tensor),
  6048. ]
  6049. lib.ggml_metal_set_tensor.restype = None
  6050. # // get data from the device into host memory
  6051. # void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  6052. def ggml_metal_get_tensor(
  6053. ctx: ggml_metal_context_p,
  6054. t: ggml_tensor_p,
  6055. ):
  6056. return lib.ggml_metal_get_tensor(ctx, t)
  6057. if GGML_USE_METAL:
  6058. lib.ggml_metal_get_tensor.argtypes = [
  6059. ggml_metal_context_p,
  6060. ctypes.POINTER(ggml_tensor),
  6061. ]
  6062. lib.ggml_metal_get_tensor.restype = None
  6063. # // try to find operations that can be run concurrently in the graph
  6064. # // you should run it again if the topology of your graph changes
  6065. # void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
  6066. def ggml_metal_graph_find_concurrency(
  6067. ctx: ggml_metal_context_p,
  6068. gf: ggml_cgraph_p,
  6069. check_mem: Union[ctypes.c_bool, bool],
  6070. ):
  6071. return lib.ggml_metal_graph_find_concurrency(ctx, gf, check_mem)
  6072. if GGML_USE_METAL:
  6073. lib.ggml_metal_graph_find_concurrency.argtypes = [
  6074. ggml_metal_context_p,
  6075. ctypes.POINTER(ggml_cgraph),
  6076. ctypes.c_bool,
  6077. ]
  6078. lib.ggml_metal_graph_find_concurrency.restype = None
  6079. # // if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
  6080. # int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
  6081. def ggml_metal_if_optimized(
  6082. ctx: ggml_metal_context_p,
  6083. ) -> int:
  6084. return lib.ggml_metal_if_optimized(ctx)
  6085. if GGML_USE_METAL:
  6086. lib.ggml_metal_if_optimized.argtypes = [
  6087. ggml_metal_context_p,
  6088. ]
  6089. lib.ggml_metal_if_optimized.restype = ctypes.c_int
  6090. # // output the concur_list for ggml_alloc
  6091. # int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
  6092. def ggml_metal_get_concur_list(
  6093. ctx: ggml_metal_context_p,
  6094. ) -> CIntPointer:
  6095. return lib.ggml_metal_get_concur_list(ctx)
  6096. if GGML_USE_METAL:
  6097. lib.ggml_metal_get_concur_list.argtypes = [
  6098. ggml_metal_context_p,
  6099. ]
  6100. lib.ggml_metal_get_concur_list.restype = ctypes.POINTER(ctypes.c_int)
  6101. # // same as ggml_graph_compute but uses Metal
  6102. # // creates gf->n_threads command buffers in parallel
  6103. # void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
  6104. def ggml_metal_graph_compute(
  6105. ctx: ggml_metal_context_p,
  6106. gf: ggml_cgraph_p,
  6107. ):
  6108. return lib.ggml_metal_graph_compute(ctx, gf)
  6109. if GGML_USE_METAL:
  6110. lib.ggml_metal_graph_compute.argtypes = [
  6111. ggml_metal_context_p,
  6112. ctypes.POINTER(ggml_cgraph),
  6113. ]
  6114. lib.ggml_metal_graph_compute.restype = None
  6115. #####################################################
  6116. # GGML OPENCL API
  6117. # source: ggml-opencl.h
  6118. #####################################################
  6119. GGML_USE_CLBLAST = hasattr(lib, "ggml_cl_init")
  6120. # void ggml_cl_init(void);
  6121. def ggml_cl_init():
  6122. return lib.ggml_cl_init()
  6123. if GGML_USE_CLBLAST:
  6124. lib.ggml_cl_init.argtypes = []
  6125. lib.ggml_cl_init.restype = None
  6126. # void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6127. def ggml_cl_mul(
  6128. src0: ggml_tensor_p,
  6129. src1: ggml_tensor_p,
  6130. dst: ggml_tensor_p,
  6131. ):
  6132. return lib.ggml_cl_mul(src0, src1, dst)
  6133. if GGML_USE_CLBLAST:
  6134. lib.ggml_cl_mul.argtypes = [
  6135. ctypes.POINTER(ggml_tensor),
  6136. ctypes.POINTER(ggml_tensor),
  6137. ctypes.POINTER(ggml_tensor),
  6138. ]
  6139. lib.ggml_cl_mul.restype = None
  6140. # bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6141. def ggml_cl_can_mul_mat(
  6142. src0: ggml_tensor_p,
  6143. src1: ggml_tensor_p,
  6144. dst: ggml_tensor_p,
  6145. ) -> bool:
  6146. return lib.ggml_cl_can_mul_mat(src0, src1, dst)
  6147. if GGML_USE_CLBLAST:
  6148. lib.ggml_cl_can_mul_mat.argtypes = [
  6149. ctypes.POINTER(ggml_tensor),
  6150. ctypes.POINTER(ggml_tensor),
  6151. ctypes.POINTER(ggml_tensor),
  6152. ]
  6153. lib.ggml_cl_can_mul_mat.restype = ctypes.c_bool
  6154. # size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6155. def ggml_cl_mul_mat_get_wsize(
  6156. src0: ggml_tensor_p,
  6157. src1: ggml_tensor_p,
  6158. dst: ggml_tensor_p,
  6159. ) -> int:
  6160. return lib.ggml_cl_mul_mat_get_wsize(src0, src1, dst)
  6161. if GGML_USE_CLBLAST:
  6162. lib.ggml_cl_mul_mat_get_wsize.argtypes = [
  6163. ctypes.POINTER(ggml_tensor),
  6164. ctypes.POINTER(ggml_tensor),
  6165. ctypes.POINTER(ggml_tensor),
  6166. ]
  6167. lib.ggml_cl_mul_mat_get_wsize.restype = ctypes.c_size_t
  6168. # void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
  6169. def ggml_cl_mul_mat(
  6170. src0: ggml_tensor_p,
  6171. src1: ggml_tensor_p,
  6172. dst: ggml_tensor_p,
  6173. wdata: ctypes.c_void_p,
  6174. wsize: Union[ctypes.c_size_t, int],
  6175. ):
  6176. return lib.ggml_cl_mul_mat(src0, src1, dst, wdata, wsize)
  6177. if GGML_USE_CLBLAST:
  6178. lib.ggml_cl_mul_mat.argtypes = [
  6179. ctypes.POINTER(ggml_tensor),
  6180. ctypes.POINTER(ggml_tensor),
  6181. ctypes.POINTER(ggml_tensor),
  6182. ctypes.c_void_p,
  6183. ctypes.c_size_t,
  6184. ]
  6185. lib.ggml_cl_mul_mat.restype = None
  6186. # void * ggml_cl_host_malloc(size_t size);
  6187. def ggml_cl_host_malloc(
  6188. size: Union[ctypes.c_size_t, int],
  6189. ) -> Optional[ctypes.c_void_p]:
  6190. return lib.ggml_cl_host_malloc(size)
  6191. if GGML_USE_CLBLAST:
  6192. lib.ggml_cl_host_malloc.argtypes = [
  6193. ctypes.c_size_t,
  6194. ]
  6195. lib.ggml_cl_host_malloc.restype = ctypes.c_void_p
  6196. # void ggml_cl_host_free(void * ptr);
  6197. def ggml_cl_host_free(
  6198. ptr: ctypes.c_void_p,
  6199. ):
  6200. return lib.ggml_cl_host_free(ptr)
  6201. if GGML_USE_CLBLAST:
  6202. lib.ggml_cl_host_free.argtypes = [
  6203. ctypes.c_void_p,
  6204. ]
  6205. lib.ggml_cl_host_free.restype = None
  6206. # void ggml_cl_free_data(const struct ggml_tensor* tensor);
  6207. def ggml_cl_free_data(
  6208. tensor: ggml_tensor_p,
  6209. ):
  6210. return lib.ggml_cl_free_data(tensor)
  6211. if GGML_USE_CLBLAST:
  6212. lib.ggml_cl_free_data.argtypes = [
  6213. ctypes.POINTER(ggml_tensor),
  6214. ]
  6215. lib.ggml_cl_free_data.restype = None
  6216. # void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
  6217. def ggml_cl_transform_tensor(
  6218. data: ctypes.c_void_p,
  6219. tensor: ggml_tensor_p,
  6220. ):
  6221. return lib.ggml_cl_transform_tensor(data, tensor)
  6222. if GGML_USE_CLBLAST:
  6223. lib.ggml_cl_transform_tensor.argtypes = [
  6224. ctypes.c_void_p,
  6225. ctypes.POINTER(ggml_tensor),
  6226. ]
  6227. lib.ggml_cl_transform_tensor.restype = None