third_party_ggml.py 210 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056
  1. """This module is the core of the ggml-python library, it exposes a low-level [ctypes](https://docs.python.org/3/library/ctypes.html)-based interface for ggml.
  2. Structures and functions in the `ggml.ggml` module map directly to the original ggml C library and
  3. they operate at a fairly low level.
  4. No additional runtime checks checks are performed nor is memory management handled automatically.
  5. You've been warned :).
  6. With that in mind here are some useful things to keep in mind
  7. - Functions accept both ctypes types (c_int, c_bool, c_float, etc.) and Python types (int, bool, float, etc.) as parameters.
  8. - Functions return Python types for simple values (int, bool, float, etc.) and ctypes types for complex values ([ggml_context_p][ggml.ggml_context_p], [ggml_tensor_p][ggml.ggml_tensor_p], etc.).
  9. - Memory management is the responsibility of the user. The user must call [ggml.ggml_free][] on the context after calling [ggml.ggml_init][].
  10. Example
  11. ```python
  12. import ggml
  13. import ctypes
  14. # Allocate a new context with 16 MB of memory
  15. params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024, mem_buffer=None)
  16. ctx = ggml.ggml_init(params=params)
  17. # Instantiate tensors
  18. x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  19. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  20. b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  21. # Use ggml operations to build a computational graph
  22. x2 = ggml.ggml_mul(ctx, x, x)
  23. f = ggml.ggml_add(ctx, ggml.ggml_mul(ctx, a, x2), b)
  24. gf = ggml.ggml_build_forward(f)
  25. # Set the input values
  26. ggml.ggml_set_f32(x, 2.0)
  27. ggml.ggml_set_f32(a, 3.0)
  28. ggml.ggml_set_f32(b, 4.0)
  29. # Compute the graph
  30. ggml.ggml_graph_compute_with_ctx(ctx, ctypes.pointer(gf), 1)
  31. # Get the output value
  32. output = ggml.ggml_get_f32_1d(f, 0)
  33. assert output == 16.0
  34. # Free the context
  35. ggml.ggml_free(ctx)
  36. ```
  37. """
  38. import ctypes
  39. import importlib.resources
  40. import os
  41. import pathlib
  42. import sys
  43. from pathlib import Path
  44. from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, Union
  45. import numpy as np
  46. from typing_extensions import TypeAlias
  47. NULL: ctypes.c_void_p = None # ignore: type
  48. GGML_MEM_ALIGN = 16
  49. # Load the library
  50. def load_shared_library(base_path: Path, lib_base_name: str):
  51. # Construct the paths to the possible shared library names
  52. # Searching for the library in the current directory under the name "libggml" (default name
  53. # for ggml) and "ggml" (default name for this repo)
  54. lib_names: List[str] = [
  55. f"lib{lib_base_name}.so",
  56. f"lib{lib_base_name}.dylib",
  57. f"{lib_base_name}.dll",
  58. ]
  59. path = None
  60. cdll_args = dict() # type: ignore
  61. # Add the library directory to the DLL search path on Windows (if needed)
  62. if sys.platform == "win32" and sys.version_info >= (3, 8):
  63. os.add_dll_directory(str(base_path))
  64. cdll_args["winmode"] = 0
  65. for lib_name in lib_names:
  66. # Try to load the shared library, handling potential errors
  67. path = base_path / lib_name
  68. if not path.exists():
  69. continue
  70. try:
  71. return ctypes.CDLL(str(path), **cdll_args)
  72. except Exception as e:
  73. raise RuntimeError(f"Failed to load shared library '{path}': {e}")
  74. raise FileNotFoundError(
  75. f"Shared library with base name '{lib_base_name}' not found in {base_path}"
  76. )
  77. base_path = Path(__file__).parent.resolve() / "build/examples/unity"
  78. lib_base_name = "fairseq2_cpp"
  79. lib = load_shared_library(base_path, lib_base_name)
  80. #####################################################
  81. # GGML Utility Types
  82. #####################################################
  83. CFloatArray: TypeAlias = "ctypes.Array[ctypes.c_float]"
  84. CInt64Array: TypeAlias = "ctypes.Array[ctypes.c_int64]"
  85. CIntPointer: TypeAlias = "ctypes._Pointer[ctypes.c_int]" # type: ignore
  86. CCharPointer: TypeAlias = "ctypes._Pointer[ctypes.c_char]" # type: ignore
  87. #####################################################
  88. # source: ggml.h
  89. # GGML API
  90. #####################################################
  91. # #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
  92. GGML_FILE_MAGIC = int("0x67676d6c", 16)
  93. # #define GGML_FILE_VERSION 1
  94. GGML_FILE_VERSION = 1
  95. # #define GGML_QNT_VERSION 2 // bump this on quantization format changes
  96. GGML_QNT_VERSION = 2
  97. # #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
  98. GGML_QNT_VERSION_FACTOR = 1000
  99. # #define GGML_MAX_DIMS 4
  100. GGML_MAX_DIMS = 4
  101. # #define GGML_MAX_NODES 4096
  102. GGML_MAX_NODES = 4096
  103. # #define GGML_MAX_PARAMS 256
  104. GGML_MAX_PARAMS = 256
  105. # #define GGML_MAX_CONTEXTS 64
  106. GGML_MAX_CONTEXTS = 64
  107. # #define GGML_MAX_SRC 6
  108. GGML_MAX_SRC = 6
  109. # #define GGML_MAX_NAME 64
  110. GGML_MAX_NAME = 64
  111. # #define GGML_MAX_OP_PARAMS 32
  112. GGML_MAX_OP_PARAMS = 32
  113. # #define GGML_DEFAULT_N_THREADS 4
  114. GGML_DEFAULT_N_THREADS = 4
  115. # #if UINTPTR_MAX == 0XFFFFFFFF
  116. # #define GGML_MEMALIGN 4
  117. # #else
  118. # # define GGML_MEMALIGN 16
  119. # #endif
  120. GGML_MEMALIGN = (
  121. 16 if ctypes.sizeof(ctypes.c_void_p) == 4 else 32
  122. ) # FIXME: Check if this is correct
  123. # #define GGML_EXIT_SUCCESS 0
  124. GGML_EXIT_SUCCESS = 0
  125. # #define GGML_EXIT_ABORTED 1
  126. GGML_EXIT_ABORTED = 1
  127. # #define GGUF_MAGIC 0x46554747 // "GGUF"
  128. GGUF_MAGIC = int("0x46554747", 16)
  129. # #define GGUF_VERSION 2
  130. GGUF_VERSION = 2
  131. # #define GGUF_DEFAULT_ALIGNMENT 32
  132. GGUF_DEFAULT_ALIGNMENT = 32
  133. # TODO: Check if this is correct
  134. # typedef uint16_t ggml_fp16_t;
  135. ggml_fp16_t = ctypes.c_uint16
  136. CFP16Array: TypeAlias = "ctypes.Array[ggml_fp16_t]"
  137. # GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
  138. def ggml_fp16_to_fp32(x: ggml_fp16_t) -> float:
  139. return lib.ggml_fp16_to_fp32(x)
  140. lib.ggml_fp16_to_fp32.argtypes = [ggml_fp16_t]
  141. lib.ggml_fp16_to_fp32.restype = ctypes.c_float
  142. # GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
  143. def ggml_fp32_to_fp16(x: ctypes.c_float) -> int:
  144. return lib.ggml_fp32_to_fp16(x)
  145. lib.ggml_fp32_to_fp16.argtypes = [ctypes.c_float]
  146. lib.ggml_fp32_to_fp16.restype = ggml_fp16_t
  147. # GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
  148. def ggml_fp16_to_fp32_row(
  149. x: CFP16Array,
  150. y: CFloatArray,
  151. n: Union[ctypes.c_int, int],
  152. ) -> None:
  153. return lib.ggml_fp16_to_fp32_row(x, y, n)
  154. lib.ggml_fp16_to_fp32_row.argtypes = [
  155. ctypes.POINTER(ggml_fp16_t),
  156. ctypes.POINTER(ctypes.c_float),
  157. ctypes.c_int,
  158. ]
  159. lib.ggml_fp16_to_fp32_row.restype = None
  160. # GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
  161. def ggml_fp32_to_fp16_row(
  162. x: CFloatArray,
  163. y: CFP16Array,
  164. n: Union[ctypes.c_int, int],
  165. ) -> None:
  166. return lib.ggml_fp32_to_fp16_row(x, y, n)
  167. lib.ggml_fp32_to_fp16_row.argtypes = [
  168. ctypes.POINTER(ctypes.c_float),
  169. ctypes.POINTER(ggml_fp16_t),
  170. ctypes.c_int,
  171. ]
  172. lib.ggml_fp32_to_fp16_row.restype = None
  173. # struct ggml_context;
  174. ggml_context_p = ctypes.c_void_p
  175. """Opaque pointer to a ggml_context.
  176. ggml_context structs are not accessed directly instead they must be created using [ggml_init](ggml.ggml_init) and freed using [ggml_free](ggml.ggml_free)."""
  177. # enum ggml_type {
  178. # GGML_TYPE_F32 = 0,
  179. # GGML_TYPE_F16 = 1,
  180. # GGML_TYPE_Q4_0 = 2,
  181. # GGML_TYPE_Q4_1 = 3,
  182. # // GGML_TYPE_Q4_2 = 4, support has been removed
  183. # // GGML_TYPE_Q4_3 (5) support has been removed
  184. # GGML_TYPE_Q5_0 = 6,
  185. # GGML_TYPE_Q5_1 = 7,
  186. # GGML_TYPE_Q8_0 = 8,
  187. # GGML_TYPE_Q8_1 = 9,
  188. # GGML_TYPE_Q2_K = 10,
  189. # GGML_TYPE_Q3_K = 11,
  190. # GGML_TYPE_Q4_K = 12,
  191. # GGML_TYPE_Q5_K = 13,
  192. # GGML_TYPE_Q6_K = 14,
  193. # GGML_TYPE_Q8_K = 15,
  194. # GGML_TYPE_I8,
  195. # GGML_TYPE_I16,
  196. # GGML_TYPE_I32,
  197. # GGML_TYPE_COUNT,
  198. # };
  199. GGML_TYPE_F32 = 0
  200. GGML_TYPE_F16 = 1
  201. GGML_TYPE_Q4_0 = 2
  202. GGML_TYPE_Q4_1 = 3
  203. GGML_TYPE_Q5_0 = 6
  204. GGML_TYPE_Q5_1 = 7
  205. GGML_TYPE_Q8_0 = 8
  206. GGML_TYPE_Q8_1 = 9
  207. GGML_TYPE_Q2_K = 10
  208. GGML_TYPE_Q3_K = 11
  209. GGML_TYPE_Q4_K = 12
  210. GGML_TYPE_Q5_K = 13
  211. GGML_TYPE_Q6_K = 14
  212. GGML_TYPE_Q8_K = 15
  213. GGML_TYPE_I8 = 16
  214. GGML_TYPE_I16 = 17
  215. GGML_TYPE_I32 = 18
  216. GGML_TYPE_COUNT = 19
  217. # enum ggml_backend {
  218. # GGML_BACKEND_CPU = 0,
  219. # GGML_BACKEND_GPU = 10,
  220. # GGML_BACKEND_GPU_SPLIT = 20,
  221. # };
  222. GGML_BACKEND_CPU = 0
  223. GGML_BACKEND_GPU = 10
  224. GGML_BACKEND_GPU_SPLIT = 20
  225. # // model file types
  226. # enum ggml_ftype {
  227. # GGML_FTYPE_UNKNOWN = -1,
  228. # GGML_FTYPE_ALL_F32 = 0,
  229. # GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
  230. # GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
  231. # GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
  232. # GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
  233. # GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
  234. # GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
  235. # GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
  236. # GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
  237. # GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
  238. # GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
  239. # GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
  240. # GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
  241. # };
  242. GGML_FTYPE_UNKNOWN = -1
  243. GGML_FTYPE_ALL_F32 = 0
  244. GGML_FTYPE_MOSTLY_F16 = 1
  245. GGML_FTYPE_MOSTLY_Q4_0 = 2
  246. GGML_FTYPE_MOSTLY_Q4_1 = 3
  247. GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4
  248. GGML_FTYPE_MOSTLY_Q8_0 = 7
  249. GGML_FTYPE_MOSTLY_Q5_0 = 8
  250. GGML_FTYPE_MOSTLY_Q5_1 = 9
  251. GGML_FTYPE_MOSTLY_Q2_K = 10
  252. GGML_FTYPE_MOSTLY_Q3_K = 11
  253. GGML_FTYPE_MOSTLY_Q4_K = 12
  254. GGML_FTYPE_MOSTLY_Q5_K = 13
  255. GGML_FTYPE_MOSTLY_Q6_K = 14
  256. # // available tensor operations:
  257. # enum ggml_op {
  258. # GGML_OP_NONE = 0,
  259. # GGML_OP_DUP,
  260. # GGML_OP_ADD,
  261. # GGML_OP_ADD1,
  262. # GGML_OP_ACC,
  263. # GGML_OP_SUB,
  264. # GGML_OP_MUL,
  265. # GGML_OP_DIV,
  266. # GGML_OP_SQR,
  267. # GGML_OP_SQRT,
  268. # GGML_OP_LOG,
  269. # GGML_OP_SUM,
  270. # GGML_OP_SUM_ROWS,
  271. # GGML_OP_MEAN,
  272. # GGML_OP_ARGMAX,
  273. # GGML_OP_REPEAT,
  274. # GGML_OP_REPEAT_BACK,
  275. # GGML_OP_CONCAT,
  276. # GGML_OP_SILU_BACK,
  277. # GGML_OP_NORM, // normalize
  278. # GGML_OP_RMS_NORM,
  279. # GGML_OP_RMS_NORM_BACK,
  280. # GGML_OP_GROUP_NORM,
  281. # GGML_OP_MUL_MAT,
  282. # GGML_OP_OUT_PROD,
  283. # GGML_OP_SCALE,
  284. # GGML_OP_SET,
  285. # GGML_OP_CPY,
  286. # GGML_OP_CONT,
  287. # GGML_OP_RESHAPE,
  288. # GGML_OP_VIEW,
  289. # GGML_OP_PERMUTE,
  290. # GGML_OP_TRANSPOSE,
  291. # GGML_OP_GET_ROWS,
  292. # GGML_OP_GET_ROWS_BACK,
  293. # GGML_OP_DIAG,
  294. # GGML_OP_DIAG_MASK_INF,
  295. # GGML_OP_DIAG_MASK_ZERO,
  296. # GGML_OP_SOFT_MAX,
  297. # GGML_OP_SOFT_MAX_BACK,
  298. # GGML_OP_ROPE,
  299. # GGML_OP_ROPE_BACK,
  300. # GGML_OP_ALIBI,
  301. # GGML_OP_CLAMP,
  302. # GGML_OP_CONV_1D,
  303. # GGML_OP_CONV_2D,
  304. # GGML_OP_CONV_TRANSPOSE_2D,
  305. # GGML_OP_POOL_1D,
  306. # GGML_OP_POOL_2D,
  307. # GGML_OP_UPSCALE, // nearest interpolate
  308. # GGML_OP_FLASH_ATTN,
  309. # GGML_OP_FLASH_FF,
  310. # GGML_OP_FLASH_ATTN_BACK,
  311. # GGML_OP_WIN_PART,
  312. # GGML_OP_WIN_UNPART,
  313. # GGML_OP_GET_REL_POS,
  314. # GGML_OP_ADD_REL_POS,
  315. # GGML_OP_UNARY,
  316. # GGML_OP_MAP_UNARY,
  317. # GGML_OP_MAP_BINARY,
  318. # GGML_OP_MAP_CUSTOM1_F32,
  319. # GGML_OP_MAP_CUSTOM2_F32,
  320. # GGML_OP_MAP_CUSTOM3_F32,
  321. # GGML_OP_MAP_CUSTOM1,
  322. # GGML_OP_MAP_CUSTOM2,
  323. # GGML_OP_MAP_CUSTOM3,
  324. # GGML_OP_CROSS_ENTROPY_LOSS,
  325. # GGML_OP_CROSS_ENTROPY_LOSS_BACK,
  326. # GGML_OP_COUNT,
  327. # };
  328. GGML_OP_NONE = 0
  329. GGML_OP_DUP = 1
  330. GGML_OP_ADD = 2
  331. GGML_OP_ADD1 = 3
  332. GGML_OP_ACC = 4
  333. GGML_OP_SUB = 5
  334. GGML_OP_MUL = 6
  335. GGML_OP_DIV = 7
  336. GGML_OP_SQR = 8
  337. GGML_OP_SQRT = 9
  338. GGML_OP_LOG = 10
  339. GGML_OP_SUM = 11
  340. GGML_OP_SUM_ROWS = 12
  341. GGML_OP_MEAN = 13
  342. GGML_OP_ARGMAX = 14
  343. GGML_OP_REPEAT = 15
  344. GGML_OP_REPEAT_BACK = 16
  345. GGML_OP_CONCAT = 17
  346. GGML_OP_SILU_BACK = 18
  347. GGML_OP_NORM = 19
  348. GGML_OP_RMS_NORM = 20
  349. GGML_OP_RMS_NORM_BACK = 21
  350. GGML_OP_GROUP_NORM = 22
  351. GGML_OP_MUL_MAT = 23
  352. GGML_OP_OUT_PROD = 24
  353. GGML_OP_SCALE = 25
  354. GGML_OP_SET = 26
  355. GGML_OP_CPY = 27
  356. GGML_OP_CONT = 28
  357. GGML_OP_RESHAPE = 29
  358. GGML_OP_VIEW = 30
  359. GGML_OP_PERMUTE = 31
  360. GGML_OP_TRANSPOSE = 32
  361. GGML_OP_GET_ROWS = 33
  362. GGML_OP_GET_ROWS_BACK = 34
  363. GGML_OP_DIAG = 35
  364. GGML_OP_DIAG_MASK_INF = 36
  365. GGML_OP_DIAG_MASK_ZERO = 37
  366. GGML_OP_SOFT_MAX = 38
  367. GGML_OP_SOFT_MAX_BACK = 39
  368. GGML_OP_ROPE = 40
  369. GGML_OP_ROPE_BACK = 41
  370. GGML_OP_ALIBI = 42
  371. GGML_OP_CLAMP = 43
  372. GGML_OP_CONV_1D = 44
  373. GGML_OP_CONV_2D = 45
  374. GGML_OP_CONV_TRANSPOSE_2D = 46
  375. GGML_OP_POOL_1D = 47
  376. GGML_OP_POOL_2D = 48
  377. GGML_OP_UPSCALE = 49
  378. GGML_OP_FLASH_ATTN = 50
  379. GGML_OP_FLASH_FF = 51
  380. GGML_OP_FLASH_ATTN_BACK = 52
  381. GGML_OP_WIN_PART = 53
  382. GGML_OP_WIN_UNPART = 54
  383. GGML_OP_GET_REL_POS = 55
  384. GGML_OP_ADD_REL_POS = 56
  385. GGML_OP_UNARY = 57
  386. GGML_OP_MAP_UNARY = 58
  387. GGML_OP_MAP_BINARY = 59
  388. GGML_OP_MAP_CUSTOM1_F32 = 60
  389. GGML_OP_MAP_CUSTOM2_F32 = 61
  390. GGML_OP_MAP_CUSTOM3_F32 = 62
  391. GGML_OP_MAP_CUSTOM1 = 63
  392. GGML_OP_MAP_CUSTOM2 = 64
  393. GGML_OP_MAP_CUSTOM3 = 65
  394. GGML_OP_CROSS_ENTROPY_LOSS = 66
  395. GGML_OP_CROSS_ENTROPY_LOSS_BACK = 67
  396. GGML_OP_COUNT = 68
  397. # enum ggml_unary_op {
  398. # GGML_UNARY_OP_ABS,
  399. # GGML_UNARY_OP_SGN,
  400. # GGML_UNARY_OP_NEG,
  401. # GGML_UNARY_OP_STEP,
  402. # GGML_UNARY_OP_TANH,
  403. # GGML_UNARY_OP_ELU,
  404. # GGML_UNARY_OP_RELU,
  405. # GGML_UNARY_OP_GELU,
  406. # GGML_UNARY_OP_GELU_QUICK,
  407. # GGML_UNARY_OP_SILU,
  408. # };
  409. GGML_UNARY_OP_ABS = 0
  410. GGML_UNARY_OP_SGN = 1
  411. GGML_UNARY_OP_NEG = 2
  412. GGML_UNARY_OP_STEP = 3
  413. GGML_UNARY_OP_TANH = 4
  414. GGML_UNARY_OP_ELU = 5
  415. GGML_UNARY_OP_RELU = 6
  416. GGML_UNARY_OP_GELU = 7
  417. GGML_UNARY_OP_GELU_QUICK = 8
  418. GGML_UNARY_OP_SILU = 9
  419. # enum ggml_object_type {
  420. # GGML_OBJECT_TENSOR,
  421. # GGML_OBJECT_GRAPH,
  422. # GGML_OBJECT_WORK_BUFFER
  423. # };
  424. GGML_OBJECT_TENSOR = 0
  425. GGML_OBJECT_GRAPH = 1
  426. GGML_OBJECT_WORK_BUFFER = 2
  427. # // ggml object
  428. # struct ggml_object {
  429. # size_t offs;
  430. # size_t size;
  431. # struct ggml_object * next;
  432. # enum ggml_object_type type;
  433. # char padding[4];
  434. # };
  435. class ggml_object(ctypes.Structure):
  436. pass
  437. ggml_object._fields_ = [
  438. ("offs", ctypes.c_size_t),
  439. ("size", ctypes.c_size_t),
  440. ("next", ctypes.POINTER(ggml_object)),
  441. ("type", ctypes.c_int),
  442. ("padding", ctypes.c_char * 4),
  443. ]
  444. ggml_object_p: TypeAlias = "ctypes._Pointer[ggml_object]" # type: ignore
  445. GGML_OBJECT_SIZE = ctypes.sizeof(ggml_object)
  446. # // n-dimensional tensor
  447. # struct ggml_tensor {
  448. # enum ggml_type type;
  449. # enum ggml_backend backend;
  450. # int n_dims;
  451. # int64_t ne[GGML_MAX_DIMS]; // number of elements
  452. # size_t nb[GGML_MAX_DIMS]; // stride in bytes:
  453. # // nb[0] = sizeof(type)
  454. # // nb[1] = nb[0] * ne[0] + padding
  455. # // nb[i] = nb[i-1] * ne[i-1]
  456. # // compute data
  457. # enum ggml_op op;
  458. # // op params - allocated as int32_t for alignment
  459. # int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
  460. # bool is_param;
  461. # struct ggml_tensor * grad;
  462. # struct ggml_tensor * src[GGML_MAX_SRC];
  463. # // performance
  464. # int perf_runs;
  465. # int64_t perf_cycles;
  466. # int64_t perf_time_us;
  467. # struct ggml_tensor * view_src;
  468. # size_t view_offs;
  469. # void * data;
  470. # char name[GGML_MAX_NAME];
  471. # void * extra; // extra things e.g. for ggml-cuda.cu
  472. # char padding[4];
  473. # };
  474. class ggml_tensor(ctypes.Structure):
  475. """n-dimensional tensor
  476. Attributes:
  477. type (int): ggml_type
  478. backend (int): ggml_backend
  479. n_dims (int): number of dimensions
  480. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension
  481. nb (ctypes.Array[ctypes.c_size_t]): stride in bytes for each dimension
  482. op (int): ggml operation
  483. op_params (ctypes.Array[ctypes.c_int32]): `GGML_MAX_OP_PARAMS`-length array of operation parameters
  484. is_param (bool): is this a parameter tensor
  485. grad (ggml_tensor_p): reference to gradient tensor
  486. src (ctypes.Array[ggml_tensor_p]): `GGML_MAX_SRC`-length array of source tensors
  487. perf_runs (int): number of performance runs
  488. perf_cycles (int): number of cycles
  489. perf_time_us (int): time in microseconds
  490. view_src (ggml_tensor_p): pointer to tensor if this tensor is a view, None if the tensor is not a view
  491. view_offs (ctypes.c_size_t): offset into the data pointer of the view tensor
  492. data (ctypes.c_void_p): reference to raw tensor data
  493. name (bytes): name of tensor
  494. extra (ctypes.c_void_p): extra data (e.g. for CUDA)
  495. """
  496. pass
  497. ggml_tensor._fields_ = [
  498. ("type", ctypes.c_int),
  499. ("backend", ctypes.c_int),
  500. ("n_dims", ctypes.c_int),
  501. ("ne", ctypes.c_int64 * GGML_MAX_DIMS),
  502. ("nb", ctypes.c_size_t * GGML_MAX_DIMS),
  503. ("op", ctypes.c_int),
  504. (
  505. "op_params",
  506. ctypes.c_int32 * (GGML_MAX_OP_PARAMS // ctypes.sizeof(ctypes.c_int32)),
  507. ),
  508. ("is_param", ctypes.c_bool),
  509. ("grad", ctypes.POINTER(ggml_tensor)),
  510. ("src", ctypes.POINTER(ggml_tensor) * GGML_MAX_SRC),
  511. ("perf_runs", ctypes.c_int),
  512. ("perf_cycles", ctypes.c_int64),
  513. ("perf_time_us", ctypes.c_int64),
  514. ("view_src", ctypes.POINTER(ggml_tensor)),
  515. ("view_offs", ctypes.c_size_t),
  516. ("data", ctypes.c_void_p),
  517. ("name", ctypes.c_char * GGML_MAX_NAME),
  518. ("extra", ctypes.c_void_p),
  519. ("padding", ctypes.c_char * 4),
  520. ]
  521. GGML_TENSOR_SIZE = ctypes.sizeof(ggml_tensor)
  522. ggml_tensor_p: TypeAlias = "ctypes._Pointer[ggml_tensor]" # type: ignore
  523. """ctypes pointer to a [ggml_tensor][ggml.ggml_tensor]
  524. Can be dereferenced to a [ggml_tensor][ggml.ggml_tensor] object using
  525. the `.contents` attribute."""
  526. abort_callback_t = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_void_p)
  527. # // the compute plan that needs to be prepared for ggml_graph_compute()
  528. # // since https://github.com/ggerganov/ggml/issues/287
  529. # struct ggml_cplan {
  530. # size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
  531. # uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
  532. # int n_threads;
  533. # // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes
  534. # int n_tasks[GGML_MAX_NODES];
  535. # // abort ggml_graph_compute when true
  536. # bool (*abort_callback)(void * data);
  537. # void * abort_callback_data;
  538. # };
  539. class ggml_cplan(ctypes.Structure):
  540. """Compute plan for a ggml computation graph
  541. Attributes:
  542. work_size (int): size of work buffer
  543. work_data (ctypes.POINTER(ctypes.c_uint8)): work buffer
  544. n_threads (int): number of threads to use when computing the graph using [ggml_graph_compute][ggml.ggml_graph_compute]
  545. n_tasks (ctypes.Array[ctypes.c_int]): `n_tasks` of nodes, 1:1 mapping to cgraph nodes
  546. abort_callback (abort_callback_t): abort callback
  547. abort_callback_data (ctypes.c_void_p): abort callback data
  548. """
  549. _fields_ = [
  550. ("work_size", ctypes.c_size_t),
  551. ("work_data", ctypes.POINTER(ctypes.c_uint8)),
  552. ("n_threads", ctypes.c_int),
  553. ("n_tasks", ctypes.c_int * GGML_MAX_NODES),
  554. (
  555. "abort_callback",
  556. abort_callback_t,
  557. ),
  558. ("abort_callback_data", ctypes.c_void_p),
  559. ]
  560. GGML_CPLAN_SIZE = ctypes.sizeof(ggml_cplan)
  561. ggml_cplan_p: TypeAlias = "ctypes._Pointer[ggml_cplan]" # type: ignore
  562. """ctypes pointer to a [ggml_cplan][ggml.ggml_cplan]
  563. Can be dereferenced to a [ggml_cplan][ggml.ggml_cplan] object using
  564. the `.contents` attribute."""
  565. # // next prime after GGML_MAX_NODES
  566. # // #define GGML_GRAPH_HASHTABLE_SIZE 4099
  567. # // next prime after GGML_MAX_NODES * 2 (nodes + leafs)
  568. # #define GGML_GRAPH_HASHTABLE_SIZE 8273
  569. GGML_GRAPH_HASHTABLE_SIZE = 8273
  570. # // computation graph
  571. # struct ggml_cgraph {
  572. # int n_nodes;
  573. # int n_leafs;
  574. # struct ggml_tensor * nodes[GGML_MAX_NODES];
  575. # struct ggml_tensor * grads[GGML_MAX_NODES];
  576. # struct ggml_tensor * leafs[GGML_MAX_NODES];
  577. # void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
  578. # // performance
  579. # int perf_runs;
  580. # int64_t perf_cycles;
  581. # int64_t perf_time_us;
  582. # };
  583. class ggml_cgraph(ctypes.Structure):
  584. """ggml computation graph
  585. Attributes:
  586. n_nodes (int): number of nodes
  587. n_leafs (int): number of leafs
  588. nodes (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of compute tensors
  589. grads (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of gradient tensors
  590. leafs (ctypes.Array[ggml_tensor_p]): `n_leafs`-length array of parameter tensors
  591. visited_hash_table (ctypes.Array[ctypes.c_void_p]): `GGML_GRAPH_HASHTABLE_SIZE`-length array of visited nodes
  592. perf_runs (int): number of runs
  593. perf_cycles (int): number of cycles
  594. perf_time_us (int): computation time in microseconds"""
  595. _fields_ = [
  596. ("n_nodes", ctypes.c_int),
  597. ("n_leafs", ctypes.c_int),
  598. ("nodes", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  599. ("grads", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  600. ("leafs", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  601. ("visited_hash_table", ctypes.c_void_p * GGML_GRAPH_HASHTABLE_SIZE),
  602. ("perf_runs", ctypes.c_int),
  603. ("perf_cycles", ctypes.c_int64),
  604. ("perf_time_us", ctypes.c_int64),
  605. ]
  606. ggml_cgraph_p: TypeAlias = "ctypes._Pointer[ggml_cgraph]" # type: ignore
  607. """ctypes pointer to a [ggml_cgraph][ggml.ggml_cgraph]
  608. Can be dereferenced to a [ggml_cgraph][ggml.ggml_cgraph] object using
  609. the `.contents` attribute."""
  610. # static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
  611. GGML_GRAPH_SIZE = ctypes.sizeof(ggml_cgraph)
  612. # struct ggml_scratch {
  613. # size_t offs;
  614. # size_t size;
  615. # void * data;
  616. # };
  617. class ggml_scratch(ctypes.Structure):
  618. _fields_ = [
  619. ("offs", ctypes.c_size_t),
  620. ("size", ctypes.c_size_t),
  621. ("data", ctypes.c_void_p),
  622. ]
  623. # struct ggml_init_params {
  624. # // memory pool
  625. # size_t mem_size; // bytes
  626. # void * mem_buffer; // if NULL, memory will be allocated internally
  627. # bool no_alloc; // don't allocate memory for the tensor data
  628. # };
  629. class ggml_init_params(ctypes.Structure):
  630. """Initialization parameters for a ggml context
  631. **NOTE**: Reference counting does not cross into ggml, if you allocate a memory buffer
  632. in python using ctypes Arrays or a numpy array, you must keep a reference to it until
  633. you free the ggml context otherwise you will encounter a segmentation fault.
  634. Attributes:
  635. mem_size (int): size of memory pool in bytes
  636. mem_buffer (ctypes.c_void_p): pointer to memory pool, if None, memory will be allocated internally
  637. no_alloc (bool): don't allocate memory for tensor data
  638. """
  639. _fields_ = [
  640. ("mem_size", ctypes.c_int64),
  641. ("mem_buffer", ctypes.c_void_p),
  642. ("no_alloc", ctypes.c_bool),
  643. ]
  644. # // compute types
  645. # // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
  646. # // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
  647. # enum ggml_task_type {
  648. # GGML_TASK_INIT = 0,
  649. # GGML_TASK_COMPUTE,
  650. # GGML_TASK_FINALIZE,
  651. # };
  652. GGML_TASK_INIT = 0
  653. GGML_TASK_COMPUTE = 1
  654. GGML_TASK_FINALIZE = 2
  655. # struct ggml_compute_params {
  656. # enum ggml_task_type type;
  657. # // ith = thread index, nth = number of threads
  658. # int ith, nth;
  659. # // work buffer for all threads
  660. # size_t wsize;
  661. # void * wdata;
  662. # };
  663. class ggml_compute_params(ctypes.Structure):
  664. _fields_ = [
  665. ("type", ctypes.c_int),
  666. ("ith", ctypes.c_int),
  667. ("nth", ctypes.c_int),
  668. ("wsize", ctypes.c_size_t),
  669. ("wdata", ctypes.c_void_p),
  670. ]
  671. ggml_compute_params_p: TypeAlias = "ctypes._Pointer[ggml_compute_params]" # type: ignore
  672. # // misc
  673. # GGML_API void ggml_time_init(void); // call this once at the beginning of the program
  674. def ggml_time_init():
  675. return lib.ggml_time_init()
  676. lib.ggml_time_init.argtypes = []
  677. lib.ggml_time_init.restype = None
  678. # GGML_API int64_t ggml_time_ms(void);
  679. def ggml_time_ms() -> int:
  680. return lib.ggml_time_ms()
  681. lib.ggml_time_ms.argtypes = []
  682. lib.ggml_time_ms.restype = ctypes.c_int64
  683. # GGML_API int64_t ggml_time_us(void);
  684. def ggml_time_us() -> int:
  685. return lib.ggml_time_us()
  686. lib.ggml_time_us.argtypes = []
  687. lib.ggml_time_us.restype = ctypes.c_int64
  688. # GGML_API int64_t ggml_cycles(void);
  689. def ggml_cycles() -> int:
  690. return lib.ggml_cycles()
  691. lib.ggml_cycles.argtypes = []
  692. lib.ggml_cycles.restype = ctypes.c_int64
  693. # GGML_API int64_t ggml_cycles_per_ms(void);
  694. def ggml_cycles_per_ms() -> int:
  695. return lib.ggml_cycles_per_ms()
  696. lib.ggml_cycles_per_ms.argtypes = []
  697. lib.ggml_cycles_per_ms.restype = ctypes.c_int64
  698. # GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
  699. def ggml_numa_init():
  700. return lib.ggml_numa_init()
  701. lib.ggml_numa_init.argtypes = []
  702. lib.ggml_numa_init.restype = None
  703. # GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
  704. def ggml_is_numa() -> bool:
  705. return lib.ggml_is_numa()
  706. lib.ggml_is_numa.argtypes = []
  707. lib.ggml_is_numa.restype = ctypes.c_bool
  708. # GGML_API void ggml_print_object (const struct ggml_object * obj);
  709. def ggml_print_object(obj: ggml_object_p):
  710. return lib.ggml_print_object(obj)
  711. lib.ggml_print_object.argtypes = [ctypes.POINTER(ggml_object)]
  712. lib.ggml_print_object.restype = None
  713. # GGML_API void ggml_print_objects(const struct ggml_context * ctx);
  714. def ggml_print_objects(ctx: ggml_context_p):
  715. return lib.ggml_print_objects(ctx)
  716. lib.ggml_print_objects.argtypes = [ggml_context_p]
  717. lib.ggml_print_objects.restype = None
  718. # GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
  719. def ggml_nelements(
  720. tensor: ggml_tensor_p,
  721. ) -> int:
  722. """Get the number of elements in a tensor
  723. Parameters:
  724. tensor: tensor
  725. Returns:
  726. number of elements"""
  727. return lib.ggml_nelements(tensor)
  728. lib.ggml_nelements.argtypes = [ctypes.POINTER(ggml_tensor)]
  729. lib.ggml_nelements.restype = ctypes.c_int64
  730. # GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
  731. def ggml_nrows(
  732. tensor: ggml_tensor_p,
  733. ) -> int:
  734. """Get the number of rows in a tensor
  735. Parameters:
  736. tensor: tensor
  737. Returns:
  738. number of rows"""
  739. return lib.ggml_nrows(tensor)
  740. lib.ggml_nrows.argtypes = [ctypes.POINTER(ggml_tensor)]
  741. lib.ggml_nrows.restype = ctypes.c_int64
  742. # GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
  743. def ggml_nbytes(
  744. tensor: ggml_tensor_p,
  745. ) -> int:
  746. """Get the number of bytes required to store tensor data
  747. Parameters:
  748. tensor: tensor
  749. Returns:
  750. number of bytes"""
  751. return lib.ggml_nbytes(tensor)
  752. lib.ggml_nbytes.argtypes = [ctypes.POINTER(ggml_tensor)]
  753. lib.ggml_nbytes.restype = ctypes.c_size_t
  754. # GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
  755. def ggml_nbytes_pad(
  756. tensor: ggml_tensor_p,
  757. ) -> int:
  758. """Get the number of bytes required to store tensor data, padded to GGML_MEM_ALIGN
  759. Parameters:
  760. tensor: tensor
  761. Returns:
  762. number of bytes"""
  763. return lib.ggml_nbytes_pad(tensor)
  764. lib.ggml_nbytes_pad.argtypes = [ctypes.POINTER(ggml_tensor)]
  765. lib.ggml_nbytes_pad.restype = ctypes.c_size_t
  766. # GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
  767. def ggml_nbytes_split(
  768. tensor: ggml_tensor_p,
  769. nrows_split: Union[ctypes.c_int, int],
  770. ) -> int:
  771. return lib.ggml_nbytes_split(tensor, nrows_split)
  772. lib.ggml_nbytes_split.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  773. lib.ggml_nbytes_split.restype = ctypes.c_size_t
  774. # GGML_API int ggml_blck_size (enum ggml_type type);
  775. def ggml_blck_size(type: Union[ctypes.c_int, int]) -> int:
  776. return lib.ggml_blck_size(type)
  777. lib.ggml_blck_size.argtypes = [ctypes.c_int]
  778. lib.ggml_blck_size.restype = ctypes.c_int
  779. # GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
  780. def ggml_type_size(type: Union[ctypes.c_int, int]) -> int:
  781. return lib.ggml_type_size(type)
  782. lib.ggml_type_size.argtypes = [ctypes.c_int]
  783. lib.ggml_type_size.restype = ctypes.c_size_t
  784. # GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
  785. def ggml_type_sizef(type: Union[ctypes.c_int, int]) -> float:
  786. return lib.ggml_type_sizef(type)
  787. lib.ggml_type_sizef.argtypes = [ctypes.c_int]
  788. lib.ggml_type_sizef.restype = ctypes.c_float
  789. # GGML_API const char * ggml_type_name(enum ggml_type type);
  790. def ggml_type_name(type: Union[ctypes.c_int, int]) -> bytes:
  791. return lib.ggml_type_name(type)
  792. lib.ggml_type_name.argtypes = [ctypes.c_int]
  793. lib.ggml_type_name.restype = ctypes.c_char_p
  794. # GGML_API const char * ggml_op_name (enum ggml_op op);
  795. def ggml_op_name(op: Union[ctypes.c_int, int]) -> bytes:
  796. return lib.ggml_op_name(op)
  797. lib.ggml_op_name.argtypes = [ctypes.c_int]
  798. lib.ggml_op_name.restype = ctypes.c_char_p
  799. # GGML_API const char * ggml_op_symbol(enum ggml_op op);
  800. def ggml_op_symbol(op: Union[ctypes.c_int, int]) -> bytes:
  801. return lib.ggml_op_symbol(op)
  802. lib.ggml_op_symbol.argtypes = [ctypes.c_int]
  803. lib.ggml_op_symbol.restype = ctypes.c_char_p
  804. # GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
  805. def ggml_element_size(
  806. tensor: ggml_tensor_p,
  807. ) -> int:
  808. return lib.ggml_element_size(tensor)
  809. lib.ggml_element_size.argtypes = [ctypes.POINTER(ggml_tensor)]
  810. lib.ggml_element_size.restype = ctypes.c_size_t
  811. # GGML_API bool ggml_is_quantized(enum ggml_type type);
  812. def ggml_is_quantized(type: Union[ctypes.c_int, int]) -> bool:
  813. return lib.ggml_is_quantized(type)
  814. lib.ggml_is_quantized.argtypes = [ctypes.c_int]
  815. lib.ggml_is_quantized.restype = ctypes.c_bool
  816. # // TODO: temporary until model loading of ggml examples is refactored
  817. # GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
  818. def ggml_ftype_to_ggml_type(ftype: Union[ctypes.c_int, int]) -> int:
  819. return lib.ggml_ftype_to_ggml_type(ftype)
  820. lib.ggml_ftype_to_ggml_type.argtypes = [ctypes.c_int]
  821. lib.ggml_ftype_to_ggml_type.restype = ctypes.c_int
  822. # GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
  823. def ggml_is_transposed(
  824. tensor: ggml_tensor_p,
  825. ) -> bool:
  826. """Check if a tensor is transposed
  827. Parameters:
  828. tensor: tensor
  829. Returns:
  830. True if tensor is transposed else False"""
  831. return lib.ggml_is_transposed(tensor)
  832. lib.ggml_is_transposed.argtypes = [ctypes.POINTER(ggml_tensor)]
  833. lib.ggml_is_transposed.restype = ctypes.c_bool
  834. # GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
  835. def ggml_is_contiguous(
  836. tensor: ggml_tensor_p,
  837. ) -> bool:
  838. """Check if a tensor is contiguous
  839. Parameters:
  840. tensor: tensor
  841. Returns:
  842. True if tensor is contiguous else False"""
  843. return lib.ggml_is_contiguous(tensor)
  844. lib.ggml_is_contiguous.argtypes = [ctypes.POINTER(ggml_tensor)]
  845. lib.ggml_is_contiguous.restype = ctypes.c_bool
  846. # GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
  847. def ggml_is_permuted(
  848. tensor: ggml_tensor_p,
  849. ) -> bool:
  850. """Check if a tensor is permuted
  851. Parameters:
  852. tensor: tensor
  853. Returns:
  854. True if tensor is permuted else False"""
  855. return lib.ggml_is_permuted(tensor)
  856. lib.ggml_is_permuted.argtypes = [ctypes.POINTER(ggml_tensor)]
  857. lib.ggml_is_permuted.restype = ctypes.c_bool
  858. # GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
  859. def ggml_are_same_shape(
  860. t0: ggml_tensor_p,
  861. t1: ggml_tensor_p,
  862. ) -> bool:
  863. """Check if two tensors have the same shape
  864. Parameters:
  865. t0: tensor 0
  866. t1: tensor 1
  867. Returns:
  868. True if tensors have the same shape else False"""
  869. return lib.ggml_are_same_shape(t0, t1)
  870. lib.ggml_are_same_shape.argtypes = [
  871. ctypes.POINTER(ggml_tensor),
  872. ctypes.POINTER(ggml_tensor),
  873. ]
  874. lib.ggml_are_same_shape.restype = ctypes.c_bool
  875. # // use this to compute the memory overhead of a tensor
  876. # GGML_API size_t ggml_tensor_overhead(void);
  877. def ggml_tensor_overhead() -> int:
  878. """Overhead required for a tensor struct in bytes
  879. Returns:
  880. size of tensor struct in bytes"""
  881. return lib.ggml_tensor_overhead()
  882. lib.ggml_tensor_overhead.argtypes = []
  883. lib.ggml_tensor_overhead.restype = ctypes.c_size_t
  884. # // main
  885. # GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
  886. def ggml_init(
  887. params: ggml_init_params,
  888. ) -> ggml_context_p:
  889. """Instantiate a new ggml context with params.
  890. You must call `ggml_free()` to free the context.
  891. Parameters:
  892. params: ggml init params
  893. Returns:
  894. Pointer to ggml_context"""
  895. return lib.ggml_init(params)
  896. lib.ggml_init.argtypes = [ggml_init_params]
  897. lib.ggml_init.restype = ggml_context_p
  898. # GGML_API void ggml_free(struct ggml_context * ctx);
  899. def ggml_free(ctx: ggml_context_p):
  900. """Free the ggml context.
  901. Parameters:
  902. ctx: ggml context"""
  903. return lib.ggml_free(ctx)
  904. lib.ggml_free.argtypes = [ggml_context_p]
  905. lib.ggml_free.restype = None
  906. # GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
  907. def ggml_used_mem(ctx: ggml_context_p) -> int:
  908. """Return the amount of memory used by the ggml context in bytes.
  909. Parameters:
  910. ctx: ggml context
  911. Returns:
  912. amount of memory used in bytes"""
  913. return lib.ggml_used_mem(ctx)
  914. lib.ggml_used_mem.argtypes = [ggml_context_p]
  915. lib.ggml_used_mem.restype = ctypes.c_size_t
  916. # GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
  917. def ggml_set_scratch(ctx: ggml_context_p, scratch: ggml_scratch) -> int:
  918. """Set the scratch buffer for the ggml context."""
  919. return lib.ggml_set_scratch(ctx, scratch)
  920. lib.ggml_set_scratch.argtypes = [ggml_context_p, ggml_scratch]
  921. lib.ggml_set_scratch.restype = ctypes.c_size_t
  922. # GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
  923. def ggml_get_no_alloc(ctx: ggml_context_p) -> bool:
  924. """Return the no_alloc flag for the ggml context."""
  925. return lib.ggml_get_no_alloc(ctx)
  926. lib.ggml_get_no_alloc.argtypes = [ggml_context_p]
  927. lib.ggml_get_no_alloc.restype = ctypes.c_bool
  928. # GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
  929. def ggml_set_no_alloc(ctx: ggml_context_p, no_alloc: Union[ctypes.c_bool, bool]):
  930. """Set the no_alloc flag for the ggml context."""
  931. return lib.ggml_set_no_alloc(ctx, no_alloc)
  932. lib.ggml_set_no_alloc.argtypes = [ggml_context_p, ctypes.c_bool]
  933. lib.ggml_set_no_alloc.restype = None
  934. # GGML_API void * ggml_get_mem_buffer (struct ggml_context * ctx);
  935. def ggml_get_mem_buffer(ctx: ggml_context_p) -> Optional[ctypes.c_void_p]:
  936. """Return the memory buffer for the ggml context."""
  937. return lib.ggml_get_mem_buffer(ctx)
  938. lib.ggml_get_mem_buffer.argtypes = [ggml_context_p]
  939. lib.ggml_get_mem_buffer.restype = ctypes.c_void_p
  940. # GGML_API size_t ggml_get_mem_size (struct ggml_context * ctx);
  941. def ggml_get_mem_size(ctx: ggml_context_p) -> int:
  942. """Return the size of the memory buffer for the ggml context in bytes."""
  943. return lib.ggml_get_mem_size(ctx)
  944. lib.ggml_get_mem_size.argtypes = [ggml_context_p]
  945. lib.ggml_get_mem_size.restype = ctypes.c_int64
  946. # GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
  947. def ggml_get_max_tensor_size(ctx: ggml_context_p) -> int:
  948. """Return the maximum size of a tensor in bytes."""
  949. return lib.ggml_get_max_tensor_size(ctx)
  950. lib.ggml_get_max_tensor_size.argtypes = [ggml_context_p]
  951. lib.ggml_get_max_tensor_size.restype = ctypes.c_size_t
  952. # GGML_API struct ggml_tensor * ggml_new_tensor(
  953. # struct ggml_context * ctx,
  954. # enum ggml_type type,
  955. # int n_dims,
  956. # const int64_t *ne);
  957. def ggml_new_tensor(
  958. ctx: ggml_context_p,
  959. type: Union[ctypes.c_int, int],
  960. n_dims: Union[ctypes.c_int, int],
  961. ne: CInt64Array,
  962. ) -> ggml_tensor_p:
  963. """Create a new tensor with the given type, number of dimensions, and number of elements in each dimension.
  964. Parameters:
  965. ctx: ggml context
  966. type: ggml type
  967. n_dims: number of dimensions
  968. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension (array of length n_dims)
  969. Returns:
  970. Pointer to ggml_tensor"""
  971. return lib.ggml_new_tensor(ctx, type, n_dims, ne)
  972. lib.ggml_new_tensor.argtypes = [
  973. ggml_context_p,
  974. ctypes.c_int,
  975. ctypes.c_int,
  976. ctypes.POINTER(ctypes.c_int64),
  977. ]
  978. lib.ggml_new_tensor.restype = ctypes.POINTER(ggml_tensor)
  979. # GGML_API struct ggml_tensor * ggml_new_tensor_1d(
  980. # struct ggml_context * ctx,
  981. # enum ggml_type type,
  982. # int64_t ne0);
  983. def ggml_new_tensor_1d(
  984. ctx: ggml_context_p, type: Union[ctypes.c_int, int], ne0: Union[ctypes.c_int64, int]
  985. ) -> ggml_tensor_p:
  986. """Create a new 1-dimensional tensor with the given type and number of elements.
  987. Parameters:
  988. ctx: ggml context
  989. type: ggml type
  990. ne0: number of elements in dimension 0
  991. Returns:
  992. Pointer to ggml_tensor"""
  993. return lib.ggml_new_tensor_1d(ctx, type, ne0)
  994. lib.ggml_new_tensor_1d.argtypes = [ggml_context_p, ctypes.c_int, ctypes.c_int64]
  995. lib.ggml_new_tensor_1d.restype = ctypes.POINTER(ggml_tensor)
  996. # GGML_API struct ggml_tensor * ggml_new_tensor_2d(
  997. # struct ggml_context * ctx,
  998. # enum ggml_type type,
  999. # int64_t ne0,
  1000. # int64_t ne1);
  1001. def ggml_new_tensor_2d(
  1002. ctx: ggml_context_p,
  1003. type: Union[ctypes.c_int, int],
  1004. ne0: Union[ctypes.c_int64, int],
  1005. ne1: Union[ctypes.c_int64, int],
  1006. ) -> ggml_tensor_p:
  1007. """Create a new 2-dimensional tensor with the given type and number of elements in each dimension.
  1008. Parameters:
  1009. ctx: ggml context
  1010. type: ggml type
  1011. ne0: number of elements in dimension 0
  1012. ne1: number of elements in dimension 1
  1013. Returns:
  1014. Pointer to ggml_tensor"""
  1015. return lib.ggml_new_tensor_2d(ctx, type, ne0, ne1)
  1016. lib.ggml_new_tensor_2d.argtypes = [
  1017. ggml_context_p,
  1018. ctypes.c_int,
  1019. ctypes.c_int64,
  1020. ctypes.c_int64,
  1021. ]
  1022. lib.ggml_new_tensor_2d.restype = ctypes.POINTER(ggml_tensor)
  1023. # GGML_API struct ggml_tensor * ggml_new_tensor_3d(
  1024. # struct ggml_context * ctx,
  1025. # enum ggml_type type,
  1026. # int64_t ne0,
  1027. # int64_t ne1,
  1028. # int64_t ne2);
  1029. def ggml_new_tensor_3d(
  1030. ctx: ggml_context_p,
  1031. type: Union[ctypes.c_int, int],
  1032. ne0: Union[ctypes.c_int64, int],
  1033. ne1: Union[ctypes.c_int64, int],
  1034. ne2: Union[ctypes.c_int64, int],
  1035. ) -> ggml_tensor_p:
  1036. """Create a new 3-dimensional tensor with the given type and number of elements in each dimension.
  1037. Parameters:
  1038. ctx: ggml context
  1039. type: ggml type
  1040. ne0: number of elements in dimension 0
  1041. ne1: number of elements in dimension 1
  1042. ne2: number of elements in dimension 2
  1043. Returns:
  1044. Pointer to ggml_tensor"""
  1045. return lib.ggml_new_tensor_3d(ctx, type, ne0, ne1, ne2)
  1046. lib.ggml_new_tensor_3d.argtypes = [
  1047. ggml_context_p,
  1048. ctypes.c_int,
  1049. ctypes.c_int64,
  1050. ctypes.c_int64,
  1051. ctypes.c_int64,
  1052. ]
  1053. lib.ggml_new_tensor_3d.restype = ctypes.POINTER(ggml_tensor)
  1054. # GGML_API struct ggml_tensor * ggml_new_tensor_4d(
  1055. # struct ggml_context * ctx,
  1056. # enum ggml_type type,
  1057. # int64_t ne0,
  1058. # int64_t ne1,
  1059. # int64_t ne2,
  1060. # int64_t ne3);
  1061. def ggml_new_tensor_4d(
  1062. ctx: ggml_context_p,
  1063. type: Union[ctypes.c_int, int],
  1064. ne0: Union[ctypes.c_int64, int],
  1065. ne1: Union[ctypes.c_int64, int],
  1066. ne2: Union[ctypes.c_int64, int],
  1067. ne3: Union[ctypes.c_int64, int],
  1068. ) -> ggml_tensor_p:
  1069. """Create a new 4-dimensional tensor with the given type and number of elements in each dimension.
  1070. Parameters:
  1071. ctx: ggml context
  1072. type: ggml type
  1073. ne0: number of elements in dimension 0
  1074. ne1: number of elements in dimension 1
  1075. ne2: number of elements in dimension 2
  1076. Returns:
  1077. Pointer to ggml_tensor"""
  1078. return lib.ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3)
  1079. lib.ggml_new_tensor_4d.argtypes = [
  1080. ggml_context_p,
  1081. ctypes.c_int,
  1082. ctypes.c_int64,
  1083. ctypes.c_int64,
  1084. ctypes.c_int64,
  1085. ctypes.c_int64,
  1086. ]
  1087. lib.ggml_new_tensor_4d.restype = ctypes.POINTER(ggml_tensor)
  1088. # GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
  1089. def ggml_new_i32(
  1090. ctx: ggml_context_p, value: Union[ctypes.c_int32, int]
  1091. ) -> ggml_tensor_p:
  1092. """Create a 1 element tensor with the given integer value.
  1093. Parameters:
  1094. ctx: ggml context
  1095. value: integer value
  1096. Returns:
  1097. Pointer to ggml_tensor"""
  1098. return lib.ggml_new_i32(ctx, value)
  1099. lib.ggml_new_i32.argtypes = [ggml_context_p, ctypes.c_int32]
  1100. lib.ggml_new_i32.restype = ctypes.POINTER(ggml_tensor)
  1101. # GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
  1102. def ggml_new_f32(
  1103. ctx: ggml_context_p,
  1104. value: Union[ctypes.c_float, float],
  1105. ) -> ggml_tensor_p:
  1106. """Create a 1 element tensor with the given float value.
  1107. Parameters:
  1108. ctx: ggml context
  1109. value: float value
  1110. Returns:
  1111. Pointer to ggml_tensor"""
  1112. return lib.ggml_new_f32(ctx, value)
  1113. lib.ggml_new_f32.argtypes = [ggml_context_p, ctypes.c_float]
  1114. lib.ggml_new_f32.restype = ctypes.POINTER(ggml_tensor)
  1115. # GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
  1116. def ggml_dup_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1117. """Create a new tensor with the same type and dimensions as the source tensor.
  1118. Parameters:
  1119. ctx: ggml context
  1120. src: source tensor
  1121. Returns:
  1122. Pointer to ggml_tensor"""
  1123. return lib.ggml_dup_tensor(ctx, src)
  1124. lib.ggml_dup_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1125. lib.ggml_dup_tensor.restype = ctypes.POINTER(ggml_tensor)
  1126. # GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
  1127. def ggml_view_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1128. """Create a new tensor with the same type, dimensions and data as the source tensor.
  1129. Parameters:
  1130. ctx: ggml context
  1131. src: source tensor
  1132. Returns:
  1133. Pointer to ggml_tensor"""
  1134. return lib.ggml_view_tensor(ctx, src)
  1135. lib.ggml_view_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1136. lib.ggml_view_tensor.restype = ctypes.POINTER(ggml_tensor)
  1137. # GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
  1138. def ggml_get_tensor(ctx: ggml_context_p, name: bytes) -> ggml_tensor_p:
  1139. """Get a tensor from the ggml context by name.
  1140. Parameters:
  1141. ctx: ggml context
  1142. name: name of tensor
  1143. Returns:
  1144. Pointer to ggml_tensor"""
  1145. return lib.ggml_get_tensor(ctx, name)
  1146. lib.ggml_get_tensor.argtypes = [ggml_context_p, ctypes.c_char_p]
  1147. lib.ggml_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  1148. # GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
  1149. def ggml_set_zero(
  1150. tensor: ggml_tensor_p,
  1151. ) -> ggml_tensor_p:
  1152. """Zero all elements in a tensor.
  1153. Parameters:
  1154. tensor: tensor
  1155. Returns:
  1156. Pointer to ggml_tensor"""
  1157. return lib.ggml_set_zero(tensor)
  1158. lib.ggml_set_zero.argtypes = [ctypes.POINTER(ggml_tensor)]
  1159. lib.ggml_set_zero.restype = ctypes.POINTER(ggml_tensor)
  1160. # GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
  1161. def ggml_set_i32(
  1162. tensor: ggml_tensor_p,
  1163. value: Union[ctypes.c_int32, int],
  1164. ) -> ggml_tensor_p:
  1165. """Set all elements in a tensor to the given integer value.
  1166. Parameters:
  1167. tensor: tensor
  1168. value: integer value
  1169. Returns:
  1170. Pointer to ggml_tensor"""
  1171. return lib.ggml_set_i32(tensor, value)
  1172. lib.ggml_set_i32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int32]
  1173. lib.ggml_set_i32.restype = ctypes.POINTER(ggml_tensor)
  1174. # GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
  1175. def ggml_set_f32(
  1176. tensor: ggml_tensor_p,
  1177. value: Union[ctypes.c_float, float],
  1178. ) -> ggml_tensor_p:
  1179. """Set all elements in a tensor to the given float value.
  1180. Parameters:
  1181. tensor: tensor
  1182. value: float value
  1183. Returns:
  1184. Pointer to ggml_tensor"""
  1185. return lib.ggml_set_f32(tensor, value)
  1186. lib.ggml_set_f32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_float]
  1187. lib.ggml_set_f32.restype = ctypes.POINTER(ggml_tensor)
  1188. # GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
  1189. def ggml_get_i32_1d(
  1190. tensor: ggml_tensor_p,
  1191. i: Union[ctypes.c_int, int],
  1192. ) -> int:
  1193. """Get the integer value of the i-th element in a 1-dimensional tensor.
  1194. Parameters:
  1195. tensor: tensor
  1196. i: index of element
  1197. Returns:
  1198. integer value of element at index i"""
  1199. return lib.ggml_get_i32_1d(tensor, i)
  1200. lib.ggml_get_i32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1201. lib.ggml_get_i32_1d.restype = ctypes.c_int32
  1202. # GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
  1203. def ggml_set_i32_1d(
  1204. tensor: ggml_tensor_p,
  1205. i: Union[ctypes.c_int, int],
  1206. value: Union[ctypes.c_int32, int],
  1207. ):
  1208. """Set the integer value of the i-th element in a 1-dimensional tensor.
  1209. Parameters:
  1210. tensor: tensor
  1211. i: index of element
  1212. value: integer value to set element to"""
  1213. return lib.ggml_set_i32_1d(tensor, i, value)
  1214. lib.ggml_set_i32_1d.argtypes = [
  1215. ctypes.POINTER(ggml_tensor),
  1216. ctypes.c_int,
  1217. ctypes.c_int32,
  1218. ]
  1219. lib.ggml_set_i32_1d.restype = None
  1220. # GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
  1221. def ggml_get_f32_1d(
  1222. tensor: ggml_tensor_p,
  1223. i: Union[ctypes.c_int, int],
  1224. ) -> float:
  1225. """Get the float value of the i-th element in a 1-dimensional tensor.
  1226. Parameters:
  1227. tensor: tensor
  1228. Returns:
  1229. float value of element at index i"""
  1230. return lib.ggml_get_f32_1d(tensor, i)
  1231. lib.ggml_get_f32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1232. lib.ggml_get_f32_1d.restype = ctypes.c_float
  1233. # GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
  1234. def ggml_set_f32_1d(
  1235. tensor: ggml_tensor_p,
  1236. i: Union[ctypes.c_int, int],
  1237. value: Union[ctypes.c_float, float],
  1238. ):
  1239. """Set the float value of the i-th element in a 1-dimensional tensor.
  1240. Parameters:
  1241. tensor: tensor
  1242. i: index of element
  1243. value: float value to set element to"""
  1244. return lib.ggml_set_f32_1d(tensor, i, value)
  1245. lib.ggml_set_f32_1d.argtypes = [
  1246. ctypes.POINTER(ggml_tensor),
  1247. ctypes.c_int,
  1248. ctypes.c_float,
  1249. ]
  1250. lib.ggml_set_f32_1d.restype = None
  1251. # GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
  1252. def ggml_get_data(
  1253. tensor: ggml_tensor_p,
  1254. ) -> Optional[ctypes.c_void_p]:
  1255. """Get the data pointer of a tensor.
  1256. Parameters:
  1257. tensor: tensor
  1258. Returns:
  1259. Pointer to data, or None if tensor has no data"""
  1260. return lib.ggml_get_data(tensor)
  1261. lib.ggml_get_data.argtypes = [ctypes.POINTER(ggml_tensor)]
  1262. lib.ggml_get_data.restype = ctypes.c_void_p
  1263. # GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
  1264. def ggml_get_data_f32(
  1265. tensor: ggml_tensor_p,
  1266. ) -> Optional[CFloatArray]:
  1267. """Get the data pointer of a tensor as a float array.
  1268. Parameters:
  1269. tensor: tensor
  1270. Returns:
  1271. (Optional[ctypes.Array[ctypes.c_float]]): array of float to data, or None if tensor has no data
  1272. """
  1273. return lib.ggml_get_data_f32(tensor)
  1274. lib.ggml_get_data_f32.argtypes = [ctypes.POINTER(ggml_tensor)]
  1275. lib.ggml_get_data_f32.restype = ctypes.POINTER(ctypes.c_float)
  1276. # GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
  1277. def ggml_get_unary_op(
  1278. tensor: ggml_tensor_p,
  1279. ) -> int:
  1280. """Get the unary operation of a tensor.
  1281. Parameters:
  1282. tensor: tensor
  1283. Returns:
  1284. unary operation"""
  1285. return lib.ggml_get_unary_op(tensor)
  1286. lib.ggml_get_unary_op.argtypes = [ctypes.POINTER(ggml_tensor)]
  1287. lib.ggml_get_unary_op.restype = ctypes.c_int
  1288. # GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
  1289. def ggml_get_name(
  1290. tensor: ggml_tensor_p,
  1291. ) -> bytes:
  1292. """Get the name of a tensor.
  1293. Parameters:
  1294. tensor: tensor
  1295. Returns:
  1296. name of tensor"""
  1297. return lib.ggml_get_name(tensor)
  1298. lib.ggml_get_name.argtypes = [ctypes.POINTER(ggml_tensor)]
  1299. lib.ggml_get_name.restype = ctypes.c_char_p
  1300. # GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
  1301. def ggml_set_name(
  1302. tensor: ggml_tensor_p,
  1303. name: bytes,
  1304. ) -> ggml_tensor_p:
  1305. """Set the name of a tensor.
  1306. Parameters:
  1307. tensor: tensor
  1308. name: name to set tensor to
  1309. Returns:
  1310. Pointer to ggml_tensor"""
  1311. return lib.ggml_set_name(tensor, name)
  1312. lib.ggml_set_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1313. lib.ggml_set_name.restype = ctypes.POINTER(ggml_tensor)
  1314. # GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
  1315. def ggml_format_name(
  1316. tensor: ggml_tensor_p,
  1317. fmt: bytes,
  1318. *args: Sequence[Union[bool, int, float, str]],
  1319. ) -> ggml_tensor_p:
  1320. """Format the name of a tensor using the given format c string and arguments.
  1321. Parameters:
  1322. tensor: tensor
  1323. fmt: format c string
  1324. args: arguments to format string
  1325. Returns:
  1326. Pointer to ggml_tensor"""
  1327. return lib.ggml_format_name(tensor, fmt, *args)
  1328. lib.ggml_format_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1329. lib.ggml_format_name.restype = ctypes.POINTER(ggml_tensor)
  1330. # //
  1331. # // operations on tensors with backpropagation
  1332. # //
  1333. # GGML_API struct ggml_tensor * ggml_dup(
  1334. # struct ggml_context * ctx,
  1335. # struct ggml_tensor * a);
  1336. def ggml_dup(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1337. return lib.ggml_dup(ctx, a)
  1338. lib.ggml_dup.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1339. lib.ggml_dup.restype = ctypes.POINTER(ggml_tensor)
  1340. # // in-place, returns view(a)
  1341. # GGML_API struct ggml_tensor * ggml_dup_inplace(
  1342. # struct ggml_context * ctx,
  1343. # struct ggml_tensor * a);
  1344. def ggml_dup_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1345. return lib.ggml_dup_inplace(ctx, a)
  1346. lib.ggml_dup_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1347. lib.ggml_dup_inplace.restype = ctypes.POINTER(ggml_tensor)
  1348. # GGML_API struct ggml_tensor * ggml_add(
  1349. # struct ggml_context * ctx,
  1350. # struct ggml_tensor * a,
  1351. # struct ggml_tensor * b);
  1352. def ggml_add(
  1353. ctx: ggml_context_p,
  1354. a: ggml_tensor_p,
  1355. b: ggml_tensor_p,
  1356. ) -> ggml_tensor_p:
  1357. """Add two tensors together and return the result.
  1358. Parameters:
  1359. ctx: ggml context
  1360. a: first tensor
  1361. b: second tensor
  1362. Returns:
  1363. Pointer to ggml_tensor"""
  1364. return lib.ggml_add(ctx, a, b)
  1365. lib.ggml_add.argtypes = [
  1366. ggml_context_p,
  1367. ctypes.POINTER(ggml_tensor),
  1368. ctypes.POINTER(ggml_tensor),
  1369. ]
  1370. lib.ggml_add.restype = ctypes.POINTER(ggml_tensor)
  1371. # GGML_API struct ggml_tensor * ggml_add_inplace(
  1372. # struct ggml_context * ctx,
  1373. # struct ggml_tensor * a,
  1374. # struct ggml_tensor * b);
  1375. def ggml_add_inplace(
  1376. ctx: ggml_context_p,
  1377. a: ggml_tensor_p,
  1378. b: ggml_tensor_p,
  1379. ) -> ggml_tensor_p:
  1380. """Add two tensors together and store the result in the first tensor.
  1381. Parameters:
  1382. ctx: ggml context
  1383. a: first tensor
  1384. b: second tensor
  1385. Returns:
  1386. Pointer to ggml_tensor"""
  1387. return lib.ggml_add_inplace(ctx, a, b)
  1388. lib.ggml_add_inplace.argtypes = [
  1389. ggml_context_p,
  1390. ctypes.POINTER(ggml_tensor),
  1391. ctypes.POINTER(ggml_tensor),
  1392. ]
  1393. lib.ggml_add_inplace.restype = ctypes.POINTER(ggml_tensor)
  1394. # GGML_API struct ggml_tensor * ggml_add1(
  1395. # struct ggml_context * ctx,
  1396. # struct ggml_tensor * a,
  1397. # struct ggml_tensor * b);
  1398. def ggml_add1(
  1399. ctx: ggml_context_p,
  1400. a: ggml_tensor_p,
  1401. b: ggml_tensor_p,
  1402. ) -> ggml_tensor_p:
  1403. return lib.ggml_add1(ctx, a, b)
  1404. lib.ggml_add1.argtypes = [
  1405. ggml_context_p,
  1406. ctypes.POINTER(ggml_tensor),
  1407. ctypes.POINTER(ggml_tensor),
  1408. ]
  1409. lib.ggml_add1.restype = ctypes.POINTER(ggml_tensor)
  1410. # GGML_API struct ggml_tensor * ggml_add1_inplace(
  1411. # struct ggml_context * ctx,
  1412. # struct ggml_tensor * a,
  1413. # struct ggml_tensor * b);
  1414. def ggml_add1_inplace(
  1415. ctx: ggml_context_p,
  1416. a: ggml_tensor_p,
  1417. b: ggml_tensor_p,
  1418. ) -> ggml_tensor_p:
  1419. return lib.ggml_add1_inplace(ctx, a, b)
  1420. lib.ggml_add1_inplace.argtypes = [
  1421. ggml_context_p,
  1422. ctypes.POINTER(ggml_tensor),
  1423. ctypes.POINTER(ggml_tensor),
  1424. ]
  1425. lib.ggml_add1_inplace.restype = ctypes.POINTER(ggml_tensor)
  1426. # GGML_API struct ggml_tensor * ggml_acc(
  1427. # struct ggml_context * ctx,
  1428. # struct ggml_tensor * a,
  1429. # struct ggml_tensor * b,
  1430. # size_t nb1,
  1431. # size_t nb2,
  1432. # size_t nb3,
  1433. # size_t offset);
  1434. def ggml_acc(
  1435. ctx: ggml_context_p,
  1436. a: ggml_tensor_p,
  1437. b: ggml_tensor_p,
  1438. nb1: Union[ctypes.c_size_t, int],
  1439. nb2: Union[ctypes.c_size_t, int],
  1440. nb3: Union[ctypes.c_size_t, int],
  1441. offset: Union[ctypes.c_size_t, int],
  1442. ) -> ggml_tensor_p:
  1443. return lib.ggml_acc(ctx, a, b, nb1, nb2, nb3, offset)
  1444. lib.ggml_acc.argtypes = [
  1445. ggml_context_p,
  1446. ctypes.POINTER(ggml_tensor),
  1447. ctypes.POINTER(ggml_tensor),
  1448. ctypes.c_size_t,
  1449. ctypes.c_size_t,
  1450. ctypes.c_size_t,
  1451. ctypes.c_size_t,
  1452. ]
  1453. lib.ggml_acc.restype = ctypes.POINTER(ggml_tensor)
  1454. # GGML_API struct ggml_tensor * ggml_acc_inplace(
  1455. # struct ggml_context * ctx,
  1456. # struct ggml_tensor * a,
  1457. # struct ggml_tensor * b,
  1458. # size_t nb1,
  1459. # size_t nb2,
  1460. # size_t nb3,
  1461. # size_t offset);
  1462. def ggml_acc_inplace(
  1463. ctx: ggml_context_p,
  1464. a: ggml_tensor_p,
  1465. b: ggml_tensor_p,
  1466. nb1: Union[ctypes.c_size_t, int],
  1467. nb2: Union[ctypes.c_size_t, int],
  1468. nb3: Union[ctypes.c_size_t, int],
  1469. offset: Union[ctypes.c_size_t, int],
  1470. ) -> ggml_tensor_p:
  1471. return lib.ggml_acc_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  1472. lib.ggml_acc_inplace.argtypes = [
  1473. ggml_context_p,
  1474. ctypes.POINTER(ggml_tensor),
  1475. ctypes.POINTER(ggml_tensor),
  1476. ctypes.c_size_t,
  1477. ctypes.c_size_t,
  1478. ctypes.c_size_t,
  1479. ctypes.c_size_t,
  1480. ]
  1481. lib.ggml_acc_inplace.restype = ctypes.POINTER(ggml_tensor)
  1482. # GGML_API struct ggml_tensor * ggml_sub(
  1483. # struct ggml_context * ctx,
  1484. # struct ggml_tensor * a,
  1485. # struct ggml_tensor * b);
  1486. def ggml_sub(
  1487. ctx: ggml_context_p,
  1488. a: ggml_tensor_p,
  1489. b: ggml_tensor_p,
  1490. ) -> ggml_tensor_p:
  1491. """Subtract two tensors and return the result.
  1492. Parameters:
  1493. ctx: ggml context
  1494. a: first tensor
  1495. b: second tensor
  1496. Returns:
  1497. Pointer to ggml_tensor"""
  1498. return lib.ggml_sub(ctx, a, b)
  1499. lib.ggml_sub.argtypes = [
  1500. ggml_context_p,
  1501. ctypes.POINTER(ggml_tensor),
  1502. ctypes.POINTER(ggml_tensor),
  1503. ]
  1504. lib.ggml_sub.restype = ctypes.POINTER(ggml_tensor)
  1505. # GGML_API struct ggml_tensor * ggml_sub_inplace(
  1506. # struct ggml_context * ctx,
  1507. # struct ggml_tensor * a,
  1508. # struct ggml_tensor * b);
  1509. def ggml_sub_inplace(
  1510. ctx: ggml_context_p,
  1511. a: ggml_tensor_p,
  1512. b: ggml_tensor_p,
  1513. ) -> ggml_tensor_p:
  1514. """Subtract two tensors and store the result in the first tensor.
  1515. Parameters:
  1516. ctx: ggml context
  1517. a: first tensor
  1518. b: second tensor
  1519. Returns:
  1520. Pointer to ggml_tensor"""
  1521. return lib.ggml_sub_inplace(ctx, a, b)
  1522. lib.ggml_sub_inplace.argtypes = [
  1523. ggml_context_p,
  1524. ctypes.POINTER(ggml_tensor),
  1525. ctypes.POINTER(ggml_tensor),
  1526. ]
  1527. lib.ggml_sub_inplace.restype = ctypes.POINTER(ggml_tensor)
  1528. # GGML_API struct ggml_tensor * ggml_mul(
  1529. # struct ggml_context * ctx,
  1530. # struct ggml_tensor * a,
  1531. # struct ggml_tensor * b);
  1532. def ggml_mul(
  1533. ctx: ggml_context_p,
  1534. a: ggml_tensor_p,
  1535. b: ggml_tensor_p,
  1536. ) -> ggml_tensor_p:
  1537. """Element-wise multiply two tensors and return the result.
  1538. Parameters:
  1539. ctx: ggml context
  1540. a: first tensor
  1541. b: second tensor
  1542. Returns:
  1543. Pointer to ggml_tensor"""
  1544. return lib.ggml_mul(ctx, a, b)
  1545. lib.ggml_mul.argtypes = [
  1546. ggml_context_p,
  1547. ctypes.POINTER(ggml_tensor),
  1548. ctypes.POINTER(ggml_tensor),
  1549. ]
  1550. lib.ggml_mul.restype = ctypes.POINTER(ggml_tensor)
  1551. # GGML_API struct ggml_tensor * ggml_mul_inplace(
  1552. # struct ggml_context * ctx,
  1553. # struct ggml_tensor * a,
  1554. # struct ggml_tensor * b);
  1555. def ggml_mul_inplace(
  1556. ctx: ggml_context_p,
  1557. a: ggml_tensor_p,
  1558. b: ggml_tensor_p,
  1559. ) -> ggml_tensor_p:
  1560. """Element-wise multiply two tensors and store the result in the first tensor.
  1561. Parameters:
  1562. ctx: ggml context
  1563. a: first tensor
  1564. b: second tensor
  1565. Returns:
  1566. Pointer to ggml_tensor"""
  1567. return lib.ggml_mul_inplace(ctx, a, b)
  1568. lib.ggml_mul_inplace.argtypes = [
  1569. ggml_context_p,
  1570. ctypes.POINTER(ggml_tensor),
  1571. ctypes.POINTER(ggml_tensor),
  1572. ]
  1573. lib.ggml_mul_inplace.restype = ctypes.POINTER(ggml_tensor)
  1574. # GGML_API struct ggml_tensor * ggml_div(
  1575. # struct ggml_context * ctx,
  1576. # struct ggml_tensor * a,
  1577. # struct ggml_tensor * b);
  1578. def ggml_div(
  1579. ctx: ggml_context_p,
  1580. a: ggml_tensor_p,
  1581. b: ggml_tensor_p,
  1582. ) -> ggml_tensor_p:
  1583. """Element-wise divide two tensors and return the result.
  1584. Parameters:
  1585. ctx: ggml context
  1586. a: first tensor
  1587. b: second tensor
  1588. Returns:
  1589. Pointer to ggml_tensor"""
  1590. return lib.ggml_div(ctx, a, b)
  1591. lib.ggml_div.argtypes = [
  1592. ggml_context_p,
  1593. ctypes.POINTER(ggml_tensor),
  1594. ctypes.POINTER(ggml_tensor),
  1595. ]
  1596. lib.ggml_div.restype = ctypes.POINTER(ggml_tensor)
  1597. # GGML_API struct ggml_tensor * ggml_div_inplace(
  1598. # struct ggml_context * ctx,
  1599. # struct ggml_tensor * a,
  1600. # struct ggml_tensor * b);
  1601. def ggml_div_inplace(
  1602. ctx: ggml_context_p,
  1603. a: ggml_tensor_p,
  1604. b: ggml_tensor_p,
  1605. ) -> ggml_tensor_p:
  1606. """Element-wise divide two tensors and store the result in the first tensor.
  1607. Parameters:
  1608. ctx: ggml context
  1609. a: first tensor
  1610. b: second tensor
  1611. Returns:
  1612. Pointer to ggml_tensor"""
  1613. return lib.ggml_div_inplace(ctx, a, b)
  1614. lib.ggml_div_inplace.argtypes = [
  1615. ggml_context_p,
  1616. ctypes.POINTER(ggml_tensor),
  1617. ctypes.POINTER(ggml_tensor),
  1618. ]
  1619. lib.ggml_div_inplace.restype = ctypes.POINTER(ggml_tensor)
  1620. # GGML_API struct ggml_tensor * ggml_sqr(
  1621. # struct ggml_context * ctx,
  1622. # struct ggml_tensor * a);
  1623. def ggml_sqr(
  1624. ctx: ggml_context_p,
  1625. a: ggml_tensor_p,
  1626. ) -> ggml_tensor_p:
  1627. """Square all elements in a tensor and return the result.
  1628. Parameters:
  1629. ctx: ggml context
  1630. a: tensor
  1631. Returns:
  1632. Pointer to ggml_tensor"""
  1633. return lib.ggml_sqr(ctx, a)
  1634. lib.ggml_sqr.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1635. lib.ggml_sqr.restype = ctypes.POINTER(ggml_tensor)
  1636. # GGML_API struct ggml_tensor * ggml_sqr_inplace(
  1637. # struct ggml_context * ctx,
  1638. # struct ggml_tensor * a);
  1639. def ggml_sqr_inplace(
  1640. ctx: ggml_context_p,
  1641. a: ggml_tensor_p,
  1642. ) -> ggml_tensor_p:
  1643. """Square all elements in a tensor and store the result in the first tensor.
  1644. Parameters:
  1645. ctx: ggml context
  1646. a: tensor
  1647. Returns:
  1648. Pointer to ggml_tensor"""
  1649. return lib.ggml_sqr_inplace(ctx, a)
  1650. lib.ggml_sqr_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1651. lib.ggml_sqr_inplace.restype = ctypes.POINTER(ggml_tensor)
  1652. # GGML_API struct ggml_tensor * ggml_sqrt(
  1653. # struct ggml_context * ctx,
  1654. # struct ggml_tensor * a);
  1655. def ggml_sqrt(
  1656. ctx: ggml_context_p,
  1657. a: ggml_tensor_p,
  1658. ) -> ggml_tensor_p:
  1659. """Square root all elements in a tensor and return the result.
  1660. Parameters:
  1661. ctx: ggml context
  1662. a: tensor
  1663. Returns:
  1664. Pointer to ggml_tensor"""
  1665. return lib.ggml_sqrt(ctx, a)
  1666. lib.ggml_sqrt.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1667. lib.ggml_sqrt.restype = ctypes.POINTER(ggml_tensor)
  1668. # GGML_API struct ggml_tensor * ggml_sqrt_inplace(
  1669. # struct ggml_context * ctx,
  1670. # struct ggml_tensor * a);
  1671. def ggml_sqrt_inplace(
  1672. ctx: ggml_context_p,
  1673. a: ggml_tensor_p,
  1674. ) -> ggml_tensor_p:
  1675. """Square root all elements in a tensor and store the result in the first tensor.
  1676. Parameters:
  1677. ctx: ggml context
  1678. Returns:
  1679. Pointer to ggml_tensor"""
  1680. return lib.ggml_sqrt_inplace(ctx, a)
  1681. lib.ggml_sqrt_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1682. lib.ggml_sqrt_inplace.restype = ctypes.POINTER(ggml_tensor)
  1683. # GGML_API struct ggml_tensor * ggml_log(
  1684. # struct ggml_context * ctx,
  1685. # struct ggml_tensor * a);
  1686. def ggml_log(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1687. """Take the natural logarithm of all elements in a tensor and return the result.
  1688. Parameters:
  1689. ctx: ggml context
  1690. a: tensor
  1691. Returns:
  1692. Pointer to ggml_tensor"""
  1693. return lib.ggml_log(ctx, a)
  1694. lib.ggml_log.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1695. lib.ggml_log.restype = ctypes.POINTER(ggml_tensor)
  1696. # GGML_API struct ggml_tensor * ggml_log_inplace(
  1697. # struct ggml_context * ctx,
  1698. # struct ggml_tensor * a);
  1699. def ggml_log_inplace(
  1700. ctx: ggml_context_p,
  1701. a: ggml_tensor_p,
  1702. ) -> ggml_tensor_p:
  1703. """Take the natural logarithm of all elements in a tensor and store the result in the first tensor.
  1704. Parameters:
  1705. ctx: ggml context
  1706. a: tensor
  1707. Returns:
  1708. Pointer to ggml_tensor"""
  1709. return lib.ggml_log_inplace(ctx, a)
  1710. lib.ggml_log_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1711. lib.ggml_log_inplace.restype = ctypes.POINTER(ggml_tensor)
  1712. # // return scalar
  1713. # GGML_API struct ggml_tensor * ggml_sum(
  1714. # struct ggml_context * ctx,
  1715. # struct ggml_tensor * a);
  1716. def ggml_sum(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1717. """Sum all elements in a tensor and return the result.
  1718. Parameters:
  1719. ctx: ggml context
  1720. a: tensor
  1721. Returns:
  1722. Pointer to ggml_tensor"""
  1723. return lib.ggml_sum(ctx, a)
  1724. lib.ggml_sum.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1725. lib.ggml_sum.restype = ctypes.POINTER(ggml_tensor)
  1726. # // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1727. # GGML_API struct ggml_tensor * ggml_sum_rows(
  1728. # struct ggml_context * ctx,
  1729. # struct ggml_tensor * a);
  1730. def ggml_sum_rows(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1731. """Sum all elements in a tensor along the first axis and return the result.
  1732. sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1733. Parameters:
  1734. ctx: ggml context
  1735. a: tensor
  1736. Returns:
  1737. Pointer to ggml_tensor"""
  1738. return lib.ggml_sum_rows(ctx, a)
  1739. lib.ggml_sum_rows.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1740. lib.ggml_sum_rows.restype = ctypes.POINTER(ggml_tensor)
  1741. # // mean along rows
  1742. # GGML_API struct ggml_tensor * ggml_mean(
  1743. # struct ggml_context * ctx,
  1744. # struct ggml_tensor * a);
  1745. def ggml_mean(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1746. """Take the mean of all elements in a tensor and return the result.
  1747. Parameters:
  1748. ctx: ggml context
  1749. a: tensor
  1750. Returns:
  1751. Pointer to ggml_tensor"""
  1752. return lib.ggml_mean(ctx, a)
  1753. lib.ggml_mean.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1754. lib.ggml_mean.restype = ctypes.POINTER(ggml_tensor)
  1755. # // argmax along rows
  1756. # GGML_API struct ggml_tensor * ggml_argmax(
  1757. # struct ggml_context * ctx,
  1758. # struct ggml_tensor * a);
  1759. def ggml_argmax(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1760. """Take the argmax of all elements in a tensor and return the result.
  1761. argmax along rows
  1762. Parameters:
  1763. ctx: ggml context
  1764. a: tensor
  1765. Returns:
  1766. Pointer to ggml_tensor"""
  1767. return lib.ggml_argmax(ctx, a)
  1768. lib.ggml_argmax.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1769. lib.ggml_argmax.restype = ctypes.POINTER(ggml_tensor)
  1770. # // if a is the same shape as b, and a is not parameter, return a
  1771. # // otherwise, return a new tensor: repeat(a) to fit in b
  1772. # GGML_API struct ggml_tensor * ggml_repeat(
  1773. # struct ggml_context * ctx,
  1774. # struct ggml_tensor * a,
  1775. # struct ggml_tensor * b);
  1776. def ggml_repeat(
  1777. ctx: ggml_context_p,
  1778. a: ggml_tensor_p,
  1779. b: ggml_tensor_p,
  1780. ) -> ggml_tensor_p:
  1781. """Repeat a tensor to fit the shape of another tensor.
  1782. If a is the same shape as b, and a is not parameter, return a
  1783. Parameters:
  1784. ctx: ggml context
  1785. a: tensor to repeat
  1786. b: tensor to fit
  1787. Returns:
  1788. Pointer to ggml_tensor"""
  1789. return lib.ggml_repeat(ctx, a, b)
  1790. lib.ggml_repeat.argtypes = [
  1791. ggml_context_p,
  1792. ctypes.POINTER(ggml_tensor),
  1793. ctypes.POINTER(ggml_tensor),
  1794. ]
  1795. lib.ggml_repeat.restype = ctypes.POINTER(ggml_tensor)
  1796. # GGML_API struct ggml_tensor * ggml_repeat_back(
  1797. # struct ggml_context * ctx,
  1798. # struct ggml_tensor * a,
  1799. # struct ggml_tensor * b);
  1800. def ggml_repeat_back(
  1801. ctx: ggml_context_p,
  1802. a: ggml_tensor_p,
  1803. b: ggml_tensor_p,
  1804. ) -> ggml_tensor_p:
  1805. return lib.ggml_repeat_back(ctx, a, b)
  1806. lib.ggml_repeat_back.argtypes = [
  1807. ggml_context_p,
  1808. ctypes.POINTER(ggml_tensor),
  1809. ctypes.POINTER(ggml_tensor),
  1810. ]
  1811. lib.ggml_repeat_back.restype = ctypes.POINTER(ggml_tensor)
  1812. # // concat a and b on dim 2
  1813. # // used in stable-diffusion
  1814. # GGML_API struct ggml_tensor * ggml_concat(
  1815. # struct ggml_context * ctx,
  1816. # struct ggml_tensor * a,
  1817. # struct ggml_tensor * b);
  1818. def ggml_concat(
  1819. ctx: ggml_context_p,
  1820. a: ggml_tensor_p,
  1821. b: ggml_tensor_p,
  1822. ) -> ggml_tensor_p:
  1823. """Concatenate two tensors along the second axis and return the result.
  1824. Parameters:
  1825. ctx: ggml context
  1826. a: first tensor
  1827. b: second tensor
  1828. Returns:
  1829. Pointer to ggml_tensor"""
  1830. return lib.ggml_concat(ctx, a, b)
  1831. lib.ggml_concat.argtypes = [
  1832. ggml_context_p,
  1833. ctypes.POINTER(ggml_tensor),
  1834. ctypes.POINTER(ggml_tensor),
  1835. ]
  1836. lib.ggml_concat.restype = ctypes.POINTER(ggml_tensor)
  1837. # GGML_API struct ggml_tensor * ggml_abs(
  1838. # struct ggml_context * ctx,
  1839. # struct ggml_tensor * a);
  1840. def ggml_abs(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1841. """Take the absolute value of all elements in a tensor and return the result.
  1842. Parameters:
  1843. ctx: ggml context
  1844. a: tensor
  1845. Returns:
  1846. Pointer to ggml_tensor"""
  1847. return lib.ggml_abs(ctx, a)
  1848. lib.ggml_abs.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1849. lib.ggml_abs.restype = ctypes.POINTER(ggml_tensor)
  1850. # GGML_API struct ggml_tensor * ggml_abs_inplace(
  1851. # struct ggml_context * ctx,
  1852. # struct ggml_tensor * a);
  1853. def ggml_abs_inplace(
  1854. ctx: ggml_context_p,
  1855. a: ggml_tensor_p,
  1856. ) -> ggml_tensor_p:
  1857. """Take the absolute value of all elements in a tensor and store the result in the first tensor.
  1858. Parameters:
  1859. ctx: ggml context
  1860. a: tensor
  1861. Returns:
  1862. Pointer to ggml_tensor"""
  1863. return lib.ggml_abs_inplace(ctx, a)
  1864. lib.ggml_abs_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1865. lib.ggml_abs_inplace.restype = ctypes.POINTER(ggml_tensor)
  1866. # GGML_API struct ggml_tensor * ggml_sgn(
  1867. # struct ggml_context * ctx,
  1868. # struct ggml_tensor * a);
  1869. def ggml_sgn(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1870. """Get the sign of all elements in a tensor and return the result.
  1871. Parameters:
  1872. ctx: ggml context
  1873. a: tensor
  1874. Returns:
  1875. Pointer to ggml_tensor"""
  1876. return lib.ggml_sgn(ctx, a)
  1877. lib.ggml_sgn.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1878. lib.ggml_sgn.restype = ctypes.POINTER(ggml_tensor)
  1879. # GGML_API struct ggml_tensor * ggml_sgn_inplace(
  1880. # struct ggml_context * ctx,
  1881. # struct ggml_tensor * a);
  1882. def ggml_sgn_inplace(
  1883. ctx: ggml_context_p,
  1884. a: ggml_tensor_p,
  1885. ) -> ggml_tensor_p:
  1886. """Get the sign of all elements in a tensor and store the result in the first tensor.
  1887. Parameters:
  1888. ctx: ggml context
  1889. a: tensor
  1890. Returns:
  1891. Pointer to ggml_tensor"""
  1892. return lib.ggml_sgn_inplace(ctx, a)
  1893. lib.ggml_sgn_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1894. lib.ggml_sgn_inplace.restype = ctypes.POINTER(ggml_tensor)
  1895. # GGML_API struct ggml_tensor * ggml_neg(
  1896. # struct ggml_context * ctx,
  1897. # struct ggml_tensor * a);
  1898. def ggml_neg(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1899. """Negate all elements in a tensor and return the result.
  1900. Parameters:
  1901. ctx: ggml context
  1902. a: tensor
  1903. Returns:
  1904. Pointer to ggml_tensor"""
  1905. return lib.ggml_neg(ctx, a)
  1906. lib.ggml_neg.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1907. lib.ggml_neg.restype = ctypes.POINTER(ggml_tensor)
  1908. # GGML_API struct ggml_tensor * ggml_neg_inplace(
  1909. # struct ggml_context * ctx,
  1910. # struct ggml_tensor * a);
  1911. def ggml_neg_inplace(
  1912. ctx: ggml_context_p,
  1913. a: ggml_tensor_p,
  1914. ) -> ggml_tensor_p:
  1915. """Negate all elements in a tensor and store the result in the first tensor.
  1916. Parameters:
  1917. ctx: ggml context
  1918. a: tensor
  1919. Returns:
  1920. Pointer to ggml_tensor"""
  1921. return lib.ggml_neg_inplace(ctx, a)
  1922. lib.ggml_neg_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1923. lib.ggml_neg_inplace.restype = ctypes.POINTER(ggml_tensor)
  1924. # GGML_API struct ggml_tensor * ggml_step(
  1925. # struct ggml_context * ctx,
  1926. # struct ggml_tensor * a);
  1927. def ggml_step(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1928. return lib.ggml_step(ctx, a)
  1929. lib.ggml_step.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1930. lib.ggml_step.restype = ctypes.POINTER(ggml_tensor)
  1931. # GGML_API struct ggml_tensor * ggml_tanh(
  1932. # struct ggml_context * ctx,
  1933. # struct ggml_tensor * a);
  1934. def ggml_tanh(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1935. """Apply the tanh activation function to all elements in a tensor and return the result.
  1936. Parameters:
  1937. ctx: ggml context
  1938. a: tensor
  1939. Returns:
  1940. Pointer to ggml_tensor"""
  1941. return lib.ggml_tanh(ctx, a)
  1942. lib.ggml_tanh.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1943. lib.ggml_tanh.restype = ctypes.POINTER(ggml_tensor)
  1944. # GGML_API struct ggml_tensor * ggml_tanh_inplace(
  1945. # struct ggml_context * ctx,
  1946. # struct ggml_tensor * a);
  1947. def ggml_tanh_inplace(
  1948. ctx: ggml_context_p,
  1949. a: ggml_tensor_p,
  1950. ) -> ggml_tensor_p:
  1951. """Apply the tanh activation function to all elements in a tensor and store the result in the first tensor.
  1952. Parameters:
  1953. ctx: ggml context
  1954. a: tensor
  1955. Returns:
  1956. Pointer to ggml_tensor"""
  1957. return lib.ggml_tanh_inplace(ctx, a)
  1958. lib.ggml_tanh_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1959. lib.ggml_tanh_inplace.restype = ctypes.POINTER(ggml_tensor)
  1960. # GGML_API struct ggml_tensor * ggml_elu(
  1961. # struct ggml_context * ctx,
  1962. # struct ggml_tensor * a);
  1963. def ggml_elu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1964. """Apply the ELU activation function to all elements in a tensor and return the result.
  1965. Parameters:
  1966. ctx: ggml context
  1967. a: tensor
  1968. Returns:
  1969. Pointer to ggml_tensor"""
  1970. return lib.ggml_elu(ctx, a)
  1971. lib.ggml_elu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1972. lib.ggml_elu.restype = ctypes.POINTER(ggml_tensor)
  1973. # GGML_API struct ggml_tensor * ggml_elu_inplace(
  1974. # struct ggml_context * ctx,
  1975. # struct ggml_tensor * a);
  1976. def ggml_elu_inplace(
  1977. ctx: ggml_context_p,
  1978. a: ggml_tensor_p,
  1979. ) -> ggml_tensor_p:
  1980. """Apply the ELU activation function to all elements in a tensor and store the result in the first tensor.
  1981. Parameters:
  1982. ctx: ggml context
  1983. a: tensor
  1984. Returns:
  1985. Pointer to ggml_tensor"""
  1986. return lib.ggml_elu_inplace(ctx, a)
  1987. lib.ggml_elu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1988. lib.ggml_elu_inplace.restype = ctypes.POINTER(ggml_tensor)
  1989. # GGML_API struct ggml_tensor * ggml_relu(
  1990. # struct ggml_context * ctx,
  1991. # struct ggml_tensor * a);
  1992. def ggml_relu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1993. """Apply the ReLU activation function to all elements in a tensor and return the result.
  1994. Parameters:
  1995. ctx: ggml context
  1996. a: tensor
  1997. Returns:
  1998. Pointer to ggml_tensor"""
  1999. return lib.ggml_relu(ctx, a)
  2000. lib.ggml_relu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2001. lib.ggml_relu.restype = ctypes.POINTER(ggml_tensor)
  2002. # GGML_API struct ggml_tensor * ggml_relu_inplace(
  2003. # struct ggml_context * ctx,
  2004. # struct ggml_tensor * a);
  2005. def ggml_relu_inplace(
  2006. ctx: ggml_context_p,
  2007. a: ggml_tensor_p,
  2008. ) -> ggml_tensor_p:
  2009. """Apply the ReLU activation function to all elements in a tensor and store the result in the first tensor.
  2010. Parameters:
  2011. ctx: ggml context
  2012. a: tensor
  2013. Returns:
  2014. Pointer to ggml_tensor"""
  2015. return lib.ggml_relu_inplace(ctx, a)
  2016. lib.ggml_relu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2017. lib.ggml_relu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2018. # // TODO: double-check this computation is correct
  2019. # GGML_API struct ggml_tensor * ggml_gelu(
  2020. # struct ggml_context * ctx,
  2021. # struct ggml_tensor * a);
  2022. def ggml_gelu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2023. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2024. Parameters:
  2025. ctx: ggml context
  2026. a: tensor
  2027. Returns:
  2028. Pointer to ggml_tensor"""
  2029. return lib.ggml_gelu(ctx, a)
  2030. lib.ggml_gelu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2031. lib.ggml_gelu.restype = ctypes.POINTER(ggml_tensor)
  2032. # GGML_API struct ggml_tensor * ggml_gelu_inplace(
  2033. # struct ggml_context * ctx,
  2034. # struct ggml_tensor * a);
  2035. def ggml_gelu_inplace(
  2036. ctx: ggml_context_p,
  2037. a: ggml_tensor_p,
  2038. ) -> ggml_tensor_p:
  2039. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2040. Parameters:
  2041. ctx: ggml context
  2042. a: tensor
  2043. Returns:
  2044. Pointer to ggml_tensor"""
  2045. return lib.ggml_gelu_inplace(ctx, a)
  2046. lib.ggml_gelu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2047. lib.ggml_gelu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2048. # GGML_API struct ggml_tensor * ggml_gelu_quick(
  2049. # struct ggml_context * ctx,
  2050. # struct ggml_tensor * a);
  2051. def ggml_gelu_quick(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2052. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2053. Parameters:
  2054. ctx: ggml context
  2055. a: tensor
  2056. Returns:
  2057. Pointer to ggml_tensor"""
  2058. return lib.ggml_gelu_quick(ctx, a)
  2059. lib.ggml_gelu_quick.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2060. lib.ggml_gelu_quick.restype = ctypes.POINTER(ggml_tensor)
  2061. # GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
  2062. # struct ggml_context * ctx,
  2063. # struct ggml_tensor * a);
  2064. def ggml_gelu_quick_inplace(
  2065. ctx: ggml_context_p,
  2066. a: ggml_tensor_p,
  2067. ) -> ggml_tensor_p:
  2068. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2069. Parameters:
  2070. ctx: ggml context
  2071. a: tensor
  2072. Returns:
  2073. Pointer to ggml_tensor"""
  2074. return lib.ggml_gelu_quick_inplace(ctx, a)
  2075. lib.ggml_gelu_quick_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2076. lib.ggml_gelu_quick_inplace.restype = ctypes.POINTER(ggml_tensor)
  2077. # GGML_API struct ggml_tensor * ggml_silu(
  2078. # struct ggml_context * ctx,
  2079. # struct ggml_tensor * a);
  2080. def ggml_silu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2081. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and return the result.
  2082. Parameters:
  2083. ctx: ggml context
  2084. a: tensor
  2085. Returns:
  2086. Pointer to ggml_tensor"""
  2087. return lib.ggml_silu(ctx, a)
  2088. lib.ggml_silu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2089. lib.ggml_silu.restype = ctypes.POINTER(ggml_tensor)
  2090. # GGML_API struct ggml_tensor * ggml_silu_inplace(
  2091. # struct ggml_context * ctx,
  2092. # struct ggml_tensor * a);
  2093. def ggml_silu_inplace(
  2094. ctx: ggml_context_p,
  2095. a: ggml_tensor_p,
  2096. ) -> ggml_tensor_p:
  2097. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2098. Parameters:
  2099. ctx: ggml context
  2100. a: tensor
  2101. Returns:
  2102. Pointer to ggml_tensor"""
  2103. return lib.ggml_silu_inplace(ctx, a)
  2104. lib.ggml_silu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2105. lib.ggml_silu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2106. # // a - x
  2107. # // b - dy
  2108. # GGML_API struct ggml_tensor * ggml_silu_back(
  2109. # struct ggml_context * ctx,
  2110. # struct ggml_tensor * a,
  2111. # struct ggml_tensor * b);
  2112. def ggml_silu_back(
  2113. ctx: ggml_context_p,
  2114. a: ggml_tensor_p,
  2115. b: ggml_tensor_p,
  2116. ) -> ggml_tensor_p:
  2117. return lib.ggml_silu_back(ctx, a, b)
  2118. lib.ggml_silu_back.argtypes = [
  2119. ggml_context_p,
  2120. ctypes.POINTER(ggml_tensor),
  2121. ctypes.POINTER(ggml_tensor),
  2122. ]
  2123. lib.ggml_silu_back.restype = ctypes.POINTER(ggml_tensor)
  2124. # // normalize along rows
  2125. # GGML_API struct ggml_tensor * ggml_norm(
  2126. # struct ggml_context * ctx,
  2127. # struct ggml_tensor * a
  2128. # float eps);
  2129. def ggml_norm(
  2130. ctx: ggml_context_p,
  2131. a: ggml_tensor_p,
  2132. eps: Union[ctypes.c_float, float],
  2133. ) -> ggml_tensor_p:
  2134. """Normalize all elements in a tensor along the first axis and return the result.
  2135. normalize along rows.
  2136. Parameters:
  2137. ctx: ggml context
  2138. a: tensor
  2139. eps: minimum value to avoid division by zero
  2140. Returns:
  2141. Pointer to ggml_tensor"""
  2142. return lib.ggml_norm(ctx, a, eps)
  2143. lib.ggml_norm.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor), ctypes.c_float]
  2144. lib.ggml_norm.restype = ctypes.POINTER(ggml_tensor)
  2145. # GGML_API struct ggml_tensor * ggml_norm_inplace(
  2146. # struct ggml_context * ctx,
  2147. # struct ggml_tensor * a
  2148. # float eps);
  2149. def ggml_norm_inplace(
  2150. ctx: ggml_context_p,
  2151. a: ggml_tensor_p,
  2152. eps: Union[ctypes.c_float, float],
  2153. ) -> ggml_tensor_p:
  2154. """Normalize all elements in a tensor along the first axis and store the result in the first tensor.
  2155. normalize along rows.
  2156. Parameters:
  2157. ctx: ggml context
  2158. a: tensor
  2159. eps: minimum value to avoid division by zero
  2160. Returns:
  2161. Pointer to ggml_tensor"""
  2162. return lib.ggml_norm_inplace(ctx, a, eps)
  2163. lib.ggml_norm_inplace.argtypes = [
  2164. ggml_context_p,
  2165. ctypes.POINTER(ggml_tensor),
  2166. ctypes.c_float,
  2167. ]
  2168. lib.ggml_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2169. # GGML_API struct ggml_tensor * ggml_rms_norm(
  2170. # struct ggml_context * ctx,
  2171. # struct ggml_tensor * a,
  2172. # float eps);
  2173. def ggml_rms_norm(
  2174. ctx: ggml_context_p,
  2175. a: ggml_tensor_p,
  2176. eps: Union[ctypes.c_float, float],
  2177. ) -> ggml_tensor_p:
  2178. """Compute the RMS norm of a tensor and return the result.
  2179. Parameters:
  2180. ctx: ggml context
  2181. a: tensor
  2182. eps: float
  2183. Returns:
  2184. Pointer to ggml_tensor"""
  2185. return lib.ggml_rms_norm(ctx, a, eps)
  2186. lib.ggml_rms_norm.argtypes = [
  2187. ggml_context_p,
  2188. ctypes.POINTER(ggml_tensor),
  2189. ctypes.c_float,
  2190. ]
  2191. lib.ggml_rms_norm.restype = ctypes.POINTER(ggml_tensor)
  2192. # GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
  2193. # struct ggml_context * ctx,
  2194. # struct ggml_tensor * a,
  2195. # float eps);
  2196. def ggml_rms_norm_inplace(
  2197. ctx: ggml_context_p,
  2198. a: ggml_tensor_p,
  2199. eps: Union[ctypes.c_float, float],
  2200. ) -> ggml_tensor_p:
  2201. return lib.ggml_rms_norm_inplace(ctx, a, eps)
  2202. lib.ggml_rms_norm_inplace.argtypes = [
  2203. ggml_context_p,
  2204. ctypes.POINTER(ggml_tensor),
  2205. ctypes.c_float,
  2206. ]
  2207. lib.ggml_rms_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2208. # // group normalize along ne0*ne1*n_groups
  2209. # // used in stable-diffusion
  2210. # // TODO: eps is hardcoded to 1e-6 for now
  2211. # GGML_API struct ggml_tensor * ggml_group_norm(
  2212. # struct ggml_context * ctx,
  2213. # struct ggml_tensor * a,
  2214. # int n_groups);
  2215. def ggml_group_norm(
  2216. ctx: ggml_context_p,
  2217. a: ggml_tensor_p,
  2218. n_groups: int,
  2219. ) -> ggml_tensor_p:
  2220. """Group normalize a tensor and return the result.
  2221. Parameters:
  2222. ctx: ggml context
  2223. a: tensor
  2224. n_groups: int
  2225. Returns:
  2226. Pointer to ggml_tensor"""
  2227. return lib.ggml_group_norm(ctx, a, n_groups)
  2228. lib.ggml_group_norm.argtypes = [
  2229. ggml_context_p,
  2230. ctypes.POINTER(ggml_tensor),
  2231. ctypes.c_int,
  2232. ]
  2233. lib.ggml_group_norm.restype = ctypes.POINTER(ggml_tensor)
  2234. # GGML_API struct ggml_tensor * ggml_group_norm_inplace(
  2235. # struct ggml_context * ctx,
  2236. # struct ggml_tensor * a,
  2237. # int n_groups);
  2238. def ggml_group_norm_inplace(
  2239. ctx: ggml_context_p,
  2240. a: ggml_tensor_p,
  2241. n_groups: int,
  2242. ) -> ggml_tensor_p:
  2243. """Group normalize a tensor and store the result in the first tensor.
  2244. Parameters:
  2245. ctx: ggml context
  2246. a: tensor
  2247. n_groups: int
  2248. Returns:
  2249. Pointer to ggml_tensor"""
  2250. return lib.ggml_group_norm_inplace(ctx, a, n_groups)
  2251. lib.ggml_group_norm_inplace.argtypes = [
  2252. ggml_context_p,
  2253. ctypes.POINTER(ggml_tensor),
  2254. ctypes.c_int,
  2255. ]
  2256. lib.ggml_group_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2257. # // a - x
  2258. # // b - dy
  2259. # GGML_API struct ggml_tensor * ggml_rms_norm_back(
  2260. # struct ggml_context * ctx,
  2261. # struct ggml_tensor * a,
  2262. # struct ggml_tensor * b
  2263. # float eps);
  2264. def ggml_rms_norm_back(
  2265. ctx: ggml_context_p,
  2266. a: ggml_tensor_p,
  2267. b: ggml_tensor_p,
  2268. eps: Union[ctypes.c_float, float],
  2269. ) -> ggml_tensor_p:
  2270. return lib.ggml_rms_norm_back(ctx, a, b, eps)
  2271. lib.ggml_rms_norm_back.argtypes = [
  2272. ggml_context_p,
  2273. ctypes.POINTER(ggml_tensor),
  2274. ctypes.POINTER(ggml_tensor),
  2275. ctypes.c_float,
  2276. ]
  2277. lib.ggml_rms_norm_back.restype = ctypes.POINTER(ggml_tensor)
  2278. # // A: m rows, n columns
  2279. # // B: p rows, n columns (i.e. we transpose it internally)
  2280. # // result is m columns, p rows
  2281. # GGML_API struct ggml_tensor * ggml_mul_mat(
  2282. # struct ggml_context * ctx,
  2283. # struct ggml_tensor * a,
  2284. # struct ggml_tensor * b);
  2285. def ggml_mul_mat(
  2286. ctx: ggml_context_p,
  2287. a: ggml_tensor_p,
  2288. b: ggml_tensor_p,
  2289. ) -> ggml_tensor_p:
  2290. """Multiply two matrices and return the result.
  2291. A: m rows, n columns
  2292. B: p rows, n columns (i.e. we transpose it internally)
  2293. result is m columns, p rows
  2294. Parameters:
  2295. ctx: ggml context
  2296. a: tensor
  2297. b: tensor
  2298. Returns:
  2299. Pointer to ggml_tensor"""
  2300. return lib.ggml_mul_mat(ctx, a, b)
  2301. lib.ggml_mul_mat.argtypes = [
  2302. ggml_context_p,
  2303. ctypes.POINTER(ggml_tensor),
  2304. ctypes.POINTER(ggml_tensor),
  2305. ]
  2306. lib.ggml_mul_mat.restype = ctypes.POINTER(ggml_tensor)
  2307. # // A: m columns, n rows,
  2308. # // B: p columns, n rows,
  2309. # // result is m columns, p rows
  2310. # GGML_API struct ggml_tensor * ggml_out_prod(
  2311. # struct ggml_context * ctx,
  2312. # struct ggml_tensor * a,
  2313. # struct ggml_tensor * b);
  2314. def ggml_out_prod(
  2315. ctx: ggml_context_p,
  2316. a: ggml_tensor_p,
  2317. b: ggml_tensor_p,
  2318. ) -> ggml_tensor_p:
  2319. """Compute the outer product of two matrices and return the result.
  2320. A: m columns, n rows,
  2321. B: p columns, n rows,
  2322. result is m columns, p rows
  2323. Parameters:
  2324. ctx: ggml context
  2325. a: tensor
  2326. b: tensor
  2327. Returns:
  2328. Pointer to ggml_tensor"""
  2329. return lib.ggml_out_prod(ctx, a, b)
  2330. lib.ggml_out_prod.argtypes = [
  2331. ggml_context_p,
  2332. ctypes.POINTER(ggml_tensor),
  2333. ctypes.POINTER(ggml_tensor),
  2334. ]
  2335. lib.ggml_out_prod.restype = ctypes.POINTER(ggml_tensor)
  2336. # //
  2337. # // operations on tensors without backpropagation
  2338. # //
  2339. # GGML_API struct ggml_tensor * ggml_scale(
  2340. # struct ggml_context * ctx,
  2341. # struct ggml_tensor * a,
  2342. # struct ggml_tensor * b);
  2343. def ggml_scale(
  2344. ctx: ggml_context_p,
  2345. a: ggml_tensor_p,
  2346. b: ggml_tensor_p,
  2347. ) -> ggml_tensor_p:
  2348. """Scale a tensor by another tensor and return the result.
  2349. Parameters:
  2350. ctx: ggml context
  2351. a: tensor
  2352. b: tensor
  2353. Returns:
  2354. Pointer to ggml_tensor"""
  2355. return lib.ggml_scale(ctx, a, b)
  2356. lib.ggml_scale.argtypes = [
  2357. ggml_context_p,
  2358. ctypes.POINTER(ggml_tensor),
  2359. ctypes.POINTER(ggml_tensor),
  2360. ]
  2361. lib.ggml_scale.restype = ctypes.POINTER(ggml_tensor)
  2362. # // in-place, returns view(a)
  2363. # GGML_API struct ggml_tensor * ggml_scale_inplace(
  2364. # struct ggml_context * ctx,
  2365. # struct ggml_tensor * a,
  2366. # struct ggml_tensor * b);
  2367. def ggml_scale_inplace(
  2368. ctx: ggml_context_p,
  2369. a: ggml_tensor_p,
  2370. b: ggml_tensor_p,
  2371. ) -> ggml_tensor_p:
  2372. """Scale a tensor by another tensor and store the result in the first tensor.
  2373. Parameters:
  2374. ctx: ggml context
  2375. a: tensor
  2376. Returns:
  2377. Pointer to ggml_tensor"""
  2378. return lib.ggml_scale_inplace(ctx, a, b)
  2379. lib.ggml_scale_inplace.argtypes = [
  2380. ggml_context_p,
  2381. ctypes.POINTER(ggml_tensor),
  2382. ctypes.POINTER(ggml_tensor),
  2383. ]
  2384. lib.ggml_scale_inplace.restype = ctypes.POINTER(ggml_tensor)
  2385. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2386. # GGML_API struct ggml_tensor * ggml_set(
  2387. # struct ggml_context * ctx,
  2388. # struct ggml_tensor * a,
  2389. # struct ggml_tensor * b,
  2390. # size_t nb1,
  2391. # size_t nb2,
  2392. # size_t nb3,
  2393. # size_t offset);
  2394. def ggml_set(
  2395. ctx: ggml_context_p,
  2396. a: ggml_tensor_p,
  2397. b: ggml_tensor_p,
  2398. nb1: Union[ctypes.c_size_t, int],
  2399. nb2: Union[ctypes.c_size_t, int],
  2400. nb3: Union[ctypes.c_size_t, int],
  2401. offset: Union[ctypes.c_size_t, int],
  2402. ) -> ggml_tensor_p:
  2403. return lib.ggml_set(ctx, a, b, nb1, nb2, nb3, offset)
  2404. lib.ggml_set.argtypes = [
  2405. ggml_context_p,
  2406. ctypes.POINTER(ggml_tensor),
  2407. ctypes.POINTER(ggml_tensor),
  2408. ctypes.c_size_t,
  2409. ctypes.c_size_t,
  2410. ctypes.c_size_t,
  2411. ctypes.c_size_t,
  2412. ]
  2413. lib.ggml_set.restype = ctypes.POINTER(ggml_tensor)
  2414. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2415. # GGML_API struct ggml_tensor * ggml_set_inplace(
  2416. # struct ggml_context * ctx,
  2417. # struct ggml_tensor * a,
  2418. # struct ggml_tensor * b,
  2419. # size_t nb1,
  2420. # size_t nb2,
  2421. # size_t nb3,
  2422. # size_t offset);
  2423. def ggml_set_inplace(
  2424. ctx: ggml_context_p,
  2425. a: ggml_tensor_p,
  2426. b: ggml_tensor_p,
  2427. nb1: Union[ctypes.c_size_t, int],
  2428. nb2: Union[ctypes.c_size_t, int],
  2429. nb3: Union[ctypes.c_size_t, int],
  2430. offset: Union[ctypes.c_size_t, int],
  2431. ) -> ggml_tensor_p:
  2432. return lib.ggml_set_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  2433. lib.ggml_set_inplace.argtypes = [
  2434. ggml_context_p,
  2435. ctypes.POINTER(ggml_tensor),
  2436. ctypes.POINTER(ggml_tensor),
  2437. ctypes.c_size_t,
  2438. ctypes.c_size_t,
  2439. ctypes.c_size_t,
  2440. ctypes.c_size_t,
  2441. ]
  2442. lib.ggml_set_inplace.restype = ctypes.POINTER(ggml_tensor)
  2443. # GGML_API struct ggml_tensor * ggml_set_1d(
  2444. # struct ggml_context * ctx,
  2445. # struct ggml_tensor * a,
  2446. # struct ggml_tensor * b,
  2447. # size_t offset);
  2448. def ggml_set_1d(
  2449. ctx: ggml_context_p,
  2450. a: ggml_tensor_p,
  2451. b: ggml_tensor_p,
  2452. offset: Union[ctypes.c_size_t, int],
  2453. ) -> ggml_tensor_p:
  2454. return lib.ggml_set_1d(ctx, a, b, offset)
  2455. lib.ggml_set_1d.argtypes = [
  2456. ggml_context_p,
  2457. ctypes.POINTER(ggml_tensor),
  2458. ctypes.POINTER(ggml_tensor),
  2459. ctypes.c_size_t,
  2460. ]
  2461. lib.ggml_set_1d.restype = ctypes.POINTER(ggml_tensor)
  2462. # GGML_API struct ggml_tensor * ggml_set_1d_inplace(
  2463. # struct ggml_context * ctx,
  2464. # struct ggml_tensor * a,
  2465. # struct ggml_tensor * b,
  2466. # size_t offset);
  2467. def ggml_set_1d_inplace(
  2468. ctx: ggml_context_p,
  2469. a: ggml_tensor_p,
  2470. b: ggml_tensor_p,
  2471. offset: Union[ctypes.c_size_t, int],
  2472. ) -> ggml_tensor_p:
  2473. return lib.ggml_set_1d_inplace(ctx, a, b, offset)
  2474. lib.ggml_set_1d_inplace.argtypes = [
  2475. ggml_context_p,
  2476. ctypes.POINTER(ggml_tensor),
  2477. ctypes.POINTER(ggml_tensor),
  2478. ctypes.c_size_t,
  2479. ]
  2480. lib.ggml_set_1d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2481. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2482. # GGML_API struct ggml_tensor * ggml_set_2d(
  2483. # struct ggml_context * ctx,
  2484. # struct ggml_tensor * a,
  2485. # struct ggml_tensor * b,
  2486. # size_t nb1,
  2487. # size_t offset);
  2488. def ggml_set_2d(
  2489. ctx: ggml_context_p,
  2490. a: ggml_tensor_p,
  2491. b: ggml_tensor_p,
  2492. nb1: Union[ctypes.c_size_t, int],
  2493. offset: Union[ctypes.c_size_t, int],
  2494. ) -> ggml_tensor_p:
  2495. return lib.ggml_set_2d(ctx, a, b, nb1, offset)
  2496. lib.ggml_set_2d.argtypes = [
  2497. ggml_context_p,
  2498. ctypes.POINTER(ggml_tensor),
  2499. ctypes.POINTER(ggml_tensor),
  2500. ctypes.c_size_t,
  2501. ctypes.c_size_t,
  2502. ]
  2503. lib.ggml_set_2d.restype = ctypes.POINTER(ggml_tensor)
  2504. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2505. # GGML_API struct ggml_tensor * ggml_set_2d_inplace(
  2506. # struct ggml_context * ctx,
  2507. # struct ggml_tensor * a,
  2508. # struct ggml_tensor * b,
  2509. # size_t nb1,
  2510. # size_t offset);
  2511. def ggml_set_2d_inplace(
  2512. ctx: ggml_context_p,
  2513. a: ggml_tensor_p,
  2514. b: ggml_tensor_p,
  2515. nb1: Union[ctypes.c_size_t, int],
  2516. offset: Union[ctypes.c_size_t, int],
  2517. ) -> ggml_tensor_p:
  2518. return lib.ggml_set_2d_inplace(ctx, a, b, nb1, offset)
  2519. lib.ggml_set_2d_inplace.argtypes = [
  2520. ggml_context_p,
  2521. ctypes.POINTER(ggml_tensor),
  2522. ctypes.POINTER(ggml_tensor),
  2523. ctypes.c_size_t,
  2524. ctypes.c_size_t,
  2525. ]
  2526. lib.ggml_set_2d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2527. # // a -> b, return view(b)
  2528. # GGML_API struct ggml_tensor * ggml_cpy(
  2529. # struct ggml_context * ctx,
  2530. # struct ggml_tensor * a,
  2531. # struct ggml_tensor * b);
  2532. def ggml_cpy(
  2533. ctx: ggml_context_p,
  2534. a: ggml_tensor_p,
  2535. b: ggml_tensor_p,
  2536. ) -> ggml_tensor_p:
  2537. return lib.ggml_cpy(ctx, a, b)
  2538. lib.ggml_cpy.argtypes = [
  2539. ggml_context_p,
  2540. ctypes.POINTER(ggml_tensor),
  2541. ctypes.POINTER(ggml_tensor),
  2542. ]
  2543. lib.ggml_cpy.restype = ctypes.POINTER(ggml_tensor)
  2544. # // a -> b, in-place, return view(b)
  2545. # GGML_API struct ggml_tensor * ggml_cpy_inplace(
  2546. # struct ggml_context * ctx,
  2547. # struct ggml_tensor * a,
  2548. # struct ggml_tensor * b);
  2549. def ggml_cpy_inplace(
  2550. ctx: ggml_context_p,
  2551. a: ggml_tensor_p,
  2552. b: ggml_tensor_p,
  2553. ) -> ggml_tensor_p:
  2554. return lib.ggml_cpy_inplace(ctx, a, b)
  2555. lib.ggml_cpy_inplace.argtypes = [
  2556. ggml_context_p,
  2557. ctypes.POINTER(ggml_tensor),
  2558. ctypes.POINTER(ggml_tensor),
  2559. ]
  2560. lib.ggml_cpy_inplace.restype = ctypes.POINTER(ggml_tensor)
  2561. # // make contiguous
  2562. # GGML_API struct ggml_tensor * ggml_cont(
  2563. # struct ggml_context * ctx,
  2564. # struct ggml_tensor * a);
  2565. def ggml_cont(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2566. """Make a tensor contiguous and return the result.
  2567. Parameters:
  2568. ctx: ggml context
  2569. a: tensor
  2570. Returns:
  2571. Pointer to ggml_tensor"""
  2572. return lib.ggml_cont(ctx, a)
  2573. lib.ggml_cont.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2574. lib.ggml_cont.restype = ctypes.POINTER(ggml_tensor)
  2575. # // make contiguous, in-place
  2576. # GGML_API struct ggml_tensor * ggml_cont_inplace(
  2577. # struct ggml_context * ctx,
  2578. # struct ggml_tensor * a);
  2579. def ggml_cont_inplace(
  2580. ctx: ggml_context_p,
  2581. a: ggml_tensor_p,
  2582. ) -> ggml_tensor_p:
  2583. """Make a tensor contiguous and store the result in the first tensor.
  2584. Parameters:
  2585. ctx: ggml context
  2586. a: tensor
  2587. Returns:
  2588. Pointer to ggml_tensor"""
  2589. return lib.ggml_cont_inplace(ctx, a)
  2590. lib.ggml_cont_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2591. lib.ggml_cont_inplace.restype = ctypes.POINTER(ggml_tensor)
  2592. # // return view(a), b specifies the new shape
  2593. # // TODO: when we start computing gradient, make a copy instead of view
  2594. # GGML_API struct ggml_tensor * ggml_reshape(
  2595. # struct ggml_context * ctx,
  2596. # struct ggml_tensor * a,
  2597. # struct ggml_tensor * b);
  2598. def ggml_reshape(
  2599. ctx: ggml_context_p,
  2600. a: ggml_tensor_p,
  2601. b: ggml_tensor_p,
  2602. ) -> ggml_tensor_p:
  2603. return lib.ggml_reshape(ctx, a, b)
  2604. lib.ggml_reshape.argtypes = [
  2605. ggml_context_p,
  2606. ctypes.POINTER(ggml_tensor),
  2607. ctypes.POINTER(ggml_tensor),
  2608. ]
  2609. lib.ggml_reshape.restype = ctypes.POINTER(ggml_tensor)
  2610. # // return view(a)
  2611. # // TODO: when we start computing gradient, make a copy instead of view
  2612. # GGML_API struct ggml_tensor * ggml_reshape_1d(
  2613. # struct ggml_context * ctx,
  2614. # struct ggml_tensor * a,
  2615. # int64_t ne0);
  2616. def ggml_reshape_1d(
  2617. ctx: ggml_context_p,
  2618. a: ggml_tensor_p,
  2619. ne0: Union[ctypes.c_int64, int],
  2620. ) -> ggml_tensor_p:
  2621. return lib.ggml_reshape_1d(ctx, a, ne0)
  2622. lib.ggml_reshape_1d.argtypes = [
  2623. ggml_context_p,
  2624. ctypes.POINTER(ggml_tensor),
  2625. ctypes.c_int64,
  2626. ]
  2627. lib.ggml_reshape_1d.restype = ctypes.POINTER(ggml_tensor)
  2628. # GGML_API struct ggml_tensor * ggml_reshape_2d(
  2629. # struct ggml_context * ctx,
  2630. # struct ggml_tensor * a,
  2631. # int64_t ne0,
  2632. # int64_t ne1);
  2633. def ggml_reshape_2d(
  2634. ctx: ggml_context_p,
  2635. a: ggml_tensor_p,
  2636. ne0: Union[ctypes.c_int64, int],
  2637. ne1: Union[ctypes.c_int64, int],
  2638. ) -> ggml_tensor_p:
  2639. return lib.ggml_reshape_2d(ctx, a, ne0, ne1)
  2640. lib.ggml_reshape_2d.argtypes = [
  2641. ggml_context_p,
  2642. ctypes.POINTER(ggml_tensor),
  2643. ctypes.c_int64,
  2644. ctypes.c_int64,
  2645. ]
  2646. lib.ggml_reshape_2d.restype = ctypes.POINTER(ggml_tensor)
  2647. # // return view(a)
  2648. # // TODO: when we start computing gradient, make a copy instead of view
  2649. # GGML_API struct ggml_tensor * ggml_reshape_3d(
  2650. # struct ggml_context * ctx,
  2651. # struct ggml_tensor * a,
  2652. # int64_t ne0,
  2653. # int64_t ne1,
  2654. # int64_t ne2);
  2655. def ggml_reshape_3d(
  2656. ctx: ggml_context_p,
  2657. a: ggml_tensor_p,
  2658. ne0: Union[ctypes.c_int64, int],
  2659. ne1: Union[ctypes.c_int64, int],
  2660. ne2: Union[ctypes.c_int64, int],
  2661. ) -> ggml_tensor_p:
  2662. return lib.ggml_reshape_3d(ctx, a, ne0, ne1, ne2)
  2663. lib.ggml_reshape_3d.argtypes = [
  2664. ggml_context_p,
  2665. ctypes.POINTER(ggml_tensor),
  2666. ctypes.c_int64,
  2667. ctypes.c_int64,
  2668. ctypes.c_int64,
  2669. ]
  2670. lib.ggml_reshape_3d.restype = ctypes.POINTER(ggml_tensor)
  2671. # GGML_API struct ggml_tensor * ggml_reshape_4d(
  2672. # struct ggml_context * ctx,
  2673. # struct ggml_tensor * a,
  2674. # int64_t ne0,
  2675. # int64_t ne1,
  2676. # int64_t ne2,
  2677. # int64_t ne3);
  2678. def ggml_reshape_4d(
  2679. ctx: ggml_context_p,
  2680. a: ggml_tensor_p,
  2681. ne0: Union[ctypes.c_int64, int],
  2682. ne1: Union[ctypes.c_int64, int],
  2683. ne2: Union[ctypes.c_int64, int],
  2684. ne3: Union[ctypes.c_int64, int],
  2685. ) -> ggml_tensor_p:
  2686. return lib.ggml_reshape_4d(ctx, a, ne0, ne1, ne2, ne3)
  2687. lib.ggml_reshape_4d.argtypes = [
  2688. ggml_context_p,
  2689. ctypes.POINTER(ggml_tensor),
  2690. ctypes.c_int64,
  2691. ctypes.c_int64,
  2692. ctypes.c_int64,
  2693. ctypes.c_int64,
  2694. ]
  2695. lib.ggml_reshape_4d.restype = ctypes.POINTER(ggml_tensor)
  2696. # // offset in bytes
  2697. # GGML_API struct ggml_tensor * ggml_view_1d(
  2698. # struct ggml_context * ctx,
  2699. # struct ggml_tensor * a,
  2700. # int64_t ne0,
  2701. # size_t offset);
  2702. def ggml_view_1d(
  2703. ctx: ggml_context_p,
  2704. a: ggml_tensor_p,
  2705. ne0: Union[ctypes.c_int64, int],
  2706. offset: Union[ctypes.c_size_t, int],
  2707. ) -> ggml_tensor_p:
  2708. return lib.ggml_view_1d(ctx, a, ne0, offset)
  2709. lib.ggml_view_1d.argtypes = [
  2710. ggml_context_p,
  2711. ctypes.POINTER(ggml_tensor),
  2712. ctypes.c_int64,
  2713. ctypes.c_size_t,
  2714. ]
  2715. lib.ggml_view_1d.restype = ctypes.POINTER(ggml_tensor)
  2716. # GGML_API struct ggml_tensor * ggml_view_2d(
  2717. # struct ggml_context * ctx,
  2718. # struct ggml_tensor * a,
  2719. # int64_t ne0,
  2720. # int64_t ne1,
  2721. # size_t nb1, // row stride in bytes
  2722. # size_t offset);
  2723. def ggml_view_2d(
  2724. ctx: ggml_context_p,
  2725. a: ggml_tensor_p,
  2726. ne0: Union[ctypes.c_int64, int],
  2727. ne1: Union[ctypes.c_int64, int],
  2728. nb1: Union[ctypes.c_size_t, int],
  2729. offset: Union[ctypes.c_size_t, int],
  2730. ) -> ggml_tensor_p:
  2731. return lib.ggml_view_2d(ctx, a, ne0, ne1, nb1, offset)
  2732. lib.ggml_view_2d.argtypes = [
  2733. ggml_context_p,
  2734. ctypes.POINTER(ggml_tensor),
  2735. ctypes.c_int64,
  2736. ctypes.c_int64,
  2737. ctypes.c_size_t,
  2738. ctypes.c_size_t,
  2739. ]
  2740. lib.ggml_view_2d.restype = ctypes.POINTER(ggml_tensor)
  2741. # GGML_API struct ggml_tensor * ggml_view_3d(
  2742. # struct ggml_context * ctx,
  2743. # struct ggml_tensor * a,
  2744. # int64_t ne0,
  2745. # int64_t ne1,
  2746. # int64_t ne2,
  2747. # size_t nb1, // row stride in bytes
  2748. # size_t nb2, // slice stride in bytes
  2749. # size_t offset);
  2750. def ggml_view_3d(
  2751. ctx: ggml_context_p,
  2752. a: ggml_tensor_p,
  2753. ne0: Union[ctypes.c_int64, int],
  2754. ne1: Union[ctypes.c_int64, int],
  2755. ne2: Union[ctypes.c_int64, int],
  2756. nb1: Union[ctypes.c_size_t, int],
  2757. nb2: Union[ctypes.c_size_t, int],
  2758. offset: Union[ctypes.c_size_t, int],
  2759. ) -> ggml_tensor_p:
  2760. return lib.ggml_view_3d(ctx, a, ne0, ne1, ne2, nb1, nb2, offset)
  2761. lib.ggml_view_3d.argtypes = [
  2762. ggml_context_p,
  2763. ctypes.POINTER(ggml_tensor),
  2764. ctypes.c_int64,
  2765. ctypes.c_int64,
  2766. ctypes.c_int64,
  2767. ctypes.c_size_t,
  2768. ctypes.c_size_t,
  2769. ctypes.c_size_t,
  2770. ]
  2771. lib.ggml_view_3d.restype = ctypes.POINTER(ggml_tensor)
  2772. # GGML_API struct ggml_tensor * ggml_view_4d(
  2773. # struct ggml_context * ctx,
  2774. # struct ggml_tensor * a,
  2775. # int64_t ne0,
  2776. # int64_t ne1,
  2777. # int64_t ne2,
  2778. # int64_t ne3,
  2779. # size_t nb1, // row stride in bytes
  2780. # size_t nb2, // slice stride in bytes
  2781. # size_t nb3,
  2782. # size_t offset);
  2783. def ggml_view_4d(
  2784. ctx: ggml_context_p,
  2785. a: ggml_tensor_p,
  2786. ne0: Union[ctypes.c_int64, int],
  2787. ne1: Union[ctypes.c_int64, int],
  2788. ne2: Union[ctypes.c_int64, int],
  2789. ne3: Union[ctypes.c_int64, int],
  2790. nb1: Union[ctypes.c_size_t, int],
  2791. nb2: Union[ctypes.c_size_t, int],
  2792. nb3: Union[ctypes.c_size_t, int],
  2793. offset: Union[ctypes.c_size_t, int],
  2794. ) -> ggml_tensor_p:
  2795. return lib.ggml_view_4d(ctx, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset)
  2796. lib.ggml_view_4d.argtypes = [
  2797. ggml_context_p,
  2798. ctypes.POINTER(ggml_tensor),
  2799. ctypes.c_int64,
  2800. ctypes.c_int64,
  2801. ctypes.c_int64,
  2802. ctypes.c_int64,
  2803. ctypes.c_size_t,
  2804. ctypes.c_size_t,
  2805. ctypes.c_size_t,
  2806. ctypes.c_size_t,
  2807. ]
  2808. lib.ggml_view_4d.restype = ctypes.POINTER(ggml_tensor)
  2809. # GGML_API struct ggml_tensor * ggml_permute(
  2810. # struct ggml_context * ctx,
  2811. # struct ggml_tensor * a,
  2812. # int axis0,
  2813. # int axis1,
  2814. # int axis2,
  2815. # int axis3);
  2816. def ggml_permute(
  2817. ctx: ggml_context_p,
  2818. a: ggml_tensor_p,
  2819. axis0: Union[ctypes.c_int, int],
  2820. axis1: Union[ctypes.c_int, int],
  2821. axis2: Union[ctypes.c_int, int],
  2822. axis3: Union[ctypes.c_int, int],
  2823. ) -> ggml_tensor_p:
  2824. return lib.ggml_permute(ctx, a, axis0, axis1, axis2, axis3)
  2825. lib.ggml_permute.argtypes = [
  2826. ggml_context_p,
  2827. ctypes.POINTER(ggml_tensor),
  2828. ctypes.c_int,
  2829. ctypes.c_int,
  2830. ctypes.c_int,
  2831. ctypes.c_int,
  2832. ]
  2833. lib.ggml_permute.restype = ctypes.POINTER(ggml_tensor)
  2834. # // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
  2835. # GGML_API struct ggml_tensor * ggml_transpose(
  2836. # struct ggml_context * ctx,
  2837. # struct ggml_tensor * a);
  2838. def ggml_transpose(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2839. """Transpose *the first two dimensions* of a tensor and return the result.
  2840. alias for `ggml_permute(ctx, a, 1, 0, 2, 3)`
  2841. Parameters:
  2842. ctx: ggml context
  2843. a: tensor
  2844. Returns:
  2845. Pointer to ggml_tensor"""
  2846. return lib.ggml_transpose(ctx, a)
  2847. lib.ggml_transpose.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2848. lib.ggml_transpose.restype = ctypes.POINTER(ggml_tensor)
  2849. # GGML_API struct ggml_tensor * ggml_get_rows(
  2850. # struct ggml_context * ctx,
  2851. # struct ggml_tensor * a,
  2852. # struct ggml_tensor * b);
  2853. def ggml_get_rows(
  2854. ctx: ggml_context_p,
  2855. a: ggml_tensor_p,
  2856. b: ggml_tensor_p,
  2857. ) -> ggml_tensor_p:
  2858. return lib.ggml_get_rows(ctx, a, b)
  2859. lib.ggml_get_rows.argtypes = [
  2860. ggml_context_p,
  2861. ctypes.POINTER(ggml_tensor),
  2862. ctypes.POINTER(ggml_tensor),
  2863. ]
  2864. lib.ggml_get_rows.restype = ctypes.POINTER(ggml_tensor)
  2865. # GGML_API struct ggml_tensor * ggml_get_rows_back(
  2866. # struct ggml_context * ctx,
  2867. # struct ggml_tensor * a,
  2868. # struct ggml_tensor * b,
  2869. # struct ggml_tensor * c);
  2870. def ggml_get_rows_back(
  2871. ctx: ggml_context_p,
  2872. a: ggml_tensor_p,
  2873. b: ggml_tensor_p,
  2874. c: ggml_tensor_p,
  2875. ) -> ggml_tensor_p:
  2876. return lib.ggml_get_rows_back(ctx, a, b, c)
  2877. lib.ggml_get_rows_back.argtypes = [
  2878. ggml_context_p,
  2879. ctypes.POINTER(ggml_tensor),
  2880. ctypes.POINTER(ggml_tensor),
  2881. ctypes.POINTER(ggml_tensor),
  2882. ]
  2883. lib.ggml_get_rows_back.restype = ctypes.POINTER(ggml_tensor)
  2884. # GGML_API struct ggml_tensor * ggml_diag(
  2885. # struct ggml_context * ctx,
  2886. # struct ggml_tensor * a);
  2887. def ggml_diag(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2888. return lib.ggml_diag(ctx, a)
  2889. lib.ggml_diag.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2890. lib.ggml_diag.restype = ctypes.POINTER(ggml_tensor)
  2891. # // set elements above the diagonal to -INF
  2892. # GGML_API struct ggml_tensor * ggml_diag_mask_inf(
  2893. # struct ggml_context * ctx,
  2894. # struct ggml_tensor * a,
  2895. # int n_past);
  2896. def ggml_diag_mask_inf(
  2897. ctx: ggml_context_p,
  2898. a: ggml_tensor_p,
  2899. n_past: Union[ctypes.c_int, int],
  2900. ) -> ggml_tensor_p:
  2901. return lib.ggml_diag_mask_inf(ctx, a, n_past)
  2902. lib.ggml_diag_mask_inf.argtypes = [
  2903. ggml_context_p,
  2904. ctypes.POINTER(ggml_tensor),
  2905. ctypes.c_int,
  2906. ]
  2907. lib.ggml_diag_mask_inf.restype = ctypes.POINTER(ggml_tensor)
  2908. # // in-place, returns view(a)
  2909. # GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2910. # struct ggml_context * ctx,
  2911. # struct ggml_tensor * a,
  2912. # int n_past);
  2913. def ggml_diag_mask_inf_inplace(
  2914. ctx: ggml_context_p,
  2915. a: ggml_tensor_p,
  2916. n_past: Union[ctypes.c_int, int],
  2917. ) -> ggml_tensor_p:
  2918. return lib.ggml_diag_mask_inf_inplace(ctx, a, n_past)
  2919. lib.ggml_diag_mask_inf_inplace.argtypes = [
  2920. ggml_context_p,
  2921. ctypes.POINTER(ggml_tensor),
  2922. ctypes.c_int,
  2923. ]
  2924. lib.ggml_diag_mask_inf_inplace.restype = ctypes.POINTER(ggml_tensor)
  2925. # // set elements above the diagonal to 0
  2926. # GGML_API struct ggml_tensor * ggml_diag_mask_zero(
  2927. # struct ggml_context * ctx,
  2928. # struct ggml_tensor * a,
  2929. # int n_past);
  2930. def ggml_diag_mask_zero(
  2931. ctx: ggml_context_p,
  2932. a: ggml_tensor_p,
  2933. n_past: Union[ctypes.c_int, int],
  2934. ) -> ggml_tensor_p:
  2935. return lib.ggml_diag_mask_zero(ctx, a, n_past)
  2936. lib.ggml_diag_mask_zero.argtypes = [
  2937. ggml_context_p,
  2938. ctypes.POINTER(ggml_tensor),
  2939. ctypes.c_int,
  2940. ]
  2941. lib.ggml_diag_mask_zero.restype = ctypes.POINTER(ggml_tensor)
  2942. # // in-place, returns view(a)
  2943. # GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2944. # struct ggml_context * ctx,
  2945. # struct ggml_tensor * a,
  2946. # int n_past);
  2947. def ggml_diag_mask_zero_inplace(
  2948. ctx: ggml_context_p,
  2949. a: ggml_tensor_p,
  2950. n_past: Union[ctypes.c_int, int],
  2951. ) -> ggml_tensor_p:
  2952. return lib.ggml_diag_mask_zero_inplace(ctx, a, n_past)
  2953. lib.ggml_diag_mask_zero_inplace.argtypes = [
  2954. ggml_context_p,
  2955. ctypes.POINTER(ggml_tensor),
  2956. ctypes.c_int,
  2957. ]
  2958. lib.ggml_diag_mask_zero_inplace.restype = ctypes.POINTER(ggml_tensor)
  2959. # GGML_API struct ggml_tensor * ggml_soft_max(
  2960. # struct ggml_context * ctx,
  2961. # struct ggml_tensor * a);
  2962. def ggml_soft_max(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2963. return lib.ggml_soft_max(ctx, a)
  2964. lib.ggml_soft_max.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2965. lib.ggml_soft_max.restype = ctypes.POINTER(ggml_tensor)
  2966. # // in-place, returns view(a)
  2967. # GGML_API struct ggml_tensor * ggml_soft_max_inplace(
  2968. # struct ggml_context * ctx,
  2969. # struct ggml_tensor * a);
  2970. def ggml_soft_max_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2971. return lib.ggml_soft_max_inplace(ctx, a)
  2972. lib.ggml_soft_max_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2973. lib.ggml_soft_max_inplace.restype = ctypes.POINTER(ggml_tensor)
  2974. # GGML_API struct ggml_tensor * ggml_soft_max_back(
  2975. # struct ggml_context * ctx,
  2976. # struct ggml_tensor * a,
  2977. # struct ggml_tensor * b);
  2978. def ggml_soft_max_back(
  2979. ctx: ggml_context_p,
  2980. a: ggml_tensor_p,
  2981. b: ggml_tensor_p,
  2982. ) -> ggml_tensor_p:
  2983. return lib.ggml_soft_max_back(ctx, a, b)
  2984. lib.ggml_soft_max_back.argtypes = [
  2985. ggml_context_p,
  2986. ctypes.POINTER(ggml_tensor),
  2987. ctypes.POINTER(ggml_tensor),
  2988. ]
  2989. lib.ggml_soft_max_back.restype = ctypes.POINTER(ggml_tensor)
  2990. # // in-place, returns view(a)
  2991. # GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
  2992. # struct ggml_context * ctx,
  2993. # struct ggml_tensor * a,
  2994. # struct ggml_tensor * b);
  2995. def ggml_soft_max_back_inplace(
  2996. ctx: ggml_context_p,
  2997. a: ggml_tensor_p,
  2998. b: ggml_tensor_p,
  2999. ) -> ggml_tensor_p:
  3000. return lib.ggml_soft_max_back_inplace(ctx, a, b)
  3001. lib.ggml_soft_max_back_inplace.argtypes = [
  3002. ggml_context_p,
  3003. ctypes.POINTER(ggml_tensor),
  3004. ctypes.POINTER(ggml_tensor),
  3005. ]
  3006. lib.ggml_soft_max_back_inplace.restype = ctypes.POINTER(ggml_tensor)
  3007. # // rotary position embedding
  3008. # // if mode & 1 == 1, skip n_past elements
  3009. # // if mode & 2 == 1, GPT-NeoX style
  3010. # // if mode & 4 == 1, ChatGLM style
  3011. # // TODO: avoid creating a new tensor every time
  3012. # GGML_API struct ggml_tensor * ggml_rope(
  3013. # struct ggml_context * ctx,
  3014. # struct ggml_tensor * a,
  3015. # int n_past,
  3016. # int n_dims,
  3017. # int mode,
  3018. # int n_ctx);
  3019. def ggml_rope(
  3020. ctx: ggml_context_p,
  3021. a: ggml_tensor_p,
  3022. n_past: Union[ctypes.c_int, int],
  3023. n_dims: Union[ctypes.c_int, int],
  3024. mode: Union[ctypes.c_int, int],
  3025. n_ctx: Union[ctypes.c_int, int],
  3026. ) -> ggml_tensor_p:
  3027. return lib.ggml_rope(ctx, a, n_past, n_dims, mode, n_ctx)
  3028. lib.ggml_rope.argtypes = [
  3029. ggml_context_p,
  3030. ctypes.POINTER(ggml_tensor),
  3031. ctypes.c_int,
  3032. ctypes.c_int,
  3033. ctypes.c_int,
  3034. ctypes.c_int,
  3035. ]
  3036. lib.ggml_rope.restype = ctypes.POINTER(ggml_tensor)
  3037. # // in-place, returns view(a)
  3038. # GGML_API struct ggml_tensor * ggml_rope_inplace(
  3039. # struct ggml_context * ctx,
  3040. # struct ggml_tensor * a,
  3041. # int n_past,
  3042. # int n_dims,
  3043. # int mode,
  3044. # int n_ctx);
  3045. def ggml_rope_inplace(
  3046. ctx: ggml_context_p,
  3047. a: ggml_tensor_p,
  3048. n_past: Union[ctypes.c_int, int],
  3049. n_dims: Union[ctypes.c_int, int],
  3050. mode: Union[ctypes.c_int, int],
  3051. n_ctx: Union[ctypes.c_int, int],
  3052. ) -> ggml_tensor_p:
  3053. return lib.ggml_rope_inplace(ctx, a, n_past, n_dims, mode, n_ctx)
  3054. lib.ggml_rope_inplace.argtypes = [
  3055. ggml_context_p,
  3056. ctypes.POINTER(ggml_tensor),
  3057. ctypes.c_int,
  3058. ctypes.c_int,
  3059. ctypes.c_int,
  3060. ctypes.c_int,
  3061. ]
  3062. lib.ggml_rope_inplace.restype = ctypes.POINTER(ggml_tensor)
  3063. # // custom RoPE
  3064. # GGML_API struct ggml_tensor * ggml_rope_custom(
  3065. # struct ggml_context * ctx,
  3066. # struct ggml_tensor * a,
  3067. # int n_past,
  3068. # int n_dims,
  3069. # int mode,
  3070. # int n_ctx,
  3071. # float freq_base,
  3072. # float freq_scale);
  3073. def ggml_rope_custom(
  3074. ctx: ggml_context_p,
  3075. a: ggml_tensor_p,
  3076. n_past: Union[ctypes.c_int, int],
  3077. n_dims: Union[ctypes.c_int, int],
  3078. mode: Union[ctypes.c_int, int],
  3079. n_ctx: Union[ctypes.c_int, int],
  3080. freq_base: Union[ctypes.c_float, float],
  3081. freq_scale: Union[ctypes.c_float, float],
  3082. ) -> ggml_tensor_p:
  3083. return lib.ggml_rope_custom(
  3084. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale
  3085. )
  3086. lib.ggml_rope_custom.argtypes = [
  3087. ggml_context_p,
  3088. ctypes.POINTER(ggml_tensor),
  3089. ctypes.c_int,
  3090. ctypes.c_int,
  3091. ctypes.c_int,
  3092. ctypes.c_int,
  3093. ctypes.c_float,
  3094. ctypes.c_float,
  3095. ]
  3096. lib.ggml_rope_custom.restype = ctypes.POINTER(ggml_tensor)
  3097. # // in-place, returns view(a)
  3098. # GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
  3099. # struct ggml_context * ctx,
  3100. # struct ggml_tensor * a,
  3101. # int n_past,
  3102. # int n_dims,
  3103. # int mode,
  3104. # int n_ctx,
  3105. # float freq_base,
  3106. # float freq_scale);
  3107. def ggml_rope_custom_inplace(
  3108. ctx: ggml_context_p,
  3109. a: ggml_tensor_p,
  3110. n_past: Union[ctypes.c_int, int],
  3111. n_dims: Union[ctypes.c_int, int],
  3112. mode: Union[ctypes.c_int, int],
  3113. n_ctx: Union[ctypes.c_int, int],
  3114. freq_base: Union[ctypes.c_float, float],
  3115. freq_scale: Union[ctypes.c_float, float],
  3116. ) -> ggml_tensor_p:
  3117. return lib.ggml_rope_custom_inplace(
  3118. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale
  3119. )
  3120. lib.ggml_rope_custom_inplace.argtypes = [
  3121. ggml_context_p,
  3122. ctypes.POINTER(ggml_tensor),
  3123. ctypes.c_int,
  3124. ctypes.c_int,
  3125. ctypes.c_int,
  3126. ctypes.c_int,
  3127. ctypes.c_float,
  3128. ctypes.c_float,
  3129. ]
  3130. lib.ggml_rope_custom_inplace.restype = ctypes.POINTER(ggml_tensor)
  3131. # // xPos RoPE, in-place, returns view(a)
  3132. # GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
  3133. # struct ggml_context * ctx,
  3134. # struct ggml_tensor * a,
  3135. # int n_past,
  3136. # int n_dims,
  3137. # float base,
  3138. # bool down);
  3139. def ggml_rope_xpos_inplace(
  3140. ctx: ggml_context_p,
  3141. a: ggml_tensor_p,
  3142. n_past: Union[ctypes.c_int, int],
  3143. n_dims: Union[ctypes.c_int, int],
  3144. base: Union[ctypes.c_float, float],
  3145. down: Union[ctypes.c_bool, bool],
  3146. ) -> ggml_tensor_p:
  3147. return lib.ggml_rope_xpos_inplace(ctx, a, n_past, n_dims, base, down)
  3148. lib.ggml_rope_xpos_inplace.argtypes = [
  3149. ggml_context_p,
  3150. ctypes.POINTER(ggml_tensor),
  3151. ctypes.c_int,
  3152. ctypes.c_int,
  3153. ctypes.c_float,
  3154. ctypes.c_bool,
  3155. ]
  3156. lib.ggml_rope_xpos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3157. # // rotary position embedding backward, i.e compute dx from dy
  3158. # // a - dy
  3159. # GGML_API struct ggml_tensor * ggml_rope_back(
  3160. # struct ggml_context * ctx,
  3161. # struct ggml_tensor * a,
  3162. # int n_past,
  3163. # int n_dims,
  3164. # int mode,
  3165. # int n_ctx,
  3166. # float freq_base,
  3167. # float freq_scale,
  3168. # float xpos_base,
  3169. # bool xpos_down);
  3170. def ggml_rope_back(
  3171. ctx: ggml_context_p,
  3172. a: ggml_tensor_p,
  3173. n_past: Union[ctypes.c_int, int],
  3174. n_dims: Union[ctypes.c_int, int],
  3175. mode: Union[ctypes.c_int, int],
  3176. n_ctx: Union[ctypes.c_int, int],
  3177. freq_base: Union[ctypes.c_float, float],
  3178. freq_scale: Union[ctypes.c_float, float],
  3179. xpos_base: Union[ctypes.c_float, float],
  3180. xpos_down: Union[ctypes.c_bool, bool],
  3181. ) -> ggml_tensor_p:
  3182. return lib.ggml_rope_back(
  3183. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, xpos_base, xpos_down
  3184. )
  3185. lib.ggml_rope_back.argtypes = [
  3186. ggml_context_p,
  3187. ctypes.POINTER(ggml_tensor),
  3188. ctypes.c_int,
  3189. ctypes.c_int,
  3190. ctypes.c_int,
  3191. ctypes.c_int,
  3192. ctypes.c_float,
  3193. ctypes.c_float,
  3194. ctypes.c_float,
  3195. ctypes.c_bool,
  3196. ]
  3197. lib.ggml_rope_back.restype = ctypes.POINTER(ggml_tensor)
  3198. # // alibi position embedding
  3199. # // in-place, returns view(a)
  3200. # struct ggml_tensor * ggml_alibi(
  3201. # struct ggml_context * ctx,
  3202. # struct ggml_tensor * a,
  3203. # int n_past,
  3204. # int n_head,
  3205. # float bias_max);
  3206. def ggml_alibi(
  3207. ctx: ggml_context_p,
  3208. a: ggml_tensor_p,
  3209. n_past: Union[ctypes.c_int, int],
  3210. n_head: Union[ctypes.c_int, int],
  3211. bias_max: Union[ctypes.c_float, float],
  3212. ) -> ggml_tensor_p:
  3213. return lib.ggml_alibi(ctx, a, n_past, n_head, bias_max)
  3214. lib.ggml_alibi.argtypes = [
  3215. ggml_context_p,
  3216. ctypes.POINTER(ggml_tensor),
  3217. ctypes.c_int,
  3218. ctypes.c_int,
  3219. ctypes.c_float,
  3220. ]
  3221. lib.ggml_alibi.restype = ctypes.POINTER(ggml_tensor)
  3222. # // clamp
  3223. # // in-place, returns view(a)
  3224. # struct ggml_tensor * ggml_clamp(
  3225. # struct ggml_context * ctx,
  3226. # struct ggml_tensor * a,
  3227. # float min,
  3228. # float max);
  3229. def ggml_clamp(
  3230. ctx: ggml_context_p,
  3231. a: ggml_tensor_p,
  3232. min: Union[ctypes.c_float, float],
  3233. max: Union[ctypes.c_float, float],
  3234. ) -> ggml_tensor_p:
  3235. return lib.ggml_clamp(ctx, a, min, max)
  3236. lib.ggml_clamp.argtypes = [
  3237. ggml_context_p,
  3238. ctypes.POINTER(ggml_tensor),
  3239. ctypes.c_float,
  3240. ctypes.c_float,
  3241. ]
  3242. lib.ggml_clamp.restype = ctypes.POINTER(ggml_tensor)
  3243. # GGML_API struct ggml_tensor * ggml_conv_1d(
  3244. # struct ggml_context * ctx,
  3245. # struct ggml_tensor * a,
  3246. # struct ggml_tensor * b,
  3247. # int s0, // stride
  3248. # int p0, // padding
  3249. # int d0); // dilation
  3250. def ggml_conv_1d(
  3251. ctx: ggml_context_p,
  3252. a: ggml_tensor_p,
  3253. b: ggml_tensor_p,
  3254. s0: Union[ctypes.c_int, int],
  3255. p0: Union[ctypes.c_int, int],
  3256. d0: Union[ctypes.c_int, int],
  3257. ) -> ggml_tensor_p:
  3258. """Convolution 1D
  3259. Parameters:
  3260. a: input tensor
  3261. b: filter tensor
  3262. s0: stride
  3263. p0: padding
  3264. d0: dilation
  3265. Returns:
  3266. output tensor"""
  3267. return lib.ggml_conv_1d(ctx, a, b, s0, p0, d0)
  3268. lib.ggml_conv_1d.argtypes = [
  3269. ggml_context_p,
  3270. ctypes.POINTER(ggml_tensor),
  3271. ctypes.POINTER(ggml_tensor),
  3272. ctypes.c_int,
  3273. ctypes.c_int,
  3274. ctypes.c_int,
  3275. ]
  3276. lib.ggml_conv_1d.restype = ctypes.POINTER(ggml_tensor)
  3277. # // conv_1d with padding = half
  3278. # // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
  3279. # GGML_API struct ggml_tensor* ggml_conv_1d_ph(
  3280. # struct ggml_context * ctx,
  3281. # struct ggml_tensor * a,
  3282. # struct ggml_tensor * b,
  3283. # int s,
  3284. # int d);
  3285. def ggml_conv_1d_ph(
  3286. ctx: ggml_context_p,
  3287. a: ggml_tensor_p,
  3288. b: ggml_tensor_p,
  3289. s: Union[ctypes.c_int, int],
  3290. d: Union[ctypes.c_int, int],
  3291. ) -> ggml_tensor_p:
  3292. """Convolution 1D with padding = half
  3293. Parameters:
  3294. a: input tensor
  3295. b: filter tensor
  3296. s: stride
  3297. d: dilation
  3298. Returns:
  3299. output tensor"""
  3300. return lib.ggml_conv_1d_ph(ctx, a, b, s, d)
  3301. lib.ggml_conv_1d_ph.argtypes = [
  3302. ggml_context_p,
  3303. ctypes.POINTER(ggml_tensor),
  3304. ctypes.POINTER(ggml_tensor),
  3305. ctypes.c_int,
  3306. ctypes.c_int,
  3307. ]
  3308. lib.ggml_conv_1d_ph.restype = ctypes.POINTER(ggml_tensor)
  3309. # GGML_API struct ggml_tensor * ggml_conv_2d(
  3310. # struct ggml_context * ctx,
  3311. # struct ggml_tensor * a,
  3312. # struct ggml_tensor * b,
  3313. # int s0,
  3314. # int s1,
  3315. # int p0,
  3316. # int p1,
  3317. # int d0,
  3318. # int d1);
  3319. def ggml_conv_2d(
  3320. ctx: ggml_context_p,
  3321. a: ggml_tensor_p,
  3322. b: ggml_tensor_p,
  3323. s0: Union[ctypes.c_int, int],
  3324. s1: Union[ctypes.c_int, int],
  3325. p0: Union[ctypes.c_int, int],
  3326. p1: Union[ctypes.c_int, int],
  3327. d0: Union[ctypes.c_int, int],
  3328. d1: Union[ctypes.c_int, int],
  3329. ) -> ggml_tensor_p:
  3330. """Convolution 2D
  3331. Parameters:
  3332. a: input tensor
  3333. b: filter tensor
  3334. s0: stride
  3335. s1: stride
  3336. p0: padding
  3337. p1: padding
  3338. d0: dilation
  3339. d1: dilation
  3340. Returns:
  3341. output tensor"""
  3342. return lib.ggml_conv_2d(ctx, a, b, s0, s1, p0, p1, d0, d1)
  3343. lib.ggml_conv_2d.argtypes = [
  3344. ggml_context_p,
  3345. ctypes.POINTER(ggml_tensor),
  3346. ctypes.POINTER(ggml_tensor),
  3347. ctypes.c_int,
  3348. ctypes.c_int,
  3349. ctypes.c_int,
  3350. ctypes.c_int,
  3351. ctypes.c_int,
  3352. ctypes.c_int,
  3353. ]
  3354. lib.ggml_conv_2d.restype = ctypes.POINTER(ggml_tensor)
  3355. # // kernel size is a->ne[0] x a->ne[1]
  3356. # // stride is equal to kernel size
  3357. # // padding is zero
  3358. # // example:
  3359. # // a: 16 16 3 768
  3360. # // b: 1024 1024 3 1
  3361. # // res: 64 64 768 1
  3362. # // used in sam
  3363. # GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
  3364. # struct ggml_context * ctx,
  3365. # struct ggml_tensor * a,
  3366. # struct ggml_tensor * b);
  3367. def ggml_conv_2d_sk_p0(
  3368. ctx: ggml_context_p,
  3369. a: ggml_tensor_p,
  3370. b: ggml_tensor_p,
  3371. ) -> ggml_tensor_p:
  3372. """Convolution 2D
  3373. Parameters:
  3374. a: input tensor
  3375. b: filter tensor
  3376. Returns:
  3377. output tensor"""
  3378. return lib.ggml_conv_2d_sk_p0(ctx, a, b)
  3379. lib.ggml_conv_2d_sk_p0.argtypes = [
  3380. ggml_context_p,
  3381. ctypes.POINTER(ggml_tensor),
  3382. ctypes.POINTER(ggml_tensor),
  3383. ]
  3384. lib.ggml_conv_2d_sk_p0.restype = ctypes.POINTER(ggml_tensor)
  3385. # // kernel size is a->ne[0] x a->ne[1]
  3386. # // stride is 1
  3387. # // padding is half
  3388. # // example:
  3389. # // a: 3 3 256 256
  3390. # // b: 64 64 256 1
  3391. # // res: 64 64 256 1
  3392. # // used in sam
  3393. # GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
  3394. # struct ggml_context * ctx,
  3395. # struct ggml_tensor * a,
  3396. # struct ggml_tensor * b);
  3397. def ggml_conv_2d_s1_ph(
  3398. ctx: ggml_context_p,
  3399. a: ggml_tensor_p,
  3400. b: ggml_tensor_p,
  3401. ) -> ggml_tensor_p:
  3402. """Convolution 2D with stride = 1 and padding = half
  3403. Parameters:
  3404. a: input tensor
  3405. b: filter tensor
  3406. Returns:
  3407. output tensor"""
  3408. return lib.ggml_conv_2d_s1_ph(ctx, a, b)
  3409. lib.ggml_conv_2d_s1_ph.argtypes = [
  3410. ggml_context_p,
  3411. ctypes.POINTER(ggml_tensor),
  3412. ctypes.POINTER(ggml_tensor),
  3413. ]
  3414. lib.ggml_conv_2d_s1_ph.restype = ctypes.POINTER(ggml_tensor)
  3415. # GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3416. # struct ggml_context * ctx,
  3417. # struct ggml_tensor * a,
  3418. # struct ggml_tensor * b,
  3419. # int stride);
  3420. def ggml_conv_transpose_2d_p0(
  3421. ctx: ggml_context_p,
  3422. a: ggml_tensor_p,
  3423. b: ggml_tensor_p,
  3424. stride: Union[ctypes.c_int, int],
  3425. ) -> ggml_tensor_p:
  3426. """Convolution Transpose 2D with padding = zero
  3427. Parameters:
  3428. a: input tensor
  3429. b: filter tensor
  3430. stride: stride
  3431. Returns:
  3432. output tensor"""
  3433. return lib.ggml_conv_transpose_2d_p0(ctx, a, b, stride)
  3434. lib.ggml_conv_transpose_2d_p0.argtypes = [
  3435. ggml_context_p,
  3436. ctypes.POINTER(ggml_tensor),
  3437. ctypes.POINTER(ggml_tensor),
  3438. ctypes.c_int,
  3439. ]
  3440. lib.ggml_conv_transpose_2d_p0.restype = ctypes.POINTER(ggml_tensor)
  3441. # enum ggml_op_pool {
  3442. # GGML_OP_POOL_MAX,
  3443. # GGML_OP_POOL_AVG,
  3444. # GGML_OP_POOL_COUNT,
  3445. # };
  3446. GGML_OP_POOL_MAX = 0
  3447. GGML_OP_POOL_AVG = 1
  3448. GGML_OP_POOL_COUNT = 2
  3449. # GGML_API struct ggml_tensor * ggml_pool_1d(
  3450. # struct ggml_context * ctx,
  3451. # struct ggml_tensor * a,
  3452. # enum ggml_op_pool op,
  3453. # int k0, // kernel size
  3454. # int s0, // stride
  3455. # int p0); // padding
  3456. def ggml_pool_1d(
  3457. ctx: ggml_context_p,
  3458. a: ggml_tensor_p,
  3459. op: Union[ctypes.c_int, int],
  3460. k0: Union[ctypes.c_int, int],
  3461. s0: Union[ctypes.c_int, int],
  3462. p0: Union[ctypes.c_int, int],
  3463. ) -> ggml_tensor_p:
  3464. """1D Pooling
  3465. Parameters:
  3466. a: input tensor
  3467. op: pooling operation
  3468. k0: kernel size
  3469. s0: stride
  3470. p0: padding
  3471. Returns:
  3472. output tensor"""
  3473. return lib.ggml_pool_1d(ctx, a, op, k0, s0, p0)
  3474. lib.ggml_pool_1d.argtypes = [
  3475. ggml_context_p,
  3476. ctypes.POINTER(ggml_tensor),
  3477. ctypes.c_int,
  3478. ctypes.c_int,
  3479. ctypes.c_int,
  3480. ctypes.c_int,
  3481. ]
  3482. lib.ggml_pool_1d.restype = ctypes.POINTER(ggml_tensor)
  3483. # GGML_API struct ggml_tensor * ggml_pool_2d(
  3484. # struct ggml_context * ctx,
  3485. # struct ggml_tensor * a,
  3486. # enum ggml_op_pool op,
  3487. # int k0,
  3488. # int k1,
  3489. # int s0,
  3490. # int s1,
  3491. # int p0,
  3492. # int p1);
  3493. def ggml_pool_2d(
  3494. ctx: ggml_context_p,
  3495. a: ggml_tensor_p,
  3496. op: Union[ctypes.c_int, int],
  3497. k0: Union[ctypes.c_int, int],
  3498. k1: Union[ctypes.c_int, int],
  3499. s0: Union[ctypes.c_int, int],
  3500. s1: Union[ctypes.c_int, int],
  3501. p0: Union[ctypes.c_int, int],
  3502. p1: Union[ctypes.c_int, int],
  3503. ) -> ggml_tensor_p:
  3504. """2D Pooling
  3505. Parameters:
  3506. a: input tensor
  3507. op: pooling operation
  3508. k0: kernel size
  3509. k1: kernel size
  3510. s0: stride
  3511. s1: stride
  3512. p0: padding
  3513. p1: padding
  3514. Returns:
  3515. output tensor"""
  3516. return lib.ggml_pool_2d(ctx, a, op, k0, k1, s0, s1, p0, p1)
  3517. lib.ggml_pool_2d.argtypes = [
  3518. ggml_context_p,
  3519. ctypes.POINTER(ggml_tensor),
  3520. ctypes.c_int,
  3521. ctypes.c_int,
  3522. ctypes.c_int,
  3523. ctypes.c_int,
  3524. ctypes.c_int,
  3525. ctypes.c_int,
  3526. ]
  3527. lib.ggml_pool_2d.restype = ctypes.POINTER(ggml_tensor)
  3528. # // nearest interpolate
  3529. # // used in stable-diffusion
  3530. # GGML_API struct ggml_tensor * ggml_upscale(
  3531. # struct ggml_context * ctx,
  3532. # struct ggml_tensor * a,
  3533. # int scale_factor);
  3534. def ggml_upscale(
  3535. ctx: ggml_context_p,
  3536. a: ggml_tensor_p,
  3537. scale_factor: Union[ctypes.c_int, int],
  3538. ) -> ggml_tensor_p:
  3539. """Upscale
  3540. Parameters:
  3541. a: input tensor
  3542. scale_factor: scale factor
  3543. Returns:
  3544. output tensor"""
  3545. return lib.ggml_upscale(ctx, a, scale_factor)
  3546. lib.ggml_upscale.argtypes = [
  3547. ggml_context_p,
  3548. ctypes.POINTER(ggml_tensor),
  3549. ctypes.c_int,
  3550. ]
  3551. lib.ggml_upscale.restype = ctypes.POINTER(ggml_tensor)
  3552. # GGML_API struct ggml_tensor * ggml_flash_attn(
  3553. # struct ggml_context * ctx,
  3554. # struct ggml_tensor * q,
  3555. # struct ggml_tensor * k,
  3556. # struct ggml_tensor * v,
  3557. # bool masked);
  3558. def ggml_flash_attn(
  3559. ctx: ggml_context_p,
  3560. q: ggml_tensor_p,
  3561. k: ggml_tensor_p,
  3562. v: ggml_tensor_p,
  3563. masked: Union[ctypes.c_bool, bool],
  3564. ) -> ggml_tensor_p:
  3565. return lib.ggml_flash_attn(ctx, q, k, v, masked)
  3566. lib.ggml_flash_attn.argtypes = [
  3567. ggml_context_p,
  3568. ctypes.POINTER(ggml_tensor),
  3569. ctypes.POINTER(ggml_tensor),
  3570. ctypes.POINTER(ggml_tensor),
  3571. ctypes.c_bool,
  3572. ]
  3573. lib.ggml_flash_attn.restype = ctypes.POINTER(ggml_tensor)
  3574. # GGML_API struct ggml_tensor * ggml_flash_attn_back(
  3575. # struct ggml_context * ctx,
  3576. # struct ggml_tensor * q,
  3577. # struct ggml_tensor * k,
  3578. # struct ggml_tensor * v,
  3579. # struct ggml_tensor * d,
  3580. # bool masked);
  3581. def ggml_flash_attn_back(
  3582. ctx: ggml_context_p,
  3583. q: ggml_tensor_p,
  3584. k: ggml_tensor_p,
  3585. v: ggml_tensor_p,
  3586. d: ggml_tensor_p,
  3587. masked: Union[ctypes.c_bool, bool],
  3588. ) -> ggml_tensor_p:
  3589. return lib.ggml_flash_attn_back(ctx, q, k, v, d, masked)
  3590. lib.ggml_flash_attn_back.argtypes = [
  3591. ggml_context_p,
  3592. ctypes.POINTER(ggml_tensor),
  3593. ctypes.POINTER(ggml_tensor),
  3594. ctypes.POINTER(ggml_tensor),
  3595. ctypes.POINTER(ggml_tensor),
  3596. ctypes.c_bool,
  3597. ]
  3598. lib.ggml_flash_attn_back.restype = ctypes.POINTER(ggml_tensor)
  3599. # GGML_API struct ggml_tensor * ggml_flash_ff(
  3600. # struct ggml_context * ctx,
  3601. # struct ggml_tensor * a,
  3602. # struct ggml_tensor * b0,
  3603. # struct ggml_tensor * b1,
  3604. # struct ggml_tensor * c0,
  3605. # struct ggml_tensor * c1);
  3606. def ggml_flash_ff(
  3607. ctx: ggml_context_p,
  3608. a: ggml_tensor_p,
  3609. b0: ggml_tensor_p,
  3610. b1: ggml_tensor_p,
  3611. c0: ggml_tensor_p,
  3612. c1: ggml_tensor_p,
  3613. ) -> ggml_tensor_p:
  3614. return lib.ggml_flash_ff(ctx, a, b0, b1, c0, c1)
  3615. lib.ggml_flash_ff.argtypes = [
  3616. ggml_context_p,
  3617. ctypes.POINTER(ggml_tensor),
  3618. ctypes.POINTER(ggml_tensor),
  3619. ctypes.POINTER(ggml_tensor),
  3620. ctypes.POINTER(ggml_tensor),
  3621. ctypes.POINTER(ggml_tensor),
  3622. ]
  3623. lib.ggml_flash_ff.restype = ctypes.POINTER(ggml_tensor)
  3624. # // partition into non-overlapping windows with padding if needed
  3625. # // example:
  3626. # // a: 768 64 64 1
  3627. # // w: 14
  3628. # // res: 768 14 14 25
  3629. # // used in sam
  3630. # GGML_API struct ggml_tensor * ggml_win_part(
  3631. # struct ggml_context * ctx,
  3632. # struct ggml_tensor * a,
  3633. # int w);
  3634. def ggml_win_part(
  3635. ctx: ggml_context_p,
  3636. a: ggml_tensor_p,
  3637. w: Union[ctypes.c_int, int],
  3638. ) -> ggml_tensor_p:
  3639. return lib.ggml_win_part(ctx, a, w)
  3640. lib.ggml_win_part.argtypes = [
  3641. ggml_context_p,
  3642. ctypes.POINTER(ggml_tensor),
  3643. ctypes.c_int,
  3644. ]
  3645. lib.ggml_win_part.restype = ctypes.POINTER(ggml_tensor)
  3646. # // reverse of ggml_win_part
  3647. # // used in sam
  3648. # GGML_API struct ggml_tensor * ggml_win_unpart(
  3649. # struct ggml_context * ctx,
  3650. # struct ggml_tensor * a,
  3651. # int w0,
  3652. # int h0,
  3653. # int w);
  3654. def ggml_win_unpart(
  3655. ctx: ggml_context_p,
  3656. a: ggml_tensor_p,
  3657. w0: Union[ctypes.c_int, int],
  3658. h0: Union[ctypes.c_int, int],
  3659. w: Union[ctypes.c_int, int],
  3660. ) -> ggml_tensor_p:
  3661. return lib.ggml_win_unpart(ctx, a, w0, h0, w)
  3662. lib.ggml_win_unpart.argtypes = [
  3663. ggml_context_p,
  3664. ctypes.POINTER(ggml_tensor),
  3665. ctypes.c_int,
  3666. ctypes.c_int,
  3667. ctypes.c_int,
  3668. ]
  3669. lib.ggml_win_unpart.restype = ctypes.POINTER(ggml_tensor)
  3670. # GGML_API struct ggml_tensor * ggml_unary(
  3671. # struct ggml_context * ctx,
  3672. # struct ggml_tensor * a,
  3673. # enum ggml_unary_op op);
  3674. def ggml_unary(
  3675. ctx: ggml_context_p,
  3676. a: ggml_tensor_p,
  3677. op: Union[ctypes.c_int, int],
  3678. ) -> ggml_tensor_p:
  3679. return lib.ggml_unary(ctx, a, op)
  3680. lib.ggml_unary.argtypes = [
  3681. ggml_context_p,
  3682. ctypes.POINTER(ggml_tensor),
  3683. ctypes.c_int,
  3684. ]
  3685. lib.ggml_unary.restype = ctypes.POINTER(ggml_tensor)
  3686. # GGML_API struct ggml_tensor * ggml_unary_inplace(
  3687. # struct ggml_context * ctx,
  3688. # struct ggml_tensor * a,
  3689. # enum ggml_unary_op op);
  3690. def ggml_unary_inplace(
  3691. ctx: ggml_context_p,
  3692. a: ggml_tensor_p,
  3693. op: Union[ctypes.c_int, int],
  3694. ) -> ggml_tensor_p:
  3695. return lib.ggml_unary_inplace(ctx, a, op)
  3696. lib.ggml_unary_inplace.argtypes = [
  3697. ggml_context_p,
  3698. ctypes.POINTER(ggml_tensor),
  3699. ctypes.c_int,
  3700. ]
  3701. lib.ggml_unary_inplace.restype = ctypes.POINTER(ggml_tensor)
  3702. # // used in sam
  3703. # GGML_API struct ggml_tensor * ggml_get_rel_pos(
  3704. # struct ggml_context * ctx,
  3705. # struct ggml_tensor * a,
  3706. # int qh,
  3707. # int kh);
  3708. def ggml_get_rel_pos(
  3709. ctx: ggml_context_p,
  3710. a: ggml_tensor_p,
  3711. qh: Union[ctypes.c_int, int],
  3712. kh: Union[ctypes.c_int, int],
  3713. ) -> ggml_tensor_p:
  3714. return lib.ggml_get_rel_pos(ctx, a, qh, kh)
  3715. lib.ggml_get_rel_pos.argtypes = [
  3716. ggml_context_p,
  3717. ctypes.POINTER(ggml_tensor),
  3718. ctypes.c_int,
  3719. ctypes.c_int,
  3720. ]
  3721. lib.ggml_get_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  3722. # // used in sam
  3723. # GGML_API struct ggml_tensor * ggml_add_rel_pos(
  3724. # struct ggml_context * ctx,
  3725. # struct ggml_tensor * a,
  3726. # struct ggml_tensor * pw,
  3727. # struct ggml_tensor * ph);
  3728. def ggml_add_rel_pos(
  3729. ctx: ggml_context_p,
  3730. a: ggml_tensor_p,
  3731. pw: ggml_tensor_p,
  3732. ph: ggml_tensor_p,
  3733. ) -> ggml_tensor_p:
  3734. return lib.ggml_add_rel_pos(ctx, a, pw, ph)
  3735. lib.ggml_add_rel_pos.argtypes = [
  3736. ggml_context_p,
  3737. ctypes.POINTER(ggml_tensor),
  3738. ctypes.POINTER(ggml_tensor),
  3739. ctypes.POINTER(ggml_tensor),
  3740. ]
  3741. lib.ggml_add_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  3742. # GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
  3743. # struct ggml_context * ctx,
  3744. # struct ggml_tensor * a,
  3745. # struct ggml_tensor * pw,
  3746. # struct ggml_tensor * ph);
  3747. def ggml_add_rel_pos_inplace(
  3748. ctx: ggml_context_p,
  3749. a: ggml_tensor_p,
  3750. pw: ggml_tensor_p,
  3751. ph: ggml_tensor_p,
  3752. ) -> ggml_tensor_p:
  3753. return lib.ggml_add_rel_pos_inplace(ctx, a, pw, ph)
  3754. lib.ggml_add_rel_pos_inplace.argtypes = [
  3755. ggml_context_p,
  3756. ctypes.POINTER(ggml_tensor),
  3757. ctypes.POINTER(ggml_tensor),
  3758. ctypes.POINTER(ggml_tensor),
  3759. ]
  3760. lib.ggml_add_rel_pos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3761. # // custom operators (DEPRECATED)
  3762. # typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
  3763. ggml_unary_op_f32_t = ctypes.CFUNCTYPE(
  3764. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float)
  3765. )
  3766. # typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
  3767. ggml_binary_op_f32_t = ctypes.CFUNCTYPE(
  3768. None,
  3769. ctypes.c_int,
  3770. ctypes.POINTER(ctypes.c_float),
  3771. ctypes.POINTER(ctypes.c_float),
  3772. ctypes.POINTER(ctypes.c_float),
  3773. )
  3774. # typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
  3775. ggml_custom1_op_f32_t = ctypes.CFUNCTYPE(
  3776. None, ctypes.POINTER(ggml_tensor), ctypes.POINTER(ggml_tensor)
  3777. )
  3778. """Unary operator function type"""
  3779. # typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  3780. ggml_custom2_op_f32_t = ctypes.CFUNCTYPE(
  3781. None,
  3782. ctypes.POINTER(ggml_tensor),
  3783. ctypes.POINTER(ggml_tensor),
  3784. ctypes.POINTER(ggml_tensor),
  3785. )
  3786. """Binary operator function type"""
  3787. # typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  3788. ggml_custom3_op_f32_t = ctypes.CFUNCTYPE(
  3789. None,
  3790. ctypes.POINTER(ggml_tensor),
  3791. ctypes.POINTER(ggml_tensor),
  3792. ctypes.POINTER(ggml_tensor),
  3793. ctypes.POINTER(ggml_tensor),
  3794. )
  3795. """Ternary operator function type"""
  3796. # GGML_API struct ggml_tensor * ggml_map_unary_f32(
  3797. # struct ggml_context * ctx,
  3798. # struct ggml_tensor * a,
  3799. # ggml_unary_op_f32_t fun);
  3800. def ggml_map_unary_f32(
  3801. ctx: ggml_context_p,
  3802. a: ggml_tensor_p,
  3803. fun: "ctypes._FuncPointer", # type: ignore
  3804. ) -> ggml_tensor_p:
  3805. return lib.ggml_map_unary_f32(ctx, a, fun)
  3806. lib.ggml_map_unary_f32.argtypes = [
  3807. ggml_context_p,
  3808. ctypes.POINTER(ggml_tensor),
  3809. ggml_unary_op_f32_t,
  3810. ]
  3811. lib.ggml_map_unary_f32.restype = ctypes.POINTER(ggml_tensor)
  3812. # GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
  3813. # struct ggml_context * ctx,
  3814. # struct ggml_tensor * a,
  3815. # ggml_unary_op_f32_t fun);
  3816. def ggml_map_unary_inplace_f32(
  3817. ctx: ggml_context_p,
  3818. a: ggml_tensor_p,
  3819. fun: "ctypes._FuncPointer", # type: ignore
  3820. ) -> ggml_tensor_p:
  3821. return lib.ggml_map_unary_inplace_f32(ctx, a, fun)
  3822. lib.ggml_map_unary_inplace_f32.argtypes = [
  3823. ggml_context_p,
  3824. ctypes.POINTER(ggml_tensor),
  3825. ggml_unary_op_f32_t,
  3826. ]
  3827. lib.ggml_map_unary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3828. # GGML_API struct ggml_tensor * ggml_map_binary_f32(
  3829. # struct ggml_context * ctx,
  3830. # struct ggml_tensor * a,
  3831. # struct ggml_tensor * b,
  3832. # ggml_binary_op_f32_t fun);
  3833. def ggml_map_binary_f32(
  3834. ctx: ggml_context_p,
  3835. a: ggml_tensor_p,
  3836. b: ggml_tensor_p,
  3837. fun: "ctypes._FuncPointer", # type: ignore
  3838. ) -> ggml_tensor_p:
  3839. return lib.ggml_map_binary_f32(ctx, a, b, fun)
  3840. lib.ggml_map_binary_f32.argtypes = [
  3841. ggml_context_p,
  3842. ctypes.POINTER(ggml_tensor),
  3843. ctypes.POINTER(ggml_tensor),
  3844. ggml_binary_op_f32_t,
  3845. ]
  3846. lib.ggml_map_binary_f32.restype = ctypes.POINTER(ggml_tensor)
  3847. # GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
  3848. # struct ggml_context * ctx,
  3849. # struct ggml_tensor * a,
  3850. # struct ggml_tensor * b,
  3851. # ggml_binary_op_f32_t fun);
  3852. def ggml_map_binary_inplace_f32(
  3853. ctx: ggml_context_p,
  3854. a: ggml_tensor_p,
  3855. b: ggml_tensor_p,
  3856. fun: "ctypes._FuncPointer", # type: ignore
  3857. ) -> ggml_tensor_p:
  3858. return lib.ggml_map_binary_inplace_f32(ctx, a, b, fun)
  3859. lib.ggml_map_binary_inplace_f32.argtypes = [
  3860. ggml_context_p,
  3861. ctypes.POINTER(ggml_tensor),
  3862. ctypes.POINTER(ggml_tensor),
  3863. ggml_binary_op_f32_t,
  3864. ]
  3865. lib.ggml_map_binary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3866. # GGML_API struct ggml_tensor * ggml_map_custom1_f32(
  3867. # struct ggml_context * ctx,
  3868. # struct ggml_tensor * a,
  3869. # ggml_custom1_op_f32_t fun);
  3870. def ggml_map_custom1_f32(
  3871. ctx: ggml_context_p,
  3872. a: ggml_tensor_p,
  3873. fun: "ctypes._FuncPointer", # type: ignore
  3874. ) -> ggml_tensor_p:
  3875. """Custom unary operator on a tensor.
  3876. Example:
  3877. ```python
  3878. import ggml
  3879. @ggml.ggml_custom1_op_f32_t
  3880. def custom_op(b: ggml.tensor_p, a: ggml.tensor_p):
  3881. # do something with a and copy to b
  3882. return
  3883. ...
  3884. b = ggml.ggml_map_custom1_f32(ctx, a, custom_op)
  3885. ```
  3886. Parameters:
  3887. a: input tensor
  3888. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  3889. Returns:
  3890. output tensor"""
  3891. return lib.ggml_map_custom1_f32(ctx, a, fun)
  3892. lib.ggml_map_custom1_f32.argtypes = [
  3893. ggml_context_p,
  3894. ctypes.POINTER(ggml_tensor),
  3895. ggml_custom1_op_f32_t,
  3896. ]
  3897. lib.ggml_map_custom1_f32.restype = ctypes.POINTER(ggml_tensor)
  3898. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
  3899. # struct ggml_context * ctx,
  3900. # struct ggml_tensor * a,
  3901. # ggml_custom1_op_f32_t fun);
  3902. def ggml_map_custom1_inplace_f32(
  3903. ctx: ggml_context_p,
  3904. a: ggml_tensor_p,
  3905. fun: "ctypes._CFuncPtr", # type: ignore
  3906. ) -> ggml_tensor_p:
  3907. """Custom unary operator on a tensor inplace.
  3908. Parameters:
  3909. a: input tensor
  3910. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  3911. Returns:
  3912. output tensor"""
  3913. return lib.ggml_map_custom1_inplace_f32(ctx, a, fun)
  3914. lib.ggml_map_custom1_inplace_f32.argtypes = [
  3915. ggml_context_p,
  3916. ctypes.POINTER(ggml_tensor),
  3917. ggml_custom1_op_f32_t,
  3918. ]
  3919. lib.ggml_map_custom1_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3920. # GGML_API struct ggml_tensor * ggml_map_custom2_f32(
  3921. # struct ggml_context * ctx,
  3922. # struct ggml_tensor * a,
  3923. # struct ggml_tensor * b,
  3924. # ggml_custom2_op_f32_t fun);
  3925. def ggml_map_custom2_f32(
  3926. ctx: ggml_context_p,
  3927. a: ggml_tensor_p,
  3928. b: ggml_tensor_p,
  3929. fun: "ctypes._FuncPointer", # type: ignore
  3930. ) -> ggml_tensor_p:
  3931. """Custom binary operator on two tensors.
  3932. Parameters:
  3933. a: input tensor
  3934. b: input tensor
  3935. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  3936. Returns:
  3937. output tensor"""
  3938. return lib.ggml_map_custom2_f32(ctx, a, b, fun)
  3939. lib.ggml_map_custom2_f32.argtypes = [
  3940. ggml_context_p,
  3941. ctypes.POINTER(ggml_tensor),
  3942. ctypes.POINTER(ggml_tensor),
  3943. ggml_custom2_op_f32_t,
  3944. ]
  3945. lib.ggml_map_custom2_f32.restype = ctypes.POINTER(ggml_tensor)
  3946. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
  3947. # struct ggml_context * ctx,
  3948. # struct ggml_tensor * a,
  3949. # struct ggml_tensor * b,
  3950. # ggml_custom2_op_f32_t fun);
  3951. def ggml_map_custom2_inplace_f32(
  3952. ctx: ggml_context_p,
  3953. a: ggml_tensor_p,
  3954. b: ggml_tensor_p,
  3955. fun: "ctypes._FuncPointer", # type: ignore
  3956. ) -> ggml_tensor_p:
  3957. """Custom binary operator on two tensors inplace.
  3958. Parameters:
  3959. a: input tensor
  3960. b: input tensor
  3961. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  3962. Returns:
  3963. output tensor"""
  3964. return lib.ggml_map_custom2_inplace_f32(ctx, a, b, fun)
  3965. lib.ggml_map_custom2_inplace_f32.argtypes = [
  3966. ggml_context_p,
  3967. ctypes.POINTER(ggml_tensor),
  3968. ctypes.POINTER(ggml_tensor),
  3969. ggml_custom2_op_f32_t,
  3970. ]
  3971. lib.ggml_map_custom2_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3972. # GGML_API struct ggml_tensor * ggml_map_custom3_f32(
  3973. # struct ggml_context * ctx,
  3974. # struct ggml_tensor * a,
  3975. # struct ggml_tensor * b,
  3976. # struct ggml_tensor * c,
  3977. # ggml_custom3_op_f32_t fun);
  3978. def ggml_map_custom3_f32(
  3979. ctx: ggml_context_p,
  3980. a: ggml_tensor_p,
  3981. b: ggml_tensor_p,
  3982. c: ggml_tensor_p,
  3983. fun: "ctypes._FuncPointer", # type: ignore
  3984. ) -> ggml_tensor_p:
  3985. """Custom ternary operator on three tensors.
  3986. Parameters:
  3987. a: input tensor
  3988. b: input tensor
  3989. c: input tensor
  3990. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  3991. Returns:
  3992. output tensor"""
  3993. return lib.ggml_map_custom3_f32(ctx, a, b, c, fun)
  3994. lib.ggml_map_custom3_f32.argtypes = [
  3995. ggml_context_p,
  3996. ctypes.POINTER(ggml_tensor),
  3997. ctypes.POINTER(ggml_tensor),
  3998. ctypes.POINTER(ggml_tensor),
  3999. ggml_custom3_op_f32_t,
  4000. ]
  4001. lib.ggml_map_custom3_f32.restype = ctypes.POINTER(ggml_tensor)
  4002. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
  4003. # struct ggml_context * ctx,
  4004. # struct ggml_tensor * a,
  4005. # struct ggml_tensor * b,
  4006. # struct ggml_tensor * c,
  4007. # ggml_custom3_op_f32_t fun);
  4008. def ggml_map_custom3_inplace_f32(
  4009. ctx: ggml_context_p,
  4010. a: ggml_tensor_p,
  4011. b: ggml_tensor_p,
  4012. c: ggml_tensor_p,
  4013. fun: "ctypes._FuncPointer", # type: ignore
  4014. ) -> ggml_tensor_p:
  4015. """Custom ternary operator on three tensors inplace.
  4016. Parameters:
  4017. a: input tensor
  4018. b: input tensor
  4019. c: input tensor
  4020. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  4021. Returns:
  4022. output tensor"""
  4023. return lib.ggml_map_custom3_inplace_f32(ctx, a, b, c, fun)
  4024. lib.ggml_map_custom3_inplace_f32.argtypes = [
  4025. ggml_context_p,
  4026. ctypes.POINTER(ggml_tensor),
  4027. ctypes.POINTER(ggml_tensor),
  4028. ctypes.POINTER(ggml_tensor),
  4029. ggml_custom3_op_f32_t,
  4030. ]
  4031. lib.ggml_map_custom3_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4032. # // custom operators v2
  4033. # typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
  4034. ggml_custom1_op_t = ctypes.CFUNCTYPE(
  4035. None,
  4036. ctypes.POINTER(ggml_tensor),
  4037. ctypes.POINTER(ggml_tensor),
  4038. ctypes.c_int,
  4039. ctypes.c_int,
  4040. ctypes.c_void_p,
  4041. )
  4042. """Custom unary operator on a tensor."""
  4043. # typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
  4044. ggml_custom2_op_t = ctypes.CFUNCTYPE(
  4045. None,
  4046. ctypes.POINTER(ggml_tensor),
  4047. ctypes.POINTER(ggml_tensor),
  4048. ctypes.POINTER(ggml_tensor),
  4049. ctypes.c_int,
  4050. ctypes.c_int,
  4051. ctypes.c_void_p,
  4052. )
  4053. """Custom binary operator on two tensors."""
  4054. # typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
  4055. ggml_custom3_op_t = ctypes.CFUNCTYPE(
  4056. None,
  4057. ctypes.POINTER(ggml_tensor),
  4058. ctypes.POINTER(ggml_tensor),
  4059. ctypes.POINTER(ggml_tensor),
  4060. ctypes.POINTER(ggml_tensor),
  4061. ctypes.c_int,
  4062. ctypes.c_int,
  4063. ctypes.c_void_p,
  4064. )
  4065. """Custom ternary operator on three tensors."""
  4066. # #define GGML_N_TASKS_MAX -1
  4067. GGML_N_TASKS_MAX = -1
  4068. # GGML_API struct ggml_tensor * ggml_map_custom1(
  4069. # struct ggml_context * ctx,
  4070. # struct ggml_tensor * a,
  4071. # ggml_custom1_op_t fun,
  4072. # int n_tasks,
  4073. # void * userdata);
  4074. def ggml_map_custom1(
  4075. ctx: ggml_context_p,
  4076. a: ggml_tensor_p,
  4077. fun: "ctypes._FuncPointer", # type: ignore
  4078. n_tasks: Union[ctypes.c_int, int],
  4079. userdata: Optional[ctypes.c_void_p],
  4080. ) -> ggml_tensor_p:
  4081. return lib.ggml_map_custom1(ctx, a, fun, n_tasks, userdata)
  4082. lib.ggml_map_custom1.argtypes = [
  4083. ggml_context_p,
  4084. ctypes.POINTER(ggml_tensor),
  4085. ggml_custom1_op_t,
  4086. ctypes.c_int,
  4087. ctypes.c_void_p,
  4088. ]
  4089. lib.ggml_map_custom1.restype = ctypes.POINTER(ggml_tensor)
  4090. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
  4091. # struct ggml_context * ctx,
  4092. # struct ggml_tensor * a,
  4093. # ggml_custom1_op_t fun,
  4094. # int n_tasks,
  4095. # void * userdata);
  4096. def ggml_map_custom1_inplace(
  4097. ctx: ggml_context_p,
  4098. a: ggml_tensor_p,
  4099. fun: "ctypes._FuncPointer", # type: ignore
  4100. n_tasks: Union[ctypes.c_int, int],
  4101. userdata: Optional[ctypes.c_void_p],
  4102. ) -> ggml_tensor_p:
  4103. return lib.ggml_map_custom1_inplace(ctx, a, fun, n_tasks, userdata)
  4104. lib.ggml_map_custom1_inplace.argtypes = [
  4105. ggml_context_p,
  4106. ctypes.POINTER(ggml_tensor),
  4107. ggml_custom1_op_t,
  4108. ctypes.c_int,
  4109. ctypes.c_void_p,
  4110. ]
  4111. lib.ggml_map_custom1_inplace.restype = ctypes.POINTER(ggml_tensor)
  4112. # GGML_API struct ggml_tensor * ggml_map_custom2(
  4113. # struct ggml_context * ctx,
  4114. # struct ggml_tensor * a,
  4115. # struct ggml_tensor * b,
  4116. # ggml_custom2_op_t fun,
  4117. # int n_tasks,
  4118. # void * userdata);
  4119. def ggml_map_custom2(
  4120. ctx: ggml_context_p,
  4121. a: ggml_tensor_p,
  4122. b: ggml_tensor_p,
  4123. fun: "ctypes._FuncPointer", # type: ignore
  4124. n_tasks: Union[ctypes.c_int, int],
  4125. userdata: Optional[ctypes.c_void_p],
  4126. ) -> ggml_tensor_p:
  4127. return lib.ggml_map_custom2(ctx, a, b, fun, n_tasks, userdata)
  4128. lib.ggml_map_custom2.argtypes = [
  4129. ggml_context_p,
  4130. ctypes.POINTER(ggml_tensor),
  4131. ctypes.POINTER(ggml_tensor),
  4132. ggml_custom2_op_t,
  4133. ctypes.c_int,
  4134. ctypes.c_void_p,
  4135. ]
  4136. lib.ggml_map_custom2.restype = ctypes.POINTER(ggml_tensor)
  4137. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
  4138. # struct ggml_context * ctx,
  4139. # struct ggml_tensor * a,
  4140. # struct ggml_tensor * b,
  4141. # ggml_custom2_op_t fun,
  4142. # int n_tasks,
  4143. # void * userdata);
  4144. def ggml_map_custom2_inplace(
  4145. ctx: ggml_context_p,
  4146. a: ggml_tensor_p,
  4147. b: ggml_tensor_p,
  4148. fun: "ctypes._FuncPointer", # type: ignore
  4149. n_tasks: Union[ctypes.c_int, int],
  4150. userdata: Optional[ctypes.c_void_p],
  4151. ) -> ggml_tensor_p:
  4152. return lib.ggml_map_custom2_inplace(ctx, a, b, fun, n_tasks, userdata)
  4153. lib.ggml_map_custom2_inplace.argtypes = [
  4154. ggml_context_p,
  4155. ctypes.POINTER(ggml_tensor),
  4156. ctypes.POINTER(ggml_tensor),
  4157. ggml_custom2_op_t,
  4158. ctypes.c_int,
  4159. ctypes.c_void_p,
  4160. ]
  4161. lib.ggml_map_custom2_inplace.restype = ctypes.POINTER(ggml_tensor)
  4162. # GGML_API struct ggml_tensor * ggml_map_custom3(
  4163. # struct ggml_context * ctx,
  4164. # struct ggml_tensor * a,
  4165. # struct ggml_tensor * b,
  4166. # struct ggml_tensor * c,
  4167. # ggml_custom3_op_t fun,
  4168. # int n_tasks,
  4169. # void * userdata);
  4170. def ggml_map_custom3(
  4171. ctx: ggml_context_p,
  4172. a: ggml_tensor_p,
  4173. b: ggml_tensor_p,
  4174. c: ggml_tensor_p,
  4175. fun: "ctypes._FuncPointer", # type: ignore
  4176. n_tasks: Union[ctypes.c_int, int],
  4177. userdata: Optional[ctypes.c_void_p],
  4178. ) -> ggml_tensor_p:
  4179. return lib.ggml_map_custom3(ctx, a, b, c, fun, n_tasks, userdata)
  4180. lib.ggml_map_custom3.argtypes = [
  4181. ggml_context_p,
  4182. ctypes.POINTER(ggml_tensor),
  4183. ctypes.POINTER(ggml_tensor),
  4184. ctypes.POINTER(ggml_tensor),
  4185. ggml_custom3_op_t,
  4186. ctypes.c_int,
  4187. ctypes.c_void_p,
  4188. ]
  4189. lib.ggml_map_custom3.restype = ctypes.POINTER(ggml_tensor)
  4190. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
  4191. # struct ggml_context * ctx,
  4192. # struct ggml_tensor * a,
  4193. # struct ggml_tensor * b,
  4194. # struct ggml_tensor * c,
  4195. # ggml_custom3_op_t fun,
  4196. # int n_tasks,
  4197. # void * userdata);
  4198. def ggml_map_custom3_inplace(
  4199. ctx: ggml_context_p,
  4200. a: ggml_tensor_p,
  4201. b: ggml_tensor_p,
  4202. c: ggml_tensor_p,
  4203. fun: "ctypes._FuncPointer", # type: ignore
  4204. n_tasks: Union[ctypes.c_int, int],
  4205. userdata: Optional[ctypes.c_void_p],
  4206. ) -> ggml_tensor_p:
  4207. return lib.ggml_map_custom3_inplace(ctx, a, b, c, fun, n_tasks, userdata)
  4208. lib.ggml_map_custom3_inplace.argtypes = [
  4209. ggml_context_p,
  4210. ctypes.POINTER(ggml_tensor),
  4211. ctypes.POINTER(ggml_tensor),
  4212. ctypes.POINTER(ggml_tensor),
  4213. ggml_custom3_op_t,
  4214. ctypes.c_int,
  4215. ctypes.c_void_p,
  4216. ]
  4217. lib.ggml_map_custom3_inplace.restype = ctypes.POINTER(ggml_tensor)
  4218. # // loss function
  4219. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
  4220. # struct ggml_context * ctx,
  4221. # struct ggml_tensor * a,
  4222. # struct ggml_tensor * b);
  4223. def ggml_cross_entropy_loss(
  4224. ctx: ggml_context_p,
  4225. a: ggml_tensor_p,
  4226. b: ggml_tensor_p,
  4227. ) -> ggml_tensor_p:
  4228. return lib.ggml_cross_entropy_loss(ctx, a, b)
  4229. lib.ggml_cross_entropy_loss.argtypes = [
  4230. ggml_context_p,
  4231. ctypes.POINTER(ggml_tensor),
  4232. ctypes.POINTER(ggml_tensor),
  4233. ]
  4234. lib.ggml_cross_entropy_loss.restype = ctypes.POINTER(ggml_tensor)
  4235. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
  4236. # struct ggml_context * ctx,
  4237. # struct ggml_tensor * a,
  4238. # struct ggml_tensor * b,
  4239. # struct ggml_tensor * c);
  4240. def ggml_cross_entropy_loss_back(
  4241. ctx: ggml_context_p,
  4242. a: ggml_tensor_p,
  4243. b: ggml_tensor_p,
  4244. c: ggml_tensor_p,
  4245. ) -> ggml_tensor_p:
  4246. return lib.ggml_cross_entropy_loss_back(ctx, a, b, c)
  4247. lib.ggml_cross_entropy_loss_back.argtypes = [
  4248. ggml_context_p,
  4249. ctypes.POINTER(ggml_tensor),
  4250. ctypes.POINTER(ggml_tensor),
  4251. ctypes.POINTER(ggml_tensor),
  4252. ]
  4253. lib.ggml_cross_entropy_loss_back.restype = ctypes.POINTER(ggml_tensor)
  4254. # //
  4255. # // automatic differentiation
  4256. # //
  4257. # GGML_API void ggml_set_param(
  4258. # struct ggml_context * ctx,
  4259. # struct ggml_tensor * tensor);
  4260. def ggml_set_param(ctx: ggml_context_p, tensor: ggml_tensor_p):
  4261. return lib.ggml_set_param(ctx, tensor)
  4262. lib.ggml_set_param.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  4263. lib.ggml_set_param.restype = None
  4264. # GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
  4265. def ggml_build_forward_expand(
  4266. cgraph: ggml_cgraph_p,
  4267. tensor: ggml_tensor_p,
  4268. ):
  4269. """Add a tensor to the forward computation graph. This is used to
  4270. compute and save the value of the tensor.
  4271. Parameters:
  4272. cgraph: The graph.
  4273. tensor: The tensor."""
  4274. return lib.ggml_build_forward_expand(cgraph, tensor)
  4275. lib.ggml_build_forward_expand.argtypes = [
  4276. ctypes.POINTER(ggml_cgraph),
  4277. ctypes.POINTER(ggml_tensor),
  4278. ]
  4279. lib.ggml_build_forward_expand.restype = None
  4280. # GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
  4281. def ggml_build_backward_expand(
  4282. ctx: ggml_context_p,
  4283. gf: ggml_cgraph_p,
  4284. gb: ggml_cgraph_p,
  4285. keep: Union[ctypes.c_bool, bool],
  4286. ):
  4287. """Add a tensor to the backward computation graph. This is used to
  4288. compute the gradient of the tensor.
  4289. Parameters:
  4290. ctx: The context.
  4291. gf: The forward graph.
  4292. gb: The backward graph.
  4293. keep: Whether to keep the tensor."""
  4294. return lib.ggml_build_backward_expand(ctx, gf, gb, keep)
  4295. lib.ggml_build_backward_expand.argtypes = [
  4296. ggml_context_p,
  4297. ctypes.POINTER(ggml_cgraph),
  4298. ctypes.POINTER(ggml_cgraph),
  4299. ctypes.c_bool,
  4300. ]
  4301. lib.ggml_build_backward_expand.restype = None
  4302. # GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
  4303. def ggml_build_forward(
  4304. tensor: ggml_tensor_p,
  4305. ) -> ggml_cgraph:
  4306. """Build the forward computation graph.
  4307. Parameters:
  4308. tensor: The tensor.
  4309. Returns:
  4310. The graph."""
  4311. return lib.ggml_build_forward(tensor)
  4312. lib.ggml_build_forward.argtypes = [ctypes.POINTER(ggml_tensor)]
  4313. lib.ggml_build_forward.restype = ggml_cgraph
  4314. # GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
  4315. def ggml_build_backward(
  4316. ctx: ggml_context_p,
  4317. gf: ggml_cgraph_p,
  4318. keep: Union[ctypes.c_bool, bool],
  4319. ) -> ggml_cgraph:
  4320. return lib.ggml_build_backward(ctx, gf, keep)
  4321. lib.ggml_build_backward.argtypes = [
  4322. ggml_context_p,
  4323. ctypes.POINTER(ggml_cgraph),
  4324. ctypes.c_bool,
  4325. ]
  4326. lib.ggml_build_backward.restype = ggml_cgraph
  4327. # // graph allocation in a context
  4328. # GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
  4329. def ggml_new_graph(
  4330. ctx: ggml_context_p,
  4331. ) -> ggml_cgraph:
  4332. """Create a new graph.
  4333. Parameters:
  4334. ctx: The context.
  4335. Returns:
  4336. The graph."""
  4337. return lib.ggml_new_graph(ctx)
  4338. lib.ggml_new_graph.argtypes = [ggml_context_p]
  4339. lib.ggml_new_graph.restype = ggml_cgraph
  4340. # GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
  4341. def ggml_build_forward_ctx(
  4342. ctx: ggml_context_p,
  4343. tensor: ggml_tensor_p,
  4344. ) -> ggml_cgraph:
  4345. """Build the forward computation graph in a context.
  4346. Parameters:
  4347. ctx: The context.
  4348. tensor: The tensor.
  4349. Returns:
  4350. The graph."""
  4351. return lib.ggml_build_forward_ctx(ctx, tensor)
  4352. lib.ggml_build_forward_ctx.argtypes = [
  4353. ggml_context_p,
  4354. ctypes.POINTER(ggml_tensor),
  4355. ]
  4356. lib.ggml_build_forward_ctx.restype = ggml_cgraph
  4357. # GGML_API size_t ggml_graph_overhead(void);
  4358. def ggml_graph_overhead() -> int:
  4359. """Get the overhead of the graph."""
  4360. return lib.ggml_graph_overhead()
  4361. lib.ggml_graph_overhead.argtypes = []
  4362. lib.ggml_graph_overhead.restype = ctypes.c_size_t
  4363. # // ggml_graph_plan() has to be called before ggml_graph_compute()
  4364. # // when plan.work_size > 0, caller must allocate memory for plan.work_data
  4365. # GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
  4366. def ggml_graph_plan(
  4367. cgraph: ggml_cgraph_p,
  4368. n_threads: Union[ctypes.c_int, int] = GGML_DEFAULT_N_THREADS,
  4369. ) -> ggml_cplan:
  4370. """Plan the computation graph.
  4371. Parameters:
  4372. cgraph: The graph.
  4373. n_threads: The number of threads to use.
  4374. Returns:
  4375. The plan."""
  4376. return lib.ggml_graph_plan(cgraph, n_threads)
  4377. lib.ggml_graph_plan.argtypes = [
  4378. ctypes.POINTER(ggml_cgraph),
  4379. ctypes.c_int,
  4380. ]
  4381. lib.ggml_graph_plan.restype = ggml_cplan
  4382. # GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
  4383. def ggml_graph_compute(
  4384. cgraph: ggml_cgraph_p,
  4385. cplan: ggml_cplan_p,
  4386. ) -> int:
  4387. """Compute the graph.
  4388. Parameters:
  4389. cgraph: The graph.
  4390. cplan: The plan."""
  4391. return lib.ggml_graph_compute(cgraph, cplan)
  4392. lib.ggml_graph_compute.argtypes = [
  4393. ctypes.POINTER(ggml_cgraph),
  4394. ctypes.POINTER(ggml_cplan),
  4395. ]
  4396. lib.ggml_graph_compute.restype = ctypes.c_int
  4397. # GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
  4398. def ggml_graph_reset(
  4399. cgraph: ggml_cgraph_p,
  4400. ):
  4401. """Reset the graph.
  4402. Parameters:
  4403. cgraph: The graph."""
  4404. return lib.ggml_graph_reset(cgraph)
  4405. # // same as ggml_graph_compute() but the work data is allocated as a part of the context
  4406. # // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
  4407. # GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
  4408. def ggml_graph_compute_with_ctx(
  4409. ctx: ggml_context_p,
  4410. cgraph: ggml_cgraph_p,
  4411. n_threads: Union[ctypes.c_int, int],
  4412. ):
  4413. """Compute the graph with a context.
  4414. Parameters:
  4415. ctx: The context.
  4416. cgraph: The graph.
  4417. n_threads: The number of threads to use."""
  4418. return lib.ggml_graph_compute_with_ctx(ctx, cgraph, n_threads)
  4419. lib.ggml_graph_compute_with_ctx.argtypes = [
  4420. ggml_context_p,
  4421. ctypes.POINTER(ggml_cgraph),
  4422. ctypes.c_int,
  4423. ]
  4424. lib.ggml_graph_compute_with_ctx.restype = None
  4425. # GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
  4426. def ggml_graph_get_tensor(
  4427. cgraph: ggml_cgraph_p,
  4428. name: bytes,
  4429. ) -> ggml_tensor_p:
  4430. """Get a tensor from the graph by name.
  4431. Parameters:
  4432. cgraph: The graph.
  4433. name: The name of the tensor.
  4434. Returns:
  4435. The tensor."""
  4436. return lib.ggml_graph_get_tensor(cgraph, name)
  4437. lib.ggml_graph_get_tensor.argtypes = [
  4438. ctypes.POINTER(ggml_cgraph),
  4439. ctypes.c_char_p,
  4440. ]
  4441. lib.ggml_graph_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  4442. # GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
  4443. def ggml_graph_export(
  4444. cgraph: ggml_cgraph_p,
  4445. fname: bytes,
  4446. ):
  4447. return lib.ggml_graph_export(cgraph, fname)
  4448. lib.ggml_graph_export.argtypes = [
  4449. ctypes.POINTER(ggml_cgraph),
  4450. ctypes.c_char_p,
  4451. ]
  4452. lib.ggml_graph_export.restype = None
  4453. # GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
  4454. def ggml_graph_import(
  4455. fname: bytes,
  4456. ctx_data: "ctypes._Pointer[ggml_context_p]", # type: ignore
  4457. ctx_eval: "ctypes._Pointer[ggml_context_p]", # type: ignore
  4458. ) -> ggml_cgraph:
  4459. return lib.ggml_graph_import(fname, ctx_data, ctx_eval)
  4460. lib.ggml_graph_import.argtypes = [
  4461. ctypes.c_char_p,
  4462. ctypes.POINTER(ggml_context_p),
  4463. ctypes.POINTER(ggml_context_p),
  4464. ]
  4465. lib.ggml_graph_import.restype = ggml_cgraph
  4466. # // print info and performance information for the graph
  4467. # GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
  4468. def ggml_graph_print(
  4469. cgraph: ggml_cgraph_p,
  4470. ):
  4471. return lib.ggml_graph_print(cgraph)
  4472. lib.ggml_graph_print.argtypes = [ctypes.POINTER(ggml_cgraph)]
  4473. lib.ggml_graph_print.restype = None
  4474. # // dump the graph into a file using the dot format
  4475. # GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
  4476. def ggml_graph_dump_dot(
  4477. gb: ggml_cgraph_p,
  4478. gf: ggml_cgraph_p,
  4479. filename: bytes,
  4480. ):
  4481. return lib.ggml_graph_dump_dot(gb, gf, filename)
  4482. lib.ggml_graph_dump_dot.argtypes = [
  4483. ctypes.POINTER(ggml_cgraph),
  4484. ctypes.POINTER(ggml_cgraph),
  4485. ctypes.c_char_p,
  4486. ]
  4487. lib.ggml_graph_dump_dot.restype = None
  4488. # //
  4489. # // optimization
  4490. # //
  4491. # // optimization methods
  4492. # enum ggml_opt_type {
  4493. # GGML_OPT_ADAM,
  4494. # GGML_OPT_LBFGS,
  4495. # };
  4496. GGML_OPT_ADAM = 0
  4497. GGML_OPT_LBFGS = 1
  4498. # // linesearch methods
  4499. # enum ggml_linesearch {
  4500. # GGML_LINESEARCH_DEFAULT = 1,
  4501. # GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
  4502. # GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
  4503. # GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
  4504. # };
  4505. GGML_LINESEARCH_DEFAULT = 1
  4506. GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0
  4507. GGML_LINESEARCH_BACKTRACKING_WOLFE = 1
  4508. GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2
  4509. # // optimization return values
  4510. # enum ggml_opt_result {
  4511. # GGML_OPT_OK = 0,
  4512. # GGML_OPT_DID_NOT_CONVERGE,
  4513. # GGML_OPT_NO_CONTEXT,
  4514. # GGML_OPT_INVALID_WOLFE,
  4515. # GGML_OPT_FAIL,
  4516. # GGML_LINESEARCH_FAIL = -128,
  4517. # GGML_LINESEARCH_MINIMUM_STEP,
  4518. # GGML_LINESEARCH_MAXIMUM_STEP,
  4519. # GGML_LINESEARCH_MAXIMUM_ITERATIONS,
  4520. # GGML_LINESEARCH_INVALID_PARAMETERS,
  4521. # };
  4522. GGML_OPT_OK = 0
  4523. GGML_OPT_DID_NOT_CONVERGE = 1
  4524. GGML_OPT_NO_CONTEXT = 2
  4525. GGML_OPT_INVALID_WOLFE = 3
  4526. GGML_OPT_FAIL = 4
  4527. GGML_LINESEARCH_FAIL = -128
  4528. GGML_LINESEARCH_MINIMUM_STEP = -127
  4529. GGML_LINESEARCH_MAXIMUM_STEP = -126
  4530. GGML_LINESEARCH_MAXIMUM_ITERATIONS = -125
  4531. GGML_LINESEARCH_INVALID_PARAMETERS = -124
  4532. # typedef void (*ggml_opt_callback)(void * data, float * sched);
  4533. ggml_opt_callback = ctypes.CFUNCTYPE(
  4534. None,
  4535. ctypes.c_void_p,
  4536. ctypes.POINTER(ctypes.c_float),
  4537. )
  4538. # // optimization parameters
  4539. # //
  4540. # // see ggml.c (ggml_opt_default_params) for default values
  4541. # //
  4542. # struct ggml_opt_params {
  4543. # enum ggml_opt_type type;
  4544. # int n_threads;
  4545. # // delta-based convergence test
  4546. # //
  4547. # // if past == 0 - disabled
  4548. # // if past > 0:
  4549. # // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
  4550. # //
  4551. # int past;
  4552. # float delta;
  4553. # // maximum number of iterations without improvement
  4554. # //
  4555. # // if 0 - disabled
  4556. # // if > 0:
  4557. # // assume convergence if no cost improvement in this number of iterations
  4558. # //
  4559. # int max_no_improvement;
  4560. # bool print_forward_graph;
  4561. # bool print_backward_graph;
  4562. # // ADAM parameters
  4563. # struct {
  4564. # int n_iter;
  4565. # float sched; // schedule multiplier (fixed, decay or warmup)
  4566. # float decay; // weight decay for AdamW, use 0.0f to disable
  4567. # int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
  4568. # float alpha; // learning rate
  4569. # float beta1;
  4570. # float beta2;
  4571. # float eps; // epsilon for numerical stability
  4572. # float eps_f; // epsilon for convergence test
  4573. # float eps_g; // epsilon for convergence test
  4574. # float gclip; // gradient clipping
  4575. # } adam;
  4576. # // LBFGS parameters
  4577. # struct {
  4578. # int m; // number of corrections to approximate the inv. Hessian
  4579. # int n_iter;
  4580. # int max_linesearch;
  4581. # float eps; // convergence tolerance
  4582. # float ftol; // line search tolerance
  4583. # float wolfe;
  4584. # float min_step;
  4585. # float max_step;
  4586. # enum ggml_linesearch linesearch;
  4587. # } lbfgs;
  4588. # };
  4589. class ggml_opt_params_adam(ctypes.Structure):
  4590. _fields_ = [
  4591. ("n_iter", ctypes.c_int),
  4592. ("sched", ctypes.c_float),
  4593. ("decay", ctypes.c_float),
  4594. ("decay_min_ndim", ctypes.c_int),
  4595. ("alpha", ctypes.c_float),
  4596. ("beta1", ctypes.c_float),
  4597. ("beta2", ctypes.c_float),
  4598. ("eps", ctypes.c_float),
  4599. ("eps_f", ctypes.c_float),
  4600. ("eps_g", ctypes.c_float),
  4601. ("gclip", ctypes.c_float),
  4602. ]
  4603. class ggml_opt_params_lbfgs(ctypes.Structure):
  4604. _fields_ = [
  4605. ("m", ctypes.c_int),
  4606. ("n_iter", ctypes.c_int),
  4607. ("max_linesearch", ctypes.c_int),
  4608. ("eps", ctypes.c_float),
  4609. ("ftol", ctypes.c_float),
  4610. ("wolfe", ctypes.c_float),
  4611. ("min_step", ctypes.c_float),
  4612. ("max_step", ctypes.c_float),
  4613. ("linesearch", ctypes.c_int),
  4614. ]
  4615. class ggml_opt_params(ctypes.Structure):
  4616. _fields_ = [
  4617. ("type", ctypes.c_int),
  4618. ("n_threads", ctypes.c_int),
  4619. ("past", ctypes.c_int),
  4620. ("delta", ctypes.c_float),
  4621. ("max_no_improvement", ctypes.c_int),
  4622. ("print_forward_graph", ctypes.c_bool),
  4623. ("print_backward_graph", ctypes.c_bool),
  4624. ("adam", ggml_opt_params_adam),
  4625. ("lbfgs", ggml_opt_params_lbfgs),
  4626. ]
  4627. # struct ggml_opt_context {
  4628. # struct ggml_context * ctx;
  4629. # struct ggml_opt_params params;
  4630. # int iter;
  4631. # int64_t nx; // number of parameter elements
  4632. # bool just_initialized;
  4633. # float loss_before;
  4634. # float loss_after;
  4635. # struct {
  4636. # struct ggml_tensor * m; // first moment
  4637. # struct ggml_tensor * v; // second moment
  4638. # struct ggml_tensor * pf; // past function values
  4639. # float fx_best;
  4640. # float fx_prev;
  4641. # int n_no_improvement;
  4642. # } adam;
  4643. # struct {
  4644. # struct ggml_tensor * x; // current parameters
  4645. # struct ggml_tensor * xp; // previous parameters
  4646. # struct ggml_tensor * g; // current gradient
  4647. # struct ggml_tensor * gp; // previous gradient
  4648. # struct ggml_tensor * d; // search direction
  4649. # struct ggml_tensor * pf; // past function values
  4650. # struct ggml_tensor * lmal; // the L-BFGS memory alpha
  4651. # struct ggml_tensor * lmys; // the L-BFGS memory ys
  4652. # struct ggml_tensor * lms; // the L-BFGS memory s
  4653. # struct ggml_tensor * lmy; // the L-BFGS memory y
  4654. # float fx_best;
  4655. # float step;
  4656. # int j;
  4657. # int k;
  4658. # int end;
  4659. # int n_no_improvement;
  4660. # } lbfgs;
  4661. # };
  4662. class ggml_opt_context_adam(ctypes.Structure):
  4663. _fields_ = [
  4664. ("m", ctypes.POINTER(ggml_tensor)),
  4665. ("v", ctypes.POINTER(ggml_tensor)),
  4666. ("pf", ctypes.POINTER(ggml_tensor)),
  4667. ("fx_best", ctypes.c_float),
  4668. ("fx_prev", ctypes.c_float),
  4669. ("n_no_improvement", ctypes.c_int),
  4670. ]
  4671. class ggml_opt_context_lbfgs(ctypes.Structure):
  4672. _fields_ = [
  4673. ("x", ctypes.POINTER(ggml_tensor)),
  4674. ("xp", ctypes.POINTER(ggml_tensor)),
  4675. ("g", ctypes.POINTER(ggml_tensor)),
  4676. ("gp", ctypes.POINTER(ggml_tensor)),
  4677. ("d", ctypes.POINTER(ggml_tensor)),
  4678. ("pf", ctypes.POINTER(ggml_tensor)),
  4679. ("lmal", ctypes.POINTER(ggml_tensor)),
  4680. ("lmys", ctypes.POINTER(ggml_tensor)),
  4681. ("lms", ctypes.POINTER(ggml_tensor)),
  4682. ("lmy", ctypes.POINTER(ggml_tensor)),
  4683. ("fx_best", ctypes.c_float),
  4684. ("step", ctypes.c_float),
  4685. ("j", ctypes.c_int),
  4686. ("k", ctypes.c_int),
  4687. ("end", ctypes.c_int),
  4688. ("n_no_improvement", ctypes.c_int),
  4689. ]
  4690. class ggml_opt_context(ctypes.Structure):
  4691. _fields_ = [
  4692. ("ctx", ggml_context_p),
  4693. ("params", ggml_opt_params),
  4694. ("iter", ctypes.c_int),
  4695. ("nx", ctypes.c_int64),
  4696. ("just_initialized", ctypes.c_bool),
  4697. ("loss_before", ctypes.c_float),
  4698. ("loss_after", ctypes.c_float),
  4699. ("adam", ggml_opt_context_adam),
  4700. ("lbfgs", ggml_opt_context_lbfgs),
  4701. ]
  4702. ggml_opt_context_p = ctypes.POINTER(ggml_opt_context)
  4703. # GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
  4704. def ggml_opt_default_params(type: Union[ctypes.c_int, bool]) -> ggml_opt_params:
  4705. return lib.ggml_opt_default_params(type)
  4706. lib.ggml_opt_default_params.argtypes = [ctypes.c_int]
  4707. lib.ggml_opt_default_params.restype = ggml_opt_params
  4708. # // optimize the function defined by the tensor f
  4709. # GGML_API enum ggml_opt_result ggml_opt(
  4710. # struct ggml_context * ctx,
  4711. # struct ggml_opt_params params,
  4712. # struct ggml_tensor * f);
  4713. def ggml_opt(
  4714. ctx: ggml_context_p,
  4715. params: ggml_opt_params,
  4716. f: ggml_tensor_p,
  4717. ) -> int:
  4718. return lib.ggml_opt(ctx, params, f)
  4719. lib.ggml_opt.argtypes = [ggml_context_p, ggml_opt_params, ctypes.POINTER(ggml_tensor)]
  4720. lib.ggml_opt.restype = ctypes.c_int
  4721. # // initialize optimizer context
  4722. # GGML_API void ggml_opt_init(
  4723. # struct ggml_context * ctx,
  4724. # struct ggml_opt_context * opt,
  4725. # struct ggml_opt_params params,
  4726. # int64_t nx);
  4727. def ggml_opt_init(
  4728. ctx: ggml_context_p,
  4729. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4730. params: ggml_opt_params,
  4731. nx: Union[ctypes.c_int64, int],
  4732. ):
  4733. return lib.ggml_opt_init(ctx, opt, params, nx)
  4734. lib.ggml_opt_init.argtypes = [
  4735. ggml_context_p,
  4736. ctypes.POINTER(ggml_opt_context),
  4737. ggml_opt_params,
  4738. ctypes.c_int64,
  4739. ]
  4740. lib.ggml_opt_init.restype = None
  4741. # // continue optimizing the function defined by the tensor f
  4742. # GGML_API enum ggml_opt_result ggml_opt_resume(
  4743. # struct ggml_context * ctx,
  4744. # struct ggml_opt_context * opt,
  4745. # struct ggml_tensor * f);
  4746. def ggml_opt_resume(
  4747. ctx: ggml_context_p,
  4748. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4749. f: ggml_tensor_p,
  4750. ) -> int:
  4751. return lib.ggml_opt_resume(ctx, opt, f)
  4752. lib.ggml_opt_resume.argtypes = [
  4753. ggml_context_p,
  4754. ctypes.POINTER(ggml_opt_context),
  4755. ctypes.POINTER(ggml_tensor),
  4756. ]
  4757. lib.ggml_opt_resume.restype = ctypes.c_int
  4758. # // continue optimizing the function defined by the tensor f
  4759. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  4760. # struct ggml_context * ctx,
  4761. # struct ggml_opt_context * opt,
  4762. # struct ggml_tensor * f,
  4763. # struct ggml_cgraph * gf,
  4764. # struct ggml_cgraph * gb,
  4765. # ggml_opt_callback callback,
  4766. # void * callback_data);
  4767. # // continue optimizing the function defined by the tensor f
  4768. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  4769. # struct ggml_context * ctx,
  4770. # struct ggml_opt_context * opt,
  4771. # struct ggml_tensor * f,
  4772. # struct ggml_cgraph * gf,
  4773. # struct ggml_cgraph * gb);
  4774. def ggml_opt_resume_g(
  4775. ctx: ggml_context_p,
  4776. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4777. f: ggml_tensor_p,
  4778. gf: ggml_cgraph_p,
  4779. gb: ggml_cgraph_p,
  4780. callback: ggml_opt_callback = None,
  4781. callback_data: ctypes.c_void_p = None,
  4782. ) -> int:
  4783. return lib.ggml_opt_resume_g(ctx, opt, f, gf, gb, callback, callback_data)
  4784. lib.ggml_opt_resume_g.argtypes = [
  4785. ggml_context_p,
  4786. ctypes.POINTER(ggml_opt_context),
  4787. ctypes.POINTER(ggml_tensor),
  4788. ctypes.POINTER(ggml_cgraph),
  4789. ctypes.POINTER(ggml_cgraph),
  4790. ggml_opt_callback,
  4791. ctypes.c_void_p,
  4792. ]
  4793. lib.ggml_opt_resume_g.restype = ctypes.c_int
  4794. # //
  4795. # // quantization
  4796. # //
  4797. # GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4798. def ggml_quantize_q4_0(
  4799. src: CFloatArray,
  4800. dst: ctypes.c_void_p,
  4801. n: Union[ctypes.c_int, int],
  4802. k: Union[ctypes.c_int, int],
  4803. hist: CInt64Array,
  4804. ) -> int:
  4805. return lib.ggml_quantize_q4_0(src, dst, n, k, hist)
  4806. lib.ggml_quantize_q4_0.argtypes = [
  4807. ctypes.POINTER(ctypes.c_float),
  4808. ctypes.c_void_p,
  4809. ctypes.c_int,
  4810. ctypes.c_int,
  4811. ctypes.POINTER(ctypes.c_int64),
  4812. ]
  4813. lib.ggml_quantize_q4_0.restype = ctypes.c_size_t
  4814. # GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
  4815. def ggml_quantize_q4_1(
  4816. src: CFloatArray,
  4817. dst: ctypes.c_void_p,
  4818. n: Union[ctypes.c_int, int],
  4819. k: Union[ctypes.c_int, int],
  4820. hist: CInt64Array,
  4821. ) -> int:
  4822. return lib.ggml_quantize_q4_1(src, dst, n, k, hist)
  4823. lib.ggml_quantize_q4_1.argtypes = [
  4824. ctypes.POINTER(ctypes.c_float),
  4825. ctypes.c_void_p,
  4826. ctypes.c_int,
  4827. ctypes.c_int,
  4828. ctypes.POINTER(ctypes.c_int64),
  4829. ]
  4830. lib.ggml_quantize_q4_1.restype = ctypes.c_size_t
  4831. # GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4832. def ggml_quantize_q5_0(
  4833. src: CFloatArray,
  4834. dst: ctypes.c_void_p,
  4835. n: Union[ctypes.c_int, int],
  4836. k: Union[ctypes.c_int, int],
  4837. hist: CInt64Array,
  4838. ) -> int:
  4839. return lib.ggml_quantize_q5_0(src, dst, n, k, hist)
  4840. lib.ggml_quantize_q5_0.argtypes = [
  4841. ctypes.POINTER(ctypes.c_float),
  4842. ctypes.c_void_p,
  4843. ctypes.c_int,
  4844. ctypes.c_int,
  4845. ctypes.POINTER(ctypes.c_int64),
  4846. ]
  4847. lib.ggml_quantize_q5_0.restype = ctypes.c_size_t
  4848. # GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
  4849. def ggml_quantize_q5_1(
  4850. src: CFloatArray,
  4851. dst: ctypes.c_void_p,
  4852. n: Union[ctypes.c_int, int],
  4853. k: Union[ctypes.c_int, int],
  4854. hist: CInt64Array,
  4855. ) -> int:
  4856. return lib.ggml_quantize_q5_1(src, dst, n, k, hist)
  4857. lib.ggml_quantize_q5_1.argtypes = [
  4858. ctypes.POINTER(ctypes.c_float),
  4859. ctypes.c_void_p,
  4860. ctypes.c_int,
  4861. ctypes.c_int,
  4862. ctypes.POINTER(ctypes.c_int64),
  4863. ]
  4864. lib.ggml_quantize_q5_1.restype = ctypes.c_size_t
  4865. # GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4866. def ggml_quantize_q8_0(
  4867. src: CFloatArray,
  4868. dst: ctypes.c_void_p,
  4869. n: Union[ctypes.c_int, int],
  4870. k: Union[ctypes.c_int, int],
  4871. hist: CInt64Array,
  4872. ) -> int:
  4873. return lib.ggml_quantize_q8_0(src, dst, n, k, hist)
  4874. lib.ggml_quantize_q8_0.argtypes = [
  4875. ctypes.POINTER(ctypes.c_float),
  4876. ctypes.c_void_p,
  4877. ctypes.c_int,
  4878. ctypes.c_int,
  4879. ctypes.POINTER(ctypes.c_int64),
  4880. ]
  4881. lib.ggml_quantize_q8_0.restype = ctypes.c_size_t
  4882. # GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
  4883. def ggml_quantize_chunk(
  4884. type: Union[ctypes.c_int, int],
  4885. src: CFloatArray,
  4886. dst: ctypes.c_void_p,
  4887. start: Union[ctypes.c_int, int],
  4888. n: Union[ctypes.c_int, int],
  4889. hist: CInt64Array,
  4890. ) -> int:
  4891. return lib.ggml_quantize_chunk(type, src, dst, start, n, hist)
  4892. lib.ggml_quantize_chunk.argtypes = [
  4893. ctypes.c_int,
  4894. ctypes.POINTER(ctypes.c_float),
  4895. ctypes.c_void_p,
  4896. ctypes.c_int,
  4897. ctypes.c_int,
  4898. ctypes.POINTER(ctypes.c_int64),
  4899. ]
  4900. lib.ggml_quantize_chunk.restype = ctypes.c_size_t
  4901. # //
  4902. # // gguf
  4903. # //
  4904. # enum gguf_type {
  4905. # GGUF_TYPE_UINT8 = 0,
  4906. # GGUF_TYPE_INT8 = 1,
  4907. # GGUF_TYPE_UINT16 = 2,
  4908. # GGUF_TYPE_INT16 = 3,
  4909. # GGUF_TYPE_UINT32 = 4,
  4910. # GGUF_TYPE_INT32 = 5,
  4911. # GGUF_TYPE_FLOAT32 = 6,
  4912. # GGUF_TYPE_BOOL = 7,
  4913. # GGUF_TYPE_STRING = 8,
  4914. # GGUF_TYPE_ARRAY = 9,
  4915. # GGUF_TYPE_UINT64 = 10,
  4916. # GGUF_TYPE_INT64 = 11,
  4917. # GGUF_TYPE_FLOAT64 = 12,
  4918. # GGUF_TYPE_COUNT, // marks the end of the enum
  4919. # };
  4920. GGUF_TYPE_UINT8 = 0
  4921. GGUF_TYPE_INT8 = 1
  4922. GGUF_TYPE_UINT16 = 2
  4923. GGUF_TYPE_INT16 = 3
  4924. GGUF_TYPE_UINT32 = 4
  4925. GGUF_TYPE_INT32 = 5
  4926. GGUF_TYPE_FLOAT32 = 6
  4927. GGUF_TYPE_BOOL = 7
  4928. GGUF_TYPE_STRING = 8
  4929. GGUF_TYPE_ARRAY = 9
  4930. GGUF_TYPE_COUNT = 10
  4931. # struct gguf_context;
  4932. gguf_context_p = ctypes.c_void_p
  4933. # struct gguf_init_params {
  4934. # bool no_alloc;
  4935. # // if not NULL, create a ggml_context and allocate the tensor data in it
  4936. # struct ggml_context ** ctx;
  4937. # };
  4938. class gguf_init_params(ctypes.Structure):
  4939. _fields_ = [
  4940. ("no_alloc", ctypes.c_bool),
  4941. ("ctx", ctypes.POINTER(ggml_context_p)),
  4942. ]
  4943. # GGML_API struct gguf_context * gguf_init_empty(void);
  4944. def gguf_init_empty() -> gguf_context_p:
  4945. return lib.gguf_init_empty()
  4946. lib.gguf_init_empty.argtypes = []
  4947. lib.gguf_init_empty.restype = gguf_context_p
  4948. # GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
  4949. def gguf_init_from_file(
  4950. fname: bytes,
  4951. params: gguf_init_params,
  4952. ) -> gguf_context_p:
  4953. return lib.gguf_init_from_file(fname, params)
  4954. lib.gguf_init_from_file.argtypes = [
  4955. ctypes.c_char_p,
  4956. gguf_init_params,
  4957. ]
  4958. lib.gguf_init_from_file.restype = gguf_context_p
  4959. # //GGML_API struct gguf_context * gguf_init_from_buffer(..);
  4960. # GGML_API void gguf_free(struct gguf_context * ctx);
  4961. def gguf_free(
  4962. ctx: gguf_context_p,
  4963. ):
  4964. return lib.gguf_free(ctx)
  4965. lib.gguf_free.argtypes = [
  4966. gguf_context_p,
  4967. ]
  4968. lib.gguf_free.restype = None
  4969. # GGML_API const char * gguf_type_name(enum gguf_type type);
  4970. def gguf_type_name(
  4971. type: Union[ctypes.c_int, int],
  4972. ) -> bytes:
  4973. return lib.gguf_type_name(type)
  4974. lib.gguf_type_name.argtypes = [
  4975. ctypes.c_int,
  4976. ]
  4977. lib.gguf_type_name.restype = ctypes.c_char_p
  4978. # GGML_API int gguf_get_version (const struct gguf_context * ctx);
  4979. def gguf_get_version(
  4980. ctx: gguf_context_p,
  4981. ) -> int:
  4982. return lib.gguf_get_version(ctx)
  4983. lib.gguf_get_version.argtypes = [
  4984. gguf_context_p,
  4985. ]
  4986. lib.gguf_get_version.restype = ctypes.c_int
  4987. # GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
  4988. def gguf_get_alignment(
  4989. ctx: gguf_context_p,
  4990. ) -> int:
  4991. return lib.gguf_get_alignment(ctx)
  4992. lib.gguf_get_alignment.argtypes = [
  4993. gguf_context_p,
  4994. ]
  4995. lib.gguf_get_alignment.restype = ctypes.c_size_t
  4996. # GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
  4997. def gguf_get_data_offset(
  4998. ctx: gguf_context_p,
  4999. ) -> int:
  5000. return lib.gguf_get_data_offset(ctx)
  5001. lib.gguf_get_data_offset.argtypes = [
  5002. gguf_context_p,
  5003. ]
  5004. lib.gguf_get_data_offset.restype = ctypes.c_size_t
  5005. # GGML_API void * gguf_get_data (const struct gguf_context * ctx);
  5006. def gguf_get_data(
  5007. ctx: gguf_context_p,
  5008. ) -> ctypes.c_void_p:
  5009. return lib.gguf_get_data(ctx)
  5010. lib.gguf_get_data.argtypes = [
  5011. gguf_context_p,
  5012. ]
  5013. lib.gguf_get_data.restype = ctypes.c_void_p
  5014. # GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
  5015. def gguf_get_n_kv(
  5016. ctx: gguf_context_p,
  5017. ) -> int:
  5018. return lib.gguf_get_n_kv(ctx)
  5019. lib.gguf_get_n_kv.argtypes = [
  5020. gguf_context_p,
  5021. ]
  5022. lib.gguf_get_n_kv.restype = ctypes.c_int
  5023. # GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
  5024. def gguf_find_key(
  5025. ctx: gguf_context_p,
  5026. key: bytes,
  5027. ) -> int:
  5028. return lib.gguf_find_key(ctx, key)
  5029. lib.gguf_find_key.argtypes = [
  5030. gguf_context_p,
  5031. ctypes.c_char_p,
  5032. ]
  5033. lib.gguf_find_key.restype = ctypes.c_int
  5034. # GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
  5035. def gguf_get_key(
  5036. ctx: gguf_context_p,
  5037. i: Union[ctypes.c_int, int],
  5038. ) -> bytes:
  5039. return lib.gguf_get_key(ctx, i)
  5040. lib.gguf_get_key.argtypes = [
  5041. gguf_context_p,
  5042. ctypes.c_int,
  5043. ]
  5044. lib.gguf_get_key.restype = ctypes.c_char_p
  5045. # GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
  5046. def gguf_get_kv_type(
  5047. ctx: gguf_context_p,
  5048. i: Union[ctypes.c_int, int],
  5049. ) -> int:
  5050. return lib.gguf_get_kv_type(ctx, i)
  5051. lib.gguf_get_kv_type.argtypes = [
  5052. gguf_context_p,
  5053. ctypes.c_int,
  5054. ]
  5055. lib.gguf_get_kv_type.restype = ctypes.c_int
  5056. # GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
  5057. def gguf_get_arr_type(
  5058. ctx: gguf_context_p,
  5059. i: Union[ctypes.c_int, int],
  5060. ) -> int:
  5061. return lib.gguf_get_arr_type(ctx, i)
  5062. lib.gguf_get_arr_type.argtypes = [
  5063. gguf_context_p,
  5064. ctypes.c_int,
  5065. ]
  5066. lib.gguf_get_arr_type.restype = ctypes.c_int
  5067. # // results are undefined if the wrong type is used for the key
  5068. # GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
  5069. def gguf_get_val_u8(
  5070. ctx: gguf_context_p,
  5071. i: Union[ctypes.c_int, int],
  5072. ) -> int:
  5073. return lib.gguf_get_val_u8(ctx, i)
  5074. lib.gguf_get_val_u8.argtypes = [
  5075. gguf_context_p,
  5076. ctypes.c_int,
  5077. ]
  5078. lib.gguf_get_val_u8.restype = ctypes.c_uint8
  5079. # GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
  5080. def gguf_get_val_i8(
  5081. ctx: gguf_context_p,
  5082. i: Union[ctypes.c_int, int],
  5083. ) -> int:
  5084. return lib.gguf_get_val_i8(ctx, i)
  5085. lib.gguf_get_val_i8.argtypes = [
  5086. gguf_context_p,
  5087. ctypes.c_int,
  5088. ]
  5089. lib.gguf_get_val_i8.restype = ctypes.c_int8
  5090. # GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
  5091. def gguf_get_val_u16(
  5092. ctx: gguf_context_p,
  5093. i: Union[ctypes.c_int, int],
  5094. ) -> int:
  5095. return lib.gguf_get_val_u16(ctx, i)
  5096. lib.gguf_get_val_u16.argtypes = [
  5097. gguf_context_p,
  5098. ctypes.c_int,
  5099. ]
  5100. lib.gguf_get_val_u16.restype = ctypes.c_uint16
  5101. # GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
  5102. def gguf_get_val_i16(
  5103. ctx: gguf_context_p,
  5104. i: Union[ctypes.c_int, int],
  5105. ) -> int:
  5106. return lib.gguf_get_val_i16(ctx, i)
  5107. lib.gguf_get_val_i16.argtypes = [
  5108. gguf_context_p,
  5109. ctypes.c_int,
  5110. ]
  5111. lib.gguf_get_val_i16.restype = ctypes.c_int16
  5112. # GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
  5113. def gguf_get_val_u32(
  5114. ctx: gguf_context_p,
  5115. i: Union[ctypes.c_int, int],
  5116. ) -> int:
  5117. return lib.gguf_get_val_u32(ctx, i)
  5118. lib.gguf_get_val_u32.argtypes = [
  5119. gguf_context_p,
  5120. ctypes.c_int,
  5121. ]
  5122. lib.gguf_get_val_u32.restype = ctypes.c_uint32
  5123. # GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
  5124. def gguf_get_val_i32(
  5125. ctx: gguf_context_p,
  5126. i: Union[ctypes.c_int, int],
  5127. ) -> int:
  5128. return lib.gguf_get_val_i32(ctx, i)
  5129. lib.gguf_get_val_i32.argtypes = [
  5130. gguf_context_p,
  5131. ctypes.c_int,
  5132. ]
  5133. lib.gguf_get_val_i32.restype = ctypes.c_int32
  5134. # GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
  5135. def gguf_get_val_f32(
  5136. ctx: gguf_context_p,
  5137. i: Union[ctypes.c_int, int],
  5138. ) -> float:
  5139. return lib.gguf_get_val_f32(ctx, i)
  5140. lib.gguf_get_val_f32.argtypes = [
  5141. gguf_context_p,
  5142. ctypes.c_int,
  5143. ]
  5144. lib.gguf_get_val_f32.restype = ctypes.c_float
  5145. # GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
  5146. def gguf_get_val_u64(
  5147. ctx: gguf_context_p,
  5148. i: Union[ctypes.c_int, int],
  5149. ) -> int:
  5150. return lib.gguf_get_val_u64(ctx, i)
  5151. lib.gguf_get_val_u64.argtypes = [
  5152. gguf_context_p,
  5153. ctypes.c_int,
  5154. ]
  5155. lib.gguf_get_val_u64.restype = ctypes.c_uint64
  5156. # GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
  5157. def gguf_get_val_i64(
  5158. ctx: gguf_context_p,
  5159. i: Union[ctypes.c_int, int],
  5160. ) -> int:
  5161. return lib.gguf_get_val_i64(ctx, i)
  5162. lib.gguf_get_val_i64.argtypes = [
  5163. gguf_context_p,
  5164. ctypes.c_int,
  5165. ]
  5166. lib.gguf_get_val_i64.restype = ctypes.c_int64
  5167. # GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
  5168. def gguf_get_val_f64(
  5169. ctx: gguf_context_p,
  5170. i: Union[ctypes.c_int, int],
  5171. ) -> float:
  5172. return lib.gguf_get_val_f64(ctx, i)
  5173. lib.gguf_get_val_f64.argtypes = [
  5174. gguf_context_p,
  5175. ctypes.c_int,
  5176. ]
  5177. lib.gguf_get_val_f64.restype = ctypes.c_double
  5178. # GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
  5179. def gguf_get_val_bool(
  5180. ctx: gguf_context_p,
  5181. i: Union[ctypes.c_int, int],
  5182. ) -> bool:
  5183. return lib.gguf_get_val_bool(ctx, i)
  5184. lib.gguf_get_val_bool.argtypes = [
  5185. gguf_context_p,
  5186. ctypes.c_int,
  5187. ]
  5188. lib.gguf_get_val_bool.restype = ctypes.c_bool
  5189. # GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
  5190. def gguf_get_val_str(
  5191. ctx: gguf_context_p,
  5192. i: Union[ctypes.c_int, int],
  5193. ) -> bytes:
  5194. return lib.gguf_get_val_str(ctx, i)
  5195. lib.gguf_get_val_str.argtypes = [
  5196. gguf_context_p,
  5197. ctypes.c_int,
  5198. ]
  5199. lib.gguf_get_val_str.restype = ctypes.c_char_p
  5200. # GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
  5201. def gguf_get_arr_n(
  5202. ctx: gguf_context_p,
  5203. i: Union[ctypes.c_int, int],
  5204. ) -> int:
  5205. return lib.gguf_get_arr_n(ctx, i)
  5206. lib.gguf_get_arr_n.argtypes = [
  5207. gguf_context_p,
  5208. ctypes.c_int,
  5209. ]
  5210. lib.gguf_get_arr_n.restype = ctypes.c_int
  5211. # GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
  5212. def gguf_get_arr_data(
  5213. ctx: gguf_context_p,
  5214. i: Union[ctypes.c_int, int],
  5215. ) -> ctypes.c_void_p:
  5216. return lib.gguf_get_arr_data(ctx, i)
  5217. lib.gguf_get_arr_data.argtypes = [
  5218. gguf_context_p,
  5219. ctypes.c_int,
  5220. ]
  5221. lib.gguf_get_arr_data.restype = ctypes.c_void_p
  5222. # GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
  5223. def gguf_get_arr_str(
  5224. ctx: gguf_context_p,
  5225. key_id: Union[ctypes.c_int, int],
  5226. i: Union[ctypes.c_int, int],
  5227. ) -> bytes:
  5228. return lib.gguf_get_arr_str(ctx, key_id, i)
  5229. lib.gguf_get_arr_str.argtypes = [
  5230. gguf_context_p,
  5231. ctypes.c_int,
  5232. ctypes.c_int,
  5233. ]
  5234. lib.gguf_get_arr_str.restype = ctypes.c_char_p
  5235. # GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
  5236. def gguf_get_n_tensors(
  5237. ctx: gguf_context_p,
  5238. ) -> int:
  5239. return lib.gguf_get_n_tensors(ctx)
  5240. lib.gguf_get_n_tensors.argtypes = [
  5241. gguf_context_p,
  5242. ]
  5243. lib.gguf_get_n_tensors.restype = ctypes.c_int
  5244. # GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
  5245. def gguf_find_tensor(
  5246. ctx: gguf_context_p,
  5247. name: bytes,
  5248. ) -> int:
  5249. return lib.gguf_find_tensor(ctx, name)
  5250. lib.gguf_find_tensor.argtypes = [
  5251. gguf_context_p,
  5252. ctypes.c_char_p,
  5253. ]
  5254. lib.gguf_find_tensor.restype = ctypes.c_int
  5255. # GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
  5256. def gguf_get_tensor_offset(
  5257. ctx: gguf_context_p,
  5258. i: Union[ctypes.c_int, int],
  5259. ) -> int:
  5260. return lib.gguf_get_tensor_offset(ctx, i)
  5261. lib.gguf_get_tensor_offset.argtypes = [
  5262. gguf_context_p,
  5263. ctypes.c_int,
  5264. ]
  5265. lib.gguf_get_tensor_offset.restype = ctypes.c_size_t
  5266. # GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
  5267. def gguf_get_tensor_name(
  5268. ctx: gguf_context_p,
  5269. i: Union[ctypes.c_int, int],
  5270. ) -> bytes:
  5271. return lib.gguf_get_tensor_name(ctx, i)
  5272. lib.gguf_get_tensor_name.argtypes = [
  5273. gguf_context_p,
  5274. ctypes.c_int,
  5275. ]
  5276. lib.gguf_get_tensor_name.restype = ctypes.c_char_p
  5277. # // overrides existing values or adds a new one
  5278. # GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
  5279. def gguf_set_val_u8(
  5280. ctx: gguf_context_p,
  5281. key: bytes,
  5282. val: Union[ctypes.c_uint8, int],
  5283. ):
  5284. return lib.gguf_set_val_u8(ctx, key, val)
  5285. lib.gguf_set_val_u8.argtypes = [
  5286. gguf_context_p,
  5287. ctypes.c_char_p,
  5288. ctypes.c_uint8,
  5289. ]
  5290. lib.gguf_set_val_u8.restype = None
  5291. # GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
  5292. def gguf_set_val_i8(
  5293. ctx: gguf_context_p,
  5294. key: bytes,
  5295. val: Union[ctypes.c_int8, int],
  5296. ):
  5297. return lib.gguf_set_val_i8(ctx, key, val)
  5298. lib.gguf_set_val_i8.argtypes = [
  5299. gguf_context_p,
  5300. ctypes.c_char_p,
  5301. ctypes.c_int8,
  5302. ]
  5303. lib.gguf_set_val_i8.restype = None
  5304. # GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
  5305. def gguf_set_val_u16(
  5306. ctx: gguf_context_p,
  5307. key: bytes,
  5308. val: Union[ctypes.c_uint16, int],
  5309. ):
  5310. return lib.gguf_set_val_u16(ctx, key, val)
  5311. lib.gguf_set_val_u16.argtypes = [
  5312. gguf_context_p,
  5313. ctypes.c_char_p,
  5314. ctypes.c_uint16,
  5315. ]
  5316. lib.gguf_set_val_u16.restype = None
  5317. # GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
  5318. def gguf_set_val_i16(
  5319. ctx: gguf_context_p,
  5320. key: bytes,
  5321. val: Union[ctypes.c_int16, int],
  5322. ):
  5323. return lib.gguf_set_val_i16(ctx, key, val)
  5324. lib.gguf_set_val_i16.argtypes = [
  5325. gguf_context_p,
  5326. ctypes.c_char_p,
  5327. ctypes.c_int16,
  5328. ]
  5329. lib.gguf_set_val_i16.restype = None
  5330. # GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
  5331. def gguf_set_val_u32(
  5332. ctx: gguf_context_p,
  5333. key: bytes,
  5334. val: Union[ctypes.c_uint32, int],
  5335. ):
  5336. return lib.gguf_set_val_u32(ctx, key, val)
  5337. lib.gguf_set_val_u32.argtypes = [
  5338. gguf_context_p,
  5339. ctypes.c_char_p,
  5340. ctypes.c_uint32,
  5341. ]
  5342. lib.gguf_set_val_u32.restype = None
  5343. # GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
  5344. def gguf_set_val_i32(
  5345. ctx: gguf_context_p,
  5346. key: bytes,
  5347. val: Union[ctypes.c_int32, int],
  5348. ):
  5349. return lib.gguf_set_val_i32(ctx, key, val)
  5350. lib.gguf_set_val_i32.argtypes = [
  5351. gguf_context_p,
  5352. ctypes.c_char_p,
  5353. ctypes.c_int32,
  5354. ]
  5355. lib.gguf_set_val_i32.restype = None
  5356. # GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
  5357. def gguf_set_val_f32(
  5358. ctx: gguf_context_p,
  5359. key: bytes,
  5360. val: Union[ctypes.c_float, float],
  5361. ):
  5362. return lib.gguf_set_val_f32(ctx, key, val)
  5363. lib.gguf_set_val_f32.argtypes = [
  5364. gguf_context_p,
  5365. ctypes.c_char_p,
  5366. ctypes.c_float,
  5367. ]
  5368. lib.gguf_set_val_f32.restype = None
  5369. # GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
  5370. def gguf_set_val_u64(
  5371. ctx: gguf_context_p,
  5372. key: bytes,
  5373. val: Union[ctypes.c_uint64, int],
  5374. ):
  5375. return lib.gguf_set_val_u64(ctx, key, val)
  5376. lib.gguf_set_val_u64.argtypes = [
  5377. gguf_context_p,
  5378. ctypes.c_char_p,
  5379. ctypes.c_uint64,
  5380. ]
  5381. lib.gguf_set_val_u64.restype = None
  5382. # GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
  5383. def gguf_set_val_i64(
  5384. ctx: gguf_context_p,
  5385. key: bytes,
  5386. val: Union[ctypes.c_int64, int],
  5387. ):
  5388. return lib.gguf_set_val_i64(ctx, key, val)
  5389. lib.gguf_set_val_i64.argtypes = [
  5390. gguf_context_p,
  5391. ctypes.c_char_p,
  5392. ctypes.c_int64,
  5393. ]
  5394. lib.gguf_set_val_i64.restype = None
  5395. # GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
  5396. def gguf_set_val_f64(
  5397. ctx: gguf_context_p,
  5398. key: bytes,
  5399. val: Union[ctypes.c_double, float],
  5400. ):
  5401. return lib.gguf_set_val_f64(ctx, key, val)
  5402. lib.gguf_set_val_f64.argtypes = [
  5403. gguf_context_p,
  5404. ctypes.c_char_p,
  5405. ctypes.c_double,
  5406. ]
  5407. lib.gguf_set_val_f64.restype = None
  5408. # GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
  5409. def gguf_set_val_bool(
  5410. ctx: gguf_context_p,
  5411. key: bytes,
  5412. val: Union[ctypes.c_bool, bool],
  5413. ):
  5414. return lib.gguf_set_val_bool(ctx, key, val)
  5415. lib.gguf_set_val_bool.argtypes = [
  5416. gguf_context_p,
  5417. ctypes.c_char_p,
  5418. ctypes.c_bool,
  5419. ]
  5420. lib.gguf_set_val_bool.restype = None
  5421. # GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
  5422. def gguf_set_val_str(
  5423. ctx: gguf_context_p,
  5424. key: bytes,
  5425. val: bytes,
  5426. ):
  5427. return lib.gguf_set_val_str(ctx, key, val)
  5428. lib.gguf_set_val_str.argtypes = [
  5429. gguf_context_p,
  5430. ctypes.c_char_p,
  5431. ctypes.c_char_p,
  5432. ]
  5433. lib.gguf_set_val_str.restype = None
  5434. # GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
  5435. def gguf_set_arr_data(
  5436. ctx: gguf_context_p,
  5437. key: bytes,
  5438. type: Union[ctypes.c_int, int],
  5439. data: ctypes.c_void_p,
  5440. n: Union[ctypes.c_int, int],
  5441. ):
  5442. return lib.gguf_set_arr_data(ctx, key, type, data, n)
  5443. lib.gguf_set_arr_data.argtypes = [
  5444. gguf_context_p,
  5445. ctypes.c_char_p,
  5446. ctypes.c_int,
  5447. ctypes.c_void_p,
  5448. ctypes.c_int,
  5449. ]
  5450. lib.gguf_set_arr_data.restype = None
  5451. # GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
  5452. def gguf_set_arr_str(
  5453. ctx: gguf_context_p,
  5454. key: bytes,
  5455. data: CCharPointer,
  5456. n: Union[ctypes.c_int, int],
  5457. ):
  5458. return lib.gguf_set_arr_str(ctx, key, data, n)
  5459. lib.gguf_set_arr_str.argtypes = [
  5460. gguf_context_p,
  5461. ctypes.c_char_p,
  5462. ctypes.POINTER(ctypes.c_char_p),
  5463. ctypes.c_int,
  5464. ]
  5465. lib.gguf_set_arr_str.restype = None
  5466. # // set or add KV pairs from another context
  5467. # GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
  5468. def gguf_set_kv(
  5469. ctx: gguf_context_p,
  5470. src: gguf_context_p,
  5471. ):
  5472. return lib.gguf_set_kv(ctx, src)
  5473. lib.gguf_set_kv.argtypes = [
  5474. gguf_context_p,
  5475. gguf_context_p,
  5476. ]
  5477. lib.gguf_set_kv.restype = None
  5478. # // manage tensor info
  5479. # GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
  5480. def gguf_add_tensor(
  5481. ctx: gguf_context_p,
  5482. tensor: ggml_tensor_p,
  5483. ):
  5484. return lib.gguf_add_tensor(ctx, tensor)
  5485. lib.gguf_add_tensor.argtypes = [
  5486. gguf_context_p,
  5487. ctypes.POINTER(ggml_tensor),
  5488. ]
  5489. lib.gguf_add_tensor.restype = None
  5490. # GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
  5491. def gguf_set_tensor_type(
  5492. ctx: gguf_context_p,
  5493. name: bytes,
  5494. type: Union[ctypes.c_int, int],
  5495. ):
  5496. return lib.gguf_set_tensor_type(ctx, name, type)
  5497. lib.gguf_set_tensor_type.argtypes = [
  5498. gguf_context_p,
  5499. ctypes.c_char_p,
  5500. ctypes.c_int,
  5501. ]
  5502. lib.gguf_set_tensor_type.restype = None
  5503. # GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
  5504. def gguf_set_tensor_data(
  5505. ctx: gguf_context_p,
  5506. name: bytes,
  5507. data: ctypes.c_void_p,
  5508. size: Union[ctypes.c_size_t, int],
  5509. ):
  5510. return lib.gguf_set_tensor_data(ctx, name, data, size)
  5511. lib.gguf_set_tensor_data.argtypes = [
  5512. gguf_context_p,
  5513. ctypes.c_char_p,
  5514. ctypes.c_void_p,
  5515. ctypes.c_size_t,
  5516. ]
  5517. lib.gguf_set_tensor_data.restype = None
  5518. # // writing gguf files can be done in 2 ways:
  5519. # //
  5520. # // - write the entire gguf_context to a binary file in a single pass:
  5521. # //
  5522. # // gguf_write_to_file(ctx, fname);
  5523. # //
  5524. # // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
  5525. # //
  5526. # // FILE * f = fopen(fname, "wb");
  5527. # // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
  5528. # // fwrite(f, ...);
  5529. # // void * data = gguf_meta_get_meta_data(ctx);
  5530. # // fseek(f, 0, SEEK_SET);
  5531. # // fwrite(f, data, gguf_get_meta_size(ctx));
  5532. # // free(data);
  5533. # // fclose(f);
  5534. # //
  5535. # // write the entire context to a binary file
  5536. # GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
  5537. def gguf_write_to_file(
  5538. ctx: gguf_context_p,
  5539. fname: bytes,
  5540. only_meta: Union[ctypes.c_bool, bool],
  5541. ):
  5542. return lib.gguf_write_to_file(ctx, fname, only_meta)
  5543. lib.gguf_write_to_file.argtypes = [
  5544. gguf_context_p,
  5545. ctypes.c_char_p,
  5546. ctypes.c_bool,
  5547. ]
  5548. lib.gguf_write_to_file.restype = None
  5549. # // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
  5550. # GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
  5551. def gguf_get_meta_size(
  5552. ctx: gguf_context_p,
  5553. ) -> int:
  5554. return lib.gguf_get_meta_size(ctx)
  5555. lib.gguf_get_meta_size.argtypes = [
  5556. gguf_context_p,
  5557. ]
  5558. lib.gguf_get_meta_size.restype = ctypes.c_size_t
  5559. # GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
  5560. def gguf_get_meta_data(
  5561. ctx: gguf_context_p,
  5562. data: ctypes.c_void_p,
  5563. ):
  5564. return lib.gguf_get_meta_data(ctx, data)
  5565. lib.gguf_get_meta_data.argtypes = [
  5566. gguf_context_p,
  5567. ctypes.c_void_p,
  5568. ]
  5569. lib.gguf_get_meta_data.restype = None
  5570. # //
  5571. # // system info
  5572. # //
  5573. # GGML_API int ggml_cpu_has_avx (void);
  5574. def ggml_cpu_has_avx() -> int:
  5575. return lib.ggml_cpu_has_avx()
  5576. lib.ggml_cpu_has_avx.argtypes = []
  5577. lib.ggml_cpu_has_avx.restype = ctypes.c_int
  5578. # GGML_API int ggml_cpu_has_avx2 (void);
  5579. def ggml_cpu_has_avx2() -> int:
  5580. return lib.ggml_cpu_has_avx2()
  5581. lib.ggml_cpu_has_avx2.argtypes = []
  5582. lib.ggml_cpu_has_avx2.restype = ctypes.c_int
  5583. # GGML_API int ggml_cpu_has_avx512 (void);
  5584. def ggml_cpu_has_avx512() -> int:
  5585. return lib.ggml_cpu_has_avx512()
  5586. lib.ggml_cpu_has_avx512.argtypes = []
  5587. lib.ggml_cpu_has_avx512.restype = ctypes.c_int
  5588. # GGML_API int ggml_cpu_has_avx512_vbmi(void);
  5589. def ggml_cpu_has_avx512_vbmi() -> int:
  5590. return lib.ggml_cpu_has_avx512_vbmi()
  5591. lib.ggml_cpu_has_avx512_vbmi.argtypes = []
  5592. lib.ggml_cpu_has_avx512_vbmi.restype = ctypes.c_int
  5593. # GGML_API int ggml_cpu_has_avx512_vnni(void);
  5594. def ggml_cpu_has_avx512_vnni() -> int:
  5595. return lib.ggml_cpu_has_avx512_vnni()
  5596. lib.ggml_cpu_has_avx512_vnni.argtypes = []
  5597. lib.ggml_cpu_has_avx512_vnni.restype = ctypes.c_int
  5598. # GGML_API int ggml_cpu_has_fma (void);
  5599. def ggml_cpu_has_fma() -> int:
  5600. return lib.ggml_cpu_has_fma()
  5601. lib.ggml_cpu_has_fma.argtypes = []
  5602. lib.ggml_cpu_has_fma.restype = ctypes.c_int
  5603. # GGML_API int ggml_cpu_has_neon (void);
  5604. def ggml_cpu_has_neon() -> int:
  5605. return lib.ggml_cpu_has_neon()
  5606. lib.ggml_cpu_has_neon.argtypes = []
  5607. lib.ggml_cpu_has_neon.restype = ctypes.c_int
  5608. # GGML_API int ggml_cpu_has_arm_fma (void);
  5609. def ggml_cpu_has_arm_fma() -> int:
  5610. return lib.ggml_cpu_has_arm_fma()
  5611. lib.ggml_cpu_has_arm_fma.argtypes = []
  5612. lib.ggml_cpu_has_arm_fma.restype = ctypes.c_int
  5613. # GGML_API int ggml_cpu_has_f16c (void);
  5614. def ggml_cpu_has_f16c() -> int:
  5615. return lib.ggml_cpu_has_f16c()
  5616. lib.ggml_cpu_has_f16c.argtypes = []
  5617. lib.ggml_cpu_has_f16c.restype = ctypes.c_int
  5618. # GGML_API int ggml_cpu_has_fp16_va (void);
  5619. def ggml_cpu_has_fp16_va() -> int:
  5620. return lib.ggml_cpu_has_fp16_va()
  5621. lib.ggml_cpu_has_fp16_va.argtypes = []
  5622. lib.ggml_cpu_has_fp16_va.restype = ctypes.c_int
  5623. # GGML_API int ggml_cpu_has_wasm_simd (void);
  5624. def ggml_cpu_has_wasm_simd() -> int:
  5625. return lib.ggml_cpu_has_wasm_simd()
  5626. lib.ggml_cpu_has_wasm_simd.argtypes = []
  5627. lib.ggml_cpu_has_wasm_simd.restype = ctypes.c_int
  5628. # GGML_API int ggml_cpu_has_blas (void);
  5629. def ggml_cpu_has_blas() -> int:
  5630. return lib.ggml_cpu_has_blas()
  5631. lib.ggml_cpu_has_blas.argtypes = []
  5632. lib.ggml_cpu_has_blas.restype = ctypes.c_int
  5633. # GGML_API int ggml_cpu_has_cublas (void);
  5634. def ggml_cpu_has_cublas() -> int:
  5635. return lib.ggml_cpu_has_cublas()
  5636. lib.ggml_cpu_has_cublas.argtypes = []
  5637. lib.ggml_cpu_has_cublas.restype = ctypes.c_int
  5638. # GGML_API int ggml_cpu_has_clblast (void);
  5639. def ggml_cpu_has_clblast() -> int:
  5640. return lib.ggml_cpu_has_clblast()
  5641. lib.ggml_cpu_has_clblast.argtypes = []
  5642. lib.ggml_cpu_has_clblast.restype = ctypes.c_int
  5643. # GGML_API int ggml_cpu_has_gpublas (void);
  5644. def ggml_cpu_has_gpublas() -> int:
  5645. return lib.ggml_cpu_has_gpublas()
  5646. lib.ggml_cpu_has_gpublas.argtypes = []
  5647. lib.ggml_cpu_has_gpublas.restype = ctypes.c_int
  5648. # GGML_API int ggml_cpu_has_sse3 (void);
  5649. def ggml_cpu_has_sse3() -> int:
  5650. return lib.ggml_cpu_has_sse3()
  5651. lib.ggml_cpu_has_sse3.argtypes = []
  5652. lib.ggml_cpu_has_sse3.restype = ctypes.c_int
  5653. # GGML_API int ggml_cpu_has_ssse3 (void);
  5654. def ggml_cpu_has_ssse3() -> int:
  5655. return lib.ggml_cpu_has_ssse3()
  5656. lib.ggml_cpu_has_ssse3.argtypes = []
  5657. lib.ggml_cpu_has_ssse3.restype = ctypes.c_int
  5658. # GGML_API int ggml_cpu_has_vsx (void);
  5659. def ggml_cpu_has_vsx() -> int:
  5660. return lib.ggml_cpu_has_vsx()
  5661. lib.ggml_cpu_has_vsx.argtypes = []
  5662. lib.ggml_cpu_has_vsx.restype = ctypes.c_int
  5663. # //
  5664. # // Internal types and functions exposed for tests and benchmarks
  5665. # //
  5666. # typedef void (*ggml_to_float_t)(const void * x, float * y, int k);
  5667. ggml_to_float_t = ctypes.CFUNCTYPE(
  5668. None, ctypes.c_void_p, ctypes.POINTER(ctypes.c_float), ctypes.c_int
  5669. )
  5670. # typedef void (*ggml_from_float_t)(const float * x, void * y, int k);
  5671. ggml_from_float_t = ctypes.CFUNCTYPE(
  5672. None, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int
  5673. )
  5674. # typedef void (*ggml_vec_dot_t)(const int n, float * s, const void * x, const void * y);
  5675. ggml_vec_dot_t = ctypes.CFUNCTYPE(
  5676. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_void_p
  5677. )
  5678. # typedef struct {
  5679. # const char * type_name;
  5680. # int blck_size;
  5681. # size_t type_size;
  5682. # bool is_quantized;
  5683. # ggml_to_float_t to_float;
  5684. # ggml_from_float_t from_float;
  5685. # ggml_from_float_t from_float_reference;
  5686. # ggml_vec_dot_t vec_dot;
  5687. # enum ggml_type vec_dot_type;
  5688. # } ggml_type_traits_t;
  5689. class ggml_type_traits_t(ctypes.Structure):
  5690. _fields_ = [
  5691. ("type_name", ctypes.c_char_p),
  5692. ("blck_size", ctypes.c_int),
  5693. ("type_size", ctypes.c_size_t),
  5694. ("is_quantized", ctypes.c_bool),
  5695. ("to_float", ggml_to_float_t),
  5696. ("from_float", ggml_from_float_t),
  5697. ("from_float_reference", ggml_from_float_t),
  5698. ("vec_dot", ggml_vec_dot_t),
  5699. ("vec_dot_type", ctypes.c_int),
  5700. ]
  5701. # ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
  5702. def ggml_internal_get_type_traits(type: Union[ctypes.c_int, int]) -> ggml_type_traits_t:
  5703. return lib.ggml_internal_get_type_traits(type)
  5704. lib.ggml_internal_get_type_traits.argtypes = [ctypes.c_int]
  5705. lib.ggml_internal_get_type_traits.restype = ggml_type_traits_t
  5706. #####################################################
  5707. # GGML ALLOC API
  5708. # source: ggml-alloc.h
  5709. #####################################################
  5710. ggml_allocr_p = ctypes.c_void_p
  5711. # GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
  5712. def ggml_allocr_new(
  5713. data: ctypes.c_void_p,
  5714. size: Union[ctypes.c_size_t, int],
  5715. alignment: Union[ctypes.c_size_t, int],
  5716. ) -> ggml_allocr_p:
  5717. return lib.ggml_allocr_new(data, size, alignment)
  5718. lib.ggml_allocr_new.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t]
  5719. lib.ggml_allocr_new.restype = ggml_allocr_p
  5720. # GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
  5721. def ggml_allocr_new_measure(
  5722. alignment: Union[ctypes.c_size_t, int],
  5723. ) -> ggml_allocr_p:
  5724. return lib.ggml_allocr_new_measure(alignment)
  5725. lib.ggml_allocr_new_measure.argtypes = [ctypes.c_size_t]
  5726. lib.ggml_allocr_new_measure.restype = ggml_allocr_p
  5727. # // tell the allocator to parse nodes following the order described in the list
  5728. # // you should call this if your graph are optimized to execute out-of-order
  5729. # GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);
  5730. def ggml_allocr_set_parse_seq(
  5731. alloc: ggml_allocr_p,
  5732. list: CIntPointer,
  5733. n: Union[ctypes.c_int, int],
  5734. ):
  5735. return lib.ggml_allocr_set_parse_seq(alloc, list, n)
  5736. lib.ggml_allocr_set_parse_seq.argtypes = [
  5737. ggml_allocr_p,
  5738. ctypes.POINTER(ctypes.c_int),
  5739. ctypes.c_int,
  5740. ]
  5741. lib.ggml_allocr_set_parse_seq.restype = None
  5742. # GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
  5743. def ggml_allocr_free(
  5744. alloc: ggml_allocr_p,
  5745. ):
  5746. return lib.ggml_allocr_free(alloc)
  5747. lib.ggml_allocr_free.argtypes = [ggml_allocr_p]
  5748. lib.ggml_allocr_free.restype = None
  5749. # GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
  5750. def ggml_allocr_is_measure(
  5751. alloc: ggml_allocr_p,
  5752. ) -> bool:
  5753. return lib.ggml_allocr_is_measure(alloc)
  5754. lib.ggml_allocr_is_measure.argtypes = [ggml_allocr_p]
  5755. lib.ggml_allocr_is_measure.restype = ctypes.c_bool
  5756. # GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
  5757. def ggml_allocr_reset(
  5758. alloc: ggml_allocr_p,
  5759. ):
  5760. return lib.ggml_allocr_reset(alloc)
  5761. lib.ggml_allocr_reset.argtypes = [ggml_allocr_p]
  5762. lib.ggml_allocr_reset.restype = None
  5763. # GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
  5764. def ggml_allocr_alloc(
  5765. alloc: ggml_allocr_p,
  5766. tensor: ggml_tensor_p,
  5767. ):
  5768. return lib.ggml_allocr_alloc(alloc, tensor)
  5769. lib.ggml_allocr_alloc.argtypes = [ggml_allocr_p, ctypes.POINTER(ggml_tensor)]
  5770. lib.ggml_allocr_alloc.restype = None
  5771. # GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
  5772. def ggml_allocr_alloc_graph(
  5773. alloc: ggml_allocr_p,
  5774. graph: ggml_cgraph_p,
  5775. ) -> int:
  5776. return lib.ggml_allocr_alloc_graph(alloc, graph)
  5777. lib.ggml_allocr_alloc_graph.argtypes = [ggml_allocr_p, ctypes.POINTER(ggml_cgraph)]
  5778. lib.ggml_allocr_alloc_graph.restype = ctypes.c_size_t
  5779. #####################################################
  5780. # GGML CUDA API
  5781. # source: ggml-cuda.h
  5782. #####################################################
  5783. GGML_USE_CUBLAS = hasattr(lib, "ggml_init_cublas")
  5784. GGML_CUDA_MAX_DEVICES = 16
  5785. # GGML_API void ggml_init_cublas(void);
  5786. def ggml_init_cublas():
  5787. return lib.ggml_init_cublas()
  5788. if GGML_USE_CUBLAS:
  5789. lib.ggml_init_cublas.argtypes = []
  5790. lib.ggml_init_cublas.restype = None
  5791. # void * ggml_cuda_host_malloc(size_t size);
  5792. def ggml_cuda_host_malloc(
  5793. size: Union[ctypes.c_size_t, int],
  5794. ) -> Optional[ctypes.c_void_p]:
  5795. return lib.ggml_cuda_host_malloc(size)
  5796. if GGML_USE_CUBLAS:
  5797. lib.ggml_cuda_host_malloc.argtypes = [ctypes.c_size_t]
  5798. lib.ggml_cuda_host_malloc.restype = ctypes.c_void_p
  5799. # void ggml_cuda_host_free(void * ptr);
  5800. def ggml_cuda_host_free(
  5801. ptr: ctypes.c_void_p,
  5802. ):
  5803. return lib.ggml_cuda_host_free(ptr)
  5804. if GGML_USE_CUBLAS:
  5805. lib.ggml_cuda_host_free.argtypes = [ctypes.c_void_p]
  5806. lib.ggml_cuda_host_free.restype = None
  5807. # GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  5808. def ggml_cuda_can_mul_mat(
  5809. src0: ggml_tensor_p,
  5810. src1: ggml_tensor_p,
  5811. dst: ggml_tensor_p,
  5812. ) -> bool:
  5813. return lib.ggml_cuda_can_mul_mat(src0, src1, dst)
  5814. if GGML_USE_CUBLAS:
  5815. lib.ggml_cuda_can_mul_mat.argtypes = [
  5816. ctypes.POINTER(ggml_tensor),
  5817. ctypes.POINTER(ggml_tensor),
  5818. ctypes.POINTER(ggml_tensor),
  5819. ]
  5820. lib.ggml_cuda_can_mul_mat.restype = ctypes.c_bool
  5821. # GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
  5822. def ggml_cuda_set_tensor_split(
  5823. tensor_split: CFloatArray,
  5824. ):
  5825. return lib.ggml_cuda_set_tensor_split(tensor_split)
  5826. if GGML_USE_CUBLAS:
  5827. lib.ggml_cuda_set_tensor_split.argtypes = [ctypes.POINTER(ctypes.c_float)]
  5828. lib.ggml_cuda_set_tensor_split.restype = None
  5829. # void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
  5830. def ggml_cuda_transform_tensor(
  5831. data: ctypes.c_void_p,
  5832. tensor: ggml_tensor_p,
  5833. ):
  5834. return lib.ggml_cuda_transform_tensor(data, tensor)
  5835. if GGML_USE_CUBLAS:
  5836. lib.ggml_cuda_transform_tensor.argtypes = [
  5837. ctypes.c_void_p,
  5838. ctypes.POINTER(ggml_tensor),
  5839. ]
  5840. lib.ggml_cuda_transform_tensor.restype = None
  5841. # void ggml_cuda_free_data(struct ggml_tensor * tensor);
  5842. def ggml_cuda_free_data(
  5843. tensor: ggml_tensor_p,
  5844. ):
  5845. return lib.ggml_cuda_free_data(tensor)
  5846. if GGML_USE_CUBLAS:
  5847. lib.ggml_cuda_free_data.argtypes = [
  5848. ctypes.POINTER(ggml_tensor),
  5849. ]
  5850. lib.ggml_cuda_free_data.restype = None
  5851. # void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
  5852. def ggml_cuda_assign_buffers(
  5853. tensor: ggml_tensor_p,
  5854. ):
  5855. return lib.ggml_cuda_assign_buffers(tensor)
  5856. if GGML_USE_CUBLAS:
  5857. lib.ggml_cuda_assign_buffers.argtypes = [
  5858. ctypes.POINTER(ggml_tensor),
  5859. ]
  5860. lib.ggml_cuda_assign_buffers.restype = None
  5861. # void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
  5862. def ggml_cuda_assign_buffers_no_scratch(
  5863. tensor: ggml_tensor_p,
  5864. ):
  5865. return lib.ggml_cuda_assign_buffers_no_scratch(tensor)
  5866. if GGML_USE_CUBLAS:
  5867. lib.ggml_cuda_assign_buffers_no_scratch.argtypes = [
  5868. ctypes.POINTER(ggml_tensor),
  5869. ]
  5870. lib.ggml_cuda_assign_buffers_no_scratch.restype = None
  5871. # GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
  5872. def ggml_cuda_assign_buffers_force_inplace(
  5873. tensor: ggml_tensor_p,
  5874. ):
  5875. return lib.ggml_cuda_assign_buffers_force_inplace(tensor)
  5876. if GGML_USE_CUBLAS:
  5877. lib.ggml_cuda_assign_buffers_force_inplace.argtypes = [
  5878. ctypes.POINTER(ggml_tensor),
  5879. ]
  5880. lib.ggml_cuda_assign_buffers_force_inplace.restype = None
  5881. # GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
  5882. def ggml_cuda_assign_buffers_no_alloc(
  5883. tensor: ggml_tensor_p,
  5884. ):
  5885. return lib.ggml_cuda_assign_buffers_no_alloc(tensor)
  5886. if GGML_USE_CUBLAS:
  5887. lib.ggml_cuda_assign_buffers_no_alloc.argtypes = [
  5888. ctypes.POINTER(ggml_tensor),
  5889. ]
  5890. lib.ggml_cuda_assign_buffers_no_alloc.restype = None
  5891. # GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
  5892. def ggml_cuda_assign_scratch_offset(
  5893. tensor: ggml_tensor_p,
  5894. offset: Union[ctypes.c_size_t, int],
  5895. ):
  5896. return lib.ggml_cuda_assign_scratch_offset(tensor, offset)
  5897. if GGML_USE_CUBLAS:
  5898. lib.ggml_cuda_assign_scratch_offset.argtypes = [
  5899. ctypes.POINTER(ggml_tensor),
  5900. ctypes.c_size_t,
  5901. ]
  5902. lib.ggml_cuda_assign_scratch_offset.restype = None
  5903. # void ggml_cuda_set_main_device(int main_device);
  5904. def ggml_cuda_set_main_device(
  5905. main_device: Union[ctypes.c_int, int],
  5906. ):
  5907. return lib.ggml_cuda_set_main_device(main_device)
  5908. if GGML_USE_CUBLAS:
  5909. lib.ggml_cuda_set_main_device.argtypes = [
  5910. ctypes.c_int,
  5911. ]
  5912. lib.ggml_cuda_set_main_device.restype = None
  5913. # GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
  5914. def ggml_cuda_set_mul_mat_q(
  5915. mul_mat_q: Union[ctypes.c_bool, bool],
  5916. ):
  5917. return lib.ggml_cuda_set_mul_mat_q(mul_mat_q)
  5918. if GGML_USE_CUBLAS:
  5919. lib.ggml_cuda_set_mul_mat_q.argtypes = [
  5920. ctypes.c_bool,
  5921. ]
  5922. lib.ggml_cuda_set_mul_mat_q.restype = None
  5923. # void ggml_cuda_set_scratch_size(size_t scratch_size);
  5924. def ggml_cuda_set_scratch_size(
  5925. scratch_size: Union[ctypes.c_size_t, int],
  5926. ):
  5927. return lib.ggml_cuda_set_scratch_size(scratch_size)
  5928. if GGML_USE_CUBLAS:
  5929. lib.ggml_cuda_set_scratch_size.argtypes = [
  5930. ctypes.c_size_t,
  5931. ]
  5932. lib.ggml_cuda_set_scratch_size.restype = None
  5933. # void ggml_cuda_free_scratch(void);
  5934. def ggml_cuda_free_scratch():
  5935. return lib.ggml_cuda_free_scratch()
  5936. if GGML_USE_CUBLAS:
  5937. lib.ggml_cuda_free_scratch.argtypes = []
  5938. lib.ggml_cuda_free_scratch.restype = None
  5939. # GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
  5940. def ggml_cuda_compute_forward(
  5941. params: ggml_compute_params_p,
  5942. tensor: ggml_tensor_p,
  5943. ) -> bool:
  5944. return lib.ggml_cuda_compute_forward(params, tensor)
  5945. if GGML_USE_CUBLAS:
  5946. lib.ggml_cuda_compute_forward.argtypes = [
  5947. ctypes.POINTER(ggml_compute_params),
  5948. ctypes.POINTER(ggml_tensor),
  5949. ]
  5950. lib.ggml_cuda_compute_forward.restype = ctypes.c_bool
  5951. # GGML_API int ggml_cuda_get_device_count(void);
  5952. def ggml_cuda_get_device_count() -> int:
  5953. return lib.ggml_cuda_get_device_count()
  5954. if GGML_USE_CUBLAS:
  5955. lib.ggml_cuda_get_device_count.argtypes = []
  5956. lib.ggml_cuda_get_device_count.restype = ctypes.c_int
  5957. # GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
  5958. def ggml_cuda_get_device_description(
  5959. device: Union[ctypes.c_int, int],
  5960. description: bytes,
  5961. description_size: Union[ctypes.c_size_t, int],
  5962. ):
  5963. return lib.ggml_cuda_get_device_description(device, description, description_size)
  5964. if GGML_USE_CUBLAS:
  5965. lib.ggml_cuda_get_device_description.argtypes = [
  5966. ctypes.c_int,
  5967. ctypes.c_char_p,
  5968. ctypes.c_size_t,
  5969. ]
  5970. lib.ggml_cuda_get_device_description.restype = None
  5971. #####################################################
  5972. # GGML METAL API
  5973. # source: ggml-metal.h
  5974. #####################################################
  5975. GGML_USE_METAL = hasattr(lib, "ggml_metal_init")
  5976. # // max memory buffers that can be mapped to the device
  5977. # #define GGML_METAL_MAX_BUFFERS 16
  5978. GGML_METAL_MAX_BUFFERS = 16
  5979. # #define GGML_METAL_MAX_COMMAND_BUFFERS 32
  5980. GGML_METAL_MAX_COMMAND_BUFFERS = 32
  5981. # struct ggml_metal_context;
  5982. ggml_metal_context_p = ctypes.c_void_p
  5983. # struct ggml_metal_context * ggml_metal_init(int n_cb);
  5984. def ggml_metal_init(
  5985. n_cb: Union[ctypes.c_int, int],
  5986. ) -> ggml_metal_context_p:
  5987. return lib.ggml_metal_init(n_cb)
  5988. if GGML_USE_METAL:
  5989. lib.ggml_metal_init.argtypes = [ctypes.c_int]
  5990. lib.ggml_metal_init.restype = ggml_metal_context_p
  5991. # void ggml_metal_free(struct ggml_metal_context * ctx);
  5992. def ggml_metal_free(
  5993. ctx: ggml_metal_context_p,
  5994. ):
  5995. return lib.ggml_metal_free(ctx)
  5996. if GGML_USE_METAL:
  5997. lib.ggml_metal_free.argtypes = [ggml_metal_context_p]
  5998. lib.ggml_metal_free.restype = None
  5999. # // set the number of command buffers to use
  6000. # void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
  6001. def ggml_metal_set_n_cb(
  6002. ctx: ggml_metal_context_p,
  6003. n_cb: Union[ctypes.c_int, int],
  6004. ):
  6005. return lib.ggml_metal_set_n_cb(ctx, n_cb)
  6006. if GGML_USE_METAL:
  6007. lib.ggml_metal_set_n_cb.argtypes = [ggml_metal_context_p, ctypes.c_int]
  6008. lib.ggml_metal_set_n_cb.restype = None
  6009. # // creates a mapping between a host memory buffer and a device memory buffer
  6010. # // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
  6011. # // - the mapping is used during computation to determine the arguments of the compute kernels
  6012. # // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
  6013. # // - max_size specifies the maximum size of a tensor and is used to create shared views such
  6014. # // that it is guaranteed that the tensor will fit in at least one of the views
  6015. # //
  6016. # bool ggml_metal_add_buffer(
  6017. # struct ggml_metal_context * ctx,
  6018. # const char * name,
  6019. # void * data,
  6020. # size_t size,
  6021. # size_t max_size);
  6022. def ggml_metal_add_buffer(
  6023. ctx: ggml_metal_context_p,
  6024. name: bytes,
  6025. data: ctypes.c_void_p,
  6026. size: Union[ctypes.c_size_t, int],
  6027. max_size: Union[ctypes.c_size_t, int],
  6028. ) -> bool:
  6029. return lib.ggml_metal_add_buffer(ctx, name, data, size, max_size)
  6030. if GGML_USE_METAL:
  6031. lib.ggml_metal_add_buffer.argtypes = [
  6032. ggml_metal_context_p,
  6033. ctypes.c_char_p,
  6034. ctypes.c_void_p,
  6035. ctypes.c_size_t,
  6036. ctypes.c_size_t,
  6037. ]
  6038. lib.ggml_metal_add_buffer.restype = ctypes.c_bool
  6039. # // set data from host memory into the device
  6040. # void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  6041. def ggml_metal_set_tensor(
  6042. ctx: ggml_metal_context_p,
  6043. t: ggml_tensor_p,
  6044. ):
  6045. return lib.ggml_metal_set_tensor(ctx, t)
  6046. if GGML_USE_METAL:
  6047. lib.ggml_metal_set_tensor.argtypes = [
  6048. ggml_metal_context_p,
  6049. ctypes.POINTER(ggml_tensor),
  6050. ]
  6051. lib.ggml_metal_set_tensor.restype = None
  6052. # // get data from the device into host memory
  6053. # void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  6054. def ggml_metal_get_tensor(
  6055. ctx: ggml_metal_context_p,
  6056. t: ggml_tensor_p,
  6057. ):
  6058. return lib.ggml_metal_get_tensor(ctx, t)
  6059. if GGML_USE_METAL:
  6060. lib.ggml_metal_get_tensor.argtypes = [
  6061. ggml_metal_context_p,
  6062. ctypes.POINTER(ggml_tensor),
  6063. ]
  6064. lib.ggml_metal_get_tensor.restype = None
  6065. # // try to find operations that can be run concurrently in the graph
  6066. # // you should run it again if the topology of your graph changes
  6067. # void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
  6068. def ggml_metal_graph_find_concurrency(
  6069. ctx: ggml_metal_context_p,
  6070. gf: ggml_cgraph_p,
  6071. check_mem: Union[ctypes.c_bool, bool],
  6072. ):
  6073. return lib.ggml_metal_graph_find_concurrency(ctx, gf, check_mem)
  6074. if GGML_USE_METAL:
  6075. lib.ggml_metal_graph_find_concurrency.argtypes = [
  6076. ggml_metal_context_p,
  6077. ctypes.POINTER(ggml_cgraph),
  6078. ctypes.c_bool,
  6079. ]
  6080. lib.ggml_metal_graph_find_concurrency.restype = None
  6081. # // if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
  6082. # int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
  6083. def ggml_metal_if_optimized(
  6084. ctx: ggml_metal_context_p,
  6085. ) -> int:
  6086. return lib.ggml_metal_if_optimized(ctx)
  6087. if GGML_USE_METAL:
  6088. lib.ggml_metal_if_optimized.argtypes = [
  6089. ggml_metal_context_p,
  6090. ]
  6091. lib.ggml_metal_if_optimized.restype = ctypes.c_int
  6092. # // output the concur_list for ggml_alloc
  6093. # int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
  6094. def ggml_metal_get_concur_list(
  6095. ctx: ggml_metal_context_p,
  6096. ) -> CIntPointer:
  6097. return lib.ggml_metal_get_concur_list(ctx)
  6098. if GGML_USE_METAL:
  6099. lib.ggml_metal_get_concur_list.argtypes = [
  6100. ggml_metal_context_p,
  6101. ]
  6102. lib.ggml_metal_get_concur_list.restype = ctypes.POINTER(ctypes.c_int)
  6103. # // same as ggml_graph_compute but uses Metal
  6104. # // creates gf->n_threads command buffers in parallel
  6105. # void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
  6106. def ggml_metal_graph_compute(
  6107. ctx: ggml_metal_context_p,
  6108. gf: ggml_cgraph_p,
  6109. ):
  6110. return lib.ggml_metal_graph_compute(ctx, gf)
  6111. if GGML_USE_METAL:
  6112. lib.ggml_metal_graph_compute.argtypes = [
  6113. ggml_metal_context_p,
  6114. ctypes.POINTER(ggml_cgraph),
  6115. ]
  6116. lib.ggml_metal_graph_compute.restype = None
  6117. #####################################################
  6118. # GGML OPENCL API
  6119. # source: ggml-opencl.h
  6120. #####################################################
  6121. GGML_USE_CLBLAST = hasattr(lib, "ggml_cl_init")
  6122. # void ggml_cl_init(void);
  6123. def ggml_cl_init():
  6124. return lib.ggml_cl_init()
  6125. if GGML_USE_CLBLAST:
  6126. lib.ggml_cl_init.argtypes = []
  6127. lib.ggml_cl_init.restype = None
  6128. # void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6129. def ggml_cl_mul(
  6130. src0: ggml_tensor_p,
  6131. src1: ggml_tensor_p,
  6132. dst: ggml_tensor_p,
  6133. ):
  6134. return lib.ggml_cl_mul(src0, src1, dst)
  6135. if GGML_USE_CLBLAST:
  6136. lib.ggml_cl_mul.argtypes = [
  6137. ctypes.POINTER(ggml_tensor),
  6138. ctypes.POINTER(ggml_tensor),
  6139. ctypes.POINTER(ggml_tensor),
  6140. ]
  6141. lib.ggml_cl_mul.restype = None
  6142. # bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6143. def ggml_cl_can_mul_mat(
  6144. src0: ggml_tensor_p,
  6145. src1: ggml_tensor_p,
  6146. dst: ggml_tensor_p,
  6147. ) -> bool:
  6148. return lib.ggml_cl_can_mul_mat(src0, src1, dst)
  6149. if GGML_USE_CLBLAST:
  6150. lib.ggml_cl_can_mul_mat.argtypes = [
  6151. ctypes.POINTER(ggml_tensor),
  6152. ctypes.POINTER(ggml_tensor),
  6153. ctypes.POINTER(ggml_tensor),
  6154. ]
  6155. lib.ggml_cl_can_mul_mat.restype = ctypes.c_bool
  6156. # size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6157. def ggml_cl_mul_mat_get_wsize(
  6158. src0: ggml_tensor_p,
  6159. src1: ggml_tensor_p,
  6160. dst: ggml_tensor_p,
  6161. ) -> int:
  6162. return lib.ggml_cl_mul_mat_get_wsize(src0, src1, dst)
  6163. if GGML_USE_CLBLAST:
  6164. lib.ggml_cl_mul_mat_get_wsize.argtypes = [
  6165. ctypes.POINTER(ggml_tensor),
  6166. ctypes.POINTER(ggml_tensor),
  6167. ctypes.POINTER(ggml_tensor),
  6168. ]
  6169. lib.ggml_cl_mul_mat_get_wsize.restype = ctypes.c_size_t
  6170. # void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
  6171. def ggml_cl_mul_mat(
  6172. src0: ggml_tensor_p,
  6173. src1: ggml_tensor_p,
  6174. dst: ggml_tensor_p,
  6175. wdata: ctypes.c_void_p,
  6176. wsize: Union[ctypes.c_size_t, int],
  6177. ):
  6178. return lib.ggml_cl_mul_mat(src0, src1, dst, wdata, wsize)
  6179. if GGML_USE_CLBLAST:
  6180. lib.ggml_cl_mul_mat.argtypes = [
  6181. ctypes.POINTER(ggml_tensor),
  6182. ctypes.POINTER(ggml_tensor),
  6183. ctypes.POINTER(ggml_tensor),
  6184. ctypes.c_void_p,
  6185. ctypes.c_size_t,
  6186. ]
  6187. lib.ggml_cl_mul_mat.restype = None
  6188. # void * ggml_cl_host_malloc(size_t size);
  6189. def ggml_cl_host_malloc(
  6190. size: Union[ctypes.c_size_t, int],
  6191. ) -> Optional[ctypes.c_void_p]:
  6192. return lib.ggml_cl_host_malloc(size)
  6193. if GGML_USE_CLBLAST:
  6194. lib.ggml_cl_host_malloc.argtypes = [
  6195. ctypes.c_size_t,
  6196. ]
  6197. lib.ggml_cl_host_malloc.restype = ctypes.c_void_p
  6198. # void ggml_cl_host_free(void * ptr);
  6199. def ggml_cl_host_free(
  6200. ptr: ctypes.c_void_p,
  6201. ):
  6202. return lib.ggml_cl_host_free(ptr)
  6203. if GGML_USE_CLBLAST:
  6204. lib.ggml_cl_host_free.argtypes = [
  6205. ctypes.c_void_p,
  6206. ]
  6207. lib.ggml_cl_host_free.restype = None
  6208. # void ggml_cl_free_data(const struct ggml_tensor* tensor);
  6209. def ggml_cl_free_data(
  6210. tensor: ggml_tensor_p,
  6211. ):
  6212. return lib.ggml_cl_free_data(tensor)
  6213. if GGML_USE_CLBLAST:
  6214. lib.ggml_cl_free_data.argtypes = [
  6215. ctypes.POINTER(ggml_tensor),
  6216. ]
  6217. lib.ggml_cl_free_data.restype = None
  6218. # void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
  6219. def ggml_cl_transform_tensor(
  6220. data: ctypes.c_void_p,
  6221. tensor: ggml_tensor_p,
  6222. ):
  6223. return lib.ggml_cl_transform_tensor(data, tensor)
  6224. if GGML_USE_CLBLAST:
  6225. lib.ggml_cl_transform_tensor.argtypes = [
  6226. ctypes.c_void_p,
  6227. ctypes.POINTER(ggml_tensor),
  6228. ]
  6229. lib.ggml_cl_transform_tensor.restype = None