third_party_ggml.py 211 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054
  1. """This module is the core of the ggml-python library, it exposes a low-level [ctypes](https://docs.python.org/3/library/ctypes.html)-based interface for ggml.
  2. Structures and functions in the `ggml.ggml` module map directly to the original ggml C library and
  3. they operate at a fairly low level.
  4. No additional runtime checks checks are performed nor is memory management handled automatically.
  5. You've been warned :).
  6. With that in mind here are some useful things to keep in mind
  7. - Functions accept both ctypes types (c_int, c_bool, c_float, etc.) and Python types (int, bool, float, etc.) as parameters.
  8. - Functions return Python types for simple values (int, bool, float, etc.) and ctypes types for complex values ([ggml_context_p][ggml.ggml_context_p], [ggml_tensor_p][ggml.ggml_tensor_p], etc.).
  9. - Memory management is the responsibility of the user. The user must call [ggml.ggml_free][] on the context after calling [ggml.ggml_init][].
  10. Example
  11. ```python
  12. import ggml
  13. import ctypes
  14. # Allocate a new context with 16 MB of memory
  15. params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024, mem_buffer=None)
  16. ctx = ggml.ggml_init(params=params)
  17. # Instantiate tensors
  18. x = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  19. a = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  20. b = ggml.ggml_new_tensor_1d(ctx, ggml.GGML_TYPE_F32, 1)
  21. # Use ggml operations to build a computational graph
  22. x2 = ggml.ggml_mul(ctx, x, x)
  23. f = ggml.ggml_add(ctx, ggml.ggml_mul(ctx, a, x2), b)
  24. gf = ggml.ggml_build_forward(f)
  25. # Set the input values
  26. ggml.ggml_set_f32(x, 2.0)
  27. ggml.ggml_set_f32(a, 3.0)
  28. ggml.ggml_set_f32(b, 4.0)
  29. # Compute the graph
  30. ggml.ggml_graph_compute_with_ctx(ctx, ctypes.pointer(gf), 1)
  31. # Get the output value
  32. output = ggml.ggml_get_f32_1d(f, 0)
  33. assert output == 16.0
  34. # Free the context
  35. ggml.ggml_free(ctx)
  36. ```
  37. """
  38. import os
  39. import sys
  40. import ctypes
  41. import pathlib
  42. import importlib.resources
  43. import numpy as np
  44. from typing import Union
  45. from typing import Type
  46. from typing import Callable
  47. from typing import Tuple
  48. from typing import Dict
  49. from typing_extensions import Self
  50. from typing import Any
  51. from pathlib import Path
  52. from typing import List, Optional, Sequence, Union
  53. from typing_extensions import TypeAlias
  54. NULL: ctypes.c_void_p = None # ignore: type
  55. GGML_MEM_ALIGN = 16
  56. # Load the library
  57. def load_shared_library(base_path: Path, lib_base_name: str):
  58. # Construct the paths to the possible shared library names
  59. # Searching for the library in the current directory under the name "libggml" (default name
  60. # for ggml) and "ggml" (default name for this repo)
  61. lib_names: List[str] = [
  62. f"lib{lib_base_name}.so",
  63. f"lib{lib_base_name}.dylib",
  64. f"{lib_base_name}.dll",
  65. ]
  66. path = None
  67. cdll_args = dict() # type: ignore
  68. # Add the library directory to the DLL search path on Windows (if needed)
  69. if sys.platform == "win32" and sys.version_info >= (3, 8):
  70. os.add_dll_directory(str(base_path))
  71. cdll_args["winmode"] = 0
  72. for lib_name in lib_names:
  73. # Try to load the shared library, handling potential errors
  74. path = base_path / lib_name
  75. if not path.exists():
  76. continue
  77. try:
  78. return ctypes.CDLL(str(path), **cdll_args)
  79. except Exception as e:
  80. raise RuntimeError(f"Failed to load shared library '{path}': {e}")
  81. raise FileNotFoundError(
  82. f"Shared library with base name '{lib_base_name}' not found in {base_path}"
  83. )
  84. base_path = Path(__file__).parent.resolve() / "build/src"
  85. lib_base_name = "ggml"
  86. lib = load_shared_library(base_path, lib_base_name)
  87. #####################################################
  88. # GGML Utility Types
  89. #####################################################
  90. CFloatArray: TypeAlias = "ctypes.Array[ctypes.c_float]"
  91. CInt64Array: TypeAlias = "ctypes.Array[ctypes.c_int64]"
  92. CIntPointer: TypeAlias = "ctypes._Pointer[ctypes.c_int]" # type: ignore
  93. CCharPointer: TypeAlias = "ctypes._Pointer[ctypes.c_char]" # type: ignore
  94. #####################################################
  95. # source: ggml.h
  96. # GGML API
  97. #####################################################
  98. # #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
  99. GGML_FILE_MAGIC = int("0x67676d6c", 16)
  100. # #define GGML_FILE_VERSION 1
  101. GGML_FILE_VERSION = 1
  102. # #define GGML_QNT_VERSION 2 // bump this on quantization format changes
  103. GGML_QNT_VERSION = 2
  104. # #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
  105. GGML_QNT_VERSION_FACTOR = 1000
  106. # #define GGML_MAX_DIMS 4
  107. GGML_MAX_DIMS = 4
  108. # #define GGML_MAX_NODES 4096
  109. GGML_MAX_NODES = 4096
  110. # #define GGML_MAX_PARAMS 256
  111. GGML_MAX_PARAMS = 256
  112. # #define GGML_MAX_CONTEXTS 64
  113. GGML_MAX_CONTEXTS = 64
  114. # #define GGML_MAX_SRC 6
  115. GGML_MAX_SRC = 6
  116. # #define GGML_MAX_NAME 64
  117. GGML_MAX_NAME = 64
  118. # #define GGML_MAX_OP_PARAMS 32
  119. GGML_MAX_OP_PARAMS = 32
  120. # #define GGML_DEFAULT_N_THREADS 4
  121. GGML_DEFAULT_N_THREADS = 4
  122. # #if UINTPTR_MAX == 0XFFFFFFFF
  123. # #define GGML_MEMALIGN 4
  124. # #else
  125. # # define GGML_MEMALIGN 16
  126. # #endif
  127. GGML_MEMALIGN = (
  128. 16 if ctypes.sizeof(ctypes.c_void_p) == 4 else 32
  129. ) # FIXME: Check if this is correct
  130. # #define GGML_EXIT_SUCCESS 0
  131. GGML_EXIT_SUCCESS = 0
  132. # #define GGML_EXIT_ABORTED 1
  133. GGML_EXIT_ABORTED = 1
  134. # #define GGUF_MAGIC 0x46554747 // "GGUF"
  135. GGUF_MAGIC = int("0x46554747", 16)
  136. # #define GGUF_VERSION 2
  137. GGUF_VERSION = 2
  138. # #define GGUF_DEFAULT_ALIGNMENT 32
  139. GGUF_DEFAULT_ALIGNMENT = 32
  140. # TODO: Check if this is correct
  141. # typedef uint16_t ggml_fp16_t;
  142. ggml_fp16_t = ctypes.c_uint16
  143. CFP16Array: TypeAlias = "ctypes.Array[ggml_fp16_t]"
  144. # GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
  145. def ggml_fp16_to_fp32(x: ggml_fp16_t) -> float:
  146. return lib.ggml_fp16_to_fp32(x)
  147. lib.ggml_fp16_to_fp32.argtypes = [ggml_fp16_t]
  148. lib.ggml_fp16_to_fp32.restype = ctypes.c_float
  149. # GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
  150. def ggml_fp32_to_fp16(x: ctypes.c_float) -> int:
  151. return lib.ggml_fp32_to_fp16(x)
  152. lib.ggml_fp32_to_fp16.argtypes = [ctypes.c_float]
  153. lib.ggml_fp32_to_fp16.restype = ggml_fp16_t
  154. # GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
  155. def ggml_fp16_to_fp32_row(
  156. x: CFP16Array,
  157. y: CFloatArray,
  158. n: Union[ctypes.c_int, int],
  159. ) -> None:
  160. return lib.ggml_fp16_to_fp32_row(x, y, n)
  161. lib.ggml_fp16_to_fp32_row.argtypes = [
  162. ctypes.POINTER(ggml_fp16_t),
  163. ctypes.POINTER(ctypes.c_float),
  164. ctypes.c_int,
  165. ]
  166. lib.ggml_fp16_to_fp32_row.restype = None
  167. # GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
  168. def ggml_fp32_to_fp16_row(
  169. x: CFloatArray,
  170. y: CFP16Array,
  171. n: Union[ctypes.c_int, int],
  172. ) -> None:
  173. return lib.ggml_fp32_to_fp16_row(x, y, n)
  174. lib.ggml_fp32_to_fp16_row.argtypes = [
  175. ctypes.POINTER(ctypes.c_float),
  176. ctypes.POINTER(ggml_fp16_t),
  177. ctypes.c_int,
  178. ]
  179. lib.ggml_fp32_to_fp16_row.restype = None
  180. # struct ggml_context;
  181. ggml_context_p = ctypes.c_void_p
  182. """Opaque pointer to a ggml_context.
  183. ggml_context structs are not accessed directly instead they must be created using [ggml_init](ggml.ggml_init) and freed using [ggml_free](ggml.ggml_free)."""
  184. # enum ggml_type {
  185. # GGML_TYPE_F32 = 0,
  186. # GGML_TYPE_F16 = 1,
  187. # GGML_TYPE_Q4_0 = 2,
  188. # GGML_TYPE_Q4_1 = 3,
  189. # // GGML_TYPE_Q4_2 = 4, support has been removed
  190. # // GGML_TYPE_Q4_3 (5) support has been removed
  191. # GGML_TYPE_Q5_0 = 6,
  192. # GGML_TYPE_Q5_1 = 7,
  193. # GGML_TYPE_Q8_0 = 8,
  194. # GGML_TYPE_Q8_1 = 9,
  195. # GGML_TYPE_Q2_K = 10,
  196. # GGML_TYPE_Q3_K = 11,
  197. # GGML_TYPE_Q4_K = 12,
  198. # GGML_TYPE_Q5_K = 13,
  199. # GGML_TYPE_Q6_K = 14,
  200. # GGML_TYPE_Q8_K = 15,
  201. # GGML_TYPE_I8,
  202. # GGML_TYPE_I16,
  203. # GGML_TYPE_I32,
  204. # GGML_TYPE_COUNT,
  205. # };
  206. GGML_TYPE_F32 = 0
  207. GGML_TYPE_F16 = 1
  208. GGML_TYPE_Q4_0 = 2
  209. GGML_TYPE_Q4_1 = 3
  210. GGML_TYPE_Q5_0 = 6
  211. GGML_TYPE_Q5_1 = 7
  212. GGML_TYPE_Q8_0 = 8
  213. GGML_TYPE_Q8_1 = 9
  214. GGML_TYPE_Q2_K = 10
  215. GGML_TYPE_Q3_K = 11
  216. GGML_TYPE_Q4_K = 12
  217. GGML_TYPE_Q5_K = 13
  218. GGML_TYPE_Q6_K = 14
  219. GGML_TYPE_Q8_K = 15
  220. GGML_TYPE_I8 = 16
  221. GGML_TYPE_I16 = 17
  222. GGML_TYPE_I32 = 18
  223. GGML_TYPE_COUNT = 19
  224. # enum ggml_backend {
  225. # GGML_BACKEND_CPU = 0,
  226. # GGML_BACKEND_GPU = 10,
  227. # GGML_BACKEND_GPU_SPLIT = 20,
  228. # };
  229. GGML_BACKEND_CPU = 0
  230. GGML_BACKEND_GPU = 10
  231. GGML_BACKEND_GPU_SPLIT = 20
  232. # // model file types
  233. # enum ggml_ftype {
  234. # GGML_FTYPE_UNKNOWN = -1,
  235. # GGML_FTYPE_ALL_F32 = 0,
  236. # GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
  237. # GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
  238. # GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
  239. # GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
  240. # GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
  241. # GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
  242. # GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
  243. # GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
  244. # GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
  245. # GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
  246. # GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
  247. # GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
  248. # };
  249. GGML_FTYPE_UNKNOWN = -1
  250. GGML_FTYPE_ALL_F32 = 0
  251. GGML_FTYPE_MOSTLY_F16 = 1
  252. GGML_FTYPE_MOSTLY_Q4_0 = 2
  253. GGML_FTYPE_MOSTLY_Q4_1 = 3
  254. GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4
  255. GGML_FTYPE_MOSTLY_Q8_0 = 7
  256. GGML_FTYPE_MOSTLY_Q5_0 = 8
  257. GGML_FTYPE_MOSTLY_Q5_1 = 9
  258. GGML_FTYPE_MOSTLY_Q2_K = 10
  259. GGML_FTYPE_MOSTLY_Q3_K = 11
  260. GGML_FTYPE_MOSTLY_Q4_K = 12
  261. GGML_FTYPE_MOSTLY_Q5_K = 13
  262. GGML_FTYPE_MOSTLY_Q6_K = 14
  263. # // available tensor operations:
  264. # enum ggml_op {
  265. # GGML_OP_NONE = 0,
  266. # GGML_OP_DUP,
  267. # GGML_OP_ADD,
  268. # GGML_OP_ADD1,
  269. # GGML_OP_ACC,
  270. # GGML_OP_SUB,
  271. # GGML_OP_MUL,
  272. # GGML_OP_DIV,
  273. # GGML_OP_SQR,
  274. # GGML_OP_SQRT,
  275. # GGML_OP_LOG,
  276. # GGML_OP_SUM,
  277. # GGML_OP_SUM_ROWS,
  278. # GGML_OP_MEAN,
  279. # GGML_OP_ARGMAX,
  280. # GGML_OP_REPEAT,
  281. # GGML_OP_REPEAT_BACK,
  282. # GGML_OP_CONCAT,
  283. # GGML_OP_SILU_BACK,
  284. # GGML_OP_NORM, // normalize
  285. # GGML_OP_RMS_NORM,
  286. # GGML_OP_RMS_NORM_BACK,
  287. # GGML_OP_GROUP_NORM,
  288. # GGML_OP_MUL_MAT,
  289. # GGML_OP_OUT_PROD,
  290. # GGML_OP_SCALE,
  291. # GGML_OP_SET,
  292. # GGML_OP_CPY,
  293. # GGML_OP_CONT,
  294. # GGML_OP_RESHAPE,
  295. # GGML_OP_VIEW,
  296. # GGML_OP_PERMUTE,
  297. # GGML_OP_TRANSPOSE,
  298. # GGML_OP_GET_ROWS,
  299. # GGML_OP_GET_ROWS_BACK,
  300. # GGML_OP_DIAG,
  301. # GGML_OP_DIAG_MASK_INF,
  302. # GGML_OP_DIAG_MASK_ZERO,
  303. # GGML_OP_SOFT_MAX,
  304. # GGML_OP_SOFT_MAX_BACK,
  305. # GGML_OP_ROPE,
  306. # GGML_OP_ROPE_BACK,
  307. # GGML_OP_ALIBI,
  308. # GGML_OP_CLAMP,
  309. # GGML_OP_CONV_1D,
  310. # GGML_OP_CONV_2D,
  311. # GGML_OP_CONV_TRANSPOSE_2D,
  312. # GGML_OP_POOL_1D,
  313. # GGML_OP_POOL_2D,
  314. # GGML_OP_UPSCALE, // nearest interpolate
  315. # GGML_OP_FLASH_ATTN,
  316. # GGML_OP_FLASH_FF,
  317. # GGML_OP_FLASH_ATTN_BACK,
  318. # GGML_OP_WIN_PART,
  319. # GGML_OP_WIN_UNPART,
  320. # GGML_OP_GET_REL_POS,
  321. # GGML_OP_ADD_REL_POS,
  322. # GGML_OP_UNARY,
  323. # GGML_OP_MAP_UNARY,
  324. # GGML_OP_MAP_BINARY,
  325. # GGML_OP_MAP_CUSTOM1_F32,
  326. # GGML_OP_MAP_CUSTOM2_F32,
  327. # GGML_OP_MAP_CUSTOM3_F32,
  328. # GGML_OP_MAP_CUSTOM1,
  329. # GGML_OP_MAP_CUSTOM2,
  330. # GGML_OP_MAP_CUSTOM3,
  331. # GGML_OP_CROSS_ENTROPY_LOSS,
  332. # GGML_OP_CROSS_ENTROPY_LOSS_BACK,
  333. # GGML_OP_COUNT,
  334. # };
  335. GGML_OP_NONE = 0
  336. GGML_OP_DUP = 1
  337. GGML_OP_ADD = 2
  338. GGML_OP_ADD1 = 3
  339. GGML_OP_ACC = 4
  340. GGML_OP_SUB = 5
  341. GGML_OP_MUL = 6
  342. GGML_OP_DIV = 7
  343. GGML_OP_SQR = 8
  344. GGML_OP_SQRT = 9
  345. GGML_OP_LOG = 10
  346. GGML_OP_SUM = 11
  347. GGML_OP_SUM_ROWS = 12
  348. GGML_OP_MEAN = 13
  349. GGML_OP_ARGMAX = 14
  350. GGML_OP_REPEAT = 15
  351. GGML_OP_REPEAT_BACK = 16
  352. GGML_OP_CONCAT = 17
  353. GGML_OP_SILU_BACK = 18
  354. GGML_OP_NORM = 19
  355. GGML_OP_RMS_NORM = 20
  356. GGML_OP_RMS_NORM_BACK = 21
  357. GGML_OP_GROUP_NORM = 22
  358. GGML_OP_MUL_MAT = 23
  359. GGML_OP_OUT_PROD = 24
  360. GGML_OP_SCALE = 25
  361. GGML_OP_SET = 26
  362. GGML_OP_CPY = 27
  363. GGML_OP_CONT = 28
  364. GGML_OP_RESHAPE = 29
  365. GGML_OP_VIEW = 30
  366. GGML_OP_PERMUTE = 31
  367. GGML_OP_TRANSPOSE = 32
  368. GGML_OP_GET_ROWS = 33
  369. GGML_OP_GET_ROWS_BACK = 34
  370. GGML_OP_DIAG = 35
  371. GGML_OP_DIAG_MASK_INF = 36
  372. GGML_OP_DIAG_MASK_ZERO = 37
  373. GGML_OP_SOFT_MAX = 38
  374. GGML_OP_SOFT_MAX_BACK = 39
  375. GGML_OP_ROPE = 40
  376. GGML_OP_ROPE_BACK = 41
  377. GGML_OP_ALIBI = 42
  378. GGML_OP_CLAMP = 43
  379. GGML_OP_CONV_1D = 44
  380. GGML_OP_CONV_2D = 45
  381. GGML_OP_CONV_TRANSPOSE_2D = 46
  382. GGML_OP_POOL_1D = 47
  383. GGML_OP_POOL_2D = 48
  384. GGML_OP_UPSCALE = 49
  385. GGML_OP_FLASH_ATTN = 50
  386. GGML_OP_FLASH_FF = 51
  387. GGML_OP_FLASH_ATTN_BACK = 52
  388. GGML_OP_WIN_PART = 53
  389. GGML_OP_WIN_UNPART = 54
  390. GGML_OP_GET_REL_POS = 55
  391. GGML_OP_ADD_REL_POS = 56
  392. GGML_OP_UNARY = 57
  393. GGML_OP_MAP_UNARY = 58
  394. GGML_OP_MAP_BINARY = 59
  395. GGML_OP_MAP_CUSTOM1_F32 = 60
  396. GGML_OP_MAP_CUSTOM2_F32 = 61
  397. GGML_OP_MAP_CUSTOM3_F32 = 62
  398. GGML_OP_MAP_CUSTOM1 = 63
  399. GGML_OP_MAP_CUSTOM2 = 64
  400. GGML_OP_MAP_CUSTOM3 = 65
  401. GGML_OP_CROSS_ENTROPY_LOSS = 66
  402. GGML_OP_CROSS_ENTROPY_LOSS_BACK = 67
  403. GGML_OP_COUNT = 68
  404. # enum ggml_unary_op {
  405. # GGML_UNARY_OP_ABS,
  406. # GGML_UNARY_OP_SGN,
  407. # GGML_UNARY_OP_NEG,
  408. # GGML_UNARY_OP_STEP,
  409. # GGML_UNARY_OP_TANH,
  410. # GGML_UNARY_OP_ELU,
  411. # GGML_UNARY_OP_RELU,
  412. # GGML_UNARY_OP_GELU,
  413. # GGML_UNARY_OP_GELU_QUICK,
  414. # GGML_UNARY_OP_SILU,
  415. # };
  416. GGML_UNARY_OP_ABS = 0
  417. GGML_UNARY_OP_SGN = 1
  418. GGML_UNARY_OP_NEG = 2
  419. GGML_UNARY_OP_STEP = 3
  420. GGML_UNARY_OP_TANH = 4
  421. GGML_UNARY_OP_ELU = 5
  422. GGML_UNARY_OP_RELU = 6
  423. GGML_UNARY_OP_GELU = 7
  424. GGML_UNARY_OP_GELU_QUICK = 8
  425. GGML_UNARY_OP_SILU = 9
  426. # enum ggml_object_type {
  427. # GGML_OBJECT_TENSOR,
  428. # GGML_OBJECT_GRAPH,
  429. # GGML_OBJECT_WORK_BUFFER
  430. # };
  431. GGML_OBJECT_TENSOR = 0
  432. GGML_OBJECT_GRAPH = 1
  433. GGML_OBJECT_WORK_BUFFER = 2
  434. # // ggml object
  435. # struct ggml_object {
  436. # size_t offs;
  437. # size_t size;
  438. # struct ggml_object * next;
  439. # enum ggml_object_type type;
  440. # char padding[4];
  441. # };
  442. class ggml_object(ctypes.Structure):
  443. pass
  444. ggml_object._fields_ = [
  445. ("offs", ctypes.c_size_t),
  446. ("size", ctypes.c_size_t),
  447. ("next", ctypes.POINTER(ggml_object)),
  448. ("type", ctypes.c_int),
  449. ("padding", ctypes.c_char * 4),
  450. ]
  451. ggml_object_p: TypeAlias = "ctypes._Pointer[ggml_object]" # type: ignore
  452. GGML_OBJECT_SIZE = ctypes.sizeof(ggml_object)
  453. # // n-dimensional tensor
  454. # struct ggml_tensor {
  455. # enum ggml_type type;
  456. # enum ggml_backend backend;
  457. # int n_dims;
  458. # int64_t ne[GGML_MAX_DIMS]; // number of elements
  459. # size_t nb[GGML_MAX_DIMS]; // stride in bytes:
  460. # // nb[0] = sizeof(type)
  461. # // nb[1] = nb[0] * ne[0] + padding
  462. # // nb[i] = nb[i-1] * ne[i-1]
  463. # // compute data
  464. # enum ggml_op op;
  465. # // op params - allocated as int32_t for alignment
  466. # int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
  467. # bool is_param;
  468. # struct ggml_tensor * grad;
  469. # struct ggml_tensor * src[GGML_MAX_SRC];
  470. # // performance
  471. # int perf_runs;
  472. # int64_t perf_cycles;
  473. # int64_t perf_time_us;
  474. # struct ggml_tensor * view_src;
  475. # size_t view_offs;
  476. # void * data;
  477. # char name[GGML_MAX_NAME];
  478. # void * extra; // extra things e.g. for ggml-cuda.cu
  479. # char padding[4];
  480. # };
  481. class ggml_tensor(ctypes.Structure):
  482. """n-dimensional tensor
  483. Attributes:
  484. type (int): ggml_type
  485. backend (int): ggml_backend
  486. n_dims (int): number of dimensions
  487. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension
  488. nb (ctypes.Array[ctypes.c_size_t]): stride in bytes for each dimension
  489. op (int): ggml operation
  490. op_params (ctypes.Array[ctypes.c_int32]): `GGML_MAX_OP_PARAMS`-length array of operation parameters
  491. is_param (bool): is this a parameter tensor
  492. grad (ggml_tensor_p): reference to gradient tensor
  493. src (ctypes.Array[ggml_tensor_p]): `GGML_MAX_SRC`-length array of source tensors
  494. perf_runs (int): number of performance runs
  495. perf_cycles (int): number of cycles
  496. perf_time_us (int): time in microseconds
  497. view_src (ggml_tensor_p): pointer to tensor if this tensor is a view, None if the tensor is not a view
  498. view_offs (ctypes.c_size_t): offset into the data pointer of the view tensor
  499. data (ctypes.c_void_p): reference to raw tensor data
  500. name (bytes): name of tensor
  501. extra (ctypes.c_void_p): extra data (e.g. for CUDA)
  502. """
  503. pass
  504. ggml_tensor._fields_ = [
  505. ("type", ctypes.c_int),
  506. ("backend", ctypes.c_int),
  507. ("n_dims", ctypes.c_int),
  508. ("ne", ctypes.c_int64 * GGML_MAX_DIMS),
  509. ("nb", ctypes.c_size_t * GGML_MAX_DIMS),
  510. ("op", ctypes.c_int),
  511. (
  512. "op_params",
  513. ctypes.c_int32 * (GGML_MAX_OP_PARAMS // ctypes.sizeof(ctypes.c_int32)),
  514. ),
  515. ("is_param", ctypes.c_bool),
  516. ("grad", ctypes.POINTER(ggml_tensor)),
  517. ("src", ctypes.POINTER(ggml_tensor) * GGML_MAX_SRC),
  518. ("perf_runs", ctypes.c_int),
  519. ("perf_cycles", ctypes.c_int64),
  520. ("perf_time_us", ctypes.c_int64),
  521. ("view_src", ctypes.POINTER(ggml_tensor)),
  522. ("view_offs", ctypes.c_size_t),
  523. ("data", ctypes.c_void_p),
  524. ("name", ctypes.c_char * GGML_MAX_NAME),
  525. ("extra", ctypes.c_void_p),
  526. ("padding", ctypes.c_char * 4),
  527. ]
  528. GGML_TENSOR_SIZE = ctypes.sizeof(ggml_tensor)
  529. ggml_tensor_p: TypeAlias = "ctypes._Pointer[ggml_tensor]" # type: ignore
  530. """ctypes pointer to a [ggml_tensor][ggml.ggml_tensor]
  531. Can be dereferenced to a [ggml_tensor][ggml.ggml_tensor] object using
  532. the `.contents` attribute."""
  533. abort_callback_t = ctypes.CFUNCTYPE(ctypes.c_bool, ctypes.c_void_p)
  534. # // the compute plan that needs to be prepared for ggml_graph_compute()
  535. # // since https://github.com/ggerganov/ggml/issues/287
  536. # struct ggml_cplan {
  537. # size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
  538. # uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
  539. # int n_threads;
  540. # // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes
  541. # int n_tasks[GGML_MAX_NODES];
  542. # // abort ggml_graph_compute when true
  543. # bool (*abort_callback)(void * data);
  544. # void * abort_callback_data;
  545. # };
  546. class ggml_cplan(ctypes.Structure):
  547. """Compute plan for a ggml computation graph
  548. Attributes:
  549. work_size (int): size of work buffer
  550. work_data (ctypes.POINTER(ctypes.c_uint8)): work buffer
  551. n_threads (int): number of threads to use when computing the graph using [ggml_graph_compute][ggml.ggml_graph_compute]
  552. n_tasks (ctypes.Array[ctypes.c_int]): `n_tasks` of nodes, 1:1 mapping to cgraph nodes
  553. abort_callback (abort_callback_t): abort callback
  554. abort_callback_data (ctypes.c_void_p): abort callback data
  555. """
  556. _fields_ = [
  557. ("work_size", ctypes.c_size_t),
  558. ("work_data", ctypes.POINTER(ctypes.c_uint8)),
  559. ("n_threads", ctypes.c_int),
  560. ("n_tasks", ctypes.c_int * GGML_MAX_NODES),
  561. (
  562. "abort_callback",
  563. abort_callback_t,
  564. ),
  565. ("abort_callback_data", ctypes.c_void_p),
  566. ]
  567. GGML_CPLAN_SIZE = ctypes.sizeof(ggml_cplan)
  568. ggml_cplan_p: TypeAlias = "ctypes._Pointer[ggml_cplan]" # type: ignore
  569. """ctypes pointer to a [ggml_cplan][ggml.ggml_cplan]
  570. Can be dereferenced to a [ggml_cplan][ggml.ggml_cplan] object using
  571. the `.contents` attribute."""
  572. # // next prime after GGML_MAX_NODES
  573. # // #define GGML_GRAPH_HASHTABLE_SIZE 4099
  574. # // next prime after GGML_MAX_NODES * 2 (nodes + leafs)
  575. # #define GGML_GRAPH_HASHTABLE_SIZE 8273
  576. GGML_GRAPH_HASHTABLE_SIZE = 8273
  577. # // computation graph
  578. # struct ggml_cgraph {
  579. # int n_nodes;
  580. # int n_leafs;
  581. # struct ggml_tensor * nodes[GGML_MAX_NODES];
  582. # struct ggml_tensor * grads[GGML_MAX_NODES];
  583. # struct ggml_tensor * leafs[GGML_MAX_NODES];
  584. # void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
  585. # // performance
  586. # int perf_runs;
  587. # int64_t perf_cycles;
  588. # int64_t perf_time_us;
  589. # };
  590. class ggml_cgraph(ctypes.Structure):
  591. """ggml computation graph
  592. Attributes:
  593. n_nodes (int): number of nodes
  594. n_leafs (int): number of leafs
  595. nodes (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of compute tensors
  596. grads (ctypes.Array[ggml_tensor_p]): `n_nodes`-length array of gradient tensors
  597. leafs (ctypes.Array[ggml_tensor_p]): `n_leafs`-length array of parameter tensors
  598. visited_hash_table (ctypes.Array[ctypes.c_void_p]): `GGML_GRAPH_HASHTABLE_SIZE`-length array of visited nodes
  599. perf_runs (int): number of runs
  600. perf_cycles (int): number of cycles
  601. perf_time_us (int): computation time in microseconds"""
  602. _fields_ = [
  603. ("n_nodes", ctypes.c_int),
  604. ("n_leafs", ctypes.c_int),
  605. ("nodes", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  606. ("grads", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  607. ("leafs", ctypes.POINTER(ggml_tensor) * GGML_MAX_NODES),
  608. ("visited_hash_table", ctypes.c_void_p * GGML_GRAPH_HASHTABLE_SIZE),
  609. ("perf_runs", ctypes.c_int),
  610. ("perf_cycles", ctypes.c_int64),
  611. ("perf_time_us", ctypes.c_int64),
  612. ]
  613. ggml_cgraph_p: TypeAlias = "ctypes._Pointer[ggml_cgraph]" # type: ignore
  614. """ctypes pointer to a [ggml_cgraph][ggml.ggml_cgraph]
  615. Can be dereferenced to a [ggml_cgraph][ggml.ggml_cgraph] object using
  616. the `.contents` attribute."""
  617. # static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
  618. GGML_GRAPH_SIZE = ctypes.sizeof(ggml_cgraph)
  619. # struct ggml_scratch {
  620. # size_t offs;
  621. # size_t size;
  622. # void * data;
  623. # };
  624. class ggml_scratch(ctypes.Structure):
  625. _fields_ = [
  626. ("offs", ctypes.c_size_t),
  627. ("size", ctypes.c_size_t),
  628. ("data", ctypes.c_void_p),
  629. ]
  630. # struct ggml_init_params {
  631. # // memory pool
  632. # size_t mem_size; // bytes
  633. # void * mem_buffer; // if NULL, memory will be allocated internally
  634. # bool no_alloc; // don't allocate memory for the tensor data
  635. # };
  636. class ggml_init_params(ctypes.Structure):
  637. """Initialization parameters for a ggml context
  638. **NOTE**: Reference counting does not cross into ggml, if you allocate a memory buffer
  639. in python using ctypes Arrays or a numpy array, you must keep a reference to it until
  640. you free the ggml context otherwise you will encounter a segmentation fault.
  641. Attributes:
  642. mem_size (int): size of memory pool in bytes
  643. mem_buffer (ctypes.c_void_p): pointer to memory pool, if None, memory will be allocated internally
  644. no_alloc (bool): don't allocate memory for tensor data
  645. """
  646. _fields_ = [
  647. ("mem_size", ctypes.c_size_t),
  648. ("mem_buffer", ctypes.c_void_p),
  649. ("no_alloc", ctypes.c_bool),
  650. ]
  651. # // compute types
  652. # // NOTE: the INIT or FINALIZE pass is not scheduled unless explicitly enabled.
  653. # // This behavior was changed since https://github.com/ggerganov/llama.cpp/pull/1995.
  654. # enum ggml_task_type {
  655. # GGML_TASK_INIT = 0,
  656. # GGML_TASK_COMPUTE,
  657. # GGML_TASK_FINALIZE,
  658. # };
  659. GGML_TASK_INIT = 0
  660. GGML_TASK_COMPUTE = 1
  661. GGML_TASK_FINALIZE = 2
  662. # struct ggml_compute_params {
  663. # enum ggml_task_type type;
  664. # // ith = thread index, nth = number of threads
  665. # int ith, nth;
  666. # // work buffer for all threads
  667. # size_t wsize;
  668. # void * wdata;
  669. # };
  670. class ggml_compute_params(ctypes.Structure):
  671. _fields_ = [
  672. ("type", ctypes.c_int),
  673. ("ith", ctypes.c_int),
  674. ("nth", ctypes.c_int),
  675. ("wsize", ctypes.c_size_t),
  676. ("wdata", ctypes.c_void_p),
  677. ]
  678. ggml_compute_params_p: TypeAlias = "ctypes._Pointer[ggml_compute_params]" # type: ignore
  679. # // misc
  680. # GGML_API void ggml_time_init(void); // call this once at the beginning of the program
  681. def ggml_time_init():
  682. return lib.ggml_time_init()
  683. lib.ggml_time_init.argtypes = []
  684. lib.ggml_time_init.restype = None
  685. # GGML_API int64_t ggml_time_ms(void);
  686. def ggml_time_ms() -> int:
  687. return lib.ggml_time_ms()
  688. lib.ggml_time_ms.argtypes = []
  689. lib.ggml_time_ms.restype = ctypes.c_int64
  690. # GGML_API int64_t ggml_time_us(void);
  691. def ggml_time_us() -> int:
  692. return lib.ggml_time_us()
  693. lib.ggml_time_us.argtypes = []
  694. lib.ggml_time_us.restype = ctypes.c_int64
  695. # GGML_API int64_t ggml_cycles(void);
  696. def ggml_cycles() -> int:
  697. return lib.ggml_cycles()
  698. lib.ggml_cycles.argtypes = []
  699. lib.ggml_cycles.restype = ctypes.c_int64
  700. # GGML_API int64_t ggml_cycles_per_ms(void);
  701. def ggml_cycles_per_ms() -> int:
  702. return lib.ggml_cycles_per_ms()
  703. lib.ggml_cycles_per_ms.argtypes = []
  704. lib.ggml_cycles_per_ms.restype = ctypes.c_int64
  705. # GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
  706. def ggml_numa_init():
  707. return lib.ggml_numa_init()
  708. lib.ggml_numa_init.argtypes = []
  709. lib.ggml_numa_init.restype = None
  710. # GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
  711. def ggml_is_numa() -> bool:
  712. return lib.ggml_is_numa()
  713. lib.ggml_is_numa.argtypes = []
  714. lib.ggml_is_numa.restype = ctypes.c_bool
  715. # GGML_API void ggml_print_object (const struct ggml_object * obj);
  716. def ggml_print_object(obj: ggml_object_p):
  717. return lib.ggml_print_object(obj)
  718. lib.ggml_print_object.argtypes = [ctypes.POINTER(ggml_object)]
  719. lib.ggml_print_object.restype = None
  720. # GGML_API void ggml_print_objects(const struct ggml_context * ctx);
  721. def ggml_print_objects(ctx: ggml_context_p):
  722. return lib.ggml_print_objects(ctx)
  723. lib.ggml_print_objects.argtypes = [ggml_context_p]
  724. lib.ggml_print_objects.restype = None
  725. # GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
  726. def ggml_nelements(
  727. tensor: ggml_tensor_p,
  728. ) -> int:
  729. """Get the number of elements in a tensor
  730. Parameters:
  731. tensor: tensor
  732. Returns:
  733. number of elements"""
  734. return lib.ggml_nelements(tensor)
  735. lib.ggml_nelements.argtypes = [ctypes.POINTER(ggml_tensor)]
  736. lib.ggml_nelements.restype = ctypes.c_int64
  737. # GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
  738. def ggml_nrows(
  739. tensor: ggml_tensor_p,
  740. ) -> int:
  741. """Get the number of rows in a tensor
  742. Parameters:
  743. tensor: tensor
  744. Returns:
  745. number of rows"""
  746. return lib.ggml_nrows(tensor)
  747. lib.ggml_nrows.argtypes = [ctypes.POINTER(ggml_tensor)]
  748. lib.ggml_nrows.restype = ctypes.c_int64
  749. # GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
  750. def ggml_nbytes(
  751. tensor: ggml_tensor_p,
  752. ) -> int:
  753. """Get the number of bytes required to store tensor data
  754. Parameters:
  755. tensor: tensor
  756. Returns:
  757. number of bytes"""
  758. return lib.ggml_nbytes(tensor)
  759. lib.ggml_nbytes.argtypes = [ctypes.POINTER(ggml_tensor)]
  760. lib.ggml_nbytes.restype = ctypes.c_size_t
  761. # GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
  762. def ggml_nbytes_pad(
  763. tensor: ggml_tensor_p,
  764. ) -> int:
  765. """Get the number of bytes required to store tensor data, padded to GGML_MEM_ALIGN
  766. Parameters:
  767. tensor: tensor
  768. Returns:
  769. number of bytes"""
  770. return lib.ggml_nbytes_pad(tensor)
  771. lib.ggml_nbytes_pad.argtypes = [ctypes.POINTER(ggml_tensor)]
  772. lib.ggml_nbytes_pad.restype = ctypes.c_size_t
  773. # GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
  774. def ggml_nbytes_split(
  775. tensor: ggml_tensor_p,
  776. nrows_split: Union[ctypes.c_int, int],
  777. ) -> int:
  778. return lib.ggml_nbytes_split(tensor, nrows_split)
  779. lib.ggml_nbytes_split.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  780. lib.ggml_nbytes_split.restype = ctypes.c_size_t
  781. # GGML_API int ggml_blck_size (enum ggml_type type);
  782. def ggml_blck_size(type: Union[ctypes.c_int, int]) -> int:
  783. return lib.ggml_blck_size(type)
  784. lib.ggml_blck_size.argtypes = [ctypes.c_int]
  785. lib.ggml_blck_size.restype = ctypes.c_int
  786. # GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
  787. def ggml_type_size(type: Union[ctypes.c_int, int]) -> int:
  788. return lib.ggml_type_size(type)
  789. lib.ggml_type_size.argtypes = [ctypes.c_int]
  790. lib.ggml_type_size.restype = ctypes.c_size_t
  791. # GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
  792. def ggml_type_sizef(type: Union[ctypes.c_int, int]) -> float:
  793. return lib.ggml_type_sizef(type)
  794. lib.ggml_type_sizef.argtypes = [ctypes.c_int]
  795. lib.ggml_type_sizef.restype = ctypes.c_float
  796. # GGML_API const char * ggml_type_name(enum ggml_type type);
  797. def ggml_type_name(type: Union[ctypes.c_int, int]) -> bytes:
  798. return lib.ggml_type_name(type)
  799. lib.ggml_type_name.argtypes = [ctypes.c_int]
  800. lib.ggml_type_name.restype = ctypes.c_char_p
  801. # GGML_API const char * ggml_op_name (enum ggml_op op);
  802. def ggml_op_name(op: Union[ctypes.c_int, int]) -> bytes:
  803. return lib.ggml_op_name(op)
  804. lib.ggml_op_name.argtypes = [ctypes.c_int]
  805. lib.ggml_op_name.restype = ctypes.c_char_p
  806. # GGML_API const char * ggml_op_symbol(enum ggml_op op);
  807. def ggml_op_symbol(op: Union[ctypes.c_int, int]) -> bytes:
  808. return lib.ggml_op_symbol(op)
  809. lib.ggml_op_symbol.argtypes = [ctypes.c_int]
  810. lib.ggml_op_symbol.restype = ctypes.c_char_p
  811. # GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
  812. def ggml_element_size(
  813. tensor: ggml_tensor_p,
  814. ) -> int:
  815. return lib.ggml_element_size(tensor)
  816. lib.ggml_element_size.argtypes = [ctypes.POINTER(ggml_tensor)]
  817. lib.ggml_element_size.restype = ctypes.c_size_t
  818. # GGML_API bool ggml_is_quantized(enum ggml_type type);
  819. def ggml_is_quantized(type: Union[ctypes.c_int, int]) -> bool:
  820. return lib.ggml_is_quantized(type)
  821. lib.ggml_is_quantized.argtypes = [ctypes.c_int]
  822. lib.ggml_is_quantized.restype = ctypes.c_bool
  823. # // TODO: temporary until model loading of ggml examples is refactored
  824. # GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
  825. def ggml_ftype_to_ggml_type(ftype: Union[ctypes.c_int, int]) -> int:
  826. return lib.ggml_ftype_to_ggml_type(ftype)
  827. lib.ggml_ftype_to_ggml_type.argtypes = [ctypes.c_int]
  828. lib.ggml_ftype_to_ggml_type.restype = ctypes.c_int
  829. # GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
  830. def ggml_is_transposed(
  831. tensor: ggml_tensor_p,
  832. ) -> bool:
  833. """Check if a tensor is transposed
  834. Parameters:
  835. tensor: tensor
  836. Returns:
  837. True if tensor is transposed else False"""
  838. return lib.ggml_is_transposed(tensor)
  839. lib.ggml_is_transposed.argtypes = [ctypes.POINTER(ggml_tensor)]
  840. lib.ggml_is_transposed.restype = ctypes.c_bool
  841. # GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
  842. def ggml_is_contiguous(
  843. tensor: ggml_tensor_p,
  844. ) -> bool:
  845. """Check if a tensor is contiguous
  846. Parameters:
  847. tensor: tensor
  848. Returns:
  849. True if tensor is contiguous else False"""
  850. return lib.ggml_is_contiguous(tensor)
  851. lib.ggml_is_contiguous.argtypes = [ctypes.POINTER(ggml_tensor)]
  852. lib.ggml_is_contiguous.restype = ctypes.c_bool
  853. # GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
  854. def ggml_is_permuted(
  855. tensor: ggml_tensor_p,
  856. ) -> bool:
  857. """Check if a tensor is permuted
  858. Parameters:
  859. tensor: tensor
  860. Returns:
  861. True if tensor is permuted else False"""
  862. return lib.ggml_is_permuted(tensor)
  863. lib.ggml_is_permuted.argtypes = [ctypes.POINTER(ggml_tensor)]
  864. lib.ggml_is_permuted.restype = ctypes.c_bool
  865. # GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
  866. def ggml_are_same_shape(
  867. t0: ggml_tensor_p,
  868. t1: ggml_tensor_p,
  869. ) -> bool:
  870. """Check if two tensors have the same shape
  871. Parameters:
  872. t0: tensor 0
  873. t1: tensor 1
  874. Returns:
  875. True if tensors have the same shape else False"""
  876. return lib.ggml_are_same_shape(t0, t1)
  877. lib.ggml_are_same_shape.argtypes = [
  878. ctypes.POINTER(ggml_tensor),
  879. ctypes.POINTER(ggml_tensor),
  880. ]
  881. lib.ggml_are_same_shape.restype = ctypes.c_bool
  882. # // use this to compute the memory overhead of a tensor
  883. # GGML_API size_t ggml_tensor_overhead(void);
  884. def ggml_tensor_overhead() -> int:
  885. """Overhead required for a tensor struct in bytes
  886. Returns:
  887. size of tensor struct in bytes"""
  888. return lib.ggml_tensor_overhead()
  889. lib.ggml_tensor_overhead.argtypes = []
  890. lib.ggml_tensor_overhead.restype = ctypes.c_size_t
  891. # // main
  892. # GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
  893. def ggml_init(
  894. params: ggml_init_params,
  895. ) -> ggml_context_p:
  896. """Instantiate a new ggml context with params.
  897. You must call `ggml_free()` to free the context.
  898. Parameters:
  899. params: ggml init params
  900. Returns:
  901. Pointer to ggml_context"""
  902. return lib.ggml_init(params)
  903. lib.ggml_init.argtypes = [ggml_init_params]
  904. lib.ggml_init.restype = ggml_context_p
  905. # GGML_API void ggml_free(struct ggml_context * ctx);
  906. def ggml_free(ctx: ggml_context_p):
  907. """Free the ggml context.
  908. Parameters:
  909. ctx: ggml context"""
  910. return lib.ggml_free(ctx)
  911. lib.ggml_free.argtypes = [ggml_context_p]
  912. lib.ggml_free.restype = None
  913. # GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
  914. def ggml_used_mem(ctx: ggml_context_p) -> int:
  915. """Return the amount of memory used by the ggml context in bytes.
  916. Parameters:
  917. ctx: ggml context
  918. Returns:
  919. amount of memory used in bytes"""
  920. return lib.ggml_used_mem(ctx)
  921. lib.ggml_used_mem.argtypes = [ggml_context_p]
  922. lib.ggml_used_mem.restype = ctypes.c_size_t
  923. # GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
  924. def ggml_set_scratch(ctx: ggml_context_p, scratch: ggml_scratch) -> int:
  925. """Set the scratch buffer for the ggml context."""
  926. return lib.ggml_set_scratch(ctx, scratch)
  927. lib.ggml_set_scratch.argtypes = [ggml_context_p, ggml_scratch]
  928. lib.ggml_set_scratch.restype = ctypes.c_size_t
  929. # GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
  930. def ggml_get_no_alloc(ctx: ggml_context_p) -> bool:
  931. """Return the no_alloc flag for the ggml context."""
  932. return lib.ggml_get_no_alloc(ctx)
  933. lib.ggml_get_no_alloc.argtypes = [ggml_context_p]
  934. lib.ggml_get_no_alloc.restype = ctypes.c_bool
  935. # GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
  936. def ggml_set_no_alloc(ctx: ggml_context_p, no_alloc: Union[ctypes.c_bool, bool]):
  937. """Set the no_alloc flag for the ggml context."""
  938. return lib.ggml_set_no_alloc(ctx, no_alloc)
  939. lib.ggml_set_no_alloc.argtypes = [ggml_context_p, ctypes.c_bool]
  940. lib.ggml_set_no_alloc.restype = None
  941. # GGML_API void * ggml_get_mem_buffer (struct ggml_context * ctx);
  942. def ggml_get_mem_buffer(ctx: ggml_context_p) -> Optional[ctypes.c_void_p]:
  943. """Return the memory buffer for the ggml context."""
  944. return lib.ggml_get_mem_buffer(ctx)
  945. lib.ggml_get_mem_buffer.argtypes = [ggml_context_p]
  946. lib.ggml_get_mem_buffer.restype = ctypes.c_void_p
  947. # GGML_API size_t ggml_get_mem_size (struct ggml_context * ctx);
  948. def ggml_get_mem_size(ctx: ggml_context_p) -> int:
  949. """Return the size of the memory buffer for the ggml context in bytes."""
  950. return lib.ggml_get_mem_size(ctx)
  951. lib.ggml_get_mem_size.argtypes = [ggml_context_p]
  952. lib.ggml_get_mem_size.restype = ctypes.c_size_t
  953. # GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
  954. def ggml_get_max_tensor_size(ctx: ggml_context_p) -> int:
  955. """Return the maximum size of a tensor in bytes."""
  956. return lib.ggml_get_max_tensor_size(ctx)
  957. lib.ggml_get_max_tensor_size.argtypes = [ggml_context_p]
  958. lib.ggml_get_max_tensor_size.restype = ctypes.c_size_t
  959. # GGML_API struct ggml_tensor * ggml_new_tensor(
  960. # struct ggml_context * ctx,
  961. # enum ggml_type type,
  962. # int n_dims,
  963. # const int64_t *ne);
  964. def ggml_new_tensor(
  965. ctx: ggml_context_p,
  966. type: Union[ctypes.c_int, int],
  967. n_dims: Union[ctypes.c_int, int],
  968. ne: CInt64Array,
  969. ) -> ggml_tensor_p:
  970. """Create a new tensor with the given type, number of dimensions, and number of elements in each dimension.
  971. Parameters:
  972. ctx: ggml context
  973. type: ggml type
  974. n_dims: number of dimensions
  975. ne (ctypes.Array[ctypes.c_int64]): number of elements in each dimension (array of length n_dims)
  976. Returns:
  977. Pointer to ggml_tensor"""
  978. return lib.ggml_new_tensor(ctx, type, n_dims, ne)
  979. lib.ggml_new_tensor.argtypes = [
  980. ggml_context_p,
  981. ctypes.c_int,
  982. ctypes.c_int,
  983. ctypes.POINTER(ctypes.c_int64),
  984. ]
  985. lib.ggml_new_tensor.restype = ctypes.POINTER(ggml_tensor)
  986. # GGML_API struct ggml_tensor * ggml_new_tensor_1d(
  987. # struct ggml_context * ctx,
  988. # enum ggml_type type,
  989. # int64_t ne0);
  990. def ggml_new_tensor_1d(
  991. ctx: ggml_context_p, type: Union[ctypes.c_int, int], ne0: Union[ctypes.c_int64, int]
  992. ) -> ggml_tensor_p:
  993. """Create a new 1-dimensional tensor with the given type and number of elements.
  994. Parameters:
  995. ctx: ggml context
  996. type: ggml type
  997. ne0: number of elements in dimension 0
  998. Returns:
  999. Pointer to ggml_tensor"""
  1000. return lib.ggml_new_tensor_1d(ctx, type, ne0)
  1001. lib.ggml_new_tensor_1d.argtypes = [ggml_context_p, ctypes.c_int, ctypes.c_int64]
  1002. lib.ggml_new_tensor_1d.restype = ctypes.POINTER(ggml_tensor)
  1003. # GGML_API struct ggml_tensor * ggml_new_tensor_2d(
  1004. # struct ggml_context * ctx,
  1005. # enum ggml_type type,
  1006. # int64_t ne0,
  1007. # int64_t ne1);
  1008. def ggml_new_tensor_2d(
  1009. ctx: ggml_context_p,
  1010. type: Union[ctypes.c_int, int],
  1011. ne0: Union[ctypes.c_int64, int],
  1012. ne1: Union[ctypes.c_int64, int],
  1013. ) -> ggml_tensor_p:
  1014. """Create a new 2-dimensional tensor with the given type and number of elements in each dimension.
  1015. Parameters:
  1016. ctx: ggml context
  1017. type: ggml type
  1018. ne0: number of elements in dimension 0
  1019. ne1: number of elements in dimension 1
  1020. Returns:
  1021. Pointer to ggml_tensor"""
  1022. return lib.ggml_new_tensor_2d(ctx, type, ne0, ne1)
  1023. lib.ggml_new_tensor_2d.argtypes = [
  1024. ggml_context_p,
  1025. ctypes.c_int,
  1026. ctypes.c_int64,
  1027. ctypes.c_int64,
  1028. ]
  1029. lib.ggml_new_tensor_2d.restype = ctypes.POINTER(ggml_tensor)
  1030. # GGML_API struct ggml_tensor * ggml_new_tensor_3d(
  1031. # struct ggml_context * ctx,
  1032. # enum ggml_type type,
  1033. # int64_t ne0,
  1034. # int64_t ne1,
  1035. # int64_t ne2);
  1036. def ggml_new_tensor_3d(
  1037. ctx: ggml_context_p,
  1038. type: Union[ctypes.c_int, int],
  1039. ne0: Union[ctypes.c_int64, int],
  1040. ne1: Union[ctypes.c_int64, int],
  1041. ne2: Union[ctypes.c_int64, int],
  1042. ) -> ggml_tensor_p:
  1043. """Create a new 3-dimensional tensor with the given type and number of elements in each dimension.
  1044. Parameters:
  1045. ctx: ggml context
  1046. type: ggml type
  1047. ne0: number of elements in dimension 0
  1048. ne1: number of elements in dimension 1
  1049. ne2: number of elements in dimension 2
  1050. Returns:
  1051. Pointer to ggml_tensor"""
  1052. return lib.ggml_new_tensor_3d(ctx, type, ne0, ne1, ne2)
  1053. lib.ggml_new_tensor_3d.argtypes = [
  1054. ggml_context_p,
  1055. ctypes.c_int,
  1056. ctypes.c_int64,
  1057. ctypes.c_int64,
  1058. ctypes.c_int64,
  1059. ]
  1060. lib.ggml_new_tensor_3d.restype = ctypes.POINTER(ggml_tensor)
  1061. # GGML_API struct ggml_tensor * ggml_new_tensor_4d(
  1062. # struct ggml_context * ctx,
  1063. # enum ggml_type type,
  1064. # int64_t ne0,
  1065. # int64_t ne1,
  1066. # int64_t ne2,
  1067. # int64_t ne3);
  1068. def ggml_new_tensor_4d(
  1069. ctx: ggml_context_p,
  1070. type: Union[ctypes.c_int, int],
  1071. ne0: Union[ctypes.c_int64, int],
  1072. ne1: Union[ctypes.c_int64, int],
  1073. ne2: Union[ctypes.c_int64, int],
  1074. ne3: Union[ctypes.c_int64, int],
  1075. ) -> ggml_tensor_p:
  1076. """Create a new 4-dimensional tensor with the given type and number of elements in each dimension.
  1077. Parameters:
  1078. ctx: ggml context
  1079. type: ggml type
  1080. ne0: number of elements in dimension 0
  1081. ne1: number of elements in dimension 1
  1082. ne2: number of elements in dimension 2
  1083. Returns:
  1084. Pointer to ggml_tensor"""
  1085. return lib.ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3)
  1086. lib.ggml_new_tensor_4d.argtypes = [
  1087. ggml_context_p,
  1088. ctypes.c_int,
  1089. ctypes.c_int64,
  1090. ctypes.c_int64,
  1091. ctypes.c_int64,
  1092. ctypes.c_int64,
  1093. ]
  1094. lib.ggml_new_tensor_4d.restype = ctypes.POINTER(ggml_tensor)
  1095. # GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
  1096. def ggml_new_i32(
  1097. ctx: ggml_context_p, value: Union[ctypes.c_int32, int]
  1098. ) -> ggml_tensor_p:
  1099. """Create a 1 element tensor with the given integer value.
  1100. Parameters:
  1101. ctx: ggml context
  1102. value: integer value
  1103. Returns:
  1104. Pointer to ggml_tensor"""
  1105. return lib.ggml_new_i32(ctx, value)
  1106. lib.ggml_new_i32.argtypes = [ggml_context_p, ctypes.c_int32]
  1107. lib.ggml_new_i32.restype = ctypes.POINTER(ggml_tensor)
  1108. # GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
  1109. def ggml_new_f32(
  1110. ctx: ggml_context_p,
  1111. value: Union[ctypes.c_float, float],
  1112. ) -> ggml_tensor_p:
  1113. """Create a 1 element tensor with the given float value.
  1114. Parameters:
  1115. ctx: ggml context
  1116. value: float value
  1117. Returns:
  1118. Pointer to ggml_tensor"""
  1119. return lib.ggml_new_f32(ctx, value)
  1120. lib.ggml_new_f32.argtypes = [ggml_context_p, ctypes.c_float]
  1121. lib.ggml_new_f32.restype = ctypes.POINTER(ggml_tensor)
  1122. # GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
  1123. def ggml_dup_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1124. """Create a new tensor with the same type and dimensions as the source tensor.
  1125. Parameters:
  1126. ctx: ggml context
  1127. src: source tensor
  1128. Returns:
  1129. Pointer to ggml_tensor"""
  1130. return lib.ggml_dup_tensor(ctx, src)
  1131. lib.ggml_dup_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1132. lib.ggml_dup_tensor.restype = ctypes.POINTER(ggml_tensor)
  1133. # GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
  1134. def ggml_view_tensor(ctx: ggml_context_p, src: ggml_tensor_p) -> ggml_tensor_p:
  1135. """Create a new tensor with the same type, dimensions and data as the source tensor.
  1136. Parameters:
  1137. ctx: ggml context
  1138. src: source tensor
  1139. Returns:
  1140. Pointer to ggml_tensor"""
  1141. return lib.ggml_view_tensor(ctx, src)
  1142. lib.ggml_view_tensor.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1143. lib.ggml_view_tensor.restype = ctypes.POINTER(ggml_tensor)
  1144. # GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
  1145. def ggml_get_tensor(ctx: ggml_context_p, name: bytes) -> ggml_tensor_p:
  1146. """Get a tensor from the ggml context by name.
  1147. Parameters:
  1148. ctx: ggml context
  1149. name: name of tensor
  1150. Returns:
  1151. Pointer to ggml_tensor"""
  1152. return lib.ggml_get_tensor(ctx, name)
  1153. lib.ggml_get_tensor.argtypes = [ggml_context_p, ctypes.c_char_p]
  1154. lib.ggml_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  1155. # GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
  1156. def ggml_set_zero(
  1157. tensor: ggml_tensor_p,
  1158. ) -> ggml_tensor_p:
  1159. """Zero all elements in a tensor.
  1160. Parameters:
  1161. tensor: tensor
  1162. Returns:
  1163. Pointer to ggml_tensor"""
  1164. return lib.ggml_set_zero(tensor)
  1165. lib.ggml_set_zero.argtypes = [ctypes.POINTER(ggml_tensor)]
  1166. lib.ggml_set_zero.restype = ctypes.POINTER(ggml_tensor)
  1167. # GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
  1168. def ggml_set_i32(
  1169. tensor: ggml_tensor_p,
  1170. value: Union[ctypes.c_int32, int],
  1171. ) -> ggml_tensor_p:
  1172. """Set all elements in a tensor to the given integer value.
  1173. Parameters:
  1174. tensor: tensor
  1175. value: integer value
  1176. Returns:
  1177. Pointer to ggml_tensor"""
  1178. return lib.ggml_set_i32(tensor, value)
  1179. lib.ggml_set_i32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int32]
  1180. lib.ggml_set_i32.restype = ctypes.POINTER(ggml_tensor)
  1181. # GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
  1182. def ggml_set_f32(
  1183. tensor: ggml_tensor_p,
  1184. value: Union[ctypes.c_float, float],
  1185. ) -> ggml_tensor_p:
  1186. """Set all elements in a tensor to the given float value.
  1187. Parameters:
  1188. tensor: tensor
  1189. value: float value
  1190. Returns:
  1191. Pointer to ggml_tensor"""
  1192. return lib.ggml_set_f32(tensor, value)
  1193. lib.ggml_set_f32.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_float]
  1194. lib.ggml_set_f32.restype = ctypes.POINTER(ggml_tensor)
  1195. # GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
  1196. def ggml_get_i32_1d(
  1197. tensor: ggml_tensor_p,
  1198. i: Union[ctypes.c_int, int],
  1199. ) -> int:
  1200. """Get the integer value of the i-th element in a 1-dimensional tensor.
  1201. Parameters:
  1202. tensor: tensor
  1203. i: index of element
  1204. Returns:
  1205. integer value of element at index i"""
  1206. return lib.ggml_get_i32_1d(tensor, i)
  1207. lib.ggml_get_i32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1208. lib.ggml_get_i32_1d.restype = ctypes.c_int32
  1209. # GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
  1210. def ggml_set_i32_1d(
  1211. tensor: ggml_tensor_p,
  1212. i: Union[ctypes.c_int, int],
  1213. value: Union[ctypes.c_int32, int],
  1214. ):
  1215. """Set the integer value of the i-th element in a 1-dimensional tensor.
  1216. Parameters:
  1217. tensor: tensor
  1218. i: index of element
  1219. value: integer value to set element to"""
  1220. return lib.ggml_set_i32_1d(tensor, i, value)
  1221. lib.ggml_set_i32_1d.argtypes = [
  1222. ctypes.POINTER(ggml_tensor),
  1223. ctypes.c_int,
  1224. ctypes.c_int32,
  1225. ]
  1226. lib.ggml_set_i32_1d.restype = None
  1227. # GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
  1228. def ggml_get_f32_1d(
  1229. tensor: ggml_tensor_p,
  1230. i: Union[ctypes.c_int, int],
  1231. ) -> float:
  1232. """Get the float value of the i-th element in a 1-dimensional tensor.
  1233. Parameters:
  1234. tensor: tensor
  1235. Returns:
  1236. float value of element at index i"""
  1237. return lib.ggml_get_f32_1d(tensor, i)
  1238. lib.ggml_get_f32_1d.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_int]
  1239. lib.ggml_get_f32_1d.restype = ctypes.c_float
  1240. # GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
  1241. def ggml_set_f32_1d(
  1242. tensor: ggml_tensor_p,
  1243. i: Union[ctypes.c_int, int],
  1244. value: Union[ctypes.c_float, float],
  1245. ):
  1246. """Set the float value of the i-th element in a 1-dimensional tensor.
  1247. Parameters:
  1248. tensor: tensor
  1249. i: index of element
  1250. value: float value to set element to"""
  1251. return lib.ggml_set_f32_1d(tensor, i, value)
  1252. lib.ggml_set_f32_1d.argtypes = [
  1253. ctypes.POINTER(ggml_tensor),
  1254. ctypes.c_int,
  1255. ctypes.c_float,
  1256. ]
  1257. lib.ggml_set_f32_1d.restype = None
  1258. # GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
  1259. def ggml_get_data(
  1260. tensor: ggml_tensor_p,
  1261. ) -> Optional[ctypes.c_void_p]:
  1262. """Get the data pointer of a tensor.
  1263. Parameters:
  1264. tensor: tensor
  1265. Returns:
  1266. Pointer to data, or None if tensor has no data"""
  1267. return lib.ggml_get_data(tensor)
  1268. lib.ggml_get_data.argtypes = [ctypes.POINTER(ggml_tensor)]
  1269. lib.ggml_get_data.restype = ctypes.c_void_p
  1270. # GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
  1271. def ggml_get_data_f32(
  1272. tensor: ggml_tensor_p,
  1273. ) -> Optional[CFloatArray]:
  1274. """Get the data pointer of a tensor as a float array.
  1275. Parameters:
  1276. tensor: tensor
  1277. Returns:
  1278. (Optional[ctypes.Array[ctypes.c_float]]): array of float to data, or None if tensor has no data
  1279. """
  1280. return lib.ggml_get_data_f32(tensor)
  1281. lib.ggml_get_data_f32.argtypes = [ctypes.POINTER(ggml_tensor)]
  1282. lib.ggml_get_data_f32.restype = ctypes.POINTER(ctypes.c_float)
  1283. # GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
  1284. def ggml_get_unary_op(
  1285. tensor: ggml_tensor_p,
  1286. ) -> int:
  1287. """Get the unary operation of a tensor.
  1288. Parameters:
  1289. tensor: tensor
  1290. Returns:
  1291. unary operation"""
  1292. return lib.ggml_get_unary_op(tensor)
  1293. lib.ggml_get_unary_op.argtypes = [ctypes.POINTER(ggml_tensor)]
  1294. lib.ggml_get_unary_op.restype = ctypes.c_int
  1295. # GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
  1296. def ggml_get_name(
  1297. tensor: ggml_tensor_p,
  1298. ) -> bytes:
  1299. """Get the name of a tensor.
  1300. Parameters:
  1301. tensor: tensor
  1302. Returns:
  1303. name of tensor"""
  1304. return lib.ggml_get_name(tensor)
  1305. lib.ggml_get_name.argtypes = [ctypes.POINTER(ggml_tensor)]
  1306. lib.ggml_get_name.restype = ctypes.c_char_p
  1307. # GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
  1308. def ggml_set_name(
  1309. tensor: ggml_tensor_p,
  1310. name: bytes,
  1311. ) -> ggml_tensor_p:
  1312. """Set the name of a tensor.
  1313. Parameters:
  1314. tensor: tensor
  1315. name: name to set tensor to
  1316. Returns:
  1317. Pointer to ggml_tensor"""
  1318. return lib.ggml_set_name(tensor, name)
  1319. lib.ggml_set_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1320. lib.ggml_set_name.restype = ctypes.POINTER(ggml_tensor)
  1321. # GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
  1322. def ggml_format_name(
  1323. tensor: ggml_tensor_p,
  1324. fmt: bytes,
  1325. *args: Sequence[Union[bool, int, float, str]],
  1326. ) -> ggml_tensor_p:
  1327. """Format the name of a tensor using the given format c string and arguments.
  1328. Parameters:
  1329. tensor: tensor
  1330. fmt: format c string
  1331. args: arguments to format string
  1332. Returns:
  1333. Pointer to ggml_tensor"""
  1334. return lib.ggml_format_name(tensor, fmt, *args)
  1335. lib.ggml_format_name.argtypes = [ctypes.POINTER(ggml_tensor), ctypes.c_char_p]
  1336. lib.ggml_format_name.restype = ctypes.POINTER(ggml_tensor)
  1337. # //
  1338. # // operations on tensors with backpropagation
  1339. # //
  1340. # GGML_API struct ggml_tensor * ggml_dup(
  1341. # struct ggml_context * ctx,
  1342. # struct ggml_tensor * a);
  1343. def ggml_dup(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1344. return lib.ggml_dup(ctx, a)
  1345. lib.ggml_dup.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1346. lib.ggml_dup.restype = ctypes.POINTER(ggml_tensor)
  1347. # // in-place, returns view(a)
  1348. # GGML_API struct ggml_tensor * ggml_dup_inplace(
  1349. # struct ggml_context * ctx,
  1350. # struct ggml_tensor * a);
  1351. def ggml_dup_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1352. return lib.ggml_dup_inplace(ctx, a)
  1353. lib.ggml_dup_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1354. lib.ggml_dup_inplace.restype = ctypes.POINTER(ggml_tensor)
  1355. # GGML_API struct ggml_tensor * ggml_add(
  1356. # struct ggml_context * ctx,
  1357. # struct ggml_tensor * a,
  1358. # struct ggml_tensor * b);
  1359. def ggml_add(
  1360. ctx: ggml_context_p,
  1361. a: ggml_tensor_p,
  1362. b: ggml_tensor_p,
  1363. ) -> ggml_tensor_p:
  1364. """Add two tensors together and return the result.
  1365. Parameters:
  1366. ctx: ggml context
  1367. a: first tensor
  1368. b: second tensor
  1369. Returns:
  1370. Pointer to ggml_tensor"""
  1371. return lib.ggml_add(ctx, a, b)
  1372. lib.ggml_add.argtypes = [
  1373. ggml_context_p,
  1374. ctypes.POINTER(ggml_tensor),
  1375. ctypes.POINTER(ggml_tensor),
  1376. ]
  1377. lib.ggml_add.restype = ctypes.POINTER(ggml_tensor)
  1378. # GGML_API struct ggml_tensor * ggml_add_inplace(
  1379. # struct ggml_context * ctx,
  1380. # struct ggml_tensor * a,
  1381. # struct ggml_tensor * b);
  1382. def ggml_add_inplace(
  1383. ctx: ggml_context_p,
  1384. a: ggml_tensor_p,
  1385. b: ggml_tensor_p,
  1386. ) -> ggml_tensor_p:
  1387. """Add two tensors together and store the result in the first tensor.
  1388. Parameters:
  1389. ctx: ggml context
  1390. a: first tensor
  1391. b: second tensor
  1392. Returns:
  1393. Pointer to ggml_tensor"""
  1394. return lib.ggml_add_inplace(ctx, a, b)
  1395. lib.ggml_add_inplace.argtypes = [
  1396. ggml_context_p,
  1397. ctypes.POINTER(ggml_tensor),
  1398. ctypes.POINTER(ggml_tensor),
  1399. ]
  1400. lib.ggml_add_inplace.restype = ctypes.POINTER(ggml_tensor)
  1401. # GGML_API struct ggml_tensor * ggml_add1(
  1402. # struct ggml_context * ctx,
  1403. # struct ggml_tensor * a,
  1404. # struct ggml_tensor * b);
  1405. def ggml_add1(
  1406. ctx: ggml_context_p,
  1407. a: ggml_tensor_p,
  1408. b: ggml_tensor_p,
  1409. ) -> ggml_tensor_p:
  1410. return lib.ggml_add1(ctx, a, b)
  1411. lib.ggml_add1.argtypes = [
  1412. ggml_context_p,
  1413. ctypes.POINTER(ggml_tensor),
  1414. ctypes.POINTER(ggml_tensor),
  1415. ]
  1416. lib.ggml_add1.restype = ctypes.POINTER(ggml_tensor)
  1417. # GGML_API struct ggml_tensor * ggml_add1_inplace(
  1418. # struct ggml_context * ctx,
  1419. # struct ggml_tensor * a,
  1420. # struct ggml_tensor * b);
  1421. def ggml_add1_inplace(
  1422. ctx: ggml_context_p,
  1423. a: ggml_tensor_p,
  1424. b: ggml_tensor_p,
  1425. ) -> ggml_tensor_p:
  1426. return lib.ggml_add1_inplace(ctx, a, b)
  1427. lib.ggml_add1_inplace.argtypes = [
  1428. ggml_context_p,
  1429. ctypes.POINTER(ggml_tensor),
  1430. ctypes.POINTER(ggml_tensor),
  1431. ]
  1432. lib.ggml_add1_inplace.restype = ctypes.POINTER(ggml_tensor)
  1433. # GGML_API struct ggml_tensor * ggml_acc(
  1434. # struct ggml_context * ctx,
  1435. # struct ggml_tensor * a,
  1436. # struct ggml_tensor * b,
  1437. # size_t nb1,
  1438. # size_t nb2,
  1439. # size_t nb3,
  1440. # size_t offset);
  1441. def ggml_acc(
  1442. ctx: ggml_context_p,
  1443. a: ggml_tensor_p,
  1444. b: ggml_tensor_p,
  1445. nb1: Union[ctypes.c_size_t, int],
  1446. nb2: Union[ctypes.c_size_t, int],
  1447. nb3: Union[ctypes.c_size_t, int],
  1448. offset: Union[ctypes.c_size_t, int],
  1449. ) -> ggml_tensor_p:
  1450. return lib.ggml_acc(ctx, a, b, nb1, nb2, nb3, offset)
  1451. lib.ggml_acc.argtypes = [
  1452. ggml_context_p,
  1453. ctypes.POINTER(ggml_tensor),
  1454. ctypes.POINTER(ggml_tensor),
  1455. ctypes.c_size_t,
  1456. ctypes.c_size_t,
  1457. ctypes.c_size_t,
  1458. ctypes.c_size_t,
  1459. ]
  1460. lib.ggml_acc.restype = ctypes.POINTER(ggml_tensor)
  1461. # GGML_API struct ggml_tensor * ggml_acc_inplace(
  1462. # struct ggml_context * ctx,
  1463. # struct ggml_tensor * a,
  1464. # struct ggml_tensor * b,
  1465. # size_t nb1,
  1466. # size_t nb2,
  1467. # size_t nb3,
  1468. # size_t offset);
  1469. def ggml_acc_inplace(
  1470. ctx: ggml_context_p,
  1471. a: ggml_tensor_p,
  1472. b: ggml_tensor_p,
  1473. nb1: Union[ctypes.c_size_t, int],
  1474. nb2: Union[ctypes.c_size_t, int],
  1475. nb3: Union[ctypes.c_size_t, int],
  1476. offset: Union[ctypes.c_size_t, int],
  1477. ) -> ggml_tensor_p:
  1478. return lib.ggml_acc_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  1479. lib.ggml_acc_inplace.argtypes = [
  1480. ggml_context_p,
  1481. ctypes.POINTER(ggml_tensor),
  1482. ctypes.POINTER(ggml_tensor),
  1483. ctypes.c_size_t,
  1484. ctypes.c_size_t,
  1485. ctypes.c_size_t,
  1486. ctypes.c_size_t,
  1487. ]
  1488. lib.ggml_acc_inplace.restype = ctypes.POINTER(ggml_tensor)
  1489. # GGML_API struct ggml_tensor * ggml_sub(
  1490. # struct ggml_context * ctx,
  1491. # struct ggml_tensor * a,
  1492. # struct ggml_tensor * b);
  1493. def ggml_sub(
  1494. ctx: ggml_context_p,
  1495. a: ggml_tensor_p,
  1496. b: ggml_tensor_p,
  1497. ) -> ggml_tensor_p:
  1498. """Subtract two tensors and return the result.
  1499. Parameters:
  1500. ctx: ggml context
  1501. a: first tensor
  1502. b: second tensor
  1503. Returns:
  1504. Pointer to ggml_tensor"""
  1505. return lib.ggml_sub(ctx, a, b)
  1506. lib.ggml_sub.argtypes = [
  1507. ggml_context_p,
  1508. ctypes.POINTER(ggml_tensor),
  1509. ctypes.POINTER(ggml_tensor),
  1510. ]
  1511. lib.ggml_sub.restype = ctypes.POINTER(ggml_tensor)
  1512. # GGML_API struct ggml_tensor * ggml_sub_inplace(
  1513. # struct ggml_context * ctx,
  1514. # struct ggml_tensor * a,
  1515. # struct ggml_tensor * b);
  1516. def ggml_sub_inplace(
  1517. ctx: ggml_context_p,
  1518. a: ggml_tensor_p,
  1519. b: ggml_tensor_p,
  1520. ) -> ggml_tensor_p:
  1521. """Subtract two tensors and store the result in the first tensor.
  1522. Parameters:
  1523. ctx: ggml context
  1524. a: first tensor
  1525. b: second tensor
  1526. Returns:
  1527. Pointer to ggml_tensor"""
  1528. return lib.ggml_sub_inplace(ctx, a, b)
  1529. lib.ggml_sub_inplace.argtypes = [
  1530. ggml_context_p,
  1531. ctypes.POINTER(ggml_tensor),
  1532. ctypes.POINTER(ggml_tensor),
  1533. ]
  1534. lib.ggml_sub_inplace.restype = ctypes.POINTER(ggml_tensor)
  1535. # GGML_API struct ggml_tensor * ggml_mul(
  1536. # struct ggml_context * ctx,
  1537. # struct ggml_tensor * a,
  1538. # struct ggml_tensor * b);
  1539. def ggml_mul(
  1540. ctx: ggml_context_p,
  1541. a: ggml_tensor_p,
  1542. b: ggml_tensor_p,
  1543. ) -> ggml_tensor_p:
  1544. """Element-wise multiply two tensors and return the result.
  1545. Parameters:
  1546. ctx: ggml context
  1547. a: first tensor
  1548. b: second tensor
  1549. Returns:
  1550. Pointer to ggml_tensor"""
  1551. return lib.ggml_mul(ctx, a, b)
  1552. lib.ggml_mul.argtypes = [
  1553. ggml_context_p,
  1554. ctypes.POINTER(ggml_tensor),
  1555. ctypes.POINTER(ggml_tensor),
  1556. ]
  1557. lib.ggml_mul.restype = ctypes.POINTER(ggml_tensor)
  1558. # GGML_API struct ggml_tensor * ggml_mul_inplace(
  1559. # struct ggml_context * ctx,
  1560. # struct ggml_tensor * a,
  1561. # struct ggml_tensor * b);
  1562. def ggml_mul_inplace(
  1563. ctx: ggml_context_p,
  1564. a: ggml_tensor_p,
  1565. b: ggml_tensor_p,
  1566. ) -> ggml_tensor_p:
  1567. """Element-wise multiply two tensors and store the result in the first tensor.
  1568. Parameters:
  1569. ctx: ggml context
  1570. a: first tensor
  1571. b: second tensor
  1572. Returns:
  1573. Pointer to ggml_tensor"""
  1574. return lib.ggml_mul_inplace(ctx, a, b)
  1575. lib.ggml_mul_inplace.argtypes = [
  1576. ggml_context_p,
  1577. ctypes.POINTER(ggml_tensor),
  1578. ctypes.POINTER(ggml_tensor),
  1579. ]
  1580. lib.ggml_mul_inplace.restype = ctypes.POINTER(ggml_tensor)
  1581. # GGML_API struct ggml_tensor * ggml_div(
  1582. # struct ggml_context * ctx,
  1583. # struct ggml_tensor * a,
  1584. # struct ggml_tensor * b);
  1585. def ggml_div(
  1586. ctx: ggml_context_p,
  1587. a: ggml_tensor_p,
  1588. b: ggml_tensor_p,
  1589. ) -> ggml_tensor_p:
  1590. """Element-wise divide two tensors and return the result.
  1591. Parameters:
  1592. ctx: ggml context
  1593. a: first tensor
  1594. b: second tensor
  1595. Returns:
  1596. Pointer to ggml_tensor"""
  1597. return lib.ggml_div(ctx, a, b)
  1598. lib.ggml_div.argtypes = [
  1599. ggml_context_p,
  1600. ctypes.POINTER(ggml_tensor),
  1601. ctypes.POINTER(ggml_tensor),
  1602. ]
  1603. lib.ggml_div.restype = ctypes.POINTER(ggml_tensor)
  1604. # GGML_API struct ggml_tensor * ggml_div_inplace(
  1605. # struct ggml_context * ctx,
  1606. # struct ggml_tensor * a,
  1607. # struct ggml_tensor * b);
  1608. def ggml_div_inplace(
  1609. ctx: ggml_context_p,
  1610. a: ggml_tensor_p,
  1611. b: ggml_tensor_p,
  1612. ) -> ggml_tensor_p:
  1613. """Element-wise divide two tensors and store the result in the first tensor.
  1614. Parameters:
  1615. ctx: ggml context
  1616. a: first tensor
  1617. b: second tensor
  1618. Returns:
  1619. Pointer to ggml_tensor"""
  1620. return lib.ggml_div_inplace(ctx, a, b)
  1621. lib.ggml_div_inplace.argtypes = [
  1622. ggml_context_p,
  1623. ctypes.POINTER(ggml_tensor),
  1624. ctypes.POINTER(ggml_tensor),
  1625. ]
  1626. lib.ggml_div_inplace.restype = ctypes.POINTER(ggml_tensor)
  1627. # GGML_API struct ggml_tensor * ggml_sqr(
  1628. # struct ggml_context * ctx,
  1629. # struct ggml_tensor * a);
  1630. def ggml_sqr(
  1631. ctx: ggml_context_p,
  1632. a: ggml_tensor_p,
  1633. ) -> ggml_tensor_p:
  1634. """Square all elements in a tensor and return the result.
  1635. Parameters:
  1636. ctx: ggml context
  1637. a: tensor
  1638. Returns:
  1639. Pointer to ggml_tensor"""
  1640. return lib.ggml_sqr(ctx, a)
  1641. lib.ggml_sqr.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1642. lib.ggml_sqr.restype = ctypes.POINTER(ggml_tensor)
  1643. # GGML_API struct ggml_tensor * ggml_sqr_inplace(
  1644. # struct ggml_context * ctx,
  1645. # struct ggml_tensor * a);
  1646. def ggml_sqr_inplace(
  1647. ctx: ggml_context_p,
  1648. a: ggml_tensor_p,
  1649. ) -> ggml_tensor_p:
  1650. """Square all elements in a tensor and store the result in the first tensor.
  1651. Parameters:
  1652. ctx: ggml context
  1653. a: tensor
  1654. Returns:
  1655. Pointer to ggml_tensor"""
  1656. return lib.ggml_sqr_inplace(ctx, a)
  1657. lib.ggml_sqr_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1658. lib.ggml_sqr_inplace.restype = ctypes.POINTER(ggml_tensor)
  1659. # GGML_API struct ggml_tensor * ggml_sqrt(
  1660. # struct ggml_context * ctx,
  1661. # struct ggml_tensor * a);
  1662. def ggml_sqrt(
  1663. ctx: ggml_context_p,
  1664. a: ggml_tensor_p,
  1665. ) -> ggml_tensor_p:
  1666. """Square root all elements in a tensor and return the result.
  1667. Parameters:
  1668. ctx: ggml context
  1669. a: tensor
  1670. Returns:
  1671. Pointer to ggml_tensor"""
  1672. return lib.ggml_sqrt(ctx, a)
  1673. lib.ggml_sqrt.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1674. lib.ggml_sqrt.restype = ctypes.POINTER(ggml_tensor)
  1675. # GGML_API struct ggml_tensor * ggml_sqrt_inplace(
  1676. # struct ggml_context * ctx,
  1677. # struct ggml_tensor * a);
  1678. def ggml_sqrt_inplace(
  1679. ctx: ggml_context_p,
  1680. a: ggml_tensor_p,
  1681. ) -> ggml_tensor_p:
  1682. """Square root all elements in a tensor and store the result in the first tensor.
  1683. Parameters:
  1684. ctx: ggml context
  1685. Returns:
  1686. Pointer to ggml_tensor"""
  1687. return lib.ggml_sqrt_inplace(ctx, a)
  1688. lib.ggml_sqrt_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1689. lib.ggml_sqrt_inplace.restype = ctypes.POINTER(ggml_tensor)
  1690. # GGML_API struct ggml_tensor * ggml_log(
  1691. # struct ggml_context * ctx,
  1692. # struct ggml_tensor * a);
  1693. def ggml_log(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1694. """Take the natural logarithm of all elements in a tensor and return the result.
  1695. Parameters:
  1696. ctx: ggml context
  1697. a: tensor
  1698. Returns:
  1699. Pointer to ggml_tensor"""
  1700. return lib.ggml_log(ctx, a)
  1701. lib.ggml_log.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1702. lib.ggml_log.restype = ctypes.POINTER(ggml_tensor)
  1703. # GGML_API struct ggml_tensor * ggml_log_inplace(
  1704. # struct ggml_context * ctx,
  1705. # struct ggml_tensor * a);
  1706. def ggml_log_inplace(
  1707. ctx: ggml_context_p,
  1708. a: ggml_tensor_p,
  1709. ) -> ggml_tensor_p:
  1710. """Take the natural logarithm of all elements in a tensor and store the result in the first tensor.
  1711. Parameters:
  1712. ctx: ggml context
  1713. a: tensor
  1714. Returns:
  1715. Pointer to ggml_tensor"""
  1716. return lib.ggml_log_inplace(ctx, a)
  1717. lib.ggml_log_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1718. lib.ggml_log_inplace.restype = ctypes.POINTER(ggml_tensor)
  1719. # // return scalar
  1720. # GGML_API struct ggml_tensor * ggml_sum(
  1721. # struct ggml_context * ctx,
  1722. # struct ggml_tensor * a);
  1723. def ggml_sum(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1724. """Sum all elements in a tensor and return the result.
  1725. Parameters:
  1726. ctx: ggml context
  1727. a: tensor
  1728. Returns:
  1729. Pointer to ggml_tensor"""
  1730. return lib.ggml_sum(ctx, a)
  1731. lib.ggml_sum.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1732. lib.ggml_sum.restype = ctypes.POINTER(ggml_tensor)
  1733. # // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1734. # GGML_API struct ggml_tensor * ggml_sum_rows(
  1735. # struct ggml_context * ctx,
  1736. # struct ggml_tensor * a);
  1737. def ggml_sum_rows(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1738. """Sum all elements in a tensor along the first axis and return the result.
  1739. sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d]
  1740. Parameters:
  1741. ctx: ggml context
  1742. a: tensor
  1743. Returns:
  1744. Pointer to ggml_tensor"""
  1745. return lib.ggml_sum_rows(ctx, a)
  1746. lib.ggml_sum_rows.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1747. lib.ggml_sum_rows.restype = ctypes.POINTER(ggml_tensor)
  1748. # // mean along rows
  1749. # GGML_API struct ggml_tensor * ggml_mean(
  1750. # struct ggml_context * ctx,
  1751. # struct ggml_tensor * a);
  1752. def ggml_mean(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1753. """Take the mean of all elements in a tensor and return the result.
  1754. Parameters:
  1755. ctx: ggml context
  1756. a: tensor
  1757. Returns:
  1758. Pointer to ggml_tensor"""
  1759. return lib.ggml_mean(ctx, a)
  1760. lib.ggml_mean.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1761. lib.ggml_mean.restype = ctypes.POINTER(ggml_tensor)
  1762. # // argmax along rows
  1763. # GGML_API struct ggml_tensor * ggml_argmax(
  1764. # struct ggml_context * ctx,
  1765. # struct ggml_tensor * a);
  1766. def ggml_argmax(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1767. """Take the argmax of all elements in a tensor and return the result.
  1768. argmax along rows
  1769. Parameters:
  1770. ctx: ggml context
  1771. a: tensor
  1772. Returns:
  1773. Pointer to ggml_tensor"""
  1774. return lib.ggml_argmax(ctx, a)
  1775. lib.ggml_argmax.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1776. lib.ggml_argmax.restype = ctypes.POINTER(ggml_tensor)
  1777. # // if a is the same shape as b, and a is not parameter, return a
  1778. # // otherwise, return a new tensor: repeat(a) to fit in b
  1779. # GGML_API struct ggml_tensor * ggml_repeat(
  1780. # struct ggml_context * ctx,
  1781. # struct ggml_tensor * a,
  1782. # struct ggml_tensor * b);
  1783. def ggml_repeat(
  1784. ctx: ggml_context_p,
  1785. a: ggml_tensor_p,
  1786. b: ggml_tensor_p,
  1787. ) -> ggml_tensor_p:
  1788. """Repeat a tensor to fit the shape of another tensor.
  1789. If a is the same shape as b, and a is not parameter, return a
  1790. Parameters:
  1791. ctx: ggml context
  1792. a: tensor to repeat
  1793. b: tensor to fit
  1794. Returns:
  1795. Pointer to ggml_tensor"""
  1796. return lib.ggml_repeat(ctx, a, b)
  1797. lib.ggml_repeat.argtypes = [
  1798. ggml_context_p,
  1799. ctypes.POINTER(ggml_tensor),
  1800. ctypes.POINTER(ggml_tensor),
  1801. ]
  1802. lib.ggml_repeat.restype = ctypes.POINTER(ggml_tensor)
  1803. # GGML_API struct ggml_tensor * ggml_repeat_back(
  1804. # struct ggml_context * ctx,
  1805. # struct ggml_tensor * a,
  1806. # struct ggml_tensor * b);
  1807. def ggml_repeat_back(
  1808. ctx: ggml_context_p,
  1809. a: ggml_tensor_p,
  1810. b: ggml_tensor_p,
  1811. ) -> ggml_tensor_p:
  1812. return lib.ggml_repeat_back(ctx, a, b)
  1813. lib.ggml_repeat_back.argtypes = [
  1814. ggml_context_p,
  1815. ctypes.POINTER(ggml_tensor),
  1816. ctypes.POINTER(ggml_tensor),
  1817. ]
  1818. lib.ggml_repeat_back.restype = ctypes.POINTER(ggml_tensor)
  1819. # // concat a and b on dim 2
  1820. # // used in stable-diffusion
  1821. # GGML_API struct ggml_tensor * ggml_concat(
  1822. # struct ggml_context * ctx,
  1823. # struct ggml_tensor * a,
  1824. # struct ggml_tensor * b);
  1825. def ggml_concat(
  1826. ctx: ggml_context_p,
  1827. a: ggml_tensor_p,
  1828. b: ggml_tensor_p,
  1829. ) -> ggml_tensor_p:
  1830. """Concatenate two tensors along the second axis and return the result.
  1831. Parameters:
  1832. ctx: ggml context
  1833. a: first tensor
  1834. b: second tensor
  1835. Returns:
  1836. Pointer to ggml_tensor"""
  1837. return lib.ggml_concat(ctx, a, b)
  1838. lib.ggml_concat.argtypes = [
  1839. ggml_context_p,
  1840. ctypes.POINTER(ggml_tensor),
  1841. ctypes.POINTER(ggml_tensor),
  1842. ]
  1843. lib.ggml_concat.restype = ctypes.POINTER(ggml_tensor)
  1844. # GGML_API struct ggml_tensor * ggml_abs(
  1845. # struct ggml_context * ctx,
  1846. # struct ggml_tensor * a);
  1847. def ggml_abs(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1848. """Take the absolute value of all elements in a tensor and return the result.
  1849. Parameters:
  1850. ctx: ggml context
  1851. a: tensor
  1852. Returns:
  1853. Pointer to ggml_tensor"""
  1854. return lib.ggml_abs(ctx, a)
  1855. lib.ggml_abs.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1856. lib.ggml_abs.restype = ctypes.POINTER(ggml_tensor)
  1857. # GGML_API struct ggml_tensor * ggml_abs_inplace(
  1858. # struct ggml_context * ctx,
  1859. # struct ggml_tensor * a);
  1860. def ggml_abs_inplace(
  1861. ctx: ggml_context_p,
  1862. a: ggml_tensor_p,
  1863. ) -> ggml_tensor_p:
  1864. """Take the absolute value of all elements in a tensor and store the result in the first tensor.
  1865. Parameters:
  1866. ctx: ggml context
  1867. a: tensor
  1868. Returns:
  1869. Pointer to ggml_tensor"""
  1870. return lib.ggml_abs_inplace(ctx, a)
  1871. lib.ggml_abs_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1872. lib.ggml_abs_inplace.restype = ctypes.POINTER(ggml_tensor)
  1873. # GGML_API struct ggml_tensor * ggml_sgn(
  1874. # struct ggml_context * ctx,
  1875. # struct ggml_tensor * a);
  1876. def ggml_sgn(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1877. """Get the sign of all elements in a tensor and return the result.
  1878. Parameters:
  1879. ctx: ggml context
  1880. a: tensor
  1881. Returns:
  1882. Pointer to ggml_tensor"""
  1883. return lib.ggml_sgn(ctx, a)
  1884. lib.ggml_sgn.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1885. lib.ggml_sgn.restype = ctypes.POINTER(ggml_tensor)
  1886. # GGML_API struct ggml_tensor * ggml_sgn_inplace(
  1887. # struct ggml_context * ctx,
  1888. # struct ggml_tensor * a);
  1889. def ggml_sgn_inplace(
  1890. ctx: ggml_context_p,
  1891. a: ggml_tensor_p,
  1892. ) -> ggml_tensor_p:
  1893. """Get the sign of all elements in a tensor and store the result in the first tensor.
  1894. Parameters:
  1895. ctx: ggml context
  1896. a: tensor
  1897. Returns:
  1898. Pointer to ggml_tensor"""
  1899. return lib.ggml_sgn_inplace(ctx, a)
  1900. lib.ggml_sgn_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1901. lib.ggml_sgn_inplace.restype = ctypes.POINTER(ggml_tensor)
  1902. # GGML_API struct ggml_tensor * ggml_neg(
  1903. # struct ggml_context * ctx,
  1904. # struct ggml_tensor * a);
  1905. def ggml_neg(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1906. """Negate all elements in a tensor and return the result.
  1907. Parameters:
  1908. ctx: ggml context
  1909. a: tensor
  1910. Returns:
  1911. Pointer to ggml_tensor"""
  1912. return lib.ggml_neg(ctx, a)
  1913. lib.ggml_neg.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1914. lib.ggml_neg.restype = ctypes.POINTER(ggml_tensor)
  1915. # GGML_API struct ggml_tensor * ggml_neg_inplace(
  1916. # struct ggml_context * ctx,
  1917. # struct ggml_tensor * a);
  1918. def ggml_neg_inplace(
  1919. ctx: ggml_context_p,
  1920. a: ggml_tensor_p,
  1921. ) -> ggml_tensor_p:
  1922. """Negate all elements in a tensor and store the result in the first tensor.
  1923. Parameters:
  1924. ctx: ggml context
  1925. a: tensor
  1926. Returns:
  1927. Pointer to ggml_tensor"""
  1928. return lib.ggml_neg_inplace(ctx, a)
  1929. lib.ggml_neg_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1930. lib.ggml_neg_inplace.restype = ctypes.POINTER(ggml_tensor)
  1931. # GGML_API struct ggml_tensor * ggml_step(
  1932. # struct ggml_context * ctx,
  1933. # struct ggml_tensor * a);
  1934. def ggml_step(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1935. return lib.ggml_step(ctx, a)
  1936. lib.ggml_step.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1937. lib.ggml_step.restype = ctypes.POINTER(ggml_tensor)
  1938. # GGML_API struct ggml_tensor * ggml_tanh(
  1939. # struct ggml_context * ctx,
  1940. # struct ggml_tensor * a);
  1941. def ggml_tanh(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1942. """Apply the tanh activation function to all elements in a tensor and return the result.
  1943. Parameters:
  1944. ctx: ggml context
  1945. a: tensor
  1946. Returns:
  1947. Pointer to ggml_tensor"""
  1948. return lib.ggml_tanh(ctx, a)
  1949. lib.ggml_tanh.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1950. lib.ggml_tanh.restype = ctypes.POINTER(ggml_tensor)
  1951. # GGML_API struct ggml_tensor * ggml_tanh_inplace(
  1952. # struct ggml_context * ctx,
  1953. # struct ggml_tensor * a);
  1954. def ggml_tanh_inplace(
  1955. ctx: ggml_context_p,
  1956. a: ggml_tensor_p,
  1957. ) -> ggml_tensor_p:
  1958. """Apply the tanh activation function to all elements in a tensor and store the result in the first tensor.
  1959. Parameters:
  1960. ctx: ggml context
  1961. a: tensor
  1962. Returns:
  1963. Pointer to ggml_tensor"""
  1964. return lib.ggml_tanh_inplace(ctx, a)
  1965. lib.ggml_tanh_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1966. lib.ggml_tanh_inplace.restype = ctypes.POINTER(ggml_tensor)
  1967. # GGML_API struct ggml_tensor * ggml_elu(
  1968. # struct ggml_context * ctx,
  1969. # struct ggml_tensor * a);
  1970. def ggml_elu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  1971. """Apply the ELU activation function to all elements in a tensor and return the result.
  1972. Parameters:
  1973. ctx: ggml context
  1974. a: tensor
  1975. Returns:
  1976. Pointer to ggml_tensor"""
  1977. return lib.ggml_elu(ctx, a)
  1978. lib.ggml_elu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1979. lib.ggml_elu.restype = ctypes.POINTER(ggml_tensor)
  1980. # GGML_API struct ggml_tensor * ggml_elu_inplace(
  1981. # struct ggml_context * ctx,
  1982. # struct ggml_tensor * a);
  1983. def ggml_elu_inplace(
  1984. ctx: ggml_context_p,
  1985. a: ggml_tensor_p,
  1986. ) -> ggml_tensor_p:
  1987. """Apply the ELU activation function to all elements in a tensor and store the result in the first tensor.
  1988. Parameters:
  1989. ctx: ggml context
  1990. a: tensor
  1991. Returns:
  1992. Pointer to ggml_tensor"""
  1993. return lib.ggml_elu_inplace(ctx, a)
  1994. lib.ggml_elu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  1995. lib.ggml_elu_inplace.restype = ctypes.POINTER(ggml_tensor)
  1996. # GGML_API struct ggml_tensor * ggml_relu(
  1997. # struct ggml_context * ctx,
  1998. # struct ggml_tensor * a);
  1999. def ggml_relu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2000. """Apply the ReLU activation function to all elements in a tensor and return the result.
  2001. Parameters:
  2002. ctx: ggml context
  2003. a: tensor
  2004. Returns:
  2005. Pointer to ggml_tensor"""
  2006. return lib.ggml_relu(ctx, a)
  2007. lib.ggml_relu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2008. lib.ggml_relu.restype = ctypes.POINTER(ggml_tensor)
  2009. # GGML_API struct ggml_tensor * ggml_relu_inplace(
  2010. # struct ggml_context * ctx,
  2011. # struct ggml_tensor * a);
  2012. def ggml_relu_inplace(
  2013. ctx: ggml_context_p,
  2014. a: ggml_tensor_p,
  2015. ) -> ggml_tensor_p:
  2016. """Apply the ReLU activation function to all elements in a tensor and store the result in the first tensor.
  2017. Parameters:
  2018. ctx: ggml context
  2019. a: tensor
  2020. Returns:
  2021. Pointer to ggml_tensor"""
  2022. return lib.ggml_relu_inplace(ctx, a)
  2023. lib.ggml_relu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2024. lib.ggml_relu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2025. # // TODO: double-check this computation is correct
  2026. # GGML_API struct ggml_tensor * ggml_gelu(
  2027. # struct ggml_context * ctx,
  2028. # struct ggml_tensor * a);
  2029. def ggml_gelu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2030. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2031. Parameters:
  2032. ctx: ggml context
  2033. a: tensor
  2034. Returns:
  2035. Pointer to ggml_tensor"""
  2036. return lib.ggml_gelu(ctx, a)
  2037. lib.ggml_gelu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2038. lib.ggml_gelu.restype = ctypes.POINTER(ggml_tensor)
  2039. # GGML_API struct ggml_tensor * ggml_gelu_inplace(
  2040. # struct ggml_context * ctx,
  2041. # struct ggml_tensor * a);
  2042. def ggml_gelu_inplace(
  2043. ctx: ggml_context_p,
  2044. a: ggml_tensor_p,
  2045. ) -> ggml_tensor_p:
  2046. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2047. Parameters:
  2048. ctx: ggml context
  2049. a: tensor
  2050. Returns:
  2051. Pointer to ggml_tensor"""
  2052. return lib.ggml_gelu_inplace(ctx, a)
  2053. lib.ggml_gelu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2054. lib.ggml_gelu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2055. # GGML_API struct ggml_tensor * ggml_gelu_quick(
  2056. # struct ggml_context * ctx,
  2057. # struct ggml_tensor * a);
  2058. def ggml_gelu_quick(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2059. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and return the result.
  2060. Parameters:
  2061. ctx: ggml context
  2062. a: tensor
  2063. Returns:
  2064. Pointer to ggml_tensor"""
  2065. return lib.ggml_gelu_quick(ctx, a)
  2066. lib.ggml_gelu_quick.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2067. lib.ggml_gelu_quick.restype = ctypes.POINTER(ggml_tensor)
  2068. # GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
  2069. # struct ggml_context * ctx,
  2070. # struct ggml_tensor * a);
  2071. def ggml_gelu_quick_inplace(
  2072. ctx: ggml_context_p,
  2073. a: ggml_tensor_p,
  2074. ) -> ggml_tensor_p:
  2075. """Apply the Gaussian Error Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2076. Parameters:
  2077. ctx: ggml context
  2078. a: tensor
  2079. Returns:
  2080. Pointer to ggml_tensor"""
  2081. return lib.ggml_gelu_quick_inplace(ctx, a)
  2082. lib.ggml_gelu_quick_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2083. lib.ggml_gelu_quick_inplace.restype = ctypes.POINTER(ggml_tensor)
  2084. # GGML_API struct ggml_tensor * ggml_silu(
  2085. # struct ggml_context * ctx,
  2086. # struct ggml_tensor * a);
  2087. def ggml_silu(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2088. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and return the result.
  2089. Parameters:
  2090. ctx: ggml context
  2091. a: tensor
  2092. Returns:
  2093. Pointer to ggml_tensor"""
  2094. return lib.ggml_silu(ctx, a)
  2095. lib.ggml_silu.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2096. lib.ggml_silu.restype = ctypes.POINTER(ggml_tensor)
  2097. # GGML_API struct ggml_tensor * ggml_silu_inplace(
  2098. # struct ggml_context * ctx,
  2099. # struct ggml_tensor * a);
  2100. def ggml_silu_inplace(
  2101. ctx: ggml_context_p,
  2102. a: ggml_tensor_p,
  2103. ) -> ggml_tensor_p:
  2104. """Apply the Sigmoid Linear Unit activation function to all elements in a tensor and store the result in the first tensor.
  2105. Parameters:
  2106. ctx: ggml context
  2107. a: tensor
  2108. Returns:
  2109. Pointer to ggml_tensor"""
  2110. return lib.ggml_silu_inplace(ctx, a)
  2111. lib.ggml_silu_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2112. lib.ggml_silu_inplace.restype = ctypes.POINTER(ggml_tensor)
  2113. # // a - x
  2114. # // b - dy
  2115. # GGML_API struct ggml_tensor * ggml_silu_back(
  2116. # struct ggml_context * ctx,
  2117. # struct ggml_tensor * a,
  2118. # struct ggml_tensor * b);
  2119. def ggml_silu_back(
  2120. ctx: ggml_context_p,
  2121. a: ggml_tensor_p,
  2122. b: ggml_tensor_p,
  2123. ) -> ggml_tensor_p:
  2124. return lib.ggml_silu_back(ctx, a, b)
  2125. lib.ggml_silu_back.argtypes = [
  2126. ggml_context_p,
  2127. ctypes.POINTER(ggml_tensor),
  2128. ctypes.POINTER(ggml_tensor),
  2129. ]
  2130. lib.ggml_silu_back.restype = ctypes.POINTER(ggml_tensor)
  2131. # // normalize along rows
  2132. # GGML_API struct ggml_tensor * ggml_norm(
  2133. # struct ggml_context * ctx,
  2134. # struct ggml_tensor * a
  2135. # float eps);
  2136. def ggml_norm(
  2137. ctx: ggml_context_p,
  2138. a: ggml_tensor_p,
  2139. eps: Union[ctypes.c_float, float],
  2140. ) -> ggml_tensor_p:
  2141. """Normalize all elements in a tensor along the first axis and return the result.
  2142. normalize along rows.
  2143. Parameters:
  2144. ctx: ggml context
  2145. a: tensor
  2146. eps: minimum value to avoid division by zero
  2147. Returns:
  2148. Pointer to ggml_tensor"""
  2149. return lib.ggml_norm(ctx, a, eps)
  2150. lib.ggml_norm.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor), ctypes.c_float]
  2151. lib.ggml_norm.restype = ctypes.POINTER(ggml_tensor)
  2152. # GGML_API struct ggml_tensor * ggml_norm_inplace(
  2153. # struct ggml_context * ctx,
  2154. # struct ggml_tensor * a
  2155. # float eps);
  2156. def ggml_norm_inplace(
  2157. ctx: ggml_context_p,
  2158. a: ggml_tensor_p,
  2159. eps: Union[ctypes.c_float, float],
  2160. ) -> ggml_tensor_p:
  2161. """Normalize all elements in a tensor along the first axis and store the result in the first tensor.
  2162. normalize along rows.
  2163. Parameters:
  2164. ctx: ggml context
  2165. a: tensor
  2166. eps: minimum value to avoid division by zero
  2167. Returns:
  2168. Pointer to ggml_tensor"""
  2169. return lib.ggml_norm_inplace(ctx, a, eps)
  2170. lib.ggml_norm_inplace.argtypes = [
  2171. ggml_context_p,
  2172. ctypes.POINTER(ggml_tensor),
  2173. ctypes.c_float,
  2174. ]
  2175. lib.ggml_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2176. # GGML_API struct ggml_tensor * ggml_rms_norm(
  2177. # struct ggml_context * ctx,
  2178. # struct ggml_tensor * a,
  2179. # float eps);
  2180. def ggml_rms_norm(
  2181. ctx: ggml_context_p,
  2182. a: ggml_tensor_p,
  2183. eps: Union[ctypes.c_float, float],
  2184. ) -> ggml_tensor_p:
  2185. """Compute the RMS norm of a tensor and return the result.
  2186. Parameters:
  2187. ctx: ggml context
  2188. a: tensor
  2189. eps: float
  2190. Returns:
  2191. Pointer to ggml_tensor"""
  2192. return lib.ggml_rms_norm(ctx, a, eps)
  2193. lib.ggml_rms_norm.argtypes = [
  2194. ggml_context_p,
  2195. ctypes.POINTER(ggml_tensor),
  2196. ctypes.c_float,
  2197. ]
  2198. lib.ggml_rms_norm.restype = ctypes.POINTER(ggml_tensor)
  2199. # GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
  2200. # struct ggml_context * ctx,
  2201. # struct ggml_tensor * a,
  2202. # float eps);
  2203. def ggml_rms_norm_inplace(
  2204. ctx: ggml_context_p,
  2205. a: ggml_tensor_p,
  2206. eps: Union[ctypes.c_float, float],
  2207. ) -> ggml_tensor_p:
  2208. return lib.ggml_rms_norm_inplace(ctx, a, eps)
  2209. lib.ggml_rms_norm_inplace.argtypes = [
  2210. ggml_context_p,
  2211. ctypes.POINTER(ggml_tensor),
  2212. ctypes.c_float,
  2213. ]
  2214. lib.ggml_rms_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2215. # // group normalize along ne0*ne1*n_groups
  2216. # // used in stable-diffusion
  2217. # // TODO: eps is hardcoded to 1e-6 for now
  2218. # GGML_API struct ggml_tensor * ggml_group_norm(
  2219. # struct ggml_context * ctx,
  2220. # struct ggml_tensor * a,
  2221. # int n_groups);
  2222. def ggml_group_norm(
  2223. ctx: ggml_context_p,
  2224. a: ggml_tensor_p,
  2225. n_groups: int,
  2226. ) -> ggml_tensor_p:
  2227. """Group normalize a tensor and return the result.
  2228. Parameters:
  2229. ctx: ggml context
  2230. a: tensor
  2231. n_groups: int
  2232. Returns:
  2233. Pointer to ggml_tensor"""
  2234. return lib.ggml_group_norm(ctx, a, n_groups)
  2235. lib.ggml_group_norm.argtypes = [
  2236. ggml_context_p,
  2237. ctypes.POINTER(ggml_tensor),
  2238. ctypes.c_int,
  2239. ]
  2240. lib.ggml_group_norm.restype = ctypes.POINTER(ggml_tensor)
  2241. # GGML_API struct ggml_tensor * ggml_group_norm_inplace(
  2242. # struct ggml_context * ctx,
  2243. # struct ggml_tensor * a,
  2244. # int n_groups);
  2245. def ggml_group_norm_inplace(
  2246. ctx: ggml_context_p,
  2247. a: ggml_tensor_p,
  2248. n_groups: int,
  2249. ) -> ggml_tensor_p:
  2250. """Group normalize a tensor and store the result in the first tensor.
  2251. Parameters:
  2252. ctx: ggml context
  2253. a: tensor
  2254. n_groups: int
  2255. Returns:
  2256. Pointer to ggml_tensor"""
  2257. return lib.ggml_group_norm_inplace(ctx, a, n_groups)
  2258. lib.ggml_group_norm_inplace.argtypes = [
  2259. ggml_context_p,
  2260. ctypes.POINTER(ggml_tensor),
  2261. ctypes.c_int,
  2262. ]
  2263. lib.ggml_group_norm_inplace.restype = ctypes.POINTER(ggml_tensor)
  2264. # // a - x
  2265. # // b - dy
  2266. # GGML_API struct ggml_tensor * ggml_rms_norm_back(
  2267. # struct ggml_context * ctx,
  2268. # struct ggml_tensor * a,
  2269. # struct ggml_tensor * b
  2270. # float eps);
  2271. def ggml_rms_norm_back(
  2272. ctx: ggml_context_p,
  2273. a: ggml_tensor_p,
  2274. b: ggml_tensor_p,
  2275. eps: Union[ctypes.c_float, float],
  2276. ) -> ggml_tensor_p:
  2277. return lib.ggml_rms_norm_back(ctx, a, b, eps)
  2278. lib.ggml_rms_norm_back.argtypes = [
  2279. ggml_context_p,
  2280. ctypes.POINTER(ggml_tensor),
  2281. ctypes.POINTER(ggml_tensor),
  2282. ctypes.c_float,
  2283. ]
  2284. lib.ggml_rms_norm_back.restype = ctypes.POINTER(ggml_tensor)
  2285. # // A: m rows, n columns
  2286. # // B: p rows, n columns (i.e. we transpose it internally)
  2287. # // result is m columns, p rows
  2288. # GGML_API struct ggml_tensor * ggml_mul_mat(
  2289. # struct ggml_context * ctx,
  2290. # struct ggml_tensor * a,
  2291. # struct ggml_tensor * b);
  2292. def ggml_mul_mat(
  2293. ctx: ggml_context_p,
  2294. a: ggml_tensor_p,
  2295. b: ggml_tensor_p,
  2296. ) -> ggml_tensor_p:
  2297. """Multiply two matrices and return the result.
  2298. A: m rows, n columns
  2299. B: p rows, n columns (i.e. we transpose it internally)
  2300. result is m columns, p rows
  2301. Parameters:
  2302. ctx: ggml context
  2303. a: tensor
  2304. b: tensor
  2305. Returns:
  2306. Pointer to ggml_tensor"""
  2307. return lib.ggml_mul_mat(ctx, a, b)
  2308. lib.ggml_mul_mat.argtypes = [
  2309. ggml_context_p,
  2310. ctypes.POINTER(ggml_tensor),
  2311. ctypes.POINTER(ggml_tensor),
  2312. ]
  2313. lib.ggml_mul_mat.restype = ctypes.POINTER(ggml_tensor)
  2314. # // A: m columns, n rows,
  2315. # // B: p columns, n rows,
  2316. # // result is m columns, p rows
  2317. # GGML_API struct ggml_tensor * ggml_out_prod(
  2318. # struct ggml_context * ctx,
  2319. # struct ggml_tensor * a,
  2320. # struct ggml_tensor * b);
  2321. def ggml_out_prod(
  2322. ctx: ggml_context_p,
  2323. a: ggml_tensor_p,
  2324. b: ggml_tensor_p,
  2325. ) -> ggml_tensor_p:
  2326. """Compute the outer product of two matrices and return the result.
  2327. A: m columns, n rows,
  2328. B: p columns, n rows,
  2329. result is m columns, p rows
  2330. Parameters:
  2331. ctx: ggml context
  2332. a: tensor
  2333. b: tensor
  2334. Returns:
  2335. Pointer to ggml_tensor"""
  2336. return lib.ggml_out_prod(ctx, a, b)
  2337. lib.ggml_out_prod.argtypes = [
  2338. ggml_context_p,
  2339. ctypes.POINTER(ggml_tensor),
  2340. ctypes.POINTER(ggml_tensor),
  2341. ]
  2342. lib.ggml_out_prod.restype = ctypes.POINTER(ggml_tensor)
  2343. # //
  2344. # // operations on tensors without backpropagation
  2345. # //
  2346. # GGML_API struct ggml_tensor * ggml_scale(
  2347. # struct ggml_context * ctx,
  2348. # struct ggml_tensor * a,
  2349. # struct ggml_tensor * b);
  2350. def ggml_scale(
  2351. ctx: ggml_context_p,
  2352. a: ggml_tensor_p,
  2353. b: ggml_tensor_p,
  2354. ) -> ggml_tensor_p:
  2355. """Scale a tensor by another tensor and return the result.
  2356. Parameters:
  2357. ctx: ggml context
  2358. a: tensor
  2359. b: tensor
  2360. Returns:
  2361. Pointer to ggml_tensor"""
  2362. return lib.ggml_scale(ctx, a, b)
  2363. lib.ggml_scale.argtypes = [
  2364. ggml_context_p,
  2365. ctypes.POINTER(ggml_tensor),
  2366. ctypes.POINTER(ggml_tensor),
  2367. ]
  2368. lib.ggml_scale.restype = ctypes.POINTER(ggml_tensor)
  2369. # // in-place, returns view(a)
  2370. # GGML_API struct ggml_tensor * ggml_scale_inplace(
  2371. # struct ggml_context * ctx,
  2372. # struct ggml_tensor * a,
  2373. # struct ggml_tensor * b);
  2374. def ggml_scale_inplace(
  2375. ctx: ggml_context_p,
  2376. a: ggml_tensor_p,
  2377. b: ggml_tensor_p,
  2378. ) -> ggml_tensor_p:
  2379. """Scale a tensor by another tensor and store the result in the first tensor.
  2380. Parameters:
  2381. ctx: ggml context
  2382. a: tensor
  2383. Returns:
  2384. Pointer to ggml_tensor"""
  2385. return lib.ggml_scale_inplace(ctx, a, b)
  2386. lib.ggml_scale_inplace.argtypes = [
  2387. ggml_context_p,
  2388. ctypes.POINTER(ggml_tensor),
  2389. ctypes.POINTER(ggml_tensor),
  2390. ]
  2391. lib.ggml_scale_inplace.restype = ctypes.POINTER(ggml_tensor)
  2392. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2393. # GGML_API struct ggml_tensor * ggml_set(
  2394. # struct ggml_context * ctx,
  2395. # struct ggml_tensor * a,
  2396. # struct ggml_tensor * b,
  2397. # size_t nb1,
  2398. # size_t nb2,
  2399. # size_t nb3,
  2400. # size_t offset);
  2401. def ggml_set(
  2402. ctx: ggml_context_p,
  2403. a: ggml_tensor_p,
  2404. b: ggml_tensor_p,
  2405. nb1: Union[ctypes.c_size_t, int],
  2406. nb2: Union[ctypes.c_size_t, int],
  2407. nb3: Union[ctypes.c_size_t, int],
  2408. offset: Union[ctypes.c_size_t, int],
  2409. ) -> ggml_tensor_p:
  2410. return lib.ggml_set(ctx, a, b, nb1, nb2, nb3, offset)
  2411. lib.ggml_set.argtypes = [
  2412. ggml_context_p,
  2413. ctypes.POINTER(ggml_tensor),
  2414. ctypes.POINTER(ggml_tensor),
  2415. ctypes.c_size_t,
  2416. ctypes.c_size_t,
  2417. ctypes.c_size_t,
  2418. ctypes.c_size_t,
  2419. ]
  2420. lib.ggml_set.restype = ctypes.POINTER(ggml_tensor)
  2421. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2422. # GGML_API struct ggml_tensor * ggml_set_inplace(
  2423. # struct ggml_context * ctx,
  2424. # struct ggml_tensor * a,
  2425. # struct ggml_tensor * b,
  2426. # size_t nb1,
  2427. # size_t nb2,
  2428. # size_t nb3,
  2429. # size_t offset);
  2430. def ggml_set_inplace(
  2431. ctx: ggml_context_p,
  2432. a: ggml_tensor_p,
  2433. b: ggml_tensor_p,
  2434. nb1: Union[ctypes.c_size_t, int],
  2435. nb2: Union[ctypes.c_size_t, int],
  2436. nb3: Union[ctypes.c_size_t, int],
  2437. offset: Union[ctypes.c_size_t, int],
  2438. ) -> ggml_tensor_p:
  2439. return lib.ggml_set_inplace(ctx, a, b, nb1, nb2, nb3, offset)
  2440. lib.ggml_set_inplace.argtypes = [
  2441. ggml_context_p,
  2442. ctypes.POINTER(ggml_tensor),
  2443. ctypes.POINTER(ggml_tensor),
  2444. ctypes.c_size_t,
  2445. ctypes.c_size_t,
  2446. ctypes.c_size_t,
  2447. ctypes.c_size_t,
  2448. ]
  2449. lib.ggml_set_inplace.restype = ctypes.POINTER(ggml_tensor)
  2450. # GGML_API struct ggml_tensor * ggml_set_1d(
  2451. # struct ggml_context * ctx,
  2452. # struct ggml_tensor * a,
  2453. # struct ggml_tensor * b,
  2454. # size_t offset);
  2455. def ggml_set_1d(
  2456. ctx: ggml_context_p,
  2457. a: ggml_tensor_p,
  2458. b: ggml_tensor_p,
  2459. offset: Union[ctypes.c_size_t, int],
  2460. ) -> ggml_tensor_p:
  2461. return lib.ggml_set_1d(ctx, a, b, offset)
  2462. lib.ggml_set_1d.argtypes = [
  2463. ggml_context_p,
  2464. ctypes.POINTER(ggml_tensor),
  2465. ctypes.POINTER(ggml_tensor),
  2466. ctypes.c_size_t,
  2467. ]
  2468. lib.ggml_set_1d.restype = ctypes.POINTER(ggml_tensor)
  2469. # GGML_API struct ggml_tensor * ggml_set_1d_inplace(
  2470. # struct ggml_context * ctx,
  2471. # struct ggml_tensor * a,
  2472. # struct ggml_tensor * b,
  2473. # size_t offset);
  2474. def ggml_set_1d_inplace(
  2475. ctx: ggml_context_p,
  2476. a: ggml_tensor_p,
  2477. b: ggml_tensor_p,
  2478. offset: Union[ctypes.c_size_t, int],
  2479. ) -> ggml_tensor_p:
  2480. return lib.ggml_set_1d_inplace(ctx, a, b, offset)
  2481. lib.ggml_set_1d_inplace.argtypes = [
  2482. ggml_context_p,
  2483. ctypes.POINTER(ggml_tensor),
  2484. ctypes.POINTER(ggml_tensor),
  2485. ctypes.c_size_t,
  2486. ]
  2487. lib.ggml_set_1d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2488. # // b -> view(a,offset,nb1,nb2,3), return modified a
  2489. # GGML_API struct ggml_tensor * ggml_set_2d(
  2490. # struct ggml_context * ctx,
  2491. # struct ggml_tensor * a,
  2492. # struct ggml_tensor * b,
  2493. # size_t nb1,
  2494. # size_t offset);
  2495. def ggml_set_2d(
  2496. ctx: ggml_context_p,
  2497. a: ggml_tensor_p,
  2498. b: ggml_tensor_p,
  2499. nb1: Union[ctypes.c_size_t, int],
  2500. offset: Union[ctypes.c_size_t, int],
  2501. ) -> ggml_tensor_p:
  2502. return lib.ggml_set_2d(ctx, a, b, nb1, offset)
  2503. lib.ggml_set_2d.argtypes = [
  2504. ggml_context_p,
  2505. ctypes.POINTER(ggml_tensor),
  2506. ctypes.POINTER(ggml_tensor),
  2507. ctypes.c_size_t,
  2508. ctypes.c_size_t,
  2509. ]
  2510. lib.ggml_set_2d.restype = ctypes.POINTER(ggml_tensor)
  2511. # // b -> view(a,offset,nb1,nb2,3), return view(a)
  2512. # GGML_API struct ggml_tensor * ggml_set_2d_inplace(
  2513. # struct ggml_context * ctx,
  2514. # struct ggml_tensor * a,
  2515. # struct ggml_tensor * b,
  2516. # size_t nb1,
  2517. # size_t offset);
  2518. def ggml_set_2d_inplace(
  2519. ctx: ggml_context_p,
  2520. a: ggml_tensor_p,
  2521. b: ggml_tensor_p,
  2522. nb1: Union[ctypes.c_size_t, int],
  2523. offset: Union[ctypes.c_size_t, int],
  2524. ) -> ggml_tensor_p:
  2525. return lib.ggml_set_2d_inplace(ctx, a, b, nb1, offset)
  2526. lib.ggml_set_2d_inplace.argtypes = [
  2527. ggml_context_p,
  2528. ctypes.POINTER(ggml_tensor),
  2529. ctypes.POINTER(ggml_tensor),
  2530. ctypes.c_size_t,
  2531. ctypes.c_size_t,
  2532. ]
  2533. lib.ggml_set_2d_inplace.restype = ctypes.POINTER(ggml_tensor)
  2534. # // a -> b, return view(b)
  2535. # GGML_API struct ggml_tensor * ggml_cpy(
  2536. # struct ggml_context * ctx,
  2537. # struct ggml_tensor * a,
  2538. # struct ggml_tensor * b);
  2539. def ggml_cpy(
  2540. ctx: ggml_context_p,
  2541. a: ggml_tensor_p,
  2542. b: ggml_tensor_p,
  2543. ) -> ggml_tensor_p:
  2544. return lib.ggml_cpy(ctx, a, b)
  2545. lib.ggml_cpy.argtypes = [
  2546. ggml_context_p,
  2547. ctypes.POINTER(ggml_tensor),
  2548. ctypes.POINTER(ggml_tensor),
  2549. ]
  2550. lib.ggml_cpy.restype = ctypes.POINTER(ggml_tensor)
  2551. # // a -> b, in-place, return view(b)
  2552. # GGML_API struct ggml_tensor * ggml_cpy_inplace(
  2553. # struct ggml_context * ctx,
  2554. # struct ggml_tensor * a,
  2555. # struct ggml_tensor * b);
  2556. def ggml_cpy_inplace(
  2557. ctx: ggml_context_p,
  2558. a: ggml_tensor_p,
  2559. b: ggml_tensor_p,
  2560. ) -> ggml_tensor_p:
  2561. return lib.ggml_cpy_inplace(ctx, a, b)
  2562. lib.ggml_cpy_inplace.argtypes = [
  2563. ggml_context_p,
  2564. ctypes.POINTER(ggml_tensor),
  2565. ctypes.POINTER(ggml_tensor),
  2566. ]
  2567. lib.ggml_cpy_inplace.restype = ctypes.POINTER(ggml_tensor)
  2568. # // make contiguous
  2569. # GGML_API struct ggml_tensor * ggml_cont(
  2570. # struct ggml_context * ctx,
  2571. # struct ggml_tensor * a);
  2572. def ggml_cont(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2573. """Make a tensor contiguous and return the result.
  2574. Parameters:
  2575. ctx: ggml context
  2576. a: tensor
  2577. Returns:
  2578. Pointer to ggml_tensor"""
  2579. return lib.ggml_cont(ctx, a)
  2580. lib.ggml_cont.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2581. lib.ggml_cont.restype = ctypes.POINTER(ggml_tensor)
  2582. # // make contiguous, in-place
  2583. # GGML_API struct ggml_tensor * ggml_cont_inplace(
  2584. # struct ggml_context * ctx,
  2585. # struct ggml_tensor * a);
  2586. def ggml_cont_inplace(
  2587. ctx: ggml_context_p,
  2588. a: ggml_tensor_p,
  2589. ) -> ggml_tensor_p:
  2590. """Make a tensor contiguous and store the result in the first tensor.
  2591. Parameters:
  2592. ctx: ggml context
  2593. a: tensor
  2594. Returns:
  2595. Pointer to ggml_tensor"""
  2596. return lib.ggml_cont_inplace(ctx, a)
  2597. lib.ggml_cont_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2598. lib.ggml_cont_inplace.restype = ctypes.POINTER(ggml_tensor)
  2599. # // return view(a), b specifies the new shape
  2600. # // TODO: when we start computing gradient, make a copy instead of view
  2601. # GGML_API struct ggml_tensor * ggml_reshape(
  2602. # struct ggml_context * ctx,
  2603. # struct ggml_tensor * a,
  2604. # struct ggml_tensor * b);
  2605. def ggml_reshape(
  2606. ctx: ggml_context_p,
  2607. a: ggml_tensor_p,
  2608. b: ggml_tensor_p,
  2609. ) -> ggml_tensor_p:
  2610. return lib.ggml_reshape(ctx, a, b)
  2611. lib.ggml_reshape.argtypes = [
  2612. ggml_context_p,
  2613. ctypes.POINTER(ggml_tensor),
  2614. ctypes.POINTER(ggml_tensor),
  2615. ]
  2616. lib.ggml_reshape.restype = ctypes.POINTER(ggml_tensor)
  2617. # // return view(a)
  2618. # // TODO: when we start computing gradient, make a copy instead of view
  2619. # GGML_API struct ggml_tensor * ggml_reshape_1d(
  2620. # struct ggml_context * ctx,
  2621. # struct ggml_tensor * a,
  2622. # int64_t ne0);
  2623. def ggml_reshape_1d(
  2624. ctx: ggml_context_p,
  2625. a: ggml_tensor_p,
  2626. ne0: Union[ctypes.c_int64, int],
  2627. ) -> ggml_tensor_p:
  2628. return lib.ggml_reshape_1d(ctx, a, ne0)
  2629. lib.ggml_reshape_1d.argtypes = [
  2630. ggml_context_p,
  2631. ctypes.POINTER(ggml_tensor),
  2632. ctypes.c_int64,
  2633. ]
  2634. lib.ggml_reshape_1d.restype = ctypes.POINTER(ggml_tensor)
  2635. # GGML_API struct ggml_tensor * ggml_reshape_2d(
  2636. # struct ggml_context * ctx,
  2637. # struct ggml_tensor * a,
  2638. # int64_t ne0,
  2639. # int64_t ne1);
  2640. def ggml_reshape_2d(
  2641. ctx: ggml_context_p,
  2642. a: ggml_tensor_p,
  2643. ne0: Union[ctypes.c_int64, int],
  2644. ne1: Union[ctypes.c_int64, int],
  2645. ) -> ggml_tensor_p:
  2646. return lib.ggml_reshape_2d(ctx, a, ne0, ne1)
  2647. lib.ggml_reshape_2d.argtypes = [
  2648. ggml_context_p,
  2649. ctypes.POINTER(ggml_tensor),
  2650. ctypes.c_int64,
  2651. ctypes.c_int64,
  2652. ]
  2653. lib.ggml_reshape_2d.restype = ctypes.POINTER(ggml_tensor)
  2654. # // return view(a)
  2655. # // TODO: when we start computing gradient, make a copy instead of view
  2656. # GGML_API struct ggml_tensor * ggml_reshape_3d(
  2657. # struct ggml_context * ctx,
  2658. # struct ggml_tensor * a,
  2659. # int64_t ne0,
  2660. # int64_t ne1,
  2661. # int64_t ne2);
  2662. def ggml_reshape_3d(
  2663. ctx: ggml_context_p,
  2664. a: ggml_tensor_p,
  2665. ne0: Union[ctypes.c_int64, int],
  2666. ne1: Union[ctypes.c_int64, int],
  2667. ne2: Union[ctypes.c_int64, int],
  2668. ) -> ggml_tensor_p:
  2669. return lib.ggml_reshape_3d(ctx, a, ne0, ne1, ne2)
  2670. lib.ggml_reshape_3d.argtypes = [
  2671. ggml_context_p,
  2672. ctypes.POINTER(ggml_tensor),
  2673. ctypes.c_int64,
  2674. ctypes.c_int64,
  2675. ctypes.c_int64,
  2676. ]
  2677. lib.ggml_reshape_3d.restype = ctypes.POINTER(ggml_tensor)
  2678. # GGML_API struct ggml_tensor * ggml_reshape_4d(
  2679. # struct ggml_context * ctx,
  2680. # struct ggml_tensor * a,
  2681. # int64_t ne0,
  2682. # int64_t ne1,
  2683. # int64_t ne2,
  2684. # int64_t ne3);
  2685. def ggml_reshape_4d(
  2686. ctx: ggml_context_p,
  2687. a: ggml_tensor_p,
  2688. ne0: Union[ctypes.c_int64, int],
  2689. ne1: Union[ctypes.c_int64, int],
  2690. ne2: Union[ctypes.c_int64, int],
  2691. ne3: Union[ctypes.c_int64, int],
  2692. ) -> ggml_tensor_p:
  2693. return lib.ggml_reshape_4d(ctx, a, ne0, ne1, ne2, ne3)
  2694. lib.ggml_reshape_4d.argtypes = [
  2695. ggml_context_p,
  2696. ctypes.POINTER(ggml_tensor),
  2697. ctypes.c_int64,
  2698. ctypes.c_int64,
  2699. ctypes.c_int64,
  2700. ctypes.c_int64,
  2701. ]
  2702. lib.ggml_reshape_4d.restype = ctypes.POINTER(ggml_tensor)
  2703. # // offset in bytes
  2704. # GGML_API struct ggml_tensor * ggml_view_1d(
  2705. # struct ggml_context * ctx,
  2706. # struct ggml_tensor * a,
  2707. # int64_t ne0,
  2708. # size_t offset);
  2709. def ggml_view_1d(
  2710. ctx: ggml_context_p,
  2711. a: ggml_tensor_p,
  2712. ne0: Union[ctypes.c_int64, int],
  2713. offset: Union[ctypes.c_size_t, int],
  2714. ) -> ggml_tensor_p:
  2715. return lib.ggml_view_1d(ctx, a, ne0, offset)
  2716. lib.ggml_view_1d.argtypes = [
  2717. ggml_context_p,
  2718. ctypes.POINTER(ggml_tensor),
  2719. ctypes.c_int64,
  2720. ctypes.c_size_t,
  2721. ]
  2722. lib.ggml_view_1d.restype = ctypes.POINTER(ggml_tensor)
  2723. # GGML_API struct ggml_tensor * ggml_view_2d(
  2724. # struct ggml_context * ctx,
  2725. # struct ggml_tensor * a,
  2726. # int64_t ne0,
  2727. # int64_t ne1,
  2728. # size_t nb1, // row stride in bytes
  2729. # size_t offset);
  2730. def ggml_view_2d(
  2731. ctx: ggml_context_p,
  2732. a: ggml_tensor_p,
  2733. ne0: Union[ctypes.c_int64, int],
  2734. ne1: Union[ctypes.c_int64, int],
  2735. nb1: Union[ctypes.c_size_t, int],
  2736. offset: Union[ctypes.c_size_t, int],
  2737. ) -> ggml_tensor_p:
  2738. return lib.ggml_view_2d(ctx, a, ne0, ne1, nb1, offset)
  2739. lib.ggml_view_2d.argtypes = [
  2740. ggml_context_p,
  2741. ctypes.POINTER(ggml_tensor),
  2742. ctypes.c_int64,
  2743. ctypes.c_int64,
  2744. ctypes.c_size_t,
  2745. ctypes.c_size_t,
  2746. ]
  2747. lib.ggml_view_2d.restype = ctypes.POINTER(ggml_tensor)
  2748. # GGML_API struct ggml_tensor * ggml_view_3d(
  2749. # struct ggml_context * ctx,
  2750. # struct ggml_tensor * a,
  2751. # int64_t ne0,
  2752. # int64_t ne1,
  2753. # int64_t ne2,
  2754. # size_t nb1, // row stride in bytes
  2755. # size_t nb2, // slice stride in bytes
  2756. # size_t offset);
  2757. def ggml_view_3d(
  2758. ctx: ggml_context_p,
  2759. a: ggml_tensor_p,
  2760. ne0: Union[ctypes.c_int64, int],
  2761. ne1: Union[ctypes.c_int64, int],
  2762. ne2: Union[ctypes.c_int64, int],
  2763. nb1: Union[ctypes.c_size_t, int],
  2764. nb2: Union[ctypes.c_size_t, int],
  2765. offset: Union[ctypes.c_size_t, int],
  2766. ) -> ggml_tensor_p:
  2767. return lib.ggml_view_3d(ctx, a, ne0, ne1, ne2, nb1, nb2, offset)
  2768. lib.ggml_view_3d.argtypes = [
  2769. ggml_context_p,
  2770. ctypes.POINTER(ggml_tensor),
  2771. ctypes.c_int64,
  2772. ctypes.c_int64,
  2773. ctypes.c_int64,
  2774. ctypes.c_size_t,
  2775. ctypes.c_size_t,
  2776. ctypes.c_size_t,
  2777. ]
  2778. lib.ggml_view_3d.restype = ctypes.POINTER(ggml_tensor)
  2779. # GGML_API struct ggml_tensor * ggml_view_4d(
  2780. # struct ggml_context * ctx,
  2781. # struct ggml_tensor * a,
  2782. # int64_t ne0,
  2783. # int64_t ne1,
  2784. # int64_t ne2,
  2785. # int64_t ne3,
  2786. # size_t nb1, // row stride in bytes
  2787. # size_t nb2, // slice stride in bytes
  2788. # size_t nb3,
  2789. # size_t offset);
  2790. def ggml_view_4d(
  2791. ctx: ggml_context_p,
  2792. a: ggml_tensor_p,
  2793. ne0: Union[ctypes.c_int64, int],
  2794. ne1: Union[ctypes.c_int64, int],
  2795. ne2: Union[ctypes.c_int64, int],
  2796. ne3: Union[ctypes.c_int64, int],
  2797. nb1: Union[ctypes.c_size_t, int],
  2798. nb2: Union[ctypes.c_size_t, int],
  2799. nb3: Union[ctypes.c_size_t, int],
  2800. offset: Union[ctypes.c_size_t, int],
  2801. ) -> ggml_tensor_p:
  2802. return lib.ggml_view_4d(ctx, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset)
  2803. lib.ggml_view_4d.argtypes = [
  2804. ggml_context_p,
  2805. ctypes.POINTER(ggml_tensor),
  2806. ctypes.c_int64,
  2807. ctypes.c_int64,
  2808. ctypes.c_int64,
  2809. ctypes.c_int64,
  2810. ctypes.c_size_t,
  2811. ctypes.c_size_t,
  2812. ctypes.c_size_t,
  2813. ctypes.c_size_t,
  2814. ]
  2815. lib.ggml_view_4d.restype = ctypes.POINTER(ggml_tensor)
  2816. # GGML_API struct ggml_tensor * ggml_permute(
  2817. # struct ggml_context * ctx,
  2818. # struct ggml_tensor * a,
  2819. # int axis0,
  2820. # int axis1,
  2821. # int axis2,
  2822. # int axis3);
  2823. def ggml_permute(
  2824. ctx: ggml_context_p,
  2825. a: ggml_tensor_p,
  2826. axis0: Union[ctypes.c_int, int],
  2827. axis1: Union[ctypes.c_int, int],
  2828. axis2: Union[ctypes.c_int, int],
  2829. axis3: Union[ctypes.c_int, int],
  2830. ) -> ggml_tensor_p:
  2831. return lib.ggml_permute(ctx, a, axis0, axis1, axis2, axis3)
  2832. lib.ggml_permute.argtypes = [
  2833. ggml_context_p,
  2834. ctypes.POINTER(ggml_tensor),
  2835. ctypes.c_int,
  2836. ctypes.c_int,
  2837. ctypes.c_int,
  2838. ctypes.c_int,
  2839. ]
  2840. lib.ggml_permute.restype = ctypes.POINTER(ggml_tensor)
  2841. # // alias for ggml_permute(ctx, a, 1, 0, 2, 3)
  2842. # GGML_API struct ggml_tensor * ggml_transpose(
  2843. # struct ggml_context * ctx,
  2844. # struct ggml_tensor * a);
  2845. def ggml_transpose(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2846. """Transpose *the first two dimensions* of a tensor and return the result.
  2847. alias for `ggml_permute(ctx, a, 1, 0, 2, 3)`
  2848. Parameters:
  2849. ctx: ggml context
  2850. a: tensor
  2851. Returns:
  2852. Pointer to ggml_tensor"""
  2853. return lib.ggml_transpose(ctx, a)
  2854. lib.ggml_transpose.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2855. lib.ggml_transpose.restype = ctypes.POINTER(ggml_tensor)
  2856. # GGML_API struct ggml_tensor * ggml_get_rows(
  2857. # struct ggml_context * ctx,
  2858. # struct ggml_tensor * a,
  2859. # struct ggml_tensor * b);
  2860. def ggml_get_rows(
  2861. ctx: ggml_context_p,
  2862. a: ggml_tensor_p,
  2863. b: ggml_tensor_p,
  2864. ) -> ggml_tensor_p:
  2865. return lib.ggml_get_rows(ctx, a, b)
  2866. lib.ggml_get_rows.argtypes = [
  2867. ggml_context_p,
  2868. ctypes.POINTER(ggml_tensor),
  2869. ctypes.POINTER(ggml_tensor),
  2870. ]
  2871. lib.ggml_get_rows.restype = ctypes.POINTER(ggml_tensor)
  2872. # GGML_API struct ggml_tensor * ggml_get_rows_back(
  2873. # struct ggml_context * ctx,
  2874. # struct ggml_tensor * a,
  2875. # struct ggml_tensor * b,
  2876. # struct ggml_tensor * c);
  2877. def ggml_get_rows_back(
  2878. ctx: ggml_context_p,
  2879. a: ggml_tensor_p,
  2880. b: ggml_tensor_p,
  2881. c: ggml_tensor_p,
  2882. ) -> ggml_tensor_p:
  2883. return lib.ggml_get_rows_back(ctx, a, b, c)
  2884. lib.ggml_get_rows_back.argtypes = [
  2885. ggml_context_p,
  2886. ctypes.POINTER(ggml_tensor),
  2887. ctypes.POINTER(ggml_tensor),
  2888. ctypes.POINTER(ggml_tensor),
  2889. ]
  2890. lib.ggml_get_rows_back.restype = ctypes.POINTER(ggml_tensor)
  2891. # GGML_API struct ggml_tensor * ggml_diag(
  2892. # struct ggml_context * ctx,
  2893. # struct ggml_tensor * a);
  2894. def ggml_diag(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2895. return lib.ggml_diag(ctx, a)
  2896. lib.ggml_diag.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2897. lib.ggml_diag.restype = ctypes.POINTER(ggml_tensor)
  2898. # // set elements above the diagonal to -INF
  2899. # GGML_API struct ggml_tensor * ggml_diag_mask_inf(
  2900. # struct ggml_context * ctx,
  2901. # struct ggml_tensor * a,
  2902. # int n_past);
  2903. def ggml_diag_mask_inf(
  2904. ctx: ggml_context_p,
  2905. a: ggml_tensor_p,
  2906. n_past: Union[ctypes.c_int, int],
  2907. ) -> ggml_tensor_p:
  2908. return lib.ggml_diag_mask_inf(ctx, a, n_past)
  2909. lib.ggml_diag_mask_inf.argtypes = [
  2910. ggml_context_p,
  2911. ctypes.POINTER(ggml_tensor),
  2912. ctypes.c_int,
  2913. ]
  2914. lib.ggml_diag_mask_inf.restype = ctypes.POINTER(ggml_tensor)
  2915. # // in-place, returns view(a)
  2916. # GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace(
  2917. # struct ggml_context * ctx,
  2918. # struct ggml_tensor * a,
  2919. # int n_past);
  2920. def ggml_diag_mask_inf_inplace(
  2921. ctx: ggml_context_p,
  2922. a: ggml_tensor_p,
  2923. n_past: Union[ctypes.c_int, int],
  2924. ) -> ggml_tensor_p:
  2925. return lib.ggml_diag_mask_inf_inplace(ctx, a, n_past)
  2926. lib.ggml_diag_mask_inf_inplace.argtypes = [
  2927. ggml_context_p,
  2928. ctypes.POINTER(ggml_tensor),
  2929. ctypes.c_int,
  2930. ]
  2931. lib.ggml_diag_mask_inf_inplace.restype = ctypes.POINTER(ggml_tensor)
  2932. # // set elements above the diagonal to 0
  2933. # GGML_API struct ggml_tensor * ggml_diag_mask_zero(
  2934. # struct ggml_context * ctx,
  2935. # struct ggml_tensor * a,
  2936. # int n_past);
  2937. def ggml_diag_mask_zero(
  2938. ctx: ggml_context_p,
  2939. a: ggml_tensor_p,
  2940. n_past: Union[ctypes.c_int, int],
  2941. ) -> ggml_tensor_p:
  2942. return lib.ggml_diag_mask_zero(ctx, a, n_past)
  2943. lib.ggml_diag_mask_zero.argtypes = [
  2944. ggml_context_p,
  2945. ctypes.POINTER(ggml_tensor),
  2946. ctypes.c_int,
  2947. ]
  2948. lib.ggml_diag_mask_zero.restype = ctypes.POINTER(ggml_tensor)
  2949. # // in-place, returns view(a)
  2950. # GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
  2951. # struct ggml_context * ctx,
  2952. # struct ggml_tensor * a,
  2953. # int n_past);
  2954. def ggml_diag_mask_zero_inplace(
  2955. ctx: ggml_context_p,
  2956. a: ggml_tensor_p,
  2957. n_past: Union[ctypes.c_int, int],
  2958. ) -> ggml_tensor_p:
  2959. return lib.ggml_diag_mask_zero_inplace(ctx, a, n_past)
  2960. lib.ggml_diag_mask_zero_inplace.argtypes = [
  2961. ggml_context_p,
  2962. ctypes.POINTER(ggml_tensor),
  2963. ctypes.c_int,
  2964. ]
  2965. lib.ggml_diag_mask_zero_inplace.restype = ctypes.POINTER(ggml_tensor)
  2966. # GGML_API struct ggml_tensor * ggml_soft_max(
  2967. # struct ggml_context * ctx,
  2968. # struct ggml_tensor * a);
  2969. def ggml_soft_max(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2970. return lib.ggml_soft_max(ctx, a)
  2971. lib.ggml_soft_max.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2972. lib.ggml_soft_max.restype = ctypes.POINTER(ggml_tensor)
  2973. # // in-place, returns view(a)
  2974. # GGML_API struct ggml_tensor * ggml_soft_max_inplace(
  2975. # struct ggml_context * ctx,
  2976. # struct ggml_tensor * a);
  2977. def ggml_soft_max_inplace(ctx: ggml_context_p, a: ggml_tensor_p) -> ggml_tensor_p:
  2978. return lib.ggml_soft_max_inplace(ctx, a)
  2979. lib.ggml_soft_max_inplace.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  2980. lib.ggml_soft_max_inplace.restype = ctypes.POINTER(ggml_tensor)
  2981. # GGML_API struct ggml_tensor * ggml_soft_max_back(
  2982. # struct ggml_context * ctx,
  2983. # struct ggml_tensor * a,
  2984. # struct ggml_tensor * b);
  2985. def ggml_soft_max_back(
  2986. ctx: ggml_context_p,
  2987. a: ggml_tensor_p,
  2988. b: ggml_tensor_p,
  2989. ) -> ggml_tensor_p:
  2990. return lib.ggml_soft_max_back(ctx, a, b)
  2991. lib.ggml_soft_max_back.argtypes = [
  2992. ggml_context_p,
  2993. ctypes.POINTER(ggml_tensor),
  2994. ctypes.POINTER(ggml_tensor),
  2995. ]
  2996. lib.ggml_soft_max_back.restype = ctypes.POINTER(ggml_tensor)
  2997. # // in-place, returns view(a)
  2998. # GGML_API struct ggml_tensor * ggml_soft_max_back_inplace(
  2999. # struct ggml_context * ctx,
  3000. # struct ggml_tensor * a,
  3001. # struct ggml_tensor * b);
  3002. def ggml_soft_max_back_inplace(
  3003. ctx: ggml_context_p,
  3004. a: ggml_tensor_p,
  3005. b: ggml_tensor_p,
  3006. ) -> ggml_tensor_p:
  3007. return lib.ggml_soft_max_back_inplace(ctx, a, b)
  3008. lib.ggml_soft_max_back_inplace.argtypes = [
  3009. ggml_context_p,
  3010. ctypes.POINTER(ggml_tensor),
  3011. ctypes.POINTER(ggml_tensor),
  3012. ]
  3013. lib.ggml_soft_max_back_inplace.restype = ctypes.POINTER(ggml_tensor)
  3014. # // rotary position embedding
  3015. # // if mode & 1 == 1, skip n_past elements
  3016. # // if mode & 2 == 1, GPT-NeoX style
  3017. # // if mode & 4 == 1, ChatGLM style
  3018. # // TODO: avoid creating a new tensor every time
  3019. # GGML_API struct ggml_tensor * ggml_rope(
  3020. # struct ggml_context * ctx,
  3021. # struct ggml_tensor * a,
  3022. # int n_past,
  3023. # int n_dims,
  3024. # int mode,
  3025. # int n_ctx);
  3026. def ggml_rope(
  3027. ctx: ggml_context_p,
  3028. a: ggml_tensor_p,
  3029. n_past: Union[ctypes.c_int, int],
  3030. n_dims: Union[ctypes.c_int, int],
  3031. mode: Union[ctypes.c_int, int],
  3032. n_ctx: Union[ctypes.c_int, int],
  3033. ) -> ggml_tensor_p:
  3034. return lib.ggml_rope(ctx, a, n_past, n_dims, mode, n_ctx)
  3035. lib.ggml_rope.argtypes = [
  3036. ggml_context_p,
  3037. ctypes.POINTER(ggml_tensor),
  3038. ctypes.c_int,
  3039. ctypes.c_int,
  3040. ctypes.c_int,
  3041. ctypes.c_int,
  3042. ]
  3043. lib.ggml_rope.restype = ctypes.POINTER(ggml_tensor)
  3044. # // in-place, returns view(a)
  3045. # GGML_API struct ggml_tensor * ggml_rope_inplace(
  3046. # struct ggml_context * ctx,
  3047. # struct ggml_tensor * a,
  3048. # int n_past,
  3049. # int n_dims,
  3050. # int mode,
  3051. # int n_ctx);
  3052. def ggml_rope_inplace(
  3053. ctx: ggml_context_p,
  3054. a: ggml_tensor_p,
  3055. n_past: Union[ctypes.c_int, int],
  3056. n_dims: Union[ctypes.c_int, int],
  3057. mode: Union[ctypes.c_int, int],
  3058. n_ctx: Union[ctypes.c_int, int],
  3059. ) -> ggml_tensor_p:
  3060. return lib.ggml_rope_inplace(ctx, a, n_past, n_dims, mode, n_ctx)
  3061. lib.ggml_rope_inplace.argtypes = [
  3062. ggml_context_p,
  3063. ctypes.POINTER(ggml_tensor),
  3064. ctypes.c_int,
  3065. ctypes.c_int,
  3066. ctypes.c_int,
  3067. ctypes.c_int,
  3068. ]
  3069. lib.ggml_rope_inplace.restype = ctypes.POINTER(ggml_tensor)
  3070. # // custom RoPE
  3071. # GGML_API struct ggml_tensor * ggml_rope_custom(
  3072. # struct ggml_context * ctx,
  3073. # struct ggml_tensor * a,
  3074. # int n_past,
  3075. # int n_dims,
  3076. # int mode,
  3077. # int n_ctx,
  3078. # float freq_base,
  3079. # float freq_scale);
  3080. def ggml_rope_custom(
  3081. ctx: ggml_context_p,
  3082. a: ggml_tensor_p,
  3083. n_past: Union[ctypes.c_int, int],
  3084. n_dims: Union[ctypes.c_int, int],
  3085. mode: Union[ctypes.c_int, int],
  3086. n_ctx: Union[ctypes.c_int, int],
  3087. freq_base: Union[ctypes.c_float, float],
  3088. freq_scale: Union[ctypes.c_float, float],
  3089. ) -> ggml_tensor_p:
  3090. return lib.ggml_rope_custom(
  3091. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale
  3092. )
  3093. lib.ggml_rope_custom.argtypes = [
  3094. ggml_context_p,
  3095. ctypes.POINTER(ggml_tensor),
  3096. ctypes.c_int,
  3097. ctypes.c_int,
  3098. ctypes.c_int,
  3099. ctypes.c_int,
  3100. ctypes.c_float,
  3101. ctypes.c_float,
  3102. ]
  3103. lib.ggml_rope_custom.restype = ctypes.POINTER(ggml_tensor)
  3104. # // in-place, returns view(a)
  3105. # GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
  3106. # struct ggml_context * ctx,
  3107. # struct ggml_tensor * a,
  3108. # int n_past,
  3109. # int n_dims,
  3110. # int mode,
  3111. # int n_ctx,
  3112. # float freq_base,
  3113. # float freq_scale);
  3114. def ggml_rope_custom_inplace(
  3115. ctx: ggml_context_p,
  3116. a: ggml_tensor_p,
  3117. n_past: Union[ctypes.c_int, int],
  3118. n_dims: Union[ctypes.c_int, int],
  3119. mode: Union[ctypes.c_int, int],
  3120. n_ctx: Union[ctypes.c_int, int],
  3121. freq_base: Union[ctypes.c_float, float],
  3122. freq_scale: Union[ctypes.c_float, float],
  3123. ) -> ggml_tensor_p:
  3124. return lib.ggml_rope_custom_inplace(
  3125. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale
  3126. )
  3127. lib.ggml_rope_custom_inplace.argtypes = [
  3128. ggml_context_p,
  3129. ctypes.POINTER(ggml_tensor),
  3130. ctypes.c_int,
  3131. ctypes.c_int,
  3132. ctypes.c_int,
  3133. ctypes.c_int,
  3134. ctypes.c_float,
  3135. ctypes.c_float,
  3136. ]
  3137. lib.ggml_rope_custom_inplace.restype = ctypes.POINTER(ggml_tensor)
  3138. # // xPos RoPE, in-place, returns view(a)
  3139. # GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
  3140. # struct ggml_context * ctx,
  3141. # struct ggml_tensor * a,
  3142. # int n_past,
  3143. # int n_dims,
  3144. # float base,
  3145. # bool down);
  3146. def ggml_rope_xpos_inplace(
  3147. ctx: ggml_context_p,
  3148. a: ggml_tensor_p,
  3149. n_past: Union[ctypes.c_int, int],
  3150. n_dims: Union[ctypes.c_int, int],
  3151. base: Union[ctypes.c_float, float],
  3152. down: Union[ctypes.c_bool, bool],
  3153. ) -> ggml_tensor_p:
  3154. return lib.ggml_rope_xpos_inplace(ctx, a, n_past, n_dims, base, down)
  3155. lib.ggml_rope_xpos_inplace.argtypes = [
  3156. ggml_context_p,
  3157. ctypes.POINTER(ggml_tensor),
  3158. ctypes.c_int,
  3159. ctypes.c_int,
  3160. ctypes.c_float,
  3161. ctypes.c_bool,
  3162. ]
  3163. lib.ggml_rope_xpos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3164. # // rotary position embedding backward, i.e compute dx from dy
  3165. # // a - dy
  3166. # GGML_API struct ggml_tensor * ggml_rope_back(
  3167. # struct ggml_context * ctx,
  3168. # struct ggml_tensor * a,
  3169. # int n_past,
  3170. # int n_dims,
  3171. # int mode,
  3172. # int n_ctx,
  3173. # float freq_base,
  3174. # float freq_scale,
  3175. # float xpos_base,
  3176. # bool xpos_down);
  3177. def ggml_rope_back(
  3178. ctx: ggml_context_p,
  3179. a: ggml_tensor_p,
  3180. n_past: Union[ctypes.c_int, int],
  3181. n_dims: Union[ctypes.c_int, int],
  3182. mode: Union[ctypes.c_int, int],
  3183. n_ctx: Union[ctypes.c_int, int],
  3184. freq_base: Union[ctypes.c_float, float],
  3185. freq_scale: Union[ctypes.c_float, float],
  3186. xpos_base: Union[ctypes.c_float, float],
  3187. xpos_down: Union[ctypes.c_bool, bool],
  3188. ) -> ggml_tensor_p:
  3189. return lib.ggml_rope_back(
  3190. ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, xpos_base, xpos_down
  3191. )
  3192. lib.ggml_rope_back.argtypes = [
  3193. ggml_context_p,
  3194. ctypes.POINTER(ggml_tensor),
  3195. ctypes.c_int,
  3196. ctypes.c_int,
  3197. ctypes.c_int,
  3198. ctypes.c_int,
  3199. ctypes.c_float,
  3200. ctypes.c_float,
  3201. ctypes.c_float,
  3202. ctypes.c_bool,
  3203. ]
  3204. lib.ggml_rope_back.restype = ctypes.POINTER(ggml_tensor)
  3205. # // alibi position embedding
  3206. # // in-place, returns view(a)
  3207. # struct ggml_tensor * ggml_alibi(
  3208. # struct ggml_context * ctx,
  3209. # struct ggml_tensor * a,
  3210. # int n_past,
  3211. # int n_head,
  3212. # float bias_max);
  3213. def ggml_alibi(
  3214. ctx: ggml_context_p,
  3215. a: ggml_tensor_p,
  3216. n_past: Union[ctypes.c_int, int],
  3217. n_head: Union[ctypes.c_int, int],
  3218. bias_max: Union[ctypes.c_float, float],
  3219. ) -> ggml_tensor_p:
  3220. return lib.ggml_alibi(ctx, a, n_past, n_head, bias_max)
  3221. lib.ggml_alibi.argtypes = [
  3222. ggml_context_p,
  3223. ctypes.POINTER(ggml_tensor),
  3224. ctypes.c_int,
  3225. ctypes.c_int,
  3226. ctypes.c_float,
  3227. ]
  3228. lib.ggml_alibi.restype = ctypes.POINTER(ggml_tensor)
  3229. # // clamp
  3230. # // in-place, returns view(a)
  3231. # struct ggml_tensor * ggml_clamp(
  3232. # struct ggml_context * ctx,
  3233. # struct ggml_tensor * a,
  3234. # float min,
  3235. # float max);
  3236. def ggml_clamp(
  3237. ctx: ggml_context_p,
  3238. a: ggml_tensor_p,
  3239. min: Union[ctypes.c_float, float],
  3240. max: Union[ctypes.c_float, float],
  3241. ) -> ggml_tensor_p:
  3242. return lib.ggml_clamp(ctx, a, min, max)
  3243. lib.ggml_clamp.argtypes = [
  3244. ggml_context_p,
  3245. ctypes.POINTER(ggml_tensor),
  3246. ctypes.c_float,
  3247. ctypes.c_float,
  3248. ]
  3249. lib.ggml_clamp.restype = ctypes.POINTER(ggml_tensor)
  3250. # GGML_API struct ggml_tensor * ggml_conv_1d(
  3251. # struct ggml_context * ctx,
  3252. # struct ggml_tensor * a,
  3253. # struct ggml_tensor * b,
  3254. # int s0, // stride
  3255. # int p0, // padding
  3256. # int d0); // dilation
  3257. def ggml_conv_1d(
  3258. ctx: ggml_context_p,
  3259. a: ggml_tensor_p,
  3260. b: ggml_tensor_p,
  3261. s0: Union[ctypes.c_int, int],
  3262. p0: Union[ctypes.c_int, int],
  3263. d0: Union[ctypes.c_int, int],
  3264. ) -> ggml_tensor_p:
  3265. """Convolution 1D
  3266. Parameters:
  3267. a: input tensor
  3268. b: filter tensor
  3269. s0: stride
  3270. p0: padding
  3271. d0: dilation
  3272. Returns:
  3273. output tensor"""
  3274. return lib.ggml_conv_1d(ctx, a, b, s0, p0, d0)
  3275. lib.ggml_conv_1d.argtypes = [
  3276. ggml_context_p,
  3277. ctypes.POINTER(ggml_tensor),
  3278. ctypes.POINTER(ggml_tensor),
  3279. ctypes.c_int,
  3280. ctypes.c_int,
  3281. ctypes.c_int,
  3282. ]
  3283. lib.ggml_conv_1d.restype = ctypes.POINTER(ggml_tensor)
  3284. # // conv_1d with padding = half
  3285. # // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
  3286. # GGML_API struct ggml_tensor* ggml_conv_1d_ph(
  3287. # struct ggml_context * ctx,
  3288. # struct ggml_tensor * a,
  3289. # struct ggml_tensor * b,
  3290. # int s,
  3291. # int d);
  3292. def ggml_conv_1d_ph(
  3293. ctx: ggml_context_p,
  3294. a: ggml_tensor_p,
  3295. b: ggml_tensor_p,
  3296. s: Union[ctypes.c_int, int],
  3297. d: Union[ctypes.c_int, int],
  3298. ) -> ggml_tensor_p:
  3299. """Convolution 1D with padding = half
  3300. Parameters:
  3301. a: input tensor
  3302. b: filter tensor
  3303. s: stride
  3304. d: dilation
  3305. Returns:
  3306. output tensor"""
  3307. return lib.ggml_conv_1d_ph(ctx, a, b, s, d)
  3308. lib.ggml_conv_1d_ph.argtypes = [
  3309. ggml_context_p,
  3310. ctypes.POINTER(ggml_tensor),
  3311. ctypes.POINTER(ggml_tensor),
  3312. ctypes.c_int,
  3313. ctypes.c_int,
  3314. ]
  3315. lib.ggml_conv_1d_ph.restype = ctypes.POINTER(ggml_tensor)
  3316. # GGML_API struct ggml_tensor * ggml_conv_2d(
  3317. # struct ggml_context * ctx,
  3318. # struct ggml_tensor * a,
  3319. # struct ggml_tensor * b,
  3320. # int s0,
  3321. # int s1,
  3322. # int p0,
  3323. # int p1,
  3324. # int d0,
  3325. # int d1);
  3326. def ggml_conv_2d(
  3327. ctx: ggml_context_p,
  3328. a: ggml_tensor_p,
  3329. b: ggml_tensor_p,
  3330. s0: Union[ctypes.c_int, int],
  3331. s1: Union[ctypes.c_int, int],
  3332. p0: Union[ctypes.c_int, int],
  3333. p1: Union[ctypes.c_int, int],
  3334. d0: Union[ctypes.c_int, int],
  3335. d1: Union[ctypes.c_int, int],
  3336. ) -> ggml_tensor_p:
  3337. """Convolution 2D
  3338. Parameters:
  3339. a: input tensor
  3340. b: filter tensor
  3341. s0: stride
  3342. s1: stride
  3343. p0: padding
  3344. p1: padding
  3345. d0: dilation
  3346. d1: dilation
  3347. Returns:
  3348. output tensor"""
  3349. return lib.ggml_conv_2d(ctx, a, b, s0, s1, p0, p1, d0, d1)
  3350. lib.ggml_conv_2d.argtypes = [
  3351. ggml_context_p,
  3352. ctypes.POINTER(ggml_tensor),
  3353. ctypes.POINTER(ggml_tensor),
  3354. ctypes.c_int,
  3355. ctypes.c_int,
  3356. ctypes.c_int,
  3357. ctypes.c_int,
  3358. ctypes.c_int,
  3359. ctypes.c_int,
  3360. ]
  3361. lib.ggml_conv_2d.restype = ctypes.POINTER(ggml_tensor)
  3362. # // kernel size is a->ne[0] x a->ne[1]
  3363. # // stride is equal to kernel size
  3364. # // padding is zero
  3365. # // example:
  3366. # // a: 16 16 3 768
  3367. # // b: 1024 1024 3 1
  3368. # // res: 64 64 768 1
  3369. # // used in sam
  3370. # GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
  3371. # struct ggml_context * ctx,
  3372. # struct ggml_tensor * a,
  3373. # struct ggml_tensor * b);
  3374. def ggml_conv_2d_sk_p0(
  3375. ctx: ggml_context_p,
  3376. a: ggml_tensor_p,
  3377. b: ggml_tensor_p,
  3378. ) -> ggml_tensor_p:
  3379. """Convolution 2D
  3380. Parameters:
  3381. a: input tensor
  3382. b: filter tensor
  3383. Returns:
  3384. output tensor"""
  3385. return lib.ggml_conv_2d_sk_p0(ctx, a, b)
  3386. lib.ggml_conv_2d_sk_p0.argtypes = [
  3387. ggml_context_p,
  3388. ctypes.POINTER(ggml_tensor),
  3389. ctypes.POINTER(ggml_tensor),
  3390. ]
  3391. lib.ggml_conv_2d_sk_p0.restype = ctypes.POINTER(ggml_tensor)
  3392. # // kernel size is a->ne[0] x a->ne[1]
  3393. # // stride is 1
  3394. # // padding is half
  3395. # // example:
  3396. # // a: 3 3 256 256
  3397. # // b: 64 64 256 1
  3398. # // res: 64 64 256 1
  3399. # // used in sam
  3400. # GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
  3401. # struct ggml_context * ctx,
  3402. # struct ggml_tensor * a,
  3403. # struct ggml_tensor * b);
  3404. def ggml_conv_2d_s1_ph(
  3405. ctx: ggml_context_p,
  3406. a: ggml_tensor_p,
  3407. b: ggml_tensor_p,
  3408. ) -> ggml_tensor_p:
  3409. """Convolution 2D with stride = 1 and padding = half
  3410. Parameters:
  3411. a: input tensor
  3412. b: filter tensor
  3413. Returns:
  3414. output tensor"""
  3415. return lib.ggml_conv_2d_s1_ph(ctx, a, b)
  3416. lib.ggml_conv_2d_s1_ph.argtypes = [
  3417. ggml_context_p,
  3418. ctypes.POINTER(ggml_tensor),
  3419. ctypes.POINTER(ggml_tensor),
  3420. ]
  3421. lib.ggml_conv_2d_s1_ph.restype = ctypes.POINTER(ggml_tensor)
  3422. # GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
  3423. # struct ggml_context * ctx,
  3424. # struct ggml_tensor * a,
  3425. # struct ggml_tensor * b,
  3426. # int stride);
  3427. def ggml_conv_transpose_2d_p0(
  3428. ctx: ggml_context_p,
  3429. a: ggml_tensor_p,
  3430. b: ggml_tensor_p,
  3431. stride: Union[ctypes.c_int, int],
  3432. ) -> ggml_tensor_p:
  3433. """Convolution Transpose 2D with padding = zero
  3434. Parameters:
  3435. a: input tensor
  3436. b: filter tensor
  3437. stride: stride
  3438. Returns:
  3439. output tensor"""
  3440. return lib.ggml_conv_transpose_2d_p0(ctx, a, b, stride)
  3441. lib.ggml_conv_transpose_2d_p0.argtypes = [
  3442. ggml_context_p,
  3443. ctypes.POINTER(ggml_tensor),
  3444. ctypes.POINTER(ggml_tensor),
  3445. ctypes.c_int,
  3446. ]
  3447. lib.ggml_conv_transpose_2d_p0.restype = ctypes.POINTER(ggml_tensor)
  3448. # enum ggml_op_pool {
  3449. # GGML_OP_POOL_MAX,
  3450. # GGML_OP_POOL_AVG,
  3451. # GGML_OP_POOL_COUNT,
  3452. # };
  3453. GGML_OP_POOL_MAX = 0
  3454. GGML_OP_POOL_AVG = 1
  3455. GGML_OP_POOL_COUNT = 2
  3456. # GGML_API struct ggml_tensor * ggml_pool_1d(
  3457. # struct ggml_context * ctx,
  3458. # struct ggml_tensor * a,
  3459. # enum ggml_op_pool op,
  3460. # int k0, // kernel size
  3461. # int s0, // stride
  3462. # int p0); // padding
  3463. def ggml_pool_1d(
  3464. ctx: ggml_context_p,
  3465. a: ggml_tensor_p,
  3466. op: Union[ctypes.c_int, int],
  3467. k0: Union[ctypes.c_int, int],
  3468. s0: Union[ctypes.c_int, int],
  3469. p0: Union[ctypes.c_int, int],
  3470. ) -> ggml_tensor_p:
  3471. """1D Pooling
  3472. Parameters:
  3473. a: input tensor
  3474. op: pooling operation
  3475. k0: kernel size
  3476. s0: stride
  3477. p0: padding
  3478. Returns:
  3479. output tensor"""
  3480. return lib.ggml_pool_1d(ctx, a, op, k0, s0, p0)
  3481. lib.ggml_pool_1d.argtypes = [
  3482. ggml_context_p,
  3483. ctypes.POINTER(ggml_tensor),
  3484. ctypes.c_int,
  3485. ctypes.c_int,
  3486. ctypes.c_int,
  3487. ctypes.c_int,
  3488. ]
  3489. lib.ggml_pool_1d.restype = ctypes.POINTER(ggml_tensor)
  3490. # GGML_API struct ggml_tensor * ggml_pool_2d(
  3491. # struct ggml_context * ctx,
  3492. # struct ggml_tensor * a,
  3493. # enum ggml_op_pool op,
  3494. # int k0,
  3495. # int k1,
  3496. # int s0,
  3497. # int s1,
  3498. # int p0,
  3499. # int p1);
  3500. def ggml_pool_2d(
  3501. ctx: ggml_context_p,
  3502. a: ggml_tensor_p,
  3503. op: Union[ctypes.c_int, int],
  3504. k0: Union[ctypes.c_int, int],
  3505. k1: Union[ctypes.c_int, int],
  3506. s0: Union[ctypes.c_int, int],
  3507. s1: Union[ctypes.c_int, int],
  3508. p0: Union[ctypes.c_int, int],
  3509. p1: Union[ctypes.c_int, int],
  3510. ) -> ggml_tensor_p:
  3511. """2D Pooling
  3512. Parameters:
  3513. a: input tensor
  3514. op: pooling operation
  3515. k0: kernel size
  3516. k1: kernel size
  3517. s0: stride
  3518. s1: stride
  3519. p0: padding
  3520. p1: padding
  3521. Returns:
  3522. output tensor"""
  3523. return lib.ggml_pool_2d(ctx, a, op, k0, k1, s0, s1, p0, p1)
  3524. lib.ggml_pool_2d.argtypes = [
  3525. ggml_context_p,
  3526. ctypes.POINTER(ggml_tensor),
  3527. ctypes.c_int,
  3528. ctypes.c_int,
  3529. ctypes.c_int,
  3530. ctypes.c_int,
  3531. ctypes.c_int,
  3532. ctypes.c_int,
  3533. ]
  3534. lib.ggml_pool_2d.restype = ctypes.POINTER(ggml_tensor)
  3535. # // nearest interpolate
  3536. # // used in stable-diffusion
  3537. # GGML_API struct ggml_tensor * ggml_upscale(
  3538. # struct ggml_context * ctx,
  3539. # struct ggml_tensor * a,
  3540. # int scale_factor);
  3541. def ggml_upscale(
  3542. ctx: ggml_context_p,
  3543. a: ggml_tensor_p,
  3544. scale_factor: Union[ctypes.c_int, int],
  3545. ) -> ggml_tensor_p:
  3546. """Upscale
  3547. Parameters:
  3548. a: input tensor
  3549. scale_factor: scale factor
  3550. Returns:
  3551. output tensor"""
  3552. return lib.ggml_upscale(ctx, a, scale_factor)
  3553. lib.ggml_upscale.argtypes = [
  3554. ggml_context_p,
  3555. ctypes.POINTER(ggml_tensor),
  3556. ctypes.c_int,
  3557. ]
  3558. lib.ggml_upscale.restype = ctypes.POINTER(ggml_tensor)
  3559. # GGML_API struct ggml_tensor * ggml_flash_attn(
  3560. # struct ggml_context * ctx,
  3561. # struct ggml_tensor * q,
  3562. # struct ggml_tensor * k,
  3563. # struct ggml_tensor * v,
  3564. # bool masked);
  3565. def ggml_flash_attn(
  3566. ctx: ggml_context_p,
  3567. q: ggml_tensor_p,
  3568. k: ggml_tensor_p,
  3569. v: ggml_tensor_p,
  3570. masked: Union[ctypes.c_bool, bool],
  3571. ) -> ggml_tensor_p:
  3572. return lib.ggml_flash_attn(ctx, q, k, v, masked)
  3573. lib.ggml_flash_attn.argtypes = [
  3574. ggml_context_p,
  3575. ctypes.POINTER(ggml_tensor),
  3576. ctypes.POINTER(ggml_tensor),
  3577. ctypes.POINTER(ggml_tensor),
  3578. ctypes.c_bool,
  3579. ]
  3580. lib.ggml_flash_attn.restype = ctypes.POINTER(ggml_tensor)
  3581. # GGML_API struct ggml_tensor * ggml_flash_attn_back(
  3582. # struct ggml_context * ctx,
  3583. # struct ggml_tensor * q,
  3584. # struct ggml_tensor * k,
  3585. # struct ggml_tensor * v,
  3586. # struct ggml_tensor * d,
  3587. # bool masked);
  3588. def ggml_flash_attn_back(
  3589. ctx: ggml_context_p,
  3590. q: ggml_tensor_p,
  3591. k: ggml_tensor_p,
  3592. v: ggml_tensor_p,
  3593. d: ggml_tensor_p,
  3594. masked: Union[ctypes.c_bool, bool],
  3595. ) -> ggml_tensor_p:
  3596. return lib.ggml_flash_attn_back(ctx, q, k, v, d, masked)
  3597. lib.ggml_flash_attn_back.argtypes = [
  3598. ggml_context_p,
  3599. ctypes.POINTER(ggml_tensor),
  3600. ctypes.POINTER(ggml_tensor),
  3601. ctypes.POINTER(ggml_tensor),
  3602. ctypes.POINTER(ggml_tensor),
  3603. ctypes.c_bool,
  3604. ]
  3605. lib.ggml_flash_attn_back.restype = ctypes.POINTER(ggml_tensor)
  3606. # GGML_API struct ggml_tensor * ggml_flash_ff(
  3607. # struct ggml_context * ctx,
  3608. # struct ggml_tensor * a,
  3609. # struct ggml_tensor * b0,
  3610. # struct ggml_tensor * b1,
  3611. # struct ggml_tensor * c0,
  3612. # struct ggml_tensor * c1);
  3613. def ggml_flash_ff(
  3614. ctx: ggml_context_p,
  3615. a: ggml_tensor_p,
  3616. b0: ggml_tensor_p,
  3617. b1: ggml_tensor_p,
  3618. c0: ggml_tensor_p,
  3619. c1: ggml_tensor_p,
  3620. ) -> ggml_tensor_p:
  3621. return lib.ggml_flash_ff(ctx, a, b0, b1, c0, c1)
  3622. lib.ggml_flash_ff.argtypes = [
  3623. ggml_context_p,
  3624. ctypes.POINTER(ggml_tensor),
  3625. ctypes.POINTER(ggml_tensor),
  3626. ctypes.POINTER(ggml_tensor),
  3627. ctypes.POINTER(ggml_tensor),
  3628. ctypes.POINTER(ggml_tensor),
  3629. ]
  3630. lib.ggml_flash_ff.restype = ctypes.POINTER(ggml_tensor)
  3631. # // partition into non-overlapping windows with padding if needed
  3632. # // example:
  3633. # // a: 768 64 64 1
  3634. # // w: 14
  3635. # // res: 768 14 14 25
  3636. # // used in sam
  3637. # GGML_API struct ggml_tensor * ggml_win_part(
  3638. # struct ggml_context * ctx,
  3639. # struct ggml_tensor * a,
  3640. # int w);
  3641. def ggml_win_part(
  3642. ctx: ggml_context_p,
  3643. a: ggml_tensor_p,
  3644. w: Union[ctypes.c_int, int],
  3645. ) -> ggml_tensor_p:
  3646. return lib.ggml_win_part(ctx, a, w)
  3647. lib.ggml_win_part.argtypes = [
  3648. ggml_context_p,
  3649. ctypes.POINTER(ggml_tensor),
  3650. ctypes.c_int,
  3651. ]
  3652. lib.ggml_win_part.restype = ctypes.POINTER(ggml_tensor)
  3653. # // reverse of ggml_win_part
  3654. # // used in sam
  3655. # GGML_API struct ggml_tensor * ggml_win_unpart(
  3656. # struct ggml_context * ctx,
  3657. # struct ggml_tensor * a,
  3658. # int w0,
  3659. # int h0,
  3660. # int w);
  3661. def ggml_win_unpart(
  3662. ctx: ggml_context_p,
  3663. a: ggml_tensor_p,
  3664. w0: Union[ctypes.c_int, int],
  3665. h0: Union[ctypes.c_int, int],
  3666. w: Union[ctypes.c_int, int],
  3667. ) -> ggml_tensor_p:
  3668. return lib.ggml_win_unpart(ctx, a, w0, h0, w)
  3669. lib.ggml_win_unpart.argtypes = [
  3670. ggml_context_p,
  3671. ctypes.POINTER(ggml_tensor),
  3672. ctypes.c_int,
  3673. ctypes.c_int,
  3674. ctypes.c_int,
  3675. ]
  3676. lib.ggml_win_unpart.restype = ctypes.POINTER(ggml_tensor)
  3677. # GGML_API struct ggml_tensor * ggml_unary(
  3678. # struct ggml_context * ctx,
  3679. # struct ggml_tensor * a,
  3680. # enum ggml_unary_op op);
  3681. def ggml_unary(
  3682. ctx: ggml_context_p,
  3683. a: ggml_tensor_p,
  3684. op: Union[ctypes.c_int, int],
  3685. ) -> ggml_tensor_p:
  3686. return lib.ggml_unary(ctx, a, op)
  3687. lib.ggml_unary.argtypes = [
  3688. ggml_context_p,
  3689. ctypes.POINTER(ggml_tensor),
  3690. ctypes.c_int,
  3691. ]
  3692. lib.ggml_unary.restype = ctypes.POINTER(ggml_tensor)
  3693. # GGML_API struct ggml_tensor * ggml_unary_inplace(
  3694. # struct ggml_context * ctx,
  3695. # struct ggml_tensor * a,
  3696. # enum ggml_unary_op op);
  3697. def ggml_unary_inplace(
  3698. ctx: ggml_context_p,
  3699. a: ggml_tensor_p,
  3700. op: Union[ctypes.c_int, int],
  3701. ) -> ggml_tensor_p:
  3702. return lib.ggml_unary_inplace(ctx, a, op)
  3703. lib.ggml_unary_inplace.argtypes = [
  3704. ggml_context_p,
  3705. ctypes.POINTER(ggml_tensor),
  3706. ctypes.c_int,
  3707. ]
  3708. lib.ggml_unary_inplace.restype = ctypes.POINTER(ggml_tensor)
  3709. # // used in sam
  3710. # GGML_API struct ggml_tensor * ggml_get_rel_pos(
  3711. # struct ggml_context * ctx,
  3712. # struct ggml_tensor * a,
  3713. # int qh,
  3714. # int kh);
  3715. def ggml_get_rel_pos(
  3716. ctx: ggml_context_p,
  3717. a: ggml_tensor_p,
  3718. qh: Union[ctypes.c_int, int],
  3719. kh: Union[ctypes.c_int, int],
  3720. ) -> ggml_tensor_p:
  3721. return lib.ggml_get_rel_pos(ctx, a, qh, kh)
  3722. lib.ggml_get_rel_pos.argtypes = [
  3723. ggml_context_p,
  3724. ctypes.POINTER(ggml_tensor),
  3725. ctypes.c_int,
  3726. ctypes.c_int,
  3727. ]
  3728. lib.ggml_get_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  3729. # // used in sam
  3730. # GGML_API struct ggml_tensor * ggml_add_rel_pos(
  3731. # struct ggml_context * ctx,
  3732. # struct ggml_tensor * a,
  3733. # struct ggml_tensor * pw,
  3734. # struct ggml_tensor * ph);
  3735. def ggml_add_rel_pos(
  3736. ctx: ggml_context_p,
  3737. a: ggml_tensor_p,
  3738. pw: ggml_tensor_p,
  3739. ph: ggml_tensor_p,
  3740. ) -> ggml_tensor_p:
  3741. return lib.ggml_add_rel_pos(ctx, a, pw, ph)
  3742. lib.ggml_add_rel_pos.argtypes = [
  3743. ggml_context_p,
  3744. ctypes.POINTER(ggml_tensor),
  3745. ctypes.POINTER(ggml_tensor),
  3746. ctypes.POINTER(ggml_tensor),
  3747. ]
  3748. lib.ggml_add_rel_pos.restype = ctypes.POINTER(ggml_tensor)
  3749. # GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
  3750. # struct ggml_context * ctx,
  3751. # struct ggml_tensor * a,
  3752. # struct ggml_tensor * pw,
  3753. # struct ggml_tensor * ph);
  3754. def ggml_add_rel_pos_inplace(
  3755. ctx: ggml_context_p,
  3756. a: ggml_tensor_p,
  3757. pw: ggml_tensor_p,
  3758. ph: ggml_tensor_p,
  3759. ) -> ggml_tensor_p:
  3760. return lib.ggml_add_rel_pos_inplace(ctx, a, pw, ph)
  3761. lib.ggml_add_rel_pos_inplace.argtypes = [
  3762. ggml_context_p,
  3763. ctypes.POINTER(ggml_tensor),
  3764. ctypes.POINTER(ggml_tensor),
  3765. ctypes.POINTER(ggml_tensor),
  3766. ]
  3767. lib.ggml_add_rel_pos_inplace.restype = ctypes.POINTER(ggml_tensor)
  3768. # // custom operators (DEPRECATED)
  3769. # typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
  3770. ggml_unary_op_f32_t = ctypes.CFUNCTYPE(
  3771. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.POINTER(ctypes.c_float)
  3772. )
  3773. # typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
  3774. ggml_binary_op_f32_t = ctypes.CFUNCTYPE(
  3775. None,
  3776. ctypes.c_int,
  3777. ctypes.POINTER(ctypes.c_float),
  3778. ctypes.POINTER(ctypes.c_float),
  3779. ctypes.POINTER(ctypes.c_float),
  3780. )
  3781. # typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
  3782. ggml_custom1_op_f32_t = ctypes.CFUNCTYPE(
  3783. None, ctypes.POINTER(ggml_tensor), ctypes.POINTER(ggml_tensor)
  3784. )
  3785. """Unary operator function type"""
  3786. # typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  3787. ggml_custom2_op_f32_t = ctypes.CFUNCTYPE(
  3788. None,
  3789. ctypes.POINTER(ggml_tensor),
  3790. ctypes.POINTER(ggml_tensor),
  3791. ctypes.POINTER(ggml_tensor),
  3792. )
  3793. """Binary operator function type"""
  3794. # typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
  3795. ggml_custom3_op_f32_t = ctypes.CFUNCTYPE(
  3796. None,
  3797. ctypes.POINTER(ggml_tensor),
  3798. ctypes.POINTER(ggml_tensor),
  3799. ctypes.POINTER(ggml_tensor),
  3800. ctypes.POINTER(ggml_tensor),
  3801. )
  3802. """Ternary operator function type"""
  3803. # GGML_API struct ggml_tensor * ggml_map_unary_f32(
  3804. # struct ggml_context * ctx,
  3805. # struct ggml_tensor * a,
  3806. # ggml_unary_op_f32_t fun);
  3807. def ggml_map_unary_f32(
  3808. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  3809. ) -> ggml_tensor_p:
  3810. return lib.ggml_map_unary_f32(ctx, a, fun)
  3811. lib.ggml_map_unary_f32.argtypes = [
  3812. ggml_context_p,
  3813. ctypes.POINTER(ggml_tensor),
  3814. ggml_unary_op_f32_t,
  3815. ]
  3816. lib.ggml_map_unary_f32.restype = ctypes.POINTER(ggml_tensor)
  3817. # GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
  3818. # struct ggml_context * ctx,
  3819. # struct ggml_tensor * a,
  3820. # ggml_unary_op_f32_t fun);
  3821. def ggml_map_unary_inplace_f32(
  3822. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  3823. ) -> ggml_tensor_p:
  3824. return lib.ggml_map_unary_inplace_f32(ctx, a, fun)
  3825. lib.ggml_map_unary_inplace_f32.argtypes = [
  3826. ggml_context_p,
  3827. ctypes.POINTER(ggml_tensor),
  3828. ggml_unary_op_f32_t,
  3829. ]
  3830. lib.ggml_map_unary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3831. # GGML_API struct ggml_tensor * ggml_map_binary_f32(
  3832. # struct ggml_context * ctx,
  3833. # struct ggml_tensor * a,
  3834. # struct ggml_tensor * b,
  3835. # ggml_binary_op_f32_t fun);
  3836. def ggml_map_binary_f32(
  3837. ctx: ggml_context_p,
  3838. a: ggml_tensor_p,
  3839. b: ggml_tensor_p,
  3840. fun: "ctypes._FuncPointer", # type: ignore
  3841. ) -> ggml_tensor_p:
  3842. return lib.ggml_map_binary_f32(ctx, a, b, fun)
  3843. lib.ggml_map_binary_f32.argtypes = [
  3844. ggml_context_p,
  3845. ctypes.POINTER(ggml_tensor),
  3846. ctypes.POINTER(ggml_tensor),
  3847. ggml_binary_op_f32_t,
  3848. ]
  3849. lib.ggml_map_binary_f32.restype = ctypes.POINTER(ggml_tensor)
  3850. # GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
  3851. # struct ggml_context * ctx,
  3852. # struct ggml_tensor * a,
  3853. # struct ggml_tensor * b,
  3854. # ggml_binary_op_f32_t fun);
  3855. def ggml_map_binary_inplace_f32(
  3856. ctx: ggml_context_p,
  3857. a: ggml_tensor_p,
  3858. b: ggml_tensor_p,
  3859. fun: "ctypes._FuncPointer", # type: ignore
  3860. ) -> ggml_tensor_p:
  3861. return lib.ggml_map_binary_inplace_f32(ctx, a, b, fun)
  3862. lib.ggml_map_binary_inplace_f32.argtypes = [
  3863. ggml_context_p,
  3864. ctypes.POINTER(ggml_tensor),
  3865. ctypes.POINTER(ggml_tensor),
  3866. ggml_binary_op_f32_t,
  3867. ]
  3868. lib.ggml_map_binary_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3869. # GGML_API struct ggml_tensor * ggml_map_custom1_f32(
  3870. # struct ggml_context * ctx,
  3871. # struct ggml_tensor * a,
  3872. # ggml_custom1_op_f32_t fun);
  3873. def ggml_map_custom1_f32(
  3874. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._FuncPointer" # type: ignore
  3875. ) -> ggml_tensor_p:
  3876. """Custom unary operator on a tensor.
  3877. Example:
  3878. ```python
  3879. import ggml
  3880. @ggml.ggml_custom1_op_f32_t
  3881. def custom_op(b: ggml.tensor_p, a: ggml.tensor_p):
  3882. # do something with a and copy to b
  3883. return
  3884. ...
  3885. b = ggml.ggml_map_custom1_f32(ctx, a, custom_op)
  3886. ```
  3887. Parameters:
  3888. a: input tensor
  3889. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  3890. Returns:
  3891. output tensor"""
  3892. return lib.ggml_map_custom1_f32(ctx, a, fun)
  3893. lib.ggml_map_custom1_f32.argtypes = [
  3894. ggml_context_p,
  3895. ctypes.POINTER(ggml_tensor),
  3896. ggml_custom1_op_f32_t,
  3897. ]
  3898. lib.ggml_map_custom1_f32.restype = ctypes.POINTER(ggml_tensor)
  3899. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
  3900. # struct ggml_context * ctx,
  3901. # struct ggml_tensor * a,
  3902. # ggml_custom1_op_f32_t fun);
  3903. def ggml_map_custom1_inplace_f32(
  3904. ctx: ggml_context_p, a: ggml_tensor_p, fun: "ctypes._CFuncPtr" # type: ignore
  3905. ) -> ggml_tensor_p:
  3906. """Custom unary operator on a tensor inplace.
  3907. Parameters:
  3908. a: input tensor
  3909. fun (ggml.ggml_custom1_op_f32_t): function to apply to each element
  3910. Returns:
  3911. output tensor"""
  3912. return lib.ggml_map_custom1_inplace_f32(ctx, a, fun)
  3913. lib.ggml_map_custom1_inplace_f32.argtypes = [
  3914. ggml_context_p,
  3915. ctypes.POINTER(ggml_tensor),
  3916. ggml_custom1_op_f32_t,
  3917. ]
  3918. lib.ggml_map_custom1_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3919. # GGML_API struct ggml_tensor * ggml_map_custom2_f32(
  3920. # struct ggml_context * ctx,
  3921. # struct ggml_tensor * a,
  3922. # struct ggml_tensor * b,
  3923. # ggml_custom2_op_f32_t fun);
  3924. def ggml_map_custom2_f32(
  3925. ctx: ggml_context_p,
  3926. a: ggml_tensor_p,
  3927. b: ggml_tensor_p,
  3928. fun: "ctypes._FuncPointer", # type: ignore
  3929. ) -> ggml_tensor_p:
  3930. """Custom binary operator on two tensors.
  3931. Parameters:
  3932. a: input tensor
  3933. b: input tensor
  3934. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  3935. Returns:
  3936. output tensor"""
  3937. return lib.ggml_map_custom2_f32(ctx, a, b, fun)
  3938. lib.ggml_map_custom2_f32.argtypes = [
  3939. ggml_context_p,
  3940. ctypes.POINTER(ggml_tensor),
  3941. ctypes.POINTER(ggml_tensor),
  3942. ggml_custom2_op_f32_t,
  3943. ]
  3944. lib.ggml_map_custom2_f32.restype = ctypes.POINTER(ggml_tensor)
  3945. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
  3946. # struct ggml_context * ctx,
  3947. # struct ggml_tensor * a,
  3948. # struct ggml_tensor * b,
  3949. # ggml_custom2_op_f32_t fun);
  3950. def ggml_map_custom2_inplace_f32(
  3951. ctx: ggml_context_p,
  3952. a: ggml_tensor_p,
  3953. b: ggml_tensor_p,
  3954. fun: "ctypes._FuncPointer", # type: ignore
  3955. ) -> ggml_tensor_p:
  3956. """Custom binary operator on two tensors inplace.
  3957. Parameters:
  3958. a: input tensor
  3959. b: input tensor
  3960. fun (ggml.ggml_custom2_op_f32_t): function to apply to each element
  3961. Returns:
  3962. output tensor"""
  3963. return lib.ggml_map_custom2_inplace_f32(ctx, a, b, fun)
  3964. lib.ggml_map_custom2_inplace_f32.argtypes = [
  3965. ggml_context_p,
  3966. ctypes.POINTER(ggml_tensor),
  3967. ctypes.POINTER(ggml_tensor),
  3968. ggml_custom2_op_f32_t,
  3969. ]
  3970. lib.ggml_map_custom2_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  3971. # GGML_API struct ggml_tensor * ggml_map_custom3_f32(
  3972. # struct ggml_context * ctx,
  3973. # struct ggml_tensor * a,
  3974. # struct ggml_tensor * b,
  3975. # struct ggml_tensor * c,
  3976. # ggml_custom3_op_f32_t fun);
  3977. def ggml_map_custom3_f32(
  3978. ctx: ggml_context_p,
  3979. a: ggml_tensor_p,
  3980. b: ggml_tensor_p,
  3981. c: ggml_tensor_p,
  3982. fun: "ctypes._FuncPointer", # type: ignore
  3983. ) -> ggml_tensor_p:
  3984. """Custom ternary operator on three tensors.
  3985. Parameters:
  3986. a: input tensor
  3987. b: input tensor
  3988. c: input tensor
  3989. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  3990. Returns:
  3991. output tensor"""
  3992. return lib.ggml_map_custom3_f32(ctx, a, b, c, fun)
  3993. lib.ggml_map_custom3_f32.argtypes = [
  3994. ggml_context_p,
  3995. ctypes.POINTER(ggml_tensor),
  3996. ctypes.POINTER(ggml_tensor),
  3997. ctypes.POINTER(ggml_tensor),
  3998. ggml_custom3_op_f32_t,
  3999. ]
  4000. lib.ggml_map_custom3_f32.restype = ctypes.POINTER(ggml_tensor)
  4001. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
  4002. # struct ggml_context * ctx,
  4003. # struct ggml_tensor * a,
  4004. # struct ggml_tensor * b,
  4005. # struct ggml_tensor * c,
  4006. # ggml_custom3_op_f32_t fun);
  4007. def ggml_map_custom3_inplace_f32(
  4008. ctx: ggml_context_p,
  4009. a: ggml_tensor_p,
  4010. b: ggml_tensor_p,
  4011. c: ggml_tensor_p,
  4012. fun: "ctypes._FuncPointer", # type: ignore
  4013. ) -> ggml_tensor_p:
  4014. """Custom ternary operator on three tensors inplace.
  4015. Parameters:
  4016. a: input tensor
  4017. b: input tensor
  4018. c: input tensor
  4019. fun (ggml.ggml_custom3_op_f32_t): function to apply to each element
  4020. Returns:
  4021. output tensor"""
  4022. return lib.ggml_map_custom3_inplace_f32(ctx, a, b, c, fun)
  4023. lib.ggml_map_custom3_inplace_f32.argtypes = [
  4024. ggml_context_p,
  4025. ctypes.POINTER(ggml_tensor),
  4026. ctypes.POINTER(ggml_tensor),
  4027. ctypes.POINTER(ggml_tensor),
  4028. ggml_custom3_op_f32_t,
  4029. ]
  4030. lib.ggml_map_custom3_inplace_f32.restype = ctypes.POINTER(ggml_tensor)
  4031. # // custom operators v2
  4032. # typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
  4033. ggml_custom1_op_t = ctypes.CFUNCTYPE(
  4034. None,
  4035. ctypes.POINTER(ggml_tensor),
  4036. ctypes.POINTER(ggml_tensor),
  4037. ctypes.c_int,
  4038. ctypes.c_int,
  4039. ctypes.c_void_p,
  4040. )
  4041. """Custom unary operator on a tensor."""
  4042. # typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
  4043. ggml_custom2_op_t = ctypes.CFUNCTYPE(
  4044. None,
  4045. ctypes.POINTER(ggml_tensor),
  4046. ctypes.POINTER(ggml_tensor),
  4047. ctypes.POINTER(ggml_tensor),
  4048. ctypes.c_int,
  4049. ctypes.c_int,
  4050. ctypes.c_void_p,
  4051. )
  4052. """Custom binary operator on two tensors."""
  4053. # typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
  4054. ggml_custom3_op_t = ctypes.CFUNCTYPE(
  4055. None,
  4056. ctypes.POINTER(ggml_tensor),
  4057. ctypes.POINTER(ggml_tensor),
  4058. ctypes.POINTER(ggml_tensor),
  4059. ctypes.POINTER(ggml_tensor),
  4060. ctypes.c_int,
  4061. ctypes.c_int,
  4062. ctypes.c_void_p,
  4063. )
  4064. """Custom ternary operator on three tensors."""
  4065. # #define GGML_N_TASKS_MAX -1
  4066. GGML_N_TASKS_MAX = -1
  4067. # GGML_API struct ggml_tensor * ggml_map_custom1(
  4068. # struct ggml_context * ctx,
  4069. # struct ggml_tensor * a,
  4070. # ggml_custom1_op_t fun,
  4071. # int n_tasks,
  4072. # void * userdata);
  4073. def ggml_map_custom1(
  4074. ctx: ggml_context_p,
  4075. a: ggml_tensor_p,
  4076. fun: "ctypes._FuncPointer", # type: ignore
  4077. n_tasks: Union[ctypes.c_int, int],
  4078. userdata: Optional[ctypes.c_void_p],
  4079. ) -> ggml_tensor_p:
  4080. return lib.ggml_map_custom1(ctx, a, fun, n_tasks, userdata)
  4081. lib.ggml_map_custom1.argtypes = [
  4082. ggml_context_p,
  4083. ctypes.POINTER(ggml_tensor),
  4084. ggml_custom1_op_t,
  4085. ctypes.c_int,
  4086. ctypes.c_void_p,
  4087. ]
  4088. lib.ggml_map_custom1.restype = ctypes.POINTER(ggml_tensor)
  4089. # GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
  4090. # struct ggml_context * ctx,
  4091. # struct ggml_tensor * a,
  4092. # ggml_custom1_op_t fun,
  4093. # int n_tasks,
  4094. # void * userdata);
  4095. def ggml_map_custom1_inplace(
  4096. ctx: ggml_context_p,
  4097. a: ggml_tensor_p,
  4098. fun: "ctypes._FuncPointer", # type: ignore
  4099. n_tasks: Union[ctypes.c_int, int],
  4100. userdata: Optional[ctypes.c_void_p],
  4101. ) -> ggml_tensor_p:
  4102. return lib.ggml_map_custom1_inplace(ctx, a, fun, n_tasks, userdata)
  4103. lib.ggml_map_custom1_inplace.argtypes = [
  4104. ggml_context_p,
  4105. ctypes.POINTER(ggml_tensor),
  4106. ggml_custom1_op_t,
  4107. ctypes.c_int,
  4108. ctypes.c_void_p,
  4109. ]
  4110. lib.ggml_map_custom1_inplace.restype = ctypes.POINTER(ggml_tensor)
  4111. # GGML_API struct ggml_tensor * ggml_map_custom2(
  4112. # struct ggml_context * ctx,
  4113. # struct ggml_tensor * a,
  4114. # struct ggml_tensor * b,
  4115. # ggml_custom2_op_t fun,
  4116. # int n_tasks,
  4117. # void * userdata);
  4118. def ggml_map_custom2(
  4119. ctx: ggml_context_p,
  4120. a: ggml_tensor_p,
  4121. b: ggml_tensor_p,
  4122. fun: "ctypes._FuncPointer", # type: ignore
  4123. n_tasks: Union[ctypes.c_int, int],
  4124. userdata: Optional[ctypes.c_void_p],
  4125. ) -> ggml_tensor_p:
  4126. return lib.ggml_map_custom2(ctx, a, b, fun, n_tasks, userdata)
  4127. lib.ggml_map_custom2.argtypes = [
  4128. ggml_context_p,
  4129. ctypes.POINTER(ggml_tensor),
  4130. ctypes.POINTER(ggml_tensor),
  4131. ggml_custom2_op_t,
  4132. ctypes.c_int,
  4133. ctypes.c_void_p,
  4134. ]
  4135. lib.ggml_map_custom2.restype = ctypes.POINTER(ggml_tensor)
  4136. # GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
  4137. # struct ggml_context * ctx,
  4138. # struct ggml_tensor * a,
  4139. # struct ggml_tensor * b,
  4140. # ggml_custom2_op_t fun,
  4141. # int n_tasks,
  4142. # void * userdata);
  4143. def ggml_map_custom2_inplace(
  4144. ctx: ggml_context_p,
  4145. a: ggml_tensor_p,
  4146. b: ggml_tensor_p,
  4147. fun: "ctypes._FuncPointer", # type: ignore
  4148. n_tasks: Union[ctypes.c_int, int],
  4149. userdata: Optional[ctypes.c_void_p],
  4150. ) -> ggml_tensor_p:
  4151. return lib.ggml_map_custom2_inplace(ctx, a, b, fun, n_tasks, userdata)
  4152. lib.ggml_map_custom2_inplace.argtypes = [
  4153. ggml_context_p,
  4154. ctypes.POINTER(ggml_tensor),
  4155. ctypes.POINTER(ggml_tensor),
  4156. ggml_custom2_op_t,
  4157. ctypes.c_int,
  4158. ctypes.c_void_p,
  4159. ]
  4160. lib.ggml_map_custom2_inplace.restype = ctypes.POINTER(ggml_tensor)
  4161. # GGML_API struct ggml_tensor * ggml_map_custom3(
  4162. # struct ggml_context * ctx,
  4163. # struct ggml_tensor * a,
  4164. # struct ggml_tensor * b,
  4165. # struct ggml_tensor * c,
  4166. # ggml_custom3_op_t fun,
  4167. # int n_tasks,
  4168. # void * userdata);
  4169. def ggml_map_custom3(
  4170. ctx: ggml_context_p,
  4171. a: ggml_tensor_p,
  4172. b: ggml_tensor_p,
  4173. c: ggml_tensor_p,
  4174. fun: "ctypes._FuncPointer", # type: ignore
  4175. n_tasks: Union[ctypes.c_int, int],
  4176. userdata: Optional[ctypes.c_void_p],
  4177. ) -> ggml_tensor_p:
  4178. return lib.ggml_map_custom3(ctx, a, b, c, fun, n_tasks, userdata)
  4179. lib.ggml_map_custom3.argtypes = [
  4180. ggml_context_p,
  4181. ctypes.POINTER(ggml_tensor),
  4182. ctypes.POINTER(ggml_tensor),
  4183. ctypes.POINTER(ggml_tensor),
  4184. ggml_custom3_op_t,
  4185. ctypes.c_int,
  4186. ctypes.c_void_p,
  4187. ]
  4188. lib.ggml_map_custom3.restype = ctypes.POINTER(ggml_tensor)
  4189. # GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
  4190. # struct ggml_context * ctx,
  4191. # struct ggml_tensor * a,
  4192. # struct ggml_tensor * b,
  4193. # struct ggml_tensor * c,
  4194. # ggml_custom3_op_t fun,
  4195. # int n_tasks,
  4196. # void * userdata);
  4197. def ggml_map_custom3_inplace(
  4198. ctx: ggml_context_p,
  4199. a: ggml_tensor_p,
  4200. b: ggml_tensor_p,
  4201. c: ggml_tensor_p,
  4202. fun: "ctypes._FuncPointer", # type: ignore
  4203. n_tasks: Union[ctypes.c_int, int],
  4204. userdata: Optional[ctypes.c_void_p],
  4205. ) -> ggml_tensor_p:
  4206. return lib.ggml_map_custom3_inplace(ctx, a, b, c, fun, n_tasks, userdata)
  4207. lib.ggml_map_custom3_inplace.argtypes = [
  4208. ggml_context_p,
  4209. ctypes.POINTER(ggml_tensor),
  4210. ctypes.POINTER(ggml_tensor),
  4211. ctypes.POINTER(ggml_tensor),
  4212. ggml_custom3_op_t,
  4213. ctypes.c_int,
  4214. ctypes.c_void_p,
  4215. ]
  4216. lib.ggml_map_custom3_inplace.restype = ctypes.POINTER(ggml_tensor)
  4217. # // loss function
  4218. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
  4219. # struct ggml_context * ctx,
  4220. # struct ggml_tensor * a,
  4221. # struct ggml_tensor * b);
  4222. def ggml_cross_entropy_loss(
  4223. ctx: ggml_context_p,
  4224. a: ggml_tensor_p,
  4225. b: ggml_tensor_p,
  4226. ) -> ggml_tensor_p:
  4227. return lib.ggml_cross_entropy_loss(ctx, a, b)
  4228. lib.ggml_cross_entropy_loss.argtypes = [
  4229. ggml_context_p,
  4230. ctypes.POINTER(ggml_tensor),
  4231. ctypes.POINTER(ggml_tensor),
  4232. ]
  4233. lib.ggml_cross_entropy_loss.restype = ctypes.POINTER(ggml_tensor)
  4234. # GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
  4235. # struct ggml_context * ctx,
  4236. # struct ggml_tensor * a,
  4237. # struct ggml_tensor * b,
  4238. # struct ggml_tensor * c);
  4239. def ggml_cross_entropy_loss_back(
  4240. ctx: ggml_context_p,
  4241. a: ggml_tensor_p,
  4242. b: ggml_tensor_p,
  4243. c: ggml_tensor_p,
  4244. ) -> ggml_tensor_p:
  4245. return lib.ggml_cross_entropy_loss_back(ctx, a, b, c)
  4246. lib.ggml_cross_entropy_loss_back.argtypes = [
  4247. ggml_context_p,
  4248. ctypes.POINTER(ggml_tensor),
  4249. ctypes.POINTER(ggml_tensor),
  4250. ctypes.POINTER(ggml_tensor),
  4251. ]
  4252. lib.ggml_cross_entropy_loss_back.restype = ctypes.POINTER(ggml_tensor)
  4253. # //
  4254. # // automatic differentiation
  4255. # //
  4256. # GGML_API void ggml_set_param(
  4257. # struct ggml_context * ctx,
  4258. # struct ggml_tensor * tensor);
  4259. def ggml_set_param(ctx: ggml_context_p, tensor: ggml_tensor_p):
  4260. return lib.ggml_set_param(ctx, tensor)
  4261. lib.ggml_set_param.argtypes = [ggml_context_p, ctypes.POINTER(ggml_tensor)]
  4262. lib.ggml_set_param.restype = None
  4263. # GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
  4264. def ggml_build_forward_expand(
  4265. cgraph: ggml_cgraph_p,
  4266. tensor: ggml_tensor_p,
  4267. ):
  4268. """Add a tensor to the forward computation graph. This is used to
  4269. compute and save the value of the tensor.
  4270. Parameters:
  4271. cgraph: The graph.
  4272. tensor: The tensor."""
  4273. return lib.ggml_build_forward_expand(cgraph, tensor)
  4274. lib.ggml_build_forward_expand.argtypes = [
  4275. ctypes.POINTER(ggml_cgraph),
  4276. ctypes.POINTER(ggml_tensor),
  4277. ]
  4278. lib.ggml_build_forward_expand.restype = None
  4279. # GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
  4280. def ggml_build_backward_expand(
  4281. ctx: ggml_context_p,
  4282. gf: ggml_cgraph_p,
  4283. gb: ggml_cgraph_p,
  4284. keep: Union[ctypes.c_bool, bool],
  4285. ):
  4286. """Add a tensor to the backward computation graph. This is used to
  4287. compute the gradient of the tensor.
  4288. Parameters:
  4289. ctx: The context.
  4290. gf: The forward graph.
  4291. gb: The backward graph.
  4292. keep: Whether to keep the tensor."""
  4293. return lib.ggml_build_backward_expand(ctx, gf, gb, keep)
  4294. lib.ggml_build_backward_expand.argtypes = [
  4295. ggml_context_p,
  4296. ctypes.POINTER(ggml_cgraph),
  4297. ctypes.POINTER(ggml_cgraph),
  4298. ctypes.c_bool,
  4299. ]
  4300. lib.ggml_build_backward_expand.restype = None
  4301. # GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
  4302. def ggml_build_forward(
  4303. tensor: ggml_tensor_p,
  4304. ) -> ggml_cgraph:
  4305. """Build the forward computation graph.
  4306. Parameters:
  4307. tensor: The tensor.
  4308. Returns:
  4309. The graph."""
  4310. return lib.ggml_build_forward(tensor)
  4311. lib.ggml_build_forward.argtypes = [ctypes.POINTER(ggml_tensor)]
  4312. lib.ggml_build_forward.restype = ggml_cgraph
  4313. # GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
  4314. def ggml_build_backward(
  4315. ctx: ggml_context_p,
  4316. gf: ggml_cgraph_p,
  4317. keep: Union[ctypes.c_bool, bool],
  4318. ) -> ggml_cgraph:
  4319. return lib.ggml_build_backward(ctx, gf, keep)
  4320. lib.ggml_build_backward.argtypes = [
  4321. ggml_context_p,
  4322. ctypes.POINTER(ggml_cgraph),
  4323. ctypes.c_bool,
  4324. ]
  4325. lib.ggml_build_backward.restype = ggml_cgraph
  4326. # // graph allocation in a context
  4327. # GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
  4328. def ggml_new_graph(
  4329. ctx: ggml_context_p,
  4330. ) -> ggml_cgraph:
  4331. """Create a new graph.
  4332. Parameters:
  4333. ctx: The context.
  4334. Returns:
  4335. The graph."""
  4336. return lib.ggml_new_graph(ctx)
  4337. lib.ggml_new_graph.argtypes = [ggml_context_p]
  4338. lib.ggml_new_graph.restype = ggml_cgraph
  4339. # GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
  4340. def ggml_build_forward_ctx(
  4341. ctx: ggml_context_p,
  4342. tensor: ggml_tensor_p,
  4343. ) -> ggml_cgraph:
  4344. """Build the forward computation graph in a context.
  4345. Parameters:
  4346. ctx: The context.
  4347. tensor: The tensor.
  4348. Returns:
  4349. The graph."""
  4350. return lib.ggml_build_forward_ctx(ctx, tensor)
  4351. lib.ggml_build_forward_ctx.argtypes = [
  4352. ggml_context_p,
  4353. ctypes.POINTER(ggml_tensor),
  4354. ]
  4355. lib.ggml_build_forward_ctx.restype = ggml_cgraph
  4356. # GGML_API size_t ggml_graph_overhead(void);
  4357. def ggml_graph_overhead() -> int:
  4358. """Get the overhead of the graph."""
  4359. return lib.ggml_graph_overhead()
  4360. lib.ggml_graph_overhead.argtypes = []
  4361. lib.ggml_graph_overhead.restype = ctypes.c_size_t
  4362. # // ggml_graph_plan() has to be called before ggml_graph_compute()
  4363. # // when plan.work_size > 0, caller must allocate memory for plan.work_data
  4364. # GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
  4365. def ggml_graph_plan(
  4366. cgraph: ggml_cgraph_p,
  4367. n_threads: Union[ctypes.c_int, int] = GGML_DEFAULT_N_THREADS,
  4368. ) -> ggml_cplan:
  4369. """Plan the computation graph.
  4370. Parameters:
  4371. cgraph: The graph.
  4372. n_threads: The number of threads to use.
  4373. Returns:
  4374. The plan."""
  4375. return lib.ggml_graph_plan(cgraph, n_threads)
  4376. lib.ggml_graph_plan.argtypes = [
  4377. ctypes.POINTER(ggml_cgraph),
  4378. ctypes.c_int,
  4379. ]
  4380. lib.ggml_graph_plan.restype = ggml_cplan
  4381. # GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
  4382. def ggml_graph_compute(
  4383. cgraph: ggml_cgraph_p,
  4384. cplan: ggml_cplan_p,
  4385. ) -> int:
  4386. """Compute the graph.
  4387. Parameters:
  4388. cgraph: The graph.
  4389. cplan: The plan."""
  4390. return lib.ggml_graph_compute(cgraph, cplan)
  4391. lib.ggml_graph_compute.argtypes = [
  4392. ctypes.POINTER(ggml_cgraph),
  4393. ctypes.POINTER(ggml_cplan),
  4394. ]
  4395. lib.ggml_graph_compute.restype = ctypes.c_int
  4396. # GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph);
  4397. def ggml_graph_reset(
  4398. cgraph: ggml_cgraph_p,
  4399. ):
  4400. """Reset the graph.
  4401. Parameters:
  4402. cgraph: The graph."""
  4403. return lib.ggml_graph_reset(cgraph)
  4404. # // same as ggml_graph_compute() but the work data is allocated as a part of the context
  4405. # // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
  4406. # GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
  4407. def ggml_graph_compute_with_ctx(
  4408. ctx: ggml_context_p,
  4409. cgraph: ggml_cgraph_p,
  4410. n_threads: Union[ctypes.c_int, int],
  4411. ):
  4412. """Compute the graph with a context.
  4413. Parameters:
  4414. ctx: The context.
  4415. cgraph: The graph.
  4416. n_threads: The number of threads to use."""
  4417. return lib.ggml_graph_compute_with_ctx(ctx, cgraph, n_threads)
  4418. lib.ggml_graph_compute_with_ctx.argtypes = [
  4419. ggml_context_p,
  4420. ctypes.POINTER(ggml_cgraph),
  4421. ctypes.c_int,
  4422. ]
  4423. lib.ggml_graph_compute_with_ctx.restype = None
  4424. # GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
  4425. def ggml_graph_get_tensor(
  4426. cgraph: ggml_cgraph_p,
  4427. name: bytes,
  4428. ) -> ggml_tensor_p:
  4429. """Get a tensor from the graph by name.
  4430. Parameters:
  4431. cgraph: The graph.
  4432. name: The name of the tensor.
  4433. Returns:
  4434. The tensor."""
  4435. return lib.ggml_graph_get_tensor(cgraph, name)
  4436. lib.ggml_graph_get_tensor.argtypes = [
  4437. ctypes.POINTER(ggml_cgraph),
  4438. ctypes.c_char_p,
  4439. ]
  4440. lib.ggml_graph_get_tensor.restype = ctypes.POINTER(ggml_tensor)
  4441. # GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
  4442. def ggml_graph_export(
  4443. cgraph: ggml_cgraph_p,
  4444. fname: bytes,
  4445. ):
  4446. return lib.ggml_graph_export(cgraph, fname)
  4447. lib.ggml_graph_export.argtypes = [
  4448. ctypes.POINTER(ggml_cgraph),
  4449. ctypes.c_char_p,
  4450. ]
  4451. lib.ggml_graph_export.restype = None
  4452. # GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
  4453. def ggml_graph_import(
  4454. fname: bytes,
  4455. ctx_data: "ctypes._Pointer[ggml_context_p]", # type: ignore
  4456. ctx_eval: "ctypes._Pointer[ggml_context_p]", # type: ignore
  4457. ) -> ggml_cgraph:
  4458. return lib.ggml_graph_import(fname, ctx_data, ctx_eval)
  4459. lib.ggml_graph_import.argtypes = [
  4460. ctypes.c_char_p,
  4461. ctypes.POINTER(ggml_context_p),
  4462. ctypes.POINTER(ggml_context_p),
  4463. ]
  4464. lib.ggml_graph_import.restype = ggml_cgraph
  4465. # // print info and performance information for the graph
  4466. # GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
  4467. def ggml_graph_print(
  4468. cgraph: ggml_cgraph_p,
  4469. ):
  4470. return lib.ggml_graph_print(cgraph)
  4471. lib.ggml_graph_print.argtypes = [ctypes.POINTER(ggml_cgraph)]
  4472. lib.ggml_graph_print.restype = None
  4473. # // dump the graph into a file using the dot format
  4474. # GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
  4475. def ggml_graph_dump_dot(
  4476. gb: ggml_cgraph_p,
  4477. gf: ggml_cgraph_p,
  4478. filename: bytes,
  4479. ):
  4480. return lib.ggml_graph_dump_dot(gb, gf, filename)
  4481. lib.ggml_graph_dump_dot.argtypes = [
  4482. ctypes.POINTER(ggml_cgraph),
  4483. ctypes.POINTER(ggml_cgraph),
  4484. ctypes.c_char_p,
  4485. ]
  4486. lib.ggml_graph_dump_dot.restype = None
  4487. # //
  4488. # // optimization
  4489. # //
  4490. # // optimization methods
  4491. # enum ggml_opt_type {
  4492. # GGML_OPT_ADAM,
  4493. # GGML_OPT_LBFGS,
  4494. # };
  4495. GGML_OPT_ADAM = 0
  4496. GGML_OPT_LBFGS = 1
  4497. # // linesearch methods
  4498. # enum ggml_linesearch {
  4499. # GGML_LINESEARCH_DEFAULT = 1,
  4500. # GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
  4501. # GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
  4502. # GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
  4503. # };
  4504. GGML_LINESEARCH_DEFAULT = 1
  4505. GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0
  4506. GGML_LINESEARCH_BACKTRACKING_WOLFE = 1
  4507. GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2
  4508. # // optimization return values
  4509. # enum ggml_opt_result {
  4510. # GGML_OPT_OK = 0,
  4511. # GGML_OPT_DID_NOT_CONVERGE,
  4512. # GGML_OPT_NO_CONTEXT,
  4513. # GGML_OPT_INVALID_WOLFE,
  4514. # GGML_OPT_FAIL,
  4515. # GGML_LINESEARCH_FAIL = -128,
  4516. # GGML_LINESEARCH_MINIMUM_STEP,
  4517. # GGML_LINESEARCH_MAXIMUM_STEP,
  4518. # GGML_LINESEARCH_MAXIMUM_ITERATIONS,
  4519. # GGML_LINESEARCH_INVALID_PARAMETERS,
  4520. # };
  4521. GGML_OPT_OK = 0
  4522. GGML_OPT_DID_NOT_CONVERGE = 1
  4523. GGML_OPT_NO_CONTEXT = 2
  4524. GGML_OPT_INVALID_WOLFE = 3
  4525. GGML_OPT_FAIL = 4
  4526. GGML_LINESEARCH_FAIL = -128
  4527. GGML_LINESEARCH_MINIMUM_STEP = -127
  4528. GGML_LINESEARCH_MAXIMUM_STEP = -126
  4529. GGML_LINESEARCH_MAXIMUM_ITERATIONS = -125
  4530. GGML_LINESEARCH_INVALID_PARAMETERS = -124
  4531. # typedef void (*ggml_opt_callback)(void * data, float * sched);
  4532. ggml_opt_callback = ctypes.CFUNCTYPE(
  4533. None,
  4534. ctypes.c_void_p,
  4535. ctypes.POINTER(ctypes.c_float),
  4536. )
  4537. # // optimization parameters
  4538. # //
  4539. # // see ggml.c (ggml_opt_default_params) for default values
  4540. # //
  4541. # struct ggml_opt_params {
  4542. # enum ggml_opt_type type;
  4543. # int n_threads;
  4544. # // delta-based convergence test
  4545. # //
  4546. # // if past == 0 - disabled
  4547. # // if past > 0:
  4548. # // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
  4549. # //
  4550. # int past;
  4551. # float delta;
  4552. # // maximum number of iterations without improvement
  4553. # //
  4554. # // if 0 - disabled
  4555. # // if > 0:
  4556. # // assume convergence if no cost improvement in this number of iterations
  4557. # //
  4558. # int max_no_improvement;
  4559. # bool print_forward_graph;
  4560. # bool print_backward_graph;
  4561. # // ADAM parameters
  4562. # struct {
  4563. # int n_iter;
  4564. # float sched; // schedule multiplier (fixed, decay or warmup)
  4565. # float decay; // weight decay for AdamW, use 0.0f to disable
  4566. # int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
  4567. # float alpha; // learning rate
  4568. # float beta1;
  4569. # float beta2;
  4570. # float eps; // epsilon for numerical stability
  4571. # float eps_f; // epsilon for convergence test
  4572. # float eps_g; // epsilon for convergence test
  4573. # float gclip; // gradient clipping
  4574. # } adam;
  4575. # // LBFGS parameters
  4576. # struct {
  4577. # int m; // number of corrections to approximate the inv. Hessian
  4578. # int n_iter;
  4579. # int max_linesearch;
  4580. # float eps; // convergence tolerance
  4581. # float ftol; // line search tolerance
  4582. # float wolfe;
  4583. # float min_step;
  4584. # float max_step;
  4585. # enum ggml_linesearch linesearch;
  4586. # } lbfgs;
  4587. # };
  4588. class ggml_opt_params_adam(ctypes.Structure):
  4589. _fields_ = [
  4590. ("n_iter", ctypes.c_int),
  4591. ("sched", ctypes.c_float),
  4592. ("decay", ctypes.c_float),
  4593. ("decay_min_ndim", ctypes.c_int),
  4594. ("alpha", ctypes.c_float),
  4595. ("beta1", ctypes.c_float),
  4596. ("beta2", ctypes.c_float),
  4597. ("eps", ctypes.c_float),
  4598. ("eps_f", ctypes.c_float),
  4599. ("eps_g", ctypes.c_float),
  4600. ("gclip", ctypes.c_float),
  4601. ]
  4602. class ggml_opt_params_lbfgs(ctypes.Structure):
  4603. _fields_ = [
  4604. ("m", ctypes.c_int),
  4605. ("n_iter", ctypes.c_int),
  4606. ("max_linesearch", ctypes.c_int),
  4607. ("eps", ctypes.c_float),
  4608. ("ftol", ctypes.c_float),
  4609. ("wolfe", ctypes.c_float),
  4610. ("min_step", ctypes.c_float),
  4611. ("max_step", ctypes.c_float),
  4612. ("linesearch", ctypes.c_int),
  4613. ]
  4614. class ggml_opt_params(ctypes.Structure):
  4615. _fields_ = [
  4616. ("type", ctypes.c_int),
  4617. ("n_threads", ctypes.c_int),
  4618. ("past", ctypes.c_int),
  4619. ("delta", ctypes.c_float),
  4620. ("max_no_improvement", ctypes.c_int),
  4621. ("print_forward_graph", ctypes.c_bool),
  4622. ("print_backward_graph", ctypes.c_bool),
  4623. ("adam", ggml_opt_params_adam),
  4624. ("lbfgs", ggml_opt_params_lbfgs),
  4625. ]
  4626. # struct ggml_opt_context {
  4627. # struct ggml_context * ctx;
  4628. # struct ggml_opt_params params;
  4629. # int iter;
  4630. # int64_t nx; // number of parameter elements
  4631. # bool just_initialized;
  4632. # float loss_before;
  4633. # float loss_after;
  4634. # struct {
  4635. # struct ggml_tensor * m; // first moment
  4636. # struct ggml_tensor * v; // second moment
  4637. # struct ggml_tensor * pf; // past function values
  4638. # float fx_best;
  4639. # float fx_prev;
  4640. # int n_no_improvement;
  4641. # } adam;
  4642. # struct {
  4643. # struct ggml_tensor * x; // current parameters
  4644. # struct ggml_tensor * xp; // previous parameters
  4645. # struct ggml_tensor * g; // current gradient
  4646. # struct ggml_tensor * gp; // previous gradient
  4647. # struct ggml_tensor * d; // search direction
  4648. # struct ggml_tensor * pf; // past function values
  4649. # struct ggml_tensor * lmal; // the L-BFGS memory alpha
  4650. # struct ggml_tensor * lmys; // the L-BFGS memory ys
  4651. # struct ggml_tensor * lms; // the L-BFGS memory s
  4652. # struct ggml_tensor * lmy; // the L-BFGS memory y
  4653. # float fx_best;
  4654. # float step;
  4655. # int j;
  4656. # int k;
  4657. # int end;
  4658. # int n_no_improvement;
  4659. # } lbfgs;
  4660. # };
  4661. class ggml_opt_context_adam(ctypes.Structure):
  4662. _fields_ = [
  4663. ("m", ctypes.POINTER(ggml_tensor)),
  4664. ("v", ctypes.POINTER(ggml_tensor)),
  4665. ("pf", ctypes.POINTER(ggml_tensor)),
  4666. ("fx_best", ctypes.c_float),
  4667. ("fx_prev", ctypes.c_float),
  4668. ("n_no_improvement", ctypes.c_int),
  4669. ]
  4670. class ggml_opt_context_lbfgs(ctypes.Structure):
  4671. _fields_ = [
  4672. ("x", ctypes.POINTER(ggml_tensor)),
  4673. ("xp", ctypes.POINTER(ggml_tensor)),
  4674. ("g", ctypes.POINTER(ggml_tensor)),
  4675. ("gp", ctypes.POINTER(ggml_tensor)),
  4676. ("d", ctypes.POINTER(ggml_tensor)),
  4677. ("pf", ctypes.POINTER(ggml_tensor)),
  4678. ("lmal", ctypes.POINTER(ggml_tensor)),
  4679. ("lmys", ctypes.POINTER(ggml_tensor)),
  4680. ("lms", ctypes.POINTER(ggml_tensor)),
  4681. ("lmy", ctypes.POINTER(ggml_tensor)),
  4682. ("fx_best", ctypes.c_float),
  4683. ("step", ctypes.c_float),
  4684. ("j", ctypes.c_int),
  4685. ("k", ctypes.c_int),
  4686. ("end", ctypes.c_int),
  4687. ("n_no_improvement", ctypes.c_int),
  4688. ]
  4689. class ggml_opt_context(ctypes.Structure):
  4690. _fields_ = [
  4691. ("ctx", ggml_context_p),
  4692. ("params", ggml_opt_params),
  4693. ("iter", ctypes.c_int),
  4694. ("nx", ctypes.c_int64),
  4695. ("just_initialized", ctypes.c_bool),
  4696. ("loss_before", ctypes.c_float),
  4697. ("loss_after", ctypes.c_float),
  4698. ("adam", ggml_opt_context_adam),
  4699. ("lbfgs", ggml_opt_context_lbfgs),
  4700. ]
  4701. ggml_opt_context_p = ctypes.POINTER(ggml_opt_context)
  4702. # GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
  4703. def ggml_opt_default_params(type: Union[ctypes.c_int, bool]) -> ggml_opt_params:
  4704. return lib.ggml_opt_default_params(type)
  4705. lib.ggml_opt_default_params.argtypes = [ctypes.c_int]
  4706. lib.ggml_opt_default_params.restype = ggml_opt_params
  4707. # // optimize the function defined by the tensor f
  4708. # GGML_API enum ggml_opt_result ggml_opt(
  4709. # struct ggml_context * ctx,
  4710. # struct ggml_opt_params params,
  4711. # struct ggml_tensor * f);
  4712. def ggml_opt(
  4713. ctx: ggml_context_p,
  4714. params: ggml_opt_params,
  4715. f: ggml_tensor_p,
  4716. ) -> int:
  4717. return lib.ggml_opt(ctx, params, f)
  4718. lib.ggml_opt.argtypes = [ggml_context_p, ggml_opt_params, ctypes.POINTER(ggml_tensor)]
  4719. lib.ggml_opt.restype = ctypes.c_int
  4720. # // initialize optimizer context
  4721. # GGML_API void ggml_opt_init(
  4722. # struct ggml_context * ctx,
  4723. # struct ggml_opt_context * opt,
  4724. # struct ggml_opt_params params,
  4725. # int64_t nx);
  4726. def ggml_opt_init(
  4727. ctx: ggml_context_p,
  4728. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4729. params: ggml_opt_params,
  4730. nx: Union[ctypes.c_int64, int],
  4731. ):
  4732. return lib.ggml_opt_init(ctx, opt, params, nx)
  4733. lib.ggml_opt_init.argtypes = [
  4734. ggml_context_p,
  4735. ctypes.POINTER(ggml_opt_context),
  4736. ggml_opt_params,
  4737. ctypes.c_int64,
  4738. ]
  4739. lib.ggml_opt_init.restype = None
  4740. # // continue optimizing the function defined by the tensor f
  4741. # GGML_API enum ggml_opt_result ggml_opt_resume(
  4742. # struct ggml_context * ctx,
  4743. # struct ggml_opt_context * opt,
  4744. # struct ggml_tensor * f);
  4745. def ggml_opt_resume(
  4746. ctx: ggml_context_p,
  4747. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4748. f: ggml_tensor_p,
  4749. ) -> int:
  4750. return lib.ggml_opt_resume(ctx, opt, f)
  4751. lib.ggml_opt_resume.argtypes = [
  4752. ggml_context_p,
  4753. ctypes.POINTER(ggml_opt_context),
  4754. ctypes.POINTER(ggml_tensor),
  4755. ]
  4756. lib.ggml_opt_resume.restype = ctypes.c_int
  4757. # // continue optimizing the function defined by the tensor f
  4758. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  4759. # struct ggml_context * ctx,
  4760. # struct ggml_opt_context * opt,
  4761. # struct ggml_tensor * f,
  4762. # struct ggml_cgraph * gf,
  4763. # struct ggml_cgraph * gb,
  4764. # ggml_opt_callback callback,
  4765. # void * callback_data);
  4766. # // continue optimizing the function defined by the tensor f
  4767. # GGML_API enum ggml_opt_result ggml_opt_resume_g(
  4768. # struct ggml_context * ctx,
  4769. # struct ggml_opt_context * opt,
  4770. # struct ggml_tensor * f,
  4771. # struct ggml_cgraph * gf,
  4772. # struct ggml_cgraph * gb);
  4773. def ggml_opt_resume_g(
  4774. ctx: ggml_context_p,
  4775. opt: "ctypes._Pointer[ggml_opt_context]", # type: ignore
  4776. f: ggml_tensor_p,
  4777. gf: ggml_cgraph_p,
  4778. gb: ggml_cgraph_p,
  4779. callback: ggml_opt_callback = None,
  4780. callback_data: ctypes.c_void_p = None,
  4781. ) -> int:
  4782. return lib.ggml_opt_resume_g(ctx, opt, f, gf, gb, callback, callback_data)
  4783. lib.ggml_opt_resume_g.argtypes = [
  4784. ggml_context_p,
  4785. ctypes.POINTER(ggml_opt_context),
  4786. ctypes.POINTER(ggml_tensor),
  4787. ctypes.POINTER(ggml_cgraph),
  4788. ctypes.POINTER(ggml_cgraph),
  4789. ggml_opt_callback,
  4790. ctypes.c_void_p,
  4791. ]
  4792. lib.ggml_opt_resume_g.restype = ctypes.c_int
  4793. # //
  4794. # // quantization
  4795. # //
  4796. # GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4797. def ggml_quantize_q4_0(
  4798. src: CFloatArray,
  4799. dst: ctypes.c_void_p,
  4800. n: Union[ctypes.c_int, int],
  4801. k: Union[ctypes.c_int, int],
  4802. hist: CInt64Array,
  4803. ) -> int:
  4804. return lib.ggml_quantize_q4_0(src, dst, n, k, hist)
  4805. lib.ggml_quantize_q4_0.argtypes = [
  4806. ctypes.POINTER(ctypes.c_float),
  4807. ctypes.c_void_p,
  4808. ctypes.c_int,
  4809. ctypes.c_int,
  4810. ctypes.POINTER(ctypes.c_int64),
  4811. ]
  4812. lib.ggml_quantize_q4_0.restype = ctypes.c_size_t
  4813. # GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
  4814. def ggml_quantize_q4_1(
  4815. src: CFloatArray,
  4816. dst: ctypes.c_void_p,
  4817. n: Union[ctypes.c_int, int],
  4818. k: Union[ctypes.c_int, int],
  4819. hist: CInt64Array,
  4820. ) -> int:
  4821. return lib.ggml_quantize_q4_1(src, dst, n, k, hist)
  4822. lib.ggml_quantize_q4_1.argtypes = [
  4823. ctypes.POINTER(ctypes.c_float),
  4824. ctypes.c_void_p,
  4825. ctypes.c_int,
  4826. ctypes.c_int,
  4827. ctypes.POINTER(ctypes.c_int64),
  4828. ]
  4829. lib.ggml_quantize_q4_1.restype = ctypes.c_size_t
  4830. # GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4831. def ggml_quantize_q5_0(
  4832. src: CFloatArray,
  4833. dst: ctypes.c_void_p,
  4834. n: Union[ctypes.c_int, int],
  4835. k: Union[ctypes.c_int, int],
  4836. hist: CInt64Array,
  4837. ) -> int:
  4838. return lib.ggml_quantize_q5_0(src, dst, n, k, hist)
  4839. lib.ggml_quantize_q5_0.argtypes = [
  4840. ctypes.POINTER(ctypes.c_float),
  4841. ctypes.c_void_p,
  4842. ctypes.c_int,
  4843. ctypes.c_int,
  4844. ctypes.POINTER(ctypes.c_int64),
  4845. ]
  4846. lib.ggml_quantize_q5_0.restype = ctypes.c_size_t
  4847. # GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
  4848. def ggml_quantize_q5_1(
  4849. src: CFloatArray,
  4850. dst: ctypes.c_void_p,
  4851. n: Union[ctypes.c_int, int],
  4852. k: Union[ctypes.c_int, int],
  4853. hist: CInt64Array,
  4854. ) -> int:
  4855. return lib.ggml_quantize_q5_1(src, dst, n, k, hist)
  4856. lib.ggml_quantize_q5_1.argtypes = [
  4857. ctypes.POINTER(ctypes.c_float),
  4858. ctypes.c_void_p,
  4859. ctypes.c_int,
  4860. ctypes.c_int,
  4861. ctypes.POINTER(ctypes.c_int64),
  4862. ]
  4863. lib.ggml_quantize_q5_1.restype = ctypes.c_size_t
  4864. # GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
  4865. def ggml_quantize_q8_0(
  4866. src: CFloatArray,
  4867. dst: ctypes.c_void_p,
  4868. n: Union[ctypes.c_int, int],
  4869. k: Union[ctypes.c_int, int],
  4870. hist: CInt64Array,
  4871. ) -> int:
  4872. return lib.ggml_quantize_q8_0(src, dst, n, k, hist)
  4873. lib.ggml_quantize_q8_0.argtypes = [
  4874. ctypes.POINTER(ctypes.c_float),
  4875. ctypes.c_void_p,
  4876. ctypes.c_int,
  4877. ctypes.c_int,
  4878. ctypes.POINTER(ctypes.c_int64),
  4879. ]
  4880. lib.ggml_quantize_q8_0.restype = ctypes.c_size_t
  4881. # GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
  4882. def ggml_quantize_chunk(
  4883. type: Union[ctypes.c_int, int],
  4884. src: CFloatArray,
  4885. dst: ctypes.c_void_p,
  4886. start: Union[ctypes.c_int, int],
  4887. n: Union[ctypes.c_int, int],
  4888. hist: CInt64Array,
  4889. ) -> int:
  4890. return lib.ggml_quantize_chunk(type, src, dst, start, n, hist)
  4891. lib.ggml_quantize_chunk.argtypes = [
  4892. ctypes.c_int,
  4893. ctypes.POINTER(ctypes.c_float),
  4894. ctypes.c_void_p,
  4895. ctypes.c_int,
  4896. ctypes.c_int,
  4897. ctypes.POINTER(ctypes.c_int64),
  4898. ]
  4899. lib.ggml_quantize_chunk.restype = ctypes.c_size_t
  4900. # //
  4901. # // gguf
  4902. # //
  4903. # enum gguf_type {
  4904. # GGUF_TYPE_UINT8 = 0,
  4905. # GGUF_TYPE_INT8 = 1,
  4906. # GGUF_TYPE_UINT16 = 2,
  4907. # GGUF_TYPE_INT16 = 3,
  4908. # GGUF_TYPE_UINT32 = 4,
  4909. # GGUF_TYPE_INT32 = 5,
  4910. # GGUF_TYPE_FLOAT32 = 6,
  4911. # GGUF_TYPE_BOOL = 7,
  4912. # GGUF_TYPE_STRING = 8,
  4913. # GGUF_TYPE_ARRAY = 9,
  4914. # GGUF_TYPE_UINT64 = 10,
  4915. # GGUF_TYPE_INT64 = 11,
  4916. # GGUF_TYPE_FLOAT64 = 12,
  4917. # GGUF_TYPE_COUNT, // marks the end of the enum
  4918. # };
  4919. GGUF_TYPE_UINT8 = 0
  4920. GGUF_TYPE_INT8 = 1
  4921. GGUF_TYPE_UINT16 = 2
  4922. GGUF_TYPE_INT16 = 3
  4923. GGUF_TYPE_UINT32 = 4
  4924. GGUF_TYPE_INT32 = 5
  4925. GGUF_TYPE_FLOAT32 = 6
  4926. GGUF_TYPE_BOOL = 7
  4927. GGUF_TYPE_STRING = 8
  4928. GGUF_TYPE_ARRAY = 9
  4929. GGUF_TYPE_COUNT = 10
  4930. # struct gguf_context;
  4931. gguf_context_p = ctypes.c_void_p
  4932. # struct gguf_init_params {
  4933. # bool no_alloc;
  4934. # // if not NULL, create a ggml_context and allocate the tensor data in it
  4935. # struct ggml_context ** ctx;
  4936. # };
  4937. class gguf_init_params(ctypes.Structure):
  4938. _fields_ = [
  4939. ("no_alloc", ctypes.c_bool),
  4940. ("ctx", ctypes.POINTER(ggml_context_p)),
  4941. ]
  4942. # GGML_API struct gguf_context * gguf_init_empty(void);
  4943. def gguf_init_empty() -> gguf_context_p:
  4944. return lib.gguf_init_empty()
  4945. lib.gguf_init_empty.argtypes = []
  4946. lib.gguf_init_empty.restype = gguf_context_p
  4947. # GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
  4948. def gguf_init_from_file(
  4949. fname: bytes,
  4950. params: gguf_init_params,
  4951. ) -> gguf_context_p:
  4952. return lib.gguf_init_from_file(fname, params)
  4953. lib.gguf_init_from_file.argtypes = [
  4954. ctypes.c_char_p,
  4955. gguf_init_params,
  4956. ]
  4957. lib.gguf_init_from_file.restype = gguf_context_p
  4958. # //GGML_API struct gguf_context * gguf_init_from_buffer(..);
  4959. # GGML_API void gguf_free(struct gguf_context * ctx);
  4960. def gguf_free(
  4961. ctx: gguf_context_p,
  4962. ):
  4963. return lib.gguf_free(ctx)
  4964. lib.gguf_free.argtypes = [
  4965. gguf_context_p,
  4966. ]
  4967. lib.gguf_free.restype = None
  4968. # GGML_API const char * gguf_type_name(enum gguf_type type);
  4969. def gguf_type_name(
  4970. type: Union[ctypes.c_int, int],
  4971. ) -> bytes:
  4972. return lib.gguf_type_name(type)
  4973. lib.gguf_type_name.argtypes = [
  4974. ctypes.c_int,
  4975. ]
  4976. lib.gguf_type_name.restype = ctypes.c_char_p
  4977. # GGML_API int gguf_get_version (const struct gguf_context * ctx);
  4978. def gguf_get_version(
  4979. ctx: gguf_context_p,
  4980. ) -> int:
  4981. return lib.gguf_get_version(ctx)
  4982. lib.gguf_get_version.argtypes = [
  4983. gguf_context_p,
  4984. ]
  4985. lib.gguf_get_version.restype = ctypes.c_int
  4986. # GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
  4987. def gguf_get_alignment(
  4988. ctx: gguf_context_p,
  4989. ) -> int:
  4990. return lib.gguf_get_alignment(ctx)
  4991. lib.gguf_get_alignment.argtypes = [
  4992. gguf_context_p,
  4993. ]
  4994. lib.gguf_get_alignment.restype = ctypes.c_size_t
  4995. # GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
  4996. def gguf_get_data_offset(
  4997. ctx: gguf_context_p,
  4998. ) -> int:
  4999. return lib.gguf_get_data_offset(ctx)
  5000. lib.gguf_get_data_offset.argtypes = [
  5001. gguf_context_p,
  5002. ]
  5003. lib.gguf_get_data_offset.restype = ctypes.c_size_t
  5004. # GGML_API void * gguf_get_data (const struct gguf_context * ctx);
  5005. def gguf_get_data(
  5006. ctx: gguf_context_p,
  5007. ) -> ctypes.c_void_p:
  5008. return lib.gguf_get_data(ctx)
  5009. lib.gguf_get_data.argtypes = [
  5010. gguf_context_p,
  5011. ]
  5012. lib.gguf_get_data.restype = ctypes.c_void_p
  5013. # GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
  5014. def gguf_get_n_kv(
  5015. ctx: gguf_context_p,
  5016. ) -> int:
  5017. return lib.gguf_get_n_kv(ctx)
  5018. lib.gguf_get_n_kv.argtypes = [
  5019. gguf_context_p,
  5020. ]
  5021. lib.gguf_get_n_kv.restype = ctypes.c_int
  5022. # GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
  5023. def gguf_find_key(
  5024. ctx: gguf_context_p,
  5025. key: bytes,
  5026. ) -> int:
  5027. return lib.gguf_find_key(ctx, key)
  5028. lib.gguf_find_key.argtypes = [
  5029. gguf_context_p,
  5030. ctypes.c_char_p,
  5031. ]
  5032. lib.gguf_find_key.restype = ctypes.c_int
  5033. # GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
  5034. def gguf_get_key(
  5035. ctx: gguf_context_p,
  5036. i: Union[ctypes.c_int, int],
  5037. ) -> bytes:
  5038. return lib.gguf_get_key(ctx, i)
  5039. lib.gguf_get_key.argtypes = [
  5040. gguf_context_p,
  5041. ctypes.c_int,
  5042. ]
  5043. lib.gguf_get_key.restype = ctypes.c_char_p
  5044. # GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
  5045. def gguf_get_kv_type(
  5046. ctx: gguf_context_p,
  5047. i: Union[ctypes.c_int, int],
  5048. ) -> int:
  5049. return lib.gguf_get_kv_type(ctx, i)
  5050. lib.gguf_get_kv_type.argtypes = [
  5051. gguf_context_p,
  5052. ctypes.c_int,
  5053. ]
  5054. lib.gguf_get_kv_type.restype = ctypes.c_int
  5055. # GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
  5056. def gguf_get_arr_type(
  5057. ctx: gguf_context_p,
  5058. i: Union[ctypes.c_int, int],
  5059. ) -> int:
  5060. return lib.gguf_get_arr_type(ctx, i)
  5061. lib.gguf_get_arr_type.argtypes = [
  5062. gguf_context_p,
  5063. ctypes.c_int,
  5064. ]
  5065. lib.gguf_get_arr_type.restype = ctypes.c_int
  5066. # // results are undefined if the wrong type is used for the key
  5067. # GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
  5068. def gguf_get_val_u8(
  5069. ctx: gguf_context_p,
  5070. i: Union[ctypes.c_int, int],
  5071. ) -> int:
  5072. return lib.gguf_get_val_u8(ctx, i)
  5073. lib.gguf_get_val_u8.argtypes = [
  5074. gguf_context_p,
  5075. ctypes.c_int,
  5076. ]
  5077. lib.gguf_get_val_u8.restype = ctypes.c_uint8
  5078. # GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
  5079. def gguf_get_val_i8(
  5080. ctx: gguf_context_p,
  5081. i: Union[ctypes.c_int, int],
  5082. ) -> int:
  5083. return lib.gguf_get_val_i8(ctx, i)
  5084. lib.gguf_get_val_i8.argtypes = [
  5085. gguf_context_p,
  5086. ctypes.c_int,
  5087. ]
  5088. lib.gguf_get_val_i8.restype = ctypes.c_int8
  5089. # GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
  5090. def gguf_get_val_u16(
  5091. ctx: gguf_context_p,
  5092. i: Union[ctypes.c_int, int],
  5093. ) -> int:
  5094. return lib.gguf_get_val_u16(ctx, i)
  5095. lib.gguf_get_val_u16.argtypes = [
  5096. gguf_context_p,
  5097. ctypes.c_int,
  5098. ]
  5099. lib.gguf_get_val_u16.restype = ctypes.c_uint16
  5100. # GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
  5101. def gguf_get_val_i16(
  5102. ctx: gguf_context_p,
  5103. i: Union[ctypes.c_int, int],
  5104. ) -> int:
  5105. return lib.gguf_get_val_i16(ctx, i)
  5106. lib.gguf_get_val_i16.argtypes = [
  5107. gguf_context_p,
  5108. ctypes.c_int,
  5109. ]
  5110. lib.gguf_get_val_i16.restype = ctypes.c_int16
  5111. # GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
  5112. def gguf_get_val_u32(
  5113. ctx: gguf_context_p,
  5114. i: Union[ctypes.c_int, int],
  5115. ) -> int:
  5116. return lib.gguf_get_val_u32(ctx, i)
  5117. lib.gguf_get_val_u32.argtypes = [
  5118. gguf_context_p,
  5119. ctypes.c_int,
  5120. ]
  5121. lib.gguf_get_val_u32.restype = ctypes.c_uint32
  5122. # GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
  5123. def gguf_get_val_i32(
  5124. ctx: gguf_context_p,
  5125. i: Union[ctypes.c_int, int],
  5126. ) -> int:
  5127. return lib.gguf_get_val_i32(ctx, i)
  5128. lib.gguf_get_val_i32.argtypes = [
  5129. gguf_context_p,
  5130. ctypes.c_int,
  5131. ]
  5132. lib.gguf_get_val_i32.restype = ctypes.c_int32
  5133. # GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
  5134. def gguf_get_val_f32(
  5135. ctx: gguf_context_p,
  5136. i: Union[ctypes.c_int, int],
  5137. ) -> float:
  5138. return lib.gguf_get_val_f32(ctx, i)
  5139. lib.gguf_get_val_f32.argtypes = [
  5140. gguf_context_p,
  5141. ctypes.c_int,
  5142. ]
  5143. lib.gguf_get_val_f32.restype = ctypes.c_float
  5144. # GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
  5145. def gguf_get_val_u64(
  5146. ctx: gguf_context_p,
  5147. i: Union[ctypes.c_int, int],
  5148. ) -> int:
  5149. return lib.gguf_get_val_u64(ctx, i)
  5150. lib.gguf_get_val_u64.argtypes = [
  5151. gguf_context_p,
  5152. ctypes.c_int,
  5153. ]
  5154. lib.gguf_get_val_u64.restype = ctypes.c_uint64
  5155. # GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
  5156. def gguf_get_val_i64(
  5157. ctx: gguf_context_p,
  5158. i: Union[ctypes.c_int, int],
  5159. ) -> int:
  5160. return lib.gguf_get_val_i64(ctx, i)
  5161. lib.gguf_get_val_i64.argtypes = [
  5162. gguf_context_p,
  5163. ctypes.c_int,
  5164. ]
  5165. lib.gguf_get_val_i64.restype = ctypes.c_int64
  5166. # GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
  5167. def gguf_get_val_f64(
  5168. ctx: gguf_context_p,
  5169. i: Union[ctypes.c_int, int],
  5170. ) -> float:
  5171. return lib.gguf_get_val_f64(ctx, i)
  5172. lib.gguf_get_val_f64.argtypes = [
  5173. gguf_context_p,
  5174. ctypes.c_int,
  5175. ]
  5176. lib.gguf_get_val_f64.restype = ctypes.c_double
  5177. # GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
  5178. def gguf_get_val_bool(
  5179. ctx: gguf_context_p,
  5180. i: Union[ctypes.c_int, int],
  5181. ) -> bool:
  5182. return lib.gguf_get_val_bool(ctx, i)
  5183. lib.gguf_get_val_bool.argtypes = [
  5184. gguf_context_p,
  5185. ctypes.c_int,
  5186. ]
  5187. lib.gguf_get_val_bool.restype = ctypes.c_bool
  5188. # GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
  5189. def gguf_get_val_str(
  5190. ctx: gguf_context_p,
  5191. i: Union[ctypes.c_int, int],
  5192. ) -> bytes:
  5193. return lib.gguf_get_val_str(ctx, i)
  5194. lib.gguf_get_val_str.argtypes = [
  5195. gguf_context_p,
  5196. ctypes.c_int,
  5197. ]
  5198. lib.gguf_get_val_str.restype = ctypes.c_char_p
  5199. # GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
  5200. def gguf_get_arr_n(
  5201. ctx: gguf_context_p,
  5202. i: Union[ctypes.c_int, int],
  5203. ) -> int:
  5204. return lib.gguf_get_arr_n(ctx, i)
  5205. lib.gguf_get_arr_n.argtypes = [
  5206. gguf_context_p,
  5207. ctypes.c_int,
  5208. ]
  5209. lib.gguf_get_arr_n.restype = ctypes.c_int
  5210. # GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
  5211. def gguf_get_arr_data(
  5212. ctx: gguf_context_p,
  5213. i: Union[ctypes.c_int, int],
  5214. ) -> ctypes.c_void_p:
  5215. return lib.gguf_get_arr_data(ctx, i)
  5216. lib.gguf_get_arr_data.argtypes = [
  5217. gguf_context_p,
  5218. ctypes.c_int,
  5219. ]
  5220. lib.gguf_get_arr_data.restype = ctypes.c_void_p
  5221. # GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
  5222. def gguf_get_arr_str(
  5223. ctx: gguf_context_p,
  5224. key_id: Union[ctypes.c_int, int],
  5225. i: Union[ctypes.c_int, int],
  5226. ) -> bytes:
  5227. return lib.gguf_get_arr_str(ctx, key_id, i)
  5228. lib.gguf_get_arr_str.argtypes = [
  5229. gguf_context_p,
  5230. ctypes.c_int,
  5231. ctypes.c_int,
  5232. ]
  5233. lib.gguf_get_arr_str.restype = ctypes.c_char_p
  5234. # GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
  5235. def gguf_get_n_tensors(
  5236. ctx: gguf_context_p,
  5237. ) -> int:
  5238. return lib.gguf_get_n_tensors(ctx)
  5239. lib.gguf_get_n_tensors.argtypes = [
  5240. gguf_context_p,
  5241. ]
  5242. lib.gguf_get_n_tensors.restype = ctypes.c_int
  5243. # GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
  5244. def gguf_find_tensor(
  5245. ctx: gguf_context_p,
  5246. name: bytes,
  5247. ) -> int:
  5248. return lib.gguf_find_tensor(ctx, name)
  5249. lib.gguf_find_tensor.argtypes = [
  5250. gguf_context_p,
  5251. ctypes.c_char_p,
  5252. ]
  5253. lib.gguf_find_tensor.restype = ctypes.c_int
  5254. # GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
  5255. def gguf_get_tensor_offset(
  5256. ctx: gguf_context_p,
  5257. i: Union[ctypes.c_int, int],
  5258. ) -> int:
  5259. return lib.gguf_get_tensor_offset(ctx, i)
  5260. lib.gguf_get_tensor_offset.argtypes = [
  5261. gguf_context_p,
  5262. ctypes.c_int,
  5263. ]
  5264. lib.gguf_get_tensor_offset.restype = ctypes.c_size_t
  5265. # GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
  5266. def gguf_get_tensor_name(
  5267. ctx: gguf_context_p,
  5268. i: Union[ctypes.c_int, int],
  5269. ) -> bytes:
  5270. return lib.gguf_get_tensor_name(ctx, i)
  5271. lib.gguf_get_tensor_name.argtypes = [
  5272. gguf_context_p,
  5273. ctypes.c_int,
  5274. ]
  5275. lib.gguf_get_tensor_name.restype = ctypes.c_char_p
  5276. # // overrides existing values or adds a new one
  5277. # GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
  5278. def gguf_set_val_u8(
  5279. ctx: gguf_context_p,
  5280. key: bytes,
  5281. val: Union[ctypes.c_uint8, int],
  5282. ):
  5283. return lib.gguf_set_val_u8(ctx, key, val)
  5284. lib.gguf_set_val_u8.argtypes = [
  5285. gguf_context_p,
  5286. ctypes.c_char_p,
  5287. ctypes.c_uint8,
  5288. ]
  5289. lib.gguf_set_val_u8.restype = None
  5290. # GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
  5291. def gguf_set_val_i8(
  5292. ctx: gguf_context_p,
  5293. key: bytes,
  5294. val: Union[ctypes.c_int8, int],
  5295. ):
  5296. return lib.gguf_set_val_i8(ctx, key, val)
  5297. lib.gguf_set_val_i8.argtypes = [
  5298. gguf_context_p,
  5299. ctypes.c_char_p,
  5300. ctypes.c_int8,
  5301. ]
  5302. lib.gguf_set_val_i8.restype = None
  5303. # GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
  5304. def gguf_set_val_u16(
  5305. ctx: gguf_context_p,
  5306. key: bytes,
  5307. val: Union[ctypes.c_uint16, int],
  5308. ):
  5309. return lib.gguf_set_val_u16(ctx, key, val)
  5310. lib.gguf_set_val_u16.argtypes = [
  5311. gguf_context_p,
  5312. ctypes.c_char_p,
  5313. ctypes.c_uint16,
  5314. ]
  5315. lib.gguf_set_val_u16.restype = None
  5316. # GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
  5317. def gguf_set_val_i16(
  5318. ctx: gguf_context_p,
  5319. key: bytes,
  5320. val: Union[ctypes.c_int16, int],
  5321. ):
  5322. return lib.gguf_set_val_i16(ctx, key, val)
  5323. lib.gguf_set_val_i16.argtypes = [
  5324. gguf_context_p,
  5325. ctypes.c_char_p,
  5326. ctypes.c_int16,
  5327. ]
  5328. lib.gguf_set_val_i16.restype = None
  5329. # GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
  5330. def gguf_set_val_u32(
  5331. ctx: gguf_context_p,
  5332. key: bytes,
  5333. val: Union[ctypes.c_uint32, int],
  5334. ):
  5335. return lib.gguf_set_val_u32(ctx, key, val)
  5336. lib.gguf_set_val_u32.argtypes = [
  5337. gguf_context_p,
  5338. ctypes.c_char_p,
  5339. ctypes.c_uint32,
  5340. ]
  5341. lib.gguf_set_val_u32.restype = None
  5342. # GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
  5343. def gguf_set_val_i32(
  5344. ctx: gguf_context_p,
  5345. key: bytes,
  5346. val: Union[ctypes.c_int32, int],
  5347. ):
  5348. return lib.gguf_set_val_i32(ctx, key, val)
  5349. lib.gguf_set_val_i32.argtypes = [
  5350. gguf_context_p,
  5351. ctypes.c_char_p,
  5352. ctypes.c_int32,
  5353. ]
  5354. lib.gguf_set_val_i32.restype = None
  5355. # GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
  5356. def gguf_set_val_f32(
  5357. ctx: gguf_context_p,
  5358. key: bytes,
  5359. val: Union[ctypes.c_float, float],
  5360. ):
  5361. return lib.gguf_set_val_f32(ctx, key, val)
  5362. lib.gguf_set_val_f32.argtypes = [
  5363. gguf_context_p,
  5364. ctypes.c_char_p,
  5365. ctypes.c_float,
  5366. ]
  5367. lib.gguf_set_val_f32.restype = None
  5368. # GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
  5369. def gguf_set_val_u64(
  5370. ctx: gguf_context_p,
  5371. key: bytes,
  5372. val: Union[ctypes.c_uint64, int],
  5373. ):
  5374. return lib.gguf_set_val_u64(ctx, key, val)
  5375. lib.gguf_set_val_u64.argtypes = [
  5376. gguf_context_p,
  5377. ctypes.c_char_p,
  5378. ctypes.c_uint64,
  5379. ]
  5380. lib.gguf_set_val_u64.restype = None
  5381. # GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
  5382. def gguf_set_val_i64(
  5383. ctx: gguf_context_p,
  5384. key: bytes,
  5385. val: Union[ctypes.c_int64, int],
  5386. ):
  5387. return lib.gguf_set_val_i64(ctx, key, val)
  5388. lib.gguf_set_val_i64.argtypes = [
  5389. gguf_context_p,
  5390. ctypes.c_char_p,
  5391. ctypes.c_int64,
  5392. ]
  5393. lib.gguf_set_val_i64.restype = None
  5394. # GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
  5395. def gguf_set_val_f64(
  5396. ctx: gguf_context_p,
  5397. key: bytes,
  5398. val: Union[ctypes.c_double, float],
  5399. ):
  5400. return lib.gguf_set_val_f64(ctx, key, val)
  5401. lib.gguf_set_val_f64.argtypes = [
  5402. gguf_context_p,
  5403. ctypes.c_char_p,
  5404. ctypes.c_double,
  5405. ]
  5406. lib.gguf_set_val_f64.restype = None
  5407. # GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
  5408. def gguf_set_val_bool(
  5409. ctx: gguf_context_p,
  5410. key: bytes,
  5411. val: Union[ctypes.c_bool, bool],
  5412. ):
  5413. return lib.gguf_set_val_bool(ctx, key, val)
  5414. lib.gguf_set_val_bool.argtypes = [
  5415. gguf_context_p,
  5416. ctypes.c_char_p,
  5417. ctypes.c_bool,
  5418. ]
  5419. lib.gguf_set_val_bool.restype = None
  5420. # GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
  5421. def gguf_set_val_str(
  5422. ctx: gguf_context_p,
  5423. key: bytes,
  5424. val: bytes,
  5425. ):
  5426. return lib.gguf_set_val_str(ctx, key, val)
  5427. lib.gguf_set_val_str.argtypes = [
  5428. gguf_context_p,
  5429. ctypes.c_char_p,
  5430. ctypes.c_char_p,
  5431. ]
  5432. lib.gguf_set_val_str.restype = None
  5433. # GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
  5434. def gguf_set_arr_data(
  5435. ctx: gguf_context_p,
  5436. key: bytes,
  5437. type: Union[ctypes.c_int, int],
  5438. data: ctypes.c_void_p,
  5439. n: Union[ctypes.c_int, int],
  5440. ):
  5441. return lib.gguf_set_arr_data(ctx, key, type, data, n)
  5442. lib.gguf_set_arr_data.argtypes = [
  5443. gguf_context_p,
  5444. ctypes.c_char_p,
  5445. ctypes.c_int,
  5446. ctypes.c_void_p,
  5447. ctypes.c_int,
  5448. ]
  5449. lib.gguf_set_arr_data.restype = None
  5450. # GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
  5451. def gguf_set_arr_str(
  5452. ctx: gguf_context_p,
  5453. key: bytes,
  5454. data: CCharPointer,
  5455. n: Union[ctypes.c_int, int],
  5456. ):
  5457. return lib.gguf_set_arr_str(ctx, key, data, n)
  5458. lib.gguf_set_arr_str.argtypes = [
  5459. gguf_context_p,
  5460. ctypes.c_char_p,
  5461. ctypes.POINTER(ctypes.c_char_p),
  5462. ctypes.c_int,
  5463. ]
  5464. lib.gguf_set_arr_str.restype = None
  5465. # // set or add KV pairs from another context
  5466. # GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
  5467. def gguf_set_kv(
  5468. ctx: gguf_context_p,
  5469. src: gguf_context_p,
  5470. ):
  5471. return lib.gguf_set_kv(ctx, src)
  5472. lib.gguf_set_kv.argtypes = [
  5473. gguf_context_p,
  5474. gguf_context_p,
  5475. ]
  5476. lib.gguf_set_kv.restype = None
  5477. # // manage tensor info
  5478. # GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
  5479. def gguf_add_tensor(
  5480. ctx: gguf_context_p,
  5481. tensor: ggml_tensor_p,
  5482. ):
  5483. return lib.gguf_add_tensor(ctx, tensor)
  5484. lib.gguf_add_tensor.argtypes = [
  5485. gguf_context_p,
  5486. ctypes.POINTER(ggml_tensor),
  5487. ]
  5488. lib.gguf_add_tensor.restype = None
  5489. # GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
  5490. def gguf_set_tensor_type(
  5491. ctx: gguf_context_p,
  5492. name: bytes,
  5493. type: Union[ctypes.c_int, int],
  5494. ):
  5495. return lib.gguf_set_tensor_type(ctx, name, type)
  5496. lib.gguf_set_tensor_type.argtypes = [
  5497. gguf_context_p,
  5498. ctypes.c_char_p,
  5499. ctypes.c_int,
  5500. ]
  5501. lib.gguf_set_tensor_type.restype = None
  5502. # GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
  5503. def gguf_set_tensor_data(
  5504. ctx: gguf_context_p,
  5505. name: bytes,
  5506. data: ctypes.c_void_p,
  5507. size: Union[ctypes.c_size_t, int],
  5508. ):
  5509. return lib.gguf_set_tensor_data(ctx, name, data, size)
  5510. lib.gguf_set_tensor_data.argtypes = [
  5511. gguf_context_p,
  5512. ctypes.c_char_p,
  5513. ctypes.c_void_p,
  5514. ctypes.c_size_t,
  5515. ]
  5516. lib.gguf_set_tensor_data.restype = None
  5517. # // writing gguf files can be done in 2 ways:
  5518. # //
  5519. # // - write the entire gguf_context to a binary file in a single pass:
  5520. # //
  5521. # // gguf_write_to_file(ctx, fname);
  5522. # //
  5523. # // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
  5524. # //
  5525. # // FILE * f = fopen(fname, "wb");
  5526. # // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
  5527. # // fwrite(f, ...);
  5528. # // void * data = gguf_meta_get_meta_data(ctx);
  5529. # // fseek(f, 0, SEEK_SET);
  5530. # // fwrite(f, data, gguf_get_meta_size(ctx));
  5531. # // free(data);
  5532. # // fclose(f);
  5533. # //
  5534. # // write the entire context to a binary file
  5535. # GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
  5536. def gguf_write_to_file(
  5537. ctx: gguf_context_p,
  5538. fname: bytes,
  5539. only_meta: Union[ctypes.c_bool, bool],
  5540. ):
  5541. return lib.gguf_write_to_file(ctx, fname, only_meta)
  5542. lib.gguf_write_to_file.argtypes = [
  5543. gguf_context_p,
  5544. ctypes.c_char_p,
  5545. ctypes.c_bool,
  5546. ]
  5547. lib.gguf_write_to_file.restype = None
  5548. # // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
  5549. # GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
  5550. def gguf_get_meta_size(
  5551. ctx: gguf_context_p,
  5552. ) -> int:
  5553. return lib.gguf_get_meta_size(ctx)
  5554. lib.gguf_get_meta_size.argtypes = [
  5555. gguf_context_p,
  5556. ]
  5557. lib.gguf_get_meta_size.restype = ctypes.c_size_t
  5558. # GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
  5559. def gguf_get_meta_data(
  5560. ctx: gguf_context_p,
  5561. data: ctypes.c_void_p,
  5562. ):
  5563. return lib.gguf_get_meta_data(ctx, data)
  5564. lib.gguf_get_meta_data.argtypes = [
  5565. gguf_context_p,
  5566. ctypes.c_void_p,
  5567. ]
  5568. lib.gguf_get_meta_data.restype = None
  5569. # //
  5570. # // system info
  5571. # //
  5572. # GGML_API int ggml_cpu_has_avx (void);
  5573. def ggml_cpu_has_avx() -> int:
  5574. return lib.ggml_cpu_has_avx()
  5575. lib.ggml_cpu_has_avx.argtypes = []
  5576. lib.ggml_cpu_has_avx.restype = ctypes.c_int
  5577. # GGML_API int ggml_cpu_has_avx2 (void);
  5578. def ggml_cpu_has_avx2() -> int:
  5579. return lib.ggml_cpu_has_avx2()
  5580. lib.ggml_cpu_has_avx2.argtypes = []
  5581. lib.ggml_cpu_has_avx2.restype = ctypes.c_int
  5582. # GGML_API int ggml_cpu_has_avx512 (void);
  5583. def ggml_cpu_has_avx512() -> int:
  5584. return lib.ggml_cpu_has_avx512()
  5585. lib.ggml_cpu_has_avx512.argtypes = []
  5586. lib.ggml_cpu_has_avx512.restype = ctypes.c_int
  5587. # GGML_API int ggml_cpu_has_avx512_vbmi(void);
  5588. def ggml_cpu_has_avx512_vbmi() -> int:
  5589. return lib.ggml_cpu_has_avx512_vbmi()
  5590. lib.ggml_cpu_has_avx512_vbmi.argtypes = []
  5591. lib.ggml_cpu_has_avx512_vbmi.restype = ctypes.c_int
  5592. # GGML_API int ggml_cpu_has_avx512_vnni(void);
  5593. def ggml_cpu_has_avx512_vnni() -> int:
  5594. return lib.ggml_cpu_has_avx512_vnni()
  5595. lib.ggml_cpu_has_avx512_vnni.argtypes = []
  5596. lib.ggml_cpu_has_avx512_vnni.restype = ctypes.c_int
  5597. # GGML_API int ggml_cpu_has_fma (void);
  5598. def ggml_cpu_has_fma() -> int:
  5599. return lib.ggml_cpu_has_fma()
  5600. lib.ggml_cpu_has_fma.argtypes = []
  5601. lib.ggml_cpu_has_fma.restype = ctypes.c_int
  5602. # GGML_API int ggml_cpu_has_neon (void);
  5603. def ggml_cpu_has_neon() -> int:
  5604. return lib.ggml_cpu_has_neon()
  5605. lib.ggml_cpu_has_neon.argtypes = []
  5606. lib.ggml_cpu_has_neon.restype = ctypes.c_int
  5607. # GGML_API int ggml_cpu_has_arm_fma (void);
  5608. def ggml_cpu_has_arm_fma() -> int:
  5609. return lib.ggml_cpu_has_arm_fma()
  5610. lib.ggml_cpu_has_arm_fma.argtypes = []
  5611. lib.ggml_cpu_has_arm_fma.restype = ctypes.c_int
  5612. # GGML_API int ggml_cpu_has_f16c (void);
  5613. def ggml_cpu_has_f16c() -> int:
  5614. return lib.ggml_cpu_has_f16c()
  5615. lib.ggml_cpu_has_f16c.argtypes = []
  5616. lib.ggml_cpu_has_f16c.restype = ctypes.c_int
  5617. # GGML_API int ggml_cpu_has_fp16_va (void);
  5618. def ggml_cpu_has_fp16_va() -> int:
  5619. return lib.ggml_cpu_has_fp16_va()
  5620. lib.ggml_cpu_has_fp16_va.argtypes = []
  5621. lib.ggml_cpu_has_fp16_va.restype = ctypes.c_int
  5622. # GGML_API int ggml_cpu_has_wasm_simd (void);
  5623. def ggml_cpu_has_wasm_simd() -> int:
  5624. return lib.ggml_cpu_has_wasm_simd()
  5625. lib.ggml_cpu_has_wasm_simd.argtypes = []
  5626. lib.ggml_cpu_has_wasm_simd.restype = ctypes.c_int
  5627. # GGML_API int ggml_cpu_has_blas (void);
  5628. def ggml_cpu_has_blas() -> int:
  5629. return lib.ggml_cpu_has_blas()
  5630. lib.ggml_cpu_has_blas.argtypes = []
  5631. lib.ggml_cpu_has_blas.restype = ctypes.c_int
  5632. # GGML_API int ggml_cpu_has_cublas (void);
  5633. def ggml_cpu_has_cublas() -> int:
  5634. return lib.ggml_cpu_has_cublas()
  5635. lib.ggml_cpu_has_cublas.argtypes = []
  5636. lib.ggml_cpu_has_cublas.restype = ctypes.c_int
  5637. # GGML_API int ggml_cpu_has_clblast (void);
  5638. def ggml_cpu_has_clblast() -> int:
  5639. return lib.ggml_cpu_has_clblast()
  5640. lib.ggml_cpu_has_clblast.argtypes = []
  5641. lib.ggml_cpu_has_clblast.restype = ctypes.c_int
  5642. # GGML_API int ggml_cpu_has_gpublas (void);
  5643. def ggml_cpu_has_gpublas() -> int:
  5644. return lib.ggml_cpu_has_gpublas()
  5645. lib.ggml_cpu_has_gpublas.argtypes = []
  5646. lib.ggml_cpu_has_gpublas.restype = ctypes.c_int
  5647. # GGML_API int ggml_cpu_has_sse3 (void);
  5648. def ggml_cpu_has_sse3() -> int:
  5649. return lib.ggml_cpu_has_sse3()
  5650. lib.ggml_cpu_has_sse3.argtypes = []
  5651. lib.ggml_cpu_has_sse3.restype = ctypes.c_int
  5652. # GGML_API int ggml_cpu_has_ssse3 (void);
  5653. def ggml_cpu_has_ssse3() -> int:
  5654. return lib.ggml_cpu_has_ssse3()
  5655. lib.ggml_cpu_has_ssse3.argtypes = []
  5656. lib.ggml_cpu_has_ssse3.restype = ctypes.c_int
  5657. # GGML_API int ggml_cpu_has_vsx (void);
  5658. def ggml_cpu_has_vsx() -> int:
  5659. return lib.ggml_cpu_has_vsx()
  5660. lib.ggml_cpu_has_vsx.argtypes = []
  5661. lib.ggml_cpu_has_vsx.restype = ctypes.c_int
  5662. # //
  5663. # // Internal types and functions exposed for tests and benchmarks
  5664. # //
  5665. # typedef void (*ggml_to_float_t)(const void * x, float * y, int k);
  5666. ggml_to_float_t = ctypes.CFUNCTYPE(
  5667. None, ctypes.c_void_p, ctypes.POINTER(ctypes.c_float), ctypes.c_int
  5668. )
  5669. # typedef void (*ggml_from_float_t)(const float * x, void * y, int k);
  5670. ggml_from_float_t = ctypes.CFUNCTYPE(
  5671. None, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_int
  5672. )
  5673. # typedef void (*ggml_vec_dot_t)(const int n, float * s, const void * x, const void * y);
  5674. ggml_vec_dot_t = ctypes.CFUNCTYPE(
  5675. None, ctypes.c_int, ctypes.POINTER(ctypes.c_float), ctypes.c_void_p, ctypes.c_void_p
  5676. )
  5677. # typedef struct {
  5678. # const char * type_name;
  5679. # int blck_size;
  5680. # size_t type_size;
  5681. # bool is_quantized;
  5682. # ggml_to_float_t to_float;
  5683. # ggml_from_float_t from_float;
  5684. # ggml_from_float_t from_float_reference;
  5685. # ggml_vec_dot_t vec_dot;
  5686. # enum ggml_type vec_dot_type;
  5687. # } ggml_type_traits_t;
  5688. class ggml_type_traits_t(ctypes.Structure):
  5689. _fields_ = [
  5690. ("type_name", ctypes.c_char_p),
  5691. ("blck_size", ctypes.c_int),
  5692. ("type_size", ctypes.c_size_t),
  5693. ("is_quantized", ctypes.c_bool),
  5694. ("to_float", ggml_to_float_t),
  5695. ("from_float", ggml_from_float_t),
  5696. ("from_float_reference", ggml_from_float_t),
  5697. ("vec_dot", ggml_vec_dot_t),
  5698. ("vec_dot_type", ctypes.c_int),
  5699. ]
  5700. # ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
  5701. def ggml_internal_get_type_traits(type: Union[ctypes.c_int, int]) -> ggml_type_traits_t:
  5702. return lib.ggml_internal_get_type_traits(type)
  5703. lib.ggml_internal_get_type_traits.argtypes = [ctypes.c_int]
  5704. lib.ggml_internal_get_type_traits.restype = ggml_type_traits_t
  5705. #####################################################
  5706. # GGML ALLOC API
  5707. # source: ggml-alloc.h
  5708. #####################################################
  5709. ggml_allocr_p = ctypes.c_void_p
  5710. # GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
  5711. def ggml_allocr_new(
  5712. data: ctypes.c_void_p,
  5713. size: Union[ctypes.c_size_t, int],
  5714. alignment: Union[ctypes.c_size_t, int],
  5715. ) -> ggml_allocr_p:
  5716. return lib.ggml_allocr_new(data, size, alignment)
  5717. lib.ggml_allocr_new.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t]
  5718. lib.ggml_allocr_new.restype = ggml_allocr_p
  5719. # GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
  5720. def ggml_allocr_new_measure(
  5721. alignment: Union[ctypes.c_size_t, int],
  5722. ) -> ggml_allocr_p:
  5723. return lib.ggml_allocr_new_measure(alignment)
  5724. lib.ggml_allocr_new_measure.argtypes = [ctypes.c_size_t]
  5725. lib.ggml_allocr_new_measure.restype = ggml_allocr_p
  5726. # // tell the allocator to parse nodes following the order described in the list
  5727. # // you should call this if your graph are optimized to execute out-of-order
  5728. # GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, const int * list, int n);
  5729. def ggml_allocr_set_parse_seq(
  5730. alloc: ggml_allocr_p,
  5731. list: CIntPointer,
  5732. n: Union[ctypes.c_int, int],
  5733. ):
  5734. return lib.ggml_allocr_set_parse_seq(alloc, list, n)
  5735. lib.ggml_allocr_set_parse_seq.argtypes = [
  5736. ggml_allocr_p,
  5737. ctypes.POINTER(ctypes.c_int),
  5738. ctypes.c_int,
  5739. ]
  5740. lib.ggml_allocr_set_parse_seq.restype = None
  5741. # GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
  5742. def ggml_allocr_free(
  5743. alloc: ggml_allocr_p,
  5744. ):
  5745. return lib.ggml_allocr_free(alloc)
  5746. lib.ggml_allocr_free.argtypes = [ggml_allocr_p]
  5747. lib.ggml_allocr_free.restype = None
  5748. # GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
  5749. def ggml_allocr_is_measure(
  5750. alloc: ggml_allocr_p,
  5751. ) -> bool:
  5752. return lib.ggml_allocr_is_measure(alloc)
  5753. lib.ggml_allocr_is_measure.argtypes = [ggml_allocr_p]
  5754. lib.ggml_allocr_is_measure.restype = ctypes.c_bool
  5755. # GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
  5756. def ggml_allocr_reset(
  5757. alloc: ggml_allocr_p,
  5758. ):
  5759. return lib.ggml_allocr_reset(alloc)
  5760. lib.ggml_allocr_reset.argtypes = [ggml_allocr_p]
  5761. lib.ggml_allocr_reset.restype = None
  5762. # GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
  5763. def ggml_allocr_alloc(
  5764. alloc: ggml_allocr_p,
  5765. tensor: ggml_tensor_p,
  5766. ):
  5767. return lib.ggml_allocr_alloc(alloc, tensor)
  5768. lib.ggml_allocr_alloc.argtypes = [ggml_allocr_p, ctypes.POINTER(ggml_tensor)]
  5769. lib.ggml_allocr_alloc.restype = None
  5770. # GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
  5771. def ggml_allocr_alloc_graph(
  5772. alloc: ggml_allocr_p,
  5773. graph: ggml_cgraph_p,
  5774. ) -> int:
  5775. return lib.ggml_allocr_alloc_graph(alloc, graph)
  5776. lib.ggml_allocr_alloc_graph.argtypes = [ggml_allocr_p, ctypes.POINTER(ggml_cgraph)]
  5777. lib.ggml_allocr_alloc_graph.restype = ctypes.c_size_t
  5778. #####################################################
  5779. # GGML CUDA API
  5780. # source: ggml-cuda.h
  5781. #####################################################
  5782. GGML_USE_CUBLAS = hasattr(lib, "ggml_init_cublas")
  5783. GGML_CUDA_MAX_DEVICES = 16
  5784. # GGML_API void ggml_init_cublas(void);
  5785. def ggml_init_cublas():
  5786. return lib.ggml_init_cublas()
  5787. if GGML_USE_CUBLAS:
  5788. lib.ggml_init_cublas.argtypes = []
  5789. lib.ggml_init_cublas.restype = None
  5790. # void * ggml_cuda_host_malloc(size_t size);
  5791. def ggml_cuda_host_malloc(
  5792. size: Union[ctypes.c_size_t, int],
  5793. ) -> Optional[ctypes.c_void_p]:
  5794. return lib.ggml_cuda_host_malloc(size)
  5795. if GGML_USE_CUBLAS:
  5796. lib.ggml_cuda_host_malloc.argtypes = [ctypes.c_size_t]
  5797. lib.ggml_cuda_host_malloc.restype = ctypes.c_void_p
  5798. # void ggml_cuda_host_free(void * ptr);
  5799. def ggml_cuda_host_free(
  5800. ptr: ctypes.c_void_p,
  5801. ):
  5802. return lib.ggml_cuda_host_free(ptr)
  5803. if GGML_USE_CUBLAS:
  5804. lib.ggml_cuda_host_free.argtypes = [ctypes.c_void_p]
  5805. lib.ggml_cuda_host_free.restype = None
  5806. # GGML_API bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  5807. def ggml_cuda_can_mul_mat(
  5808. src0: ggml_tensor_p,
  5809. src1: ggml_tensor_p,
  5810. dst: ggml_tensor_p,
  5811. ) -> bool:
  5812. return lib.ggml_cuda_can_mul_mat(src0, src1, dst)
  5813. if GGML_USE_CUBLAS:
  5814. lib.ggml_cuda_can_mul_mat.argtypes = [
  5815. ctypes.POINTER(ggml_tensor),
  5816. ctypes.POINTER(ggml_tensor),
  5817. ctypes.POINTER(ggml_tensor),
  5818. ]
  5819. lib.ggml_cuda_can_mul_mat.restype = ctypes.c_bool
  5820. # GGML_API void ggml_cuda_set_tensor_split(const float * tensor_split);
  5821. def ggml_cuda_set_tensor_split(
  5822. tensor_split: CFloatArray,
  5823. ):
  5824. return lib.ggml_cuda_set_tensor_split(tensor_split)
  5825. if GGML_USE_CUBLAS:
  5826. lib.ggml_cuda_set_tensor_split.argtypes = [ctypes.POINTER(ctypes.c_float)]
  5827. lib.ggml_cuda_set_tensor_split.restype = None
  5828. # void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor);
  5829. def ggml_cuda_transform_tensor(
  5830. data: ctypes.c_void_p,
  5831. tensor: ggml_tensor_p,
  5832. ):
  5833. return lib.ggml_cuda_transform_tensor(data, tensor)
  5834. if GGML_USE_CUBLAS:
  5835. lib.ggml_cuda_transform_tensor.argtypes = [
  5836. ctypes.c_void_p,
  5837. ctypes.POINTER(ggml_tensor),
  5838. ]
  5839. lib.ggml_cuda_transform_tensor.restype = None
  5840. # void ggml_cuda_free_data(struct ggml_tensor * tensor);
  5841. def ggml_cuda_free_data(
  5842. tensor: ggml_tensor_p,
  5843. ):
  5844. return lib.ggml_cuda_free_data(tensor)
  5845. if GGML_USE_CUBLAS:
  5846. lib.ggml_cuda_free_data.argtypes = [
  5847. ctypes.POINTER(ggml_tensor),
  5848. ]
  5849. lib.ggml_cuda_free_data.restype = None
  5850. # void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
  5851. def ggml_cuda_assign_buffers(
  5852. tensor: ggml_tensor_p,
  5853. ):
  5854. return lib.ggml_cuda_assign_buffers(tensor)
  5855. if GGML_USE_CUBLAS:
  5856. lib.ggml_cuda_assign_buffers.argtypes = [
  5857. ctypes.POINTER(ggml_tensor),
  5858. ]
  5859. lib.ggml_cuda_assign_buffers.restype = None
  5860. # void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
  5861. def ggml_cuda_assign_buffers_no_scratch(
  5862. tensor: ggml_tensor_p,
  5863. ):
  5864. return lib.ggml_cuda_assign_buffers_no_scratch(tensor)
  5865. if GGML_USE_CUBLAS:
  5866. lib.ggml_cuda_assign_buffers_no_scratch.argtypes = [
  5867. ctypes.POINTER(ggml_tensor),
  5868. ]
  5869. lib.ggml_cuda_assign_buffers_no_scratch.restype = None
  5870. # GGML_API void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
  5871. def ggml_cuda_assign_buffers_force_inplace(
  5872. tensor: ggml_tensor_p,
  5873. ):
  5874. return lib.ggml_cuda_assign_buffers_force_inplace(tensor)
  5875. if GGML_USE_CUBLAS:
  5876. lib.ggml_cuda_assign_buffers_force_inplace.argtypes = [
  5877. ctypes.POINTER(ggml_tensor),
  5878. ]
  5879. lib.ggml_cuda_assign_buffers_force_inplace.restype = None
  5880. # GGML_API void ggml_cuda_assign_buffers_no_alloc(struct ggml_tensor * tensor);
  5881. def ggml_cuda_assign_buffers_no_alloc(
  5882. tensor: ggml_tensor_p,
  5883. ):
  5884. return lib.ggml_cuda_assign_buffers_no_alloc(tensor)
  5885. if GGML_USE_CUBLAS:
  5886. lib.ggml_cuda_assign_buffers_no_alloc.argtypes = [
  5887. ctypes.POINTER(ggml_tensor),
  5888. ]
  5889. lib.ggml_cuda_assign_buffers_no_alloc.restype = None
  5890. # GGML_API void ggml_cuda_assign_scratch_offset(struct ggml_tensor * tensor, size_t offset);
  5891. def ggml_cuda_assign_scratch_offset(
  5892. tensor: ggml_tensor_p,
  5893. offset: Union[ctypes.c_size_t, int],
  5894. ):
  5895. return lib.ggml_cuda_assign_scratch_offset(tensor, offset)
  5896. if GGML_USE_CUBLAS:
  5897. lib.ggml_cuda_assign_scratch_offset.argtypes = [
  5898. ctypes.POINTER(ggml_tensor),
  5899. ctypes.c_size_t,
  5900. ]
  5901. lib.ggml_cuda_assign_scratch_offset.restype = None
  5902. # void ggml_cuda_set_main_device(int main_device);
  5903. def ggml_cuda_set_main_device(
  5904. main_device: Union[ctypes.c_int, int],
  5905. ):
  5906. return lib.ggml_cuda_set_main_device(main_device)
  5907. if GGML_USE_CUBLAS:
  5908. lib.ggml_cuda_set_main_device.argtypes = [
  5909. ctypes.c_int,
  5910. ]
  5911. lib.ggml_cuda_set_main_device.restype = None
  5912. # GGML_API void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
  5913. def ggml_cuda_set_mul_mat_q(
  5914. mul_mat_q: Union[ctypes.c_bool, bool],
  5915. ):
  5916. return lib.ggml_cuda_set_mul_mat_q(mul_mat_q)
  5917. if GGML_USE_CUBLAS:
  5918. lib.ggml_cuda_set_mul_mat_q.argtypes = [
  5919. ctypes.c_bool,
  5920. ]
  5921. lib.ggml_cuda_set_mul_mat_q.restype = None
  5922. # void ggml_cuda_set_scratch_size(size_t scratch_size);
  5923. def ggml_cuda_set_scratch_size(
  5924. scratch_size: Union[ctypes.c_size_t, int],
  5925. ):
  5926. return lib.ggml_cuda_set_scratch_size(scratch_size)
  5927. if GGML_USE_CUBLAS:
  5928. lib.ggml_cuda_set_scratch_size.argtypes = [
  5929. ctypes.c_size_t,
  5930. ]
  5931. lib.ggml_cuda_set_scratch_size.restype = None
  5932. # void ggml_cuda_free_scratch(void);
  5933. def ggml_cuda_free_scratch():
  5934. return lib.ggml_cuda_free_scratch()
  5935. if GGML_USE_CUBLAS:
  5936. lib.ggml_cuda_free_scratch.argtypes = []
  5937. lib.ggml_cuda_free_scratch.restype = None
  5938. # GGML_API bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
  5939. def ggml_cuda_compute_forward(
  5940. params: ggml_compute_params_p,
  5941. tensor: ggml_tensor_p,
  5942. ) -> bool:
  5943. return lib.ggml_cuda_compute_forward(params, tensor)
  5944. if GGML_USE_CUBLAS:
  5945. lib.ggml_cuda_compute_forward.argtypes = [
  5946. ctypes.POINTER(ggml_compute_params),
  5947. ctypes.POINTER(ggml_tensor),
  5948. ]
  5949. lib.ggml_cuda_compute_forward.restype = ctypes.c_bool
  5950. # GGML_API int ggml_cuda_get_device_count(void);
  5951. def ggml_cuda_get_device_count() -> int:
  5952. return lib.ggml_cuda_get_device_count()
  5953. if GGML_USE_CUBLAS:
  5954. lib.ggml_cuda_get_device_count.argtypes = []
  5955. lib.ggml_cuda_get_device_count.restype = ctypes.c_int
  5956. # GGML_API void ggml_cuda_get_device_description(int device, char * description, size_t description_size);
  5957. def ggml_cuda_get_device_description(
  5958. device: Union[ctypes.c_int, int],
  5959. description: bytes,
  5960. description_size: Union[ctypes.c_size_t, int],
  5961. ):
  5962. return lib.ggml_cuda_get_device_description(device, description, description_size)
  5963. if GGML_USE_CUBLAS:
  5964. lib.ggml_cuda_get_device_description.argtypes = [
  5965. ctypes.c_int,
  5966. ctypes.c_char_p,
  5967. ctypes.c_size_t,
  5968. ]
  5969. lib.ggml_cuda_get_device_description.restype = None
  5970. #####################################################
  5971. # GGML METAL API
  5972. # source: ggml-metal.h
  5973. #####################################################
  5974. GGML_USE_METAL = hasattr(lib, "ggml_metal_init")
  5975. # // max memory buffers that can be mapped to the device
  5976. # #define GGML_METAL_MAX_BUFFERS 16
  5977. GGML_METAL_MAX_BUFFERS = 16
  5978. # #define GGML_METAL_MAX_COMMAND_BUFFERS 32
  5979. GGML_METAL_MAX_COMMAND_BUFFERS = 32
  5980. # struct ggml_metal_context;
  5981. ggml_metal_context_p = ctypes.c_void_p
  5982. # struct ggml_metal_context * ggml_metal_init(int n_cb);
  5983. def ggml_metal_init(
  5984. n_cb: Union[ctypes.c_int, int],
  5985. ) -> ggml_metal_context_p:
  5986. return lib.ggml_metal_init(n_cb)
  5987. if GGML_USE_METAL:
  5988. lib.ggml_metal_init.argtypes = [ctypes.c_int]
  5989. lib.ggml_metal_init.restype = ggml_metal_context_p
  5990. # void ggml_metal_free(struct ggml_metal_context * ctx);
  5991. def ggml_metal_free(
  5992. ctx: ggml_metal_context_p,
  5993. ):
  5994. return lib.ggml_metal_free(ctx)
  5995. if GGML_USE_METAL:
  5996. lib.ggml_metal_free.argtypes = [ggml_metal_context_p]
  5997. lib.ggml_metal_free.restype = None
  5998. # // set the number of command buffers to use
  5999. # void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
  6000. def ggml_metal_set_n_cb(
  6001. ctx: ggml_metal_context_p,
  6002. n_cb: Union[ctypes.c_int, int],
  6003. ):
  6004. return lib.ggml_metal_set_n_cb(ctx, n_cb)
  6005. if GGML_USE_METAL:
  6006. lib.ggml_metal_set_n_cb.argtypes = [ggml_metal_context_p, ctypes.c_int]
  6007. lib.ggml_metal_set_n_cb.restype = None
  6008. # // creates a mapping between a host memory buffer and a device memory buffer
  6009. # // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
  6010. # // - the mapping is used during computation to determine the arguments of the compute kernels
  6011. # // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
  6012. # // - max_size specifies the maximum size of a tensor and is used to create shared views such
  6013. # // that it is guaranteed that the tensor will fit in at least one of the views
  6014. # //
  6015. # bool ggml_metal_add_buffer(
  6016. # struct ggml_metal_context * ctx,
  6017. # const char * name,
  6018. # void * data,
  6019. # size_t size,
  6020. # size_t max_size);
  6021. def ggml_metal_add_buffer(
  6022. ctx: ggml_metal_context_p,
  6023. name: bytes,
  6024. data: ctypes.c_void_p,
  6025. size: Union[ctypes.c_size_t, int],
  6026. max_size: Union[ctypes.c_size_t, int],
  6027. ) -> bool:
  6028. return lib.ggml_metal_add_buffer(ctx, name, data, size, max_size)
  6029. if GGML_USE_METAL:
  6030. lib.ggml_metal_add_buffer.argtypes = [
  6031. ggml_metal_context_p,
  6032. ctypes.c_char_p,
  6033. ctypes.c_void_p,
  6034. ctypes.c_size_t,
  6035. ctypes.c_size_t,
  6036. ]
  6037. lib.ggml_metal_add_buffer.restype = ctypes.c_bool
  6038. # // set data from host memory into the device
  6039. # void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  6040. def ggml_metal_set_tensor(
  6041. ctx: ggml_metal_context_p,
  6042. t: ggml_tensor_p,
  6043. ):
  6044. return lib.ggml_metal_set_tensor(ctx, t)
  6045. if GGML_USE_METAL:
  6046. lib.ggml_metal_set_tensor.argtypes = [
  6047. ggml_metal_context_p,
  6048. ctypes.POINTER(ggml_tensor),
  6049. ]
  6050. lib.ggml_metal_set_tensor.restype = None
  6051. # // get data from the device into host memory
  6052. # void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
  6053. def ggml_metal_get_tensor(
  6054. ctx: ggml_metal_context_p,
  6055. t: ggml_tensor_p,
  6056. ):
  6057. return lib.ggml_metal_get_tensor(ctx, t)
  6058. if GGML_USE_METAL:
  6059. lib.ggml_metal_get_tensor.argtypes = [
  6060. ggml_metal_context_p,
  6061. ctypes.POINTER(ggml_tensor),
  6062. ]
  6063. lib.ggml_metal_get_tensor.restype = None
  6064. # // try to find operations that can be run concurrently in the graph
  6065. # // you should run it again if the topology of your graph changes
  6066. # void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
  6067. def ggml_metal_graph_find_concurrency(
  6068. ctx: ggml_metal_context_p,
  6069. gf: ggml_cgraph_p,
  6070. check_mem: Union[ctypes.c_bool, bool],
  6071. ):
  6072. return lib.ggml_metal_graph_find_concurrency(ctx, gf, check_mem)
  6073. if GGML_USE_METAL:
  6074. lib.ggml_metal_graph_find_concurrency.argtypes = [
  6075. ggml_metal_context_p,
  6076. ctypes.POINTER(ggml_cgraph),
  6077. ctypes.c_bool,
  6078. ]
  6079. lib.ggml_metal_graph_find_concurrency.restype = None
  6080. # // if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
  6081. # int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
  6082. def ggml_metal_if_optimized(
  6083. ctx: ggml_metal_context_p,
  6084. ) -> int:
  6085. return lib.ggml_metal_if_optimized(ctx)
  6086. if GGML_USE_METAL:
  6087. lib.ggml_metal_if_optimized.argtypes = [
  6088. ggml_metal_context_p,
  6089. ]
  6090. lib.ggml_metal_if_optimized.restype = ctypes.c_int
  6091. # // output the concur_list for ggml_alloc
  6092. # int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
  6093. def ggml_metal_get_concur_list(
  6094. ctx: ggml_metal_context_p,
  6095. ) -> CIntPointer:
  6096. return lib.ggml_metal_get_concur_list(ctx)
  6097. if GGML_USE_METAL:
  6098. lib.ggml_metal_get_concur_list.argtypes = [
  6099. ggml_metal_context_p,
  6100. ]
  6101. lib.ggml_metal_get_concur_list.restype = ctypes.POINTER(ctypes.c_int)
  6102. # // same as ggml_graph_compute but uses Metal
  6103. # // creates gf->n_threads command buffers in parallel
  6104. # void ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
  6105. def ggml_metal_graph_compute(
  6106. ctx: ggml_metal_context_p,
  6107. gf: ggml_cgraph_p,
  6108. ):
  6109. return lib.ggml_metal_graph_compute(ctx, gf)
  6110. if GGML_USE_METAL:
  6111. lib.ggml_metal_graph_compute.argtypes = [
  6112. ggml_metal_context_p,
  6113. ctypes.POINTER(ggml_cgraph),
  6114. ]
  6115. lib.ggml_metal_graph_compute.restype = None
  6116. #####################################################
  6117. # GGML OPENCL API
  6118. # source: ggml-opencl.h
  6119. #####################################################
  6120. GGML_USE_CLBLAST = hasattr(lib, "ggml_cl_init")
  6121. # void ggml_cl_init(void);
  6122. def ggml_cl_init():
  6123. return lib.ggml_cl_init()
  6124. if GGML_USE_CLBLAST:
  6125. lib.ggml_cl_init.argtypes = []
  6126. lib.ggml_cl_init.restype = None
  6127. # void ggml_cl_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6128. def ggml_cl_mul(
  6129. src0: ggml_tensor_p,
  6130. src1: ggml_tensor_p,
  6131. dst: ggml_tensor_p,
  6132. ):
  6133. return lib.ggml_cl_mul(src0, src1, dst)
  6134. if GGML_USE_CLBLAST:
  6135. lib.ggml_cl_mul.argtypes = [
  6136. ctypes.POINTER(ggml_tensor),
  6137. ctypes.POINTER(ggml_tensor),
  6138. ctypes.POINTER(ggml_tensor),
  6139. ]
  6140. lib.ggml_cl_mul.restype = None
  6141. # bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6142. def ggml_cl_can_mul_mat(
  6143. src0: ggml_tensor_p,
  6144. src1: ggml_tensor_p,
  6145. dst: ggml_tensor_p,
  6146. ) -> bool:
  6147. return lib.ggml_cl_can_mul_mat(src0, src1, dst)
  6148. if GGML_USE_CLBLAST:
  6149. lib.ggml_cl_can_mul_mat.argtypes = [
  6150. ctypes.POINTER(ggml_tensor),
  6151. ctypes.POINTER(ggml_tensor),
  6152. ctypes.POINTER(ggml_tensor),
  6153. ]
  6154. lib.ggml_cl_can_mul_mat.restype = ctypes.c_bool
  6155. # size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
  6156. def ggml_cl_mul_mat_get_wsize(
  6157. src0: ggml_tensor_p,
  6158. src1: ggml_tensor_p,
  6159. dst: ggml_tensor_p,
  6160. ) -> int:
  6161. return lib.ggml_cl_mul_mat_get_wsize(src0, src1, dst)
  6162. if GGML_USE_CLBLAST:
  6163. lib.ggml_cl_mul_mat_get_wsize.argtypes = [
  6164. ctypes.POINTER(ggml_tensor),
  6165. ctypes.POINTER(ggml_tensor),
  6166. ctypes.POINTER(ggml_tensor),
  6167. ]
  6168. lib.ggml_cl_mul_mat_get_wsize.restype = ctypes.c_size_t
  6169. # void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
  6170. def ggml_cl_mul_mat(
  6171. src0: ggml_tensor_p,
  6172. src1: ggml_tensor_p,
  6173. dst: ggml_tensor_p,
  6174. wdata: ctypes.c_void_p,
  6175. wsize: Union[ctypes.c_size_t, int],
  6176. ):
  6177. return lib.ggml_cl_mul_mat(src0, src1, dst, wdata, wsize)
  6178. if GGML_USE_CLBLAST:
  6179. lib.ggml_cl_mul_mat.argtypes = [
  6180. ctypes.POINTER(ggml_tensor),
  6181. ctypes.POINTER(ggml_tensor),
  6182. ctypes.POINTER(ggml_tensor),
  6183. ctypes.c_void_p,
  6184. ctypes.c_size_t,
  6185. ]
  6186. lib.ggml_cl_mul_mat.restype = None
  6187. # void * ggml_cl_host_malloc(size_t size);
  6188. def ggml_cl_host_malloc(
  6189. size: Union[ctypes.c_size_t, int],
  6190. ) -> Optional[ctypes.c_void_p]:
  6191. return lib.ggml_cl_host_malloc(size)
  6192. if GGML_USE_CLBLAST:
  6193. lib.ggml_cl_host_malloc.argtypes = [
  6194. ctypes.c_size_t,
  6195. ]
  6196. lib.ggml_cl_host_malloc.restype = ctypes.c_void_p
  6197. # void ggml_cl_host_free(void * ptr);
  6198. def ggml_cl_host_free(
  6199. ptr: ctypes.c_void_p,
  6200. ):
  6201. return lib.ggml_cl_host_free(ptr)
  6202. if GGML_USE_CLBLAST:
  6203. lib.ggml_cl_host_free.argtypes = [
  6204. ctypes.c_void_p,
  6205. ]
  6206. lib.ggml_cl_host_free.restype = None
  6207. # void ggml_cl_free_data(const struct ggml_tensor* tensor);
  6208. def ggml_cl_free_data(
  6209. tensor: ggml_tensor_p,
  6210. ):
  6211. return lib.ggml_cl_free_data(tensor)
  6212. if GGML_USE_CLBLAST:
  6213. lib.ggml_cl_free_data.argtypes = [
  6214. ctypes.POINTER(ggml_tensor),
  6215. ]
  6216. lib.ggml_cl_free_data.restype = None
  6217. # void ggml_cl_transform_tensor(void * data, struct ggml_tensor * tensor);
  6218. def ggml_cl_transform_tensor(
  6219. data: ctypes.c_void_p,
  6220. tensor: ggml_tensor_p,
  6221. ):
  6222. return lib.ggml_cl_transform_tensor(data, tensor)
  6223. if GGML_USE_CLBLAST:
  6224. lib.ggml_cl_transform_tensor.argtypes = [
  6225. ctypes.c_void_p,
  6226. ctypes.POINTER(ggml_tensor),
  6227. ]
  6228. lib.ggml_cl_transform_tensor.restype = None