mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-04 16:14:06 +00:00
Compare commits
14 Commits
gg/ci-fix-
...
b4863
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e39a3c332 | ||
|
|
be421fc429 | ||
|
|
87c2630546 | ||
|
|
2b3a25c212 | ||
|
|
8352cdc87b | ||
|
|
1e2f78a004 | ||
|
|
0fd7ca7a21 | ||
|
|
6fefc05a7a | ||
|
|
7ab364390f | ||
|
|
7c7f3b7f43 | ||
|
|
102ac1891d | ||
|
|
d6ae2fa061 | ||
|
|
68d0027f3d | ||
|
|
ea002810a2 |
61
AUTHORS
61
AUTHORS
@@ -1,4 +1,4 @@
|
||||
# date: Tue Feb 4 13:04:05 EET 2025
|
||||
# date: Sat Mar 8 18:23:52 EET 2025
|
||||
# this file is auto-generated by scripts/gen-authors.sh
|
||||
|
||||
0cc4m <picard12@live.de>
|
||||
@@ -8,10 +8,12 @@
|
||||
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
||||
44670 <44670@users.noreply.github.com>
|
||||
65a <10104049+65a@users.noreply.github.com>
|
||||
708-145 <40387547+708-145@users.noreply.github.com>
|
||||
AN Long <aisk@users.noreply.github.com>
|
||||
AT <manyoso@users.noreply.github.com>
|
||||
Aarni Koskela <akx@iki.fi>
|
||||
Aaron Miller <apage43@ninjawhale.com>
|
||||
Aaron Teo <57927438+taronaeo@users.noreply.github.com>
|
||||
Aaryaman Vasishta <aaryaman.vasishta@amd.com>
|
||||
Abheek Gulati <abheekg@hotmail.com>
|
||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||
@@ -20,6 +22,7 @@ Adithya Balaji <adithya.b94@gmail.com>
|
||||
AdithyanI <adithyan.i4internet@gmail.com>
|
||||
Adrian <smith.adriane@gmail.com>
|
||||
Adrian Hesketh <a-h@users.noreply.github.com>
|
||||
Adrian Kretz <me@akretz.com>
|
||||
Adrien Gallouët <adrien@gallouet.fr>
|
||||
Adrien Gallouët <angt@huggingface.co>
|
||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
||||
@@ -28,15 +31,18 @@ AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||
AidanBeltonS <aidan.belton@codeplay.com>
|
||||
Aisuko <urakiny@gmail.com>
|
||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
||||
Akarshan Biswas <akarshan@menlo.ai>
|
||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||
Al Mochkin <14274697+amochkin@users.noreply.github.com>
|
||||
Albert Jin <albert.jin@gmail.com>
|
||||
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
||||
Aleksei Nikiforov <103434461+AlekseiNikiforovIBM@users.noreply.github.com>
|
||||
Alex <awhill19@icloud.com>
|
||||
Alex Azarov <alex@azarov.by>
|
||||
Alex Azarov <alexander.azarov@mapbox.com>
|
||||
Alex Brooks <alex.brooks@ibm.com>
|
||||
Alex Klinkhamer <from.github.com.917@grencez.dev>
|
||||
Alex Klinkhamer <git@grencez.dev>
|
||||
Alex Nguyen <tiendung@users.noreply.github.com>
|
||||
@@ -67,6 +73,7 @@ Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
||||
Andy Salerno <andysalerno@gmail.com>
|
||||
Andy Tai <andy-tai@users.noreply.github.com>
|
||||
Anthony Van de Gejuchte <anthonyvdgent@gmail.com>
|
||||
Antoine Viallon <antoine@lesviallon.fr>
|
||||
Antonis Makropoulos <benuix@gmail.com>
|
||||
Arik Poznanski <arikpoz@users.noreply.github.com>
|
||||
Armen Kaleshian <kriation@users.noreply.github.com>
|
||||
@@ -83,6 +90,7 @@ Atsushi Tatsuma <yoshoku@outlook.com>
|
||||
Austin <77757836+teleprint-me@users.noreply.github.com>
|
||||
AustinMroz <austinmroz@utexas.edu>
|
||||
BADR <contact@pythops.com>
|
||||
BB-fat <45072480+BB-fat@users.noreply.github.com>
|
||||
Bach Le <bach@bullno1.com>
|
||||
Bailey Chittle <39804642+bachittle@users.noreply.github.com>
|
||||
BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
|
||||
@@ -101,6 +109,7 @@ Bert Wagner <github@bertwagner.com>
|
||||
Billel Mokeddem <billel.mokeddem.ml@gmail.com>
|
||||
Bingan <70050083+binganao@users.noreply.github.com>
|
||||
Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com>
|
||||
Bodhi <3882561+BodhiHu@users.noreply.github.com>
|
||||
Bodo Graumann <mail@bodograumann.de>
|
||||
Bono Lv <lvscar@users.noreply.github.com>
|
||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||
@@ -128,6 +137,7 @@ CentricStorm <CentricStorm@users.noreply.github.com>
|
||||
Chad Brewbaker <crb002@gmail.com>
|
||||
Changyeon Kim <cyzero.kim@samsung.com>
|
||||
Chao Jiang <jc19chaoj@zoho.com>
|
||||
Charles Duffy <charles@dyfis.net>
|
||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
||||
Charles Xu <charles.xu@arm.com>
|
||||
Chen Xi <xi2.chen@intel.com>
|
||||
@@ -139,12 +149,14 @@ Chris Kuehl <ckuehl@ckuehl.me>
|
||||
Christian Demsar <christian@github.email.demsar.us>
|
||||
Christian Demsar <crasm@git.vczf.us>
|
||||
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
||||
Christian Fillion <cfillion@users.noreply.github.com>
|
||||
Christian Kastner <ckk@kvr.at>
|
||||
Christian Kögler <ck3d@gmx.de>
|
||||
Christian Köhnenkamp <cvk5@me.com>
|
||||
Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
|
||||
Christopher Nielsen <62156882+mascguy@users.noreply.github.com>
|
||||
Clark Saben <76020733+csaben@users.noreply.github.com>
|
||||
Clauszy <zhangyub@uniontech.com>
|
||||
Clint Herron <hanclinto@gmail.com>
|
||||
Conrad Kramer <conrad@conradkramer.com>
|
||||
Corentin REGAL <corentin.regal@gmail.com>
|
||||
@@ -163,6 +175,7 @@ Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
||||
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
||||
Daniel Kleine <53251018+d-kleine@users.noreply.github.com>
|
||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||
Danny Milosavljevic <dannym@friendly-machines.com>
|
||||
DannyDaemonic <DannyDaemonic@gmail.com>
|
||||
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
||||
Dave <dave-fl@users.noreply.github.com>
|
||||
@@ -170,6 +183,7 @@ Dave Airlie <airlied@gmail.com>
|
||||
Dave Airlie <airlied@redhat.com>
|
||||
Dave Della Costa <ddellacosta+github@gmail.com>
|
||||
David Friehs <david@friehs.info>
|
||||
David Huang <1969802+hjc4869@users.noreply.github.com>
|
||||
David Kennedy <dakennedyd@gmail.com>
|
||||
David Pflug <david@pflug.email>
|
||||
David Renshaw <dwrenshaw@gmail.com>
|
||||
@@ -236,6 +250,7 @@ Felix <stenbackfelix@gmail.com>
|
||||
Finn Voorhees <finnvoorhees@gmail.com>
|
||||
Firat <firatkiral@gmail.com>
|
||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
||||
Florent BENOIT <fbenoit@redhat.com>
|
||||
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
||||
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
||||
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
||||
@@ -254,6 +269,7 @@ Gary Mulder <gjmulder@gmail.com>
|
||||
Gavin Zhao <gavinzhaojw@protonmail.com>
|
||||
Genkagaku.GPT <hlhr202@163.com>
|
||||
Georgi Gerganov <ggerganov@gmail.com>
|
||||
Gian-Carlo Pascutto <gcp@sjeng.org>
|
||||
Gilad S <giladgd@users.noreply.github.com>
|
||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
||||
Giuseppe Scrivano <giuseppe@scrivano.org>
|
||||
@@ -267,7 +283,9 @@ Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com>
|
||||
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
||||
Haggai Nuchi <h.nuchi@gmail.com>
|
||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||
Hale Chan <halechan@qq.com>
|
||||
Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com>
|
||||
Han Yin <han.yin@arm.com>
|
||||
HanishKVC <hanishkvc@gmail.com>
|
||||
Haohui Mai <ricetons@gmail.com>
|
||||
Haoxiang Fei <tonyfettes@tonyfettes.com>
|
||||
@@ -278,6 +296,7 @@ Haus1 <haus.xda@gmail.com>
|
||||
Henk Poley <HenkPoley@gmail.com>
|
||||
Henri Vasserman <henv@hot.ee>
|
||||
Henrik Forstén <henrik.forsten@gmail.com>
|
||||
Henry Linjamäki <henry.linjamaki@gmail.com>
|
||||
Herman Semenov <GermanAizek@yandex.ru>
|
||||
Hesen Peng <hesen.peng@gmail.com>
|
||||
HimariO <dsfhe49854@gmail.com>
|
||||
@@ -307,6 +326,7 @@ Ivan <nekotekina@gmail.com>
|
||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
||||
Ivan Komarov <Ivan.Komarov@dfyz.info>
|
||||
Ivan Stepanov <ivanstepanovftw@gmail.com>
|
||||
JC <43374599+MrSMlT@users.noreply.github.com>
|
||||
JFLFY2255 <JFLFY2255@163.com>
|
||||
JH23X <165871467+JH23X@users.noreply.github.com>
|
||||
Jack Mousseau <jack@software.inc>
|
||||
@@ -325,6 +345,7 @@ Jan Ploski <jpl@plosquare.com>
|
||||
Jannis Schönleber <joennlae@gmail.com>
|
||||
Jared Van Bortel <cebtenzzre@gmail.com>
|
||||
Jared Van Bortel <jared@nomic.ai>
|
||||
Jason C.H <ctrysbita@outlook.com>
|
||||
Jason McCartney <jmac@theroot.org>
|
||||
Jason Stillerman <jason.t.stillerman@gmail.com>
|
||||
Jean-Christophe Hoelt <hoelt@fovea.cc>
|
||||
@@ -342,6 +363,7 @@ Jiahao Li <liplus17@163.com>
|
||||
Jian Liao <jianliao@users.noreply.github.com>
|
||||
JidongZhang-THU <1119708529@qq.com>
|
||||
Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
|
||||
Jinyang He <hejinyang@loongson.cn>
|
||||
Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
|
||||
Jiří Sejkora <Sejseloid@gmail.com>
|
||||
Joan Fontanals <jfontanalsmartinez@gmail.com>
|
||||
@@ -379,6 +401,7 @@ Justine Tunney <jtunney@mozilla.com>
|
||||
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
||||
KASR <karim.asrih@gmail.com>
|
||||
Kamil Tomšík <info@tomsik.cz>
|
||||
Kante Yin <kerthcet@gmail.com>
|
||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
||||
Karsten Weiss <knweiss@gmail.com>
|
||||
Karthick <j.karthic2004@gmail.com>
|
||||
@@ -419,6 +442,7 @@ LoganDark <github@logandark.mozmail.com>
|
||||
Loïc Carrère <loic.carrere@gmail.com>
|
||||
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
||||
LostRuins Concedo <39025047+LostRuins@users.noreply.github.com>
|
||||
Lucas Moura Belo <lucas.belo@live.com>
|
||||
Luciano <lucianostrika44@gmail.com>
|
||||
Luo Tian <lt@basecity.com>
|
||||
Lyle Dean <dean@lyle.dev>
|
||||
@@ -463,6 +487,7 @@ Matthew Tejo <matthew.tejo@gmail.com>
|
||||
Matvey Soloviev <blackhole89@gmail.com>
|
||||
Max Krasnyansky <max.krasnyansky@gmail.com>
|
||||
Max Krasnyansky <quic_maxk@quicinc.com>
|
||||
Maxim Evtush <154841002+maximevtush@users.noreply.github.com>
|
||||
Maxime <672982+maximegmd@users.noreply.github.com>
|
||||
Maximilian Winter <maximilian.winter.91@gmail.com>
|
||||
Meng Zhang <meng@tabbyml.com>
|
||||
@@ -494,6 +519,7 @@ Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||
Mohammadreza Hendiani <mohammad.r.hendiani@gmail.com>
|
||||
Molly Sophia <mollysophia379@gmail.com>
|
||||
MoonRide303 <130458190+MoonRide303@users.noreply.github.com>
|
||||
MorganRO8 <47795945+MorganRO8@users.noreply.github.com>
|
||||
Murilo Santana <mvrilo@gmail.com>
|
||||
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
||||
@@ -524,6 +550,7 @@ Nikolas <127742645+nneubacher@users.noreply.github.com>
|
||||
Nindaleth <Nindaleth@users.noreply.github.com>
|
||||
Nuno <rare-magma@posteo.eu>
|
||||
OSecret <135510162+OLSecret@users.noreply.github.com>
|
||||
Oleksandr Kuvshynov <661042+okuvshynov@users.noreply.github.com>
|
||||
Oleksandr Nikitin <oleksandr@tvori.info>
|
||||
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||
@@ -533,6 +560,7 @@ PAB <pierreantoine.bannier@gmail.com>
|
||||
Pablo Duboue <pablo.duboue@gmail.com>
|
||||
Pascal Patry <ppatry@mtacitlabs.com>
|
||||
Patrice Ferlet <metal3d@gmail.com>
|
||||
Patrick Peng <retr0@retr0.blog>
|
||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||
Pavel Zloi <github.com@drteam.rocks>
|
||||
Pavol Rusnak <pavol@rusnak.io>
|
||||
@@ -549,6 +577,7 @@ Pieter Ouwerkerk <pieter.ouwerkerk@gmail.com>
|
||||
Plamen Minev <pacominev@gmail.com>
|
||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||
PureJourney <edward.pong@qq.com>
|
||||
Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
|
||||
Qingyou Meng <meng.qingyou@gmail.com>
|
||||
Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
|
||||
@@ -564,14 +593,17 @@ Rand Xie <randxiexyy29@gmail.com>
|
||||
Randall Fitzgerald <randall@dasaku.net>
|
||||
Random Fly <renfei8@live.cn>
|
||||
Reinforce-II <fate@eastal.com>
|
||||
Rémy O <remyoudompheng@gmail.com>
|
||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
||||
Ren Xuancheng <jklj077@users.noreply.github.com>
|
||||
Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
|
||||
Reza Kakhki <rezakakhki.de@gmail.com>
|
||||
Reza Rahemtola <49811529+RezaRahemtola@users.noreply.github.com>
|
||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||
Riccardo Orlando <Riccorl@users.noreply.github.com>
|
||||
Riceball LEE <snowyu.lee@gmail.com>
|
||||
Rich Dougherty <rich@rd.nz>
|
||||
Richard <r-burton@hotmail.co.uk>
|
||||
Richard Kiss <him@richardkiss.com>
|
||||
Richard Roberson <richardr1126@gmail.com>
|
||||
Rick G <26732651+TheFlipbook@users.noreply.github.com>
|
||||
@@ -588,6 +620,7 @@ Robert Sung-wook Shin <edp1096@users.noreply.github.com>
|
||||
Robey Holderith <robey@flaminglunchbox.net>
|
||||
Robyn <robyngraf@users.noreply.github.com>
|
||||
Roger Meier <r.meier@siemens.com>
|
||||
Rohanjames1997 <rohan.james4@gmail.com>
|
||||
Roland <14355895+rbur0425@users.noreply.github.com>
|
||||
Romain Biessy <romain.biessy@codeplay.com>
|
||||
Romain D <90720+Artefact2@users.noreply.github.com>
|
||||
@@ -610,6 +643,7 @@ Ryan Landay <rlanday@gmail.com>
|
||||
Ryder Wishart <ryderwishart@gmail.com>
|
||||
Ryuei <louixs@users.noreply.github.com>
|
||||
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
||||
SAMI <samuel.koesnadi@stud.uni-due.de>
|
||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
||||
SXX <sxx1136965276@gmail.com>
|
||||
SakuraUmi <yukinon244@gmail.com>
|
||||
@@ -634,6 +668,8 @@ Shane A <shanea@allenai.org>
|
||||
Shangning Xu <32517059+xushangning@users.noreply.github.com>
|
||||
Shankar <gshankar.87@gmail.com>
|
||||
Shanshan Shen <467638484@qq.com>
|
||||
Shelby Jenkins <47464908+ShelbyJenkins@users.noreply.github.com>
|
||||
Sheldon Robinson <sheldon.robinson@live.com>
|
||||
Shijie <821898965@qq.com>
|
||||
Shintarou Okada <kokuzen@gmail.com>
|
||||
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
||||
@@ -713,18 +749,24 @@ Victor Nogueira <felladrin@gmail.com>
|
||||
Victor Z. Peng <ziliangdotme@gmail.com>
|
||||
Viet-Anh NGUYEN (Andrew) <vietanh.dev@gmail.com>
|
||||
Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com>
|
||||
Vitali Lovich <vlovich+github@gmail.com>
|
||||
Vivian <vynride@gmail.com>
|
||||
Vlad <spitfireage@gmail.com>
|
||||
Vladimir <bogdad@gmail.com>
|
||||
Vladimir Malyutin <first-leon@yandex.ru>
|
||||
Vladimir Vuksanovic <109677816+vvuksanovic@users.noreply.github.com>
|
||||
Vladimir Zorin <vladimir@deviant.guru>
|
||||
VoidIsVoid <343750470@qq.com>
|
||||
Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
|
||||
Wagner Bruna <wbruna@users.noreply.github.com>
|
||||
Wang Qin <37098874+wangqin0@users.noreply.github.com>
|
||||
Wang Ran (汪然) <wangr@smail.nju.edu.cn>
|
||||
WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
|
||||
Weird Constructor <weirdconstructor@gmail.com>
|
||||
Weizhao Ouyang <o451686892@gmail.com>
|
||||
Welby Seely <welbyseely@gmail.com>
|
||||
Wentai Zhang <rchardx@gmail.com>
|
||||
Wilken Gottwalt <12194808+wgottwalt@users.noreply.github.com>
|
||||
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
||||
William Tambellini <william.tambellini@gmail.com>
|
||||
William Tambellini <wtambellini@sdl.com>
|
||||
@@ -816,6 +858,8 @@ chaihahaha <chai836275709@gmail.com>
|
||||
chiranko <96988916+chiranko@users.noreply.github.com>
|
||||
clibdev <52199778+clibdev@users.noreply.github.com>
|
||||
clyang <clyang@clyang.net>
|
||||
cmdr2 <secondary.cmdr2@gmail.com>
|
||||
cmdr2 <shashank.shekhar.global@gmail.com>
|
||||
cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
|
||||
codezjx <code.zjx@gmail.com>
|
||||
coezbek <c.oezbek@gmail.com>
|
||||
@@ -835,6 +879,7 @@ deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
||||
devojony <61173062+devojony@users.noreply.github.com>
|
||||
ditsuke <ditsuke@protonmail.com>
|
||||
divinity76 <divinity76@gmail.com>
|
||||
dm4 <dm4@secondstate.io>
|
||||
dm4 <sunrisedm4@gmail.com>
|
||||
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
||||
drbh <david.richard.holtz@gmail.com>
|
||||
@@ -849,6 +894,7 @@ fairydreaming <166155368+fairydreaming@users.noreply.github.com>
|
||||
fengerhu1 <2748250768@qq.com>
|
||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||
fxzjshm <11426482+fxzjshm@users.noreply.github.com>
|
||||
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
||||
gliptic <gliptic@users.noreply.github.com>
|
||||
gn64 <yukikaze.jp@gmail.com>
|
||||
@@ -873,6 +919,7 @@ hydai <z54981220@gmail.com>
|
||||
iSma <ismail.senhaji@gmail.com>
|
||||
iacore <74560659+iacore@users.noreply.github.com>
|
||||
icppWorld <124377669+icppWorld@users.noreply.github.com>
|
||||
igardev <49397134+igardev@users.noreply.github.com>
|
||||
igarnier <igarnier@protonmail.com>
|
||||
intelmatt <61025942+intelmatt@users.noreply.github.com>
|
||||
iohub <rickyang.pro@gmail.com>
|
||||
@@ -880,6 +927,7 @@ issixx <46835150+issixx@users.noreply.github.com>
|
||||
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
||||
jaime-m-p <167997752+jaime-m-p@users.noreply.github.com>
|
||||
jameswu2014 <545426914@qq.com>
|
||||
jason_w <jason.wang@126.com>
|
||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
||||
jiahao su <damow890@gmail.com>
|
||||
jiez <373447296@qq.com>
|
||||
@@ -891,6 +939,7 @@ jon-chuang <9093549+jon-chuang@users.noreply.github.com>
|
||||
jp-x-g <jpxg-dev@protonmail.com>
|
||||
jukofyork <69222624+jukofyork@users.noreply.github.com>
|
||||
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||
junchao-zhao <68935141+junchao-loongson@users.noreply.github.com>
|
||||
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||
kaizau <kaizau@users.noreply.github.com>
|
||||
@@ -925,6 +974,7 @@ ltoniazzi <61414566+ltoniazzi@users.noreply.github.com>
|
||||
luoyu-intel <yu.luo@intel.com>
|
||||
m3ndax <adrian.goessl@outlook.com>
|
||||
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
||||
magicse <magicse@users.noreply.github.com>
|
||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
||||
makomk <makosoft@googlemail.com>
|
||||
manikbhandari <mbbhandarimanik2@gmail.com>
|
||||
@@ -935,6 +985,7 @@ matt23654 <matthew.webber@protonmail.com>
|
||||
matteo <matteogeniaccio@yahoo.it>
|
||||
mdrokz <mohammadmunshi@gmail.com>
|
||||
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
||||
midnight <midnightmagic@users.noreply.github.com>
|
||||
minarchist <minarchist@users.noreply.github.com>
|
||||
mj-shifu <77107165+mj-shifu@users.noreply.github.com>
|
||||
mmyjona <jonathan.gonse@gmail.com>
|
||||
@@ -958,10 +1009,12 @@ omahs <73983677+omahs@users.noreply.github.com>
|
||||
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
||||
opparco <parco.opaai@gmail.com>
|
||||
ostix360 <55257054+ostix360@users.noreply.github.com>
|
||||
pascal-lc <49066376+pascal-lc@users.noreply.github.com>
|
||||
pculliton <phillipculliton@gmail.com>
|
||||
peidaqi <peidaqi@gmail.com>
|
||||
pengxin99 <pengxin.yuan@intel.com>
|
||||
perserk <perserk@gmail.com>
|
||||
petterreinholdtsen <pere-github@hungry.com>
|
||||
piDack <104877312+piDack@users.noreply.github.com>
|
||||
pmysl <piotr.myslinski@outlook.com>
|
||||
postmasters <namnguyen@google.com>
|
||||
@@ -983,6 +1036,7 @@ semidark <me@semidark.net>
|
||||
serhii-nakon <57632032+serhii-nakon@users.noreply.github.com>
|
||||
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
||||
shibe2 <shibe@tuta.io>
|
||||
simon886212 <37953122+simon886212@users.noreply.github.com>
|
||||
singularity <12184989+singularity-s0@users.noreply.github.com>
|
||||
sjinzh <sjinzh@gmail.com>
|
||||
sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com>
|
||||
@@ -1000,10 +1054,12 @@ tarcey <cey.tarik@gmail.com>
|
||||
tc-mb <157115220+tc-mb@users.noreply.github.com>
|
||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||
thement <40525767+thement@users.noreply.github.com>
|
||||
theraininsky <76763719+theraininsky@users.noreply.github.com>
|
||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
||||
tjohnman <tjohnman@users.noreply.github.com>
|
||||
toyer <2042519524@qq.com>
|
||||
tslmy <tslmy@users.noreply.github.com>
|
||||
tv1wnd <55383215+tv1wnd@users.noreply.github.com>
|
||||
ubik2 <ubik2@users.noreply.github.com>
|
||||
uint256_t <konndennsa@gmail.com>
|
||||
uint256_t <maekawatoshiki1017@gmail.com>
|
||||
@@ -1014,6 +1070,7 @@ valiray <133289098+valiray@users.noreply.github.com>
|
||||
vb <vaibhavs10@gmail.com>
|
||||
vik <vikhyatk@gmail.com>
|
||||
viric <viric@viric.name>
|
||||
vmobilis <75476228+vmobilis@users.noreply.github.com>
|
||||
vodkaslime <646329483@qq.com>
|
||||
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
||||
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
||||
@@ -1028,6 +1085,8 @@ wzy <32936898+Freed-Wu@users.noreply.github.com>
|
||||
xaedes <xaedes@gmail.com>
|
||||
xaedes <xaedes@googlemail.com>
|
||||
xctan <axunlei@gmail.com>
|
||||
xiaobing318 <71554036+xiaobing318@users.noreply.github.com>
|
||||
xiaofei <hbuxiaofei@gmail.com>
|
||||
xloem <0xloem@gmail.com>
|
||||
yangli2 <yangli2@gmail.com>
|
||||
ymcki <84055651+ymcki@users.noreply.github.com>
|
||||
|
||||
@@ -352,10 +352,10 @@ function gg_run_open_llama_7b_v2 {
|
||||
|
||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
|
||||
function check_ppl {
|
||||
qnt="$1"
|
||||
|
||||
@@ -2571,5 +2571,43 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
|
||||
add_opt(common_arg(
|
||||
{"--fim-qwen-7b-spec"},
|
||||
string_format("use Qwen 2.5 Coder 7B + 0.5B draft for speculative decoding (note: can download weights from the internet)"),
|
||||
[](common_params & params) {
|
||||
params.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF";
|
||||
params.hf_file = "qwen2.5-coder-7b-q8_0.gguf";
|
||||
params.speculative.hf_repo = "ggml-org/Qwen2.5-Coder-0.5B-Q8_0-GGUF";
|
||||
params.speculative.hf_file = "qwen2.5-coder-0.5b-q8_0.gguf";
|
||||
params.speculative.n_gpu_layers = 99;
|
||||
params.port = 8012;
|
||||
params.n_gpu_layers = 99;
|
||||
params.flash_attn = true;
|
||||
params.n_ubatch = 1024;
|
||||
params.n_batch = 1024;
|
||||
params.n_ctx = 0;
|
||||
params.n_cache_reuse = 256;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
|
||||
add_opt(common_arg(
|
||||
{"--fim-qwen-14b-spec"},
|
||||
string_format("use Qwen 2.5 Coder 14B + 0.5B draft for speculative decoding (note: can download weights from the internet)"),
|
||||
[](common_params & params) {
|
||||
params.hf_repo = "ggml-org/Qwen2.5-Coder-14B-Q8_0-GGUF";
|
||||
params.hf_file = "qwen2.5-coder-14b-q8_0.gguf";
|
||||
params.speculative.hf_repo = "ggml-org/Qwen2.5-Coder-0.5B-Q8_0-GGUF";
|
||||
params.speculative.hf_file = "qwen2.5-coder-0.5b-q8_0.gguf";
|
||||
params.speculative.n_gpu_layers = 99;
|
||||
params.port = 8012;
|
||||
params.n_gpu_layers = 99;
|
||||
params.flash_attn = true;
|
||||
params.n_ubatch = 1024;
|
||||
params.n_batch = 1024;
|
||||
params.n_ctx = 0;
|
||||
params.n_cache_reuse = 256;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
||||
|
||||
return ctx_arg;
|
||||
}
|
||||
|
||||
311
common/chat.cpp
311
common/chat.cpp
@@ -60,7 +60,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
||||
}
|
||||
msg.role = message.at("role");
|
||||
|
||||
if (message.contains("content")) {
|
||||
auto has_content = message.contains("content");
|
||||
auto has_tool_calls = message.contains("tool_calls");
|
||||
if (has_content) {
|
||||
const auto & content = message.at("content");
|
||||
if (content.is_string()) {
|
||||
msg.content = content;
|
||||
@@ -81,19 +83,8 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
||||
} else if (!content.is_null()) {
|
||||
throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
|
||||
}
|
||||
} else {
|
||||
throw std::runtime_error("Expected 'content' (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
|
||||
}
|
||||
if (message.contains("reasoning_content")) {
|
||||
msg.reasoning_content = message.at("reasoning_content");
|
||||
}
|
||||
if (message.contains("name")) {
|
||||
msg.tool_name = message.at("name");
|
||||
}
|
||||
if (message.contains("tool_call_id")) {
|
||||
msg.tool_call_id = message.at("tool_call_id");
|
||||
}
|
||||
if (message.contains("tool_calls")) {
|
||||
if (has_tool_calls) {
|
||||
for (const auto & tool_call : message.at("tool_calls")) {
|
||||
common_chat_tool_call tc;
|
||||
if (!tool_call.contains("type")) {
|
||||
@@ -118,6 +109,18 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
||||
msg.tool_calls.push_back(tc);
|
||||
}
|
||||
}
|
||||
if (!has_content && !has_tool_calls) {
|
||||
throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
|
||||
}
|
||||
if (message.contains("reasoning_content")) {
|
||||
msg.reasoning_content = message.at("reasoning_content");
|
||||
}
|
||||
if (message.contains("name")) {
|
||||
msg.tool_name = message.at("name");
|
||||
}
|
||||
if (message.contains("tool_call_id")) {
|
||||
msg.tool_call_id = message.at("tool_call_id");
|
||||
}
|
||||
|
||||
msgs.push_back(msg);
|
||||
}
|
||||
@@ -442,6 +445,7 @@ std::string common_chat_format_name(common_chat_format format) {
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
||||
default:
|
||||
@@ -875,9 +879,9 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
||||
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
|
||||
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
|
||||
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
|
||||
static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
|
||||
static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
|
||||
static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
|
||||
|
||||
std::smatch match;
|
||||
|
||||
@@ -1009,10 +1013,10 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
|
||||
}
|
||||
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
||||
// TODO: tighten & simplify the parser, don't accept leading text context.
|
||||
static std::regex function_regex(
|
||||
static const std::regex function_regex(
|
||||
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
||||
static std::regex close_regex("\\}\\s*");
|
||||
static std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
|
||||
static const std::regex close_regex("\\}\\s*");
|
||||
static const std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
|
||||
|
||||
if (with_builtin_tools) {
|
||||
std::smatch match;
|
||||
@@ -1102,34 +1106,42 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
||||
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
|
||||
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||
static std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
||||
static std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
|
||||
static std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
static common_chat_msg handle_think_tag_prelude(const std::string & input, bool extract_reasoning, const std::function<common_chat_msg(const std::string &)> & rest_parser) {
|
||||
std::smatch match;
|
||||
static const std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
|
||||
if (std::regex_match(input, match, reasoning_content_regex)) {
|
||||
std::string rest;
|
||||
auto rest = match[3].str();
|
||||
auto msg = rest_parser(rest);
|
||||
auto reasoning_content = string_strip(match[2].str());
|
||||
if (extract_reasoning) {
|
||||
msg.reasoning_content = string_strip(match[2].str());
|
||||
} else {
|
||||
msg.content = match[1].str();
|
||||
msg.reasoning_content = reasoning_content;
|
||||
} else if (!reasoning_content.empty()) {
|
||||
std::ostringstream content;
|
||||
content << "<think>" << reasoning_content << "</think>" << msg.content;
|
||||
msg.content = content.str();
|
||||
}
|
||||
rest = match[3].str();
|
||||
return msg;
|
||||
}
|
||||
return rest_parser(input);
|
||||
}
|
||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
|
||||
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
|
||||
static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||
static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
||||
static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
||||
|
||||
if (std::regex_search(rest, match, tool_calls_regex)) {
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
std::smatch match;
|
||||
if (std::regex_search(input, match, tool_calls_regex)) {
|
||||
auto tool_calls = match[1].str();
|
||||
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
||||
msg.tool_calls = std::move(msg2.tool_calls);
|
||||
} else {
|
||||
msg.content += std::string(rest.begin() + rest.find_first_not_of(" \r\n"), rest.end());
|
||||
msg.content = input;
|
||||
}
|
||||
} else {
|
||||
msg.content = input;
|
||||
}
|
||||
return msg;
|
||||
return msg;
|
||||
});
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
@@ -1234,8 +1246,8 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
||||
}
|
||||
|
||||
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
|
||||
static std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
|
||||
static std::regex close_regex(R"($|(?=>>>))");
|
||||
static const std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
|
||||
static const std::regex close_regex(R"($|(?=>>>))");
|
||||
|
||||
std::string content;
|
||||
auto it = input.begin();
|
||||
@@ -1324,7 +1336,7 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
|
||||
}
|
||||
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
||||
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
||||
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||
static const std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||
std::smatch match;
|
||||
if (std::regex_search(input, match, python_tag_regex)) {
|
||||
auto code = match[1].str();
|
||||
@@ -1338,8 +1350,8 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s
|
||||
});
|
||||
return msg;
|
||||
}
|
||||
static std::regex function_regex(R"(<function=(\w+)>)");
|
||||
static std::regex close_regex(R"(</function>)");
|
||||
static const std::regex function_regex(R"(<function=(\w+)>)");
|
||||
static const std::regex close_regex(R"(</function>)");
|
||||
// TODO: tighten & simplify.
|
||||
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
||||
}
|
||||
@@ -1406,6 +1418,8 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
||||
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
|
||||
});
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
"<tool_call>",
|
||||
"</tool_call>",
|
||||
"<function",
|
||||
@@ -1426,122 +1440,123 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
||||
});
|
||||
|
||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
||||
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
||||
return data;
|
||||
}
|
||||
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input) {
|
||||
const static std::regex open_regex(
|
||||
"(?:"
|
||||
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
||||
"(<tool_call>" // match 2 (open_tag)
|
||||
"|<function_call>"
|
||||
"|<tool>"
|
||||
"|<tools>"
|
||||
"|<response>"
|
||||
"|<json>"
|
||||
"|<xml>"
|
||||
"|<JSON>"
|
||||
")?"
|
||||
"(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest)
|
||||
")"
|
||||
"|"
|
||||
"(?:<function=([^>]+)>" // match 4 (function name)
|
||||
"|<function name=\"([^\"]+)\">)" // match 5 (function name again)
|
||||
"([\\s\\S]*)" // match 6 (function arguments + rest)})"
|
||||
);
|
||||
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input, bool extract_reasoning) {
|
||||
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
|
||||
static const std::regex open_regex(
|
||||
"(?:"
|
||||
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
||||
"(<tool_call>" // match 2 (open_tag)
|
||||
"|<function_call>"
|
||||
"|<tool>"
|
||||
"|<tools>"
|
||||
"|<response>"
|
||||
"|<json>"
|
||||
"|<xml>"
|
||||
"|<JSON>"
|
||||
")?"
|
||||
"(\\s*\\{\\s*\"name\"\\s*:[\\s\\S]*)" // match 3 (named tool call + rest)
|
||||
")"
|
||||
"|"
|
||||
"(?:<function=([^>]+)>" // match 4 (function name)
|
||||
"|<function name=\"([^\"]+)\">)" // match 5 (function name again)
|
||||
"([\\s\\S]*)" // match 6 (function arguments + rest)})"
|
||||
);
|
||||
|
||||
try {
|
||||
try {
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
std::string::const_iterator it = input.begin();
|
||||
const std::string::const_iterator end = input.end();
|
||||
std::smatch match;
|
||||
|
||||
std::string::const_iterator it = input.begin();
|
||||
const std::string::const_iterator end = input.end();
|
||||
std::smatch match;
|
||||
while (it != end) {
|
||||
if (std::regex_search(it, end, match, open_regex)) {
|
||||
// Add content before the match
|
||||
msg.content += std::string(it, match[0].first);
|
||||
|
||||
while (it != end) {
|
||||
if (std::regex_search(it, end, match, open_regex)) {
|
||||
// Add content before the match
|
||||
msg.content += std::string(it, match[0].first);
|
||||
auto block_start = match[1].str();
|
||||
std::string block_end = block_start.empty() ? "" : "```";
|
||||
|
||||
auto block_start = match[1].str();
|
||||
std::string block_end = block_start.empty() ? "" : "```";
|
||||
auto open_tag = match[2].str();
|
||||
std::string close_tag;
|
||||
|
||||
auto open_tag = match[2].str();
|
||||
std::string close_tag;
|
||||
if (match[3].matched) {
|
||||
close_tag = open_tag.empty() ? "" : "</" + open_tag.substr(1);
|
||||
auto json_it = match[3].first;
|
||||
json tool_call;
|
||||
if (parse_json(json_it, end, tool_call) && tool_call.contains("name") && tool_call.contains("arguments")) {
|
||||
|
||||
if (match[3].matched) {
|
||||
close_tag = open_tag.empty() ? "" : "</" + open_tag.substr(1);
|
||||
auto json_it = match[3].first;
|
||||
json tool_call;
|
||||
if (parse_json(json_it, end, tool_call) && tool_call.contains("name") && tool_call.contains("arguments")) {
|
||||
msg.tool_calls.emplace_back(process_tool_call(tool_call));
|
||||
it = json_it; // Move iterator past parsed JSON
|
||||
|
||||
msg.tool_calls.emplace_back(process_tool_call(tool_call));
|
||||
it = json_it; // Move iterator past parsed JSON
|
||||
|
||||
// Handle close tags
|
||||
consume_spaces(it, end);
|
||||
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
|
||||
throw std::runtime_error("Failed to parse closing tag");
|
||||
// Handle close tags
|
||||
consume_spaces(it, end);
|
||||
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
|
||||
throw std::runtime_error("Failed to parse closing tag");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
|
||||
throw std::runtime_error("Failed to parse block end");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
} else {
|
||||
// Not a valid tool call, treat as content
|
||||
msg.content += std::string(match[0].first, match[0].second);
|
||||
it = match[0].second;
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
|
||||
throw std::runtime_error("Failed to parse block end");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
} else {
|
||||
// Not a valid tool call, treat as content
|
||||
msg.content += std::string(match[0].first, match[0].second);
|
||||
it = match[0].second;
|
||||
auto function_name = match[4].str();
|
||||
if (function_name.empty()) {
|
||||
function_name = match[5].str();
|
||||
}
|
||||
GGML_ASSERT(!function_name.empty());
|
||||
|
||||
close_tag = "</function>";
|
||||
// Start parsing from after the opening tags
|
||||
auto json_it = match[6].first;
|
||||
json arguments;
|
||||
if (parse_json(json_it, end, arguments)) {
|
||||
msg.tool_calls.emplace_back(process_tool_call({
|
||||
{"name", function_name},
|
||||
{"arguments", arguments},
|
||||
}));
|
||||
it = json_it; // Move iterator past parsed JSON
|
||||
|
||||
// Handle close tags
|
||||
consume_spaces(it, end);
|
||||
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
|
||||
throw std::runtime_error("Failed to parse closing tag");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
|
||||
throw std::runtime_error("Failed to parse block end");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
} else {
|
||||
// Not a valid tool call, treat as content
|
||||
msg.content += std::string(match[0].first, match[0].second);
|
||||
it = match[0].second;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto function_name = match[4].str();
|
||||
if (function_name.empty()) {
|
||||
function_name = match[5].str();
|
||||
}
|
||||
GGML_ASSERT(!function_name.empty());
|
||||
|
||||
close_tag = "</function>";
|
||||
// Start parsing from after the opening tags
|
||||
auto json_it = match[6].first;
|
||||
json arguments;
|
||||
if (parse_json(json_it, end, arguments)) {
|
||||
msg.tool_calls.emplace_back(process_tool_call({
|
||||
{"name", function_name},
|
||||
{"arguments", arguments},
|
||||
}));
|
||||
it = json_it; // Move iterator past parsed JSON
|
||||
|
||||
// Handle close tags
|
||||
consume_spaces(it, end);
|
||||
if (!close_tag.empty() && !parse_literal(it, end, close_tag)) {
|
||||
throw std::runtime_error("Failed to parse closing tag");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
if (!block_end.empty() && !parse_literal(it, end, block_end)) {
|
||||
throw std::runtime_error("Failed to parse block end");
|
||||
}
|
||||
consume_spaces(it, end);
|
||||
} else {
|
||||
// Not a valid tool call, treat as content
|
||||
msg.content += std::string(match[0].first, match[0].second);
|
||||
it = match[0].second;
|
||||
}
|
||||
// Add remaining content
|
||||
msg.content += std::string(it, end);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// Add remaining content
|
||||
msg.content += std::string(it, end);
|
||||
break;
|
||||
}
|
||||
return msg;
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
msg.content = input;
|
||||
return msg;
|
||||
}
|
||||
return msg;
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("Failed to parse hermes 2 pro input: %s\n", e.what());
|
||||
common_chat_msg msg;
|
||||
msg.role = "assistant";
|
||||
msg.content = input;
|
||||
return msg;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
@@ -1606,6 +1621,11 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||
return common_chat_params_init_command_r7b(tmpl, params);
|
||||
}
|
||||
|
||||
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
||||
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
||||
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
||||
}
|
||||
|
||||
// Use generic handler when mixing tools + JSON schema.
|
||||
// TODO: support that mix in handlers below.
|
||||
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
||||
@@ -1627,11 +1647,6 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||
return common_chat_params_init_without_tools(tmpl, params);
|
||||
}
|
||||
|
||||
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
||||
if (src.find("<tool_call>") != std::string::npos) {
|
||||
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
||||
}
|
||||
|
||||
// Functionary v3.1 (w/ tools)
|
||||
if (src.find("<|start_header_id|>") != std::string::npos
|
||||
&& src.find("<function=") != std::string::npos) {
|
||||
@@ -1749,7 +1764,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
||||
return common_chat_parse_functionary_v3_1_llama_3_1(input);
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
||||
return common_chat_parse_hermes_2_pro(input);
|
||||
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ false);
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING:
|
||||
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true);
|
||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
||||
return common_chat_parse_firefunction_v2(input);
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
||||
|
||||
@@ -53,6 +53,7 @@ enum common_chat_format {
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
||||
|
||||
|
||||
@@ -5,13 +5,25 @@ Currently, this readme only supports minicpm-omni's image capabilities, and we w
|
||||
|
||||
Download [MiniCPM-o-2_6](https://huggingface.co/openbmb/MiniCPM-o-2_6) PyTorch model from huggingface to "MiniCPM-o-2_6" folder.
|
||||
|
||||
|
||||
### Build llama.cpp
|
||||
Readme modification time: 20250206
|
||||
|
||||
If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md)
|
||||
|
||||
Clone llama.cpp:
|
||||
```bash
|
||||
git clone git@github.com:OpenBMB/llama.cpp.git
|
||||
git clone https://github.com/ggerganov/llama.cpp
|
||||
cd llama.cpp
|
||||
git checkout minicpm-omni
|
||||
```
|
||||
|
||||
Build llama.cpp using `CMake`:
|
||||
```bash
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
|
||||
### Usage of MiniCPM-o 2.6
|
||||
|
||||
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us)
|
||||
@@ -22,25 +34,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-
|
||||
python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model
|
||||
|
||||
# quantize int4 version
|
||||
./llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
./build/bin/llama-quantize ../MiniCPM-o-2_6/model/ggml-model-f16.gguf ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
```
|
||||
|
||||
Build llama.cpp using `CMake`:
|
||||
https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md
|
||||
|
||||
```bash
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
Inference on Linux or Mac
|
||||
```
|
||||
```bash
|
||||
# run f16 version
|
||||
./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
./build/bin/llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# run quantized int4 version
|
||||
./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# or run in interactive mode
|
||||
./llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i
|
||||
./build/bin/llama-minicpmv-cli -m ../MiniCPM-o-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-o-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
```
|
||||
|
||||
@@ -4,13 +4,26 @@
|
||||
|
||||
Download [MiniCPM-Llama3-V-2_5](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5) PyTorch model from huggingface to "MiniCPM-Llama3-V-2_5" folder.
|
||||
|
||||
|
||||
### Build llama.cpp
|
||||
Readme modification time: 20250206
|
||||
|
||||
If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md)
|
||||
|
||||
Clone llama.cpp:
|
||||
```bash
|
||||
git clone https://github.com/ggml-org/llama.cpp
|
||||
cd llama.cpp
|
||||
```
|
||||
|
||||
### Usage
|
||||
Build llama.cpp using `CMake`:
|
||||
```bash
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
|
||||
### Usage of MiniCPM-Llama3-V 2.5
|
||||
|
||||
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us)
|
||||
|
||||
@@ -20,80 +33,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-
|
||||
python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model
|
||||
|
||||
# quantize int4 version
|
||||
./llama-quantize ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
./build/bin/llama-quantize ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
```
|
||||
|
||||
Build for Linux or Mac
|
||||
|
||||
```bash
|
||||
make
|
||||
make llama-minicpmv-cli
|
||||
```
|
||||
|
||||
Inference on Linux or Mac
|
||||
```
|
||||
```bash
|
||||
# run f16 version
|
||||
./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
./build/bin/llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/model-8B-F16.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# run quantized int4 version
|
||||
./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# or run in interactive mode
|
||||
./llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i
|
||||
```
|
||||
|
||||
### Android
|
||||
|
||||
#### Build on Android device using Termux
|
||||
We found that build on Android device would bring better runtime performance, so we recommend to build on device.
|
||||
|
||||
[Termux](https://github.com/termux/termux-app#installation) is a terminal app on Android device (no root required).
|
||||
|
||||
Install tools in Termux:
|
||||
```
|
||||
apt update && apt upgrade -y
|
||||
apt install git make cmake
|
||||
```
|
||||
|
||||
It's recommended to move your model inside the `~/` directory for best performance:
|
||||
```
|
||||
cd storage/downloads
|
||||
mv model.gguf ~/
|
||||
```
|
||||
|
||||
#### Building the Project using Android NDK
|
||||
Obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake.
|
||||
|
||||
Execute the following commands on your computer to avoid downloading the NDK to your mobile. Alternatively, you can also do this in Termux:
|
||||
|
||||
```bash
|
||||
mkdir build-android
|
||||
cd build-android
|
||||
export NDK=/your_ndk_path
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod ..
|
||||
make
|
||||
```
|
||||
|
||||
Install [termux](https://github.com/termux/termux-app#installation) on your device and run `termux-setup-storage` to get access to your SD card (if Android 11+ then run the command twice).
|
||||
|
||||
Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission:
|
||||
|
||||
(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`)
|
||||
```
|
||||
$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/
|
||||
$cd /data/data/com.termux/files/home/bin
|
||||
$chmod +x ./*
|
||||
```
|
||||
|
||||
Download models and push them to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/`
|
||||
|
||||
```
|
||||
$mv /sdcard/llama.cpp/ggml-model-Q4_K_M.gguf /data/data/com.termux/files/home/model/
|
||||
$mv /sdcard/llama.cpp/mmproj-model-f16.gguf /data/data/com.termux/files/home/model/
|
||||
```
|
||||
|
||||
Now, you can start chatting:
|
||||
```
|
||||
$cd /data/data/com.termux/files/home/bin
|
||||
$./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
./build/bin/llama-minicpmv-cli -m ../MiniCPM-Llama3-V-2_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-Llama3-V-2_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
```
|
||||
|
||||
@@ -4,13 +4,25 @@
|
||||
|
||||
Download [MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) PyTorch model from huggingface to "MiniCPM-V-2_6" folder.
|
||||
|
||||
|
||||
### Build llama.cpp
|
||||
Readme modification time: 20250206
|
||||
|
||||
If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md)
|
||||
|
||||
Clone llama.cpp:
|
||||
```bash
|
||||
git clone git@github.com:OpenBMB/llama.cpp.git
|
||||
git clone https://github.com/ggerganov/llama.cpp
|
||||
cd llama.cpp
|
||||
git checkout minicpmv-main
|
||||
```
|
||||
|
||||
Build llama.cpp using `CMake`:
|
||||
```bash
|
||||
cmake -B build
|
||||
cmake --build build --config Release
|
||||
```
|
||||
|
||||
|
||||
### Usage of MiniCPM-V 2.6
|
||||
|
||||
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us)
|
||||
@@ -21,87 +33,15 @@ python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-
|
||||
python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model
|
||||
|
||||
# quantize int4 version
|
||||
./llama-quantize ../MiniCPM-V-2_6/model/ggml-model-f16.gguf ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
./build/bin/llama-quantize ../MiniCPM-V-2_6/model/ggml-model-f16.gguf ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf Q4_K_M
|
||||
```
|
||||
|
||||
Build for Linux or Mac
|
||||
|
||||
```bash
|
||||
make
|
||||
make llama-minicpmv-cli
|
||||
```
|
||||
|
||||
Inference on Linux or Mac
|
||||
```
|
||||
```bash
|
||||
# run f16 version
|
||||
./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
./build/bin/llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# run quantized int4 version
|
||||
./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
|
||||
# or run in interactive mode
|
||||
./llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -i
|
||||
```
|
||||
|
||||
### Video
|
||||
Install FFmpeg
|
||||
```
|
||||
brew install ffmpeg
|
||||
brew install pkg-config
|
||||
```
|
||||
|
||||
### Android
|
||||
|
||||
#### Build on Android device using Termux
|
||||
We found that build on Android device would bring better runtime performance, so we recommend to build on device.
|
||||
|
||||
[Termux](https://github.com/termux/termux-app#installation) is a terminal app on Android device (no root required).
|
||||
|
||||
Install tools in Termux:
|
||||
```
|
||||
apt update && apt upgrade -y
|
||||
apt install git make cmake
|
||||
```
|
||||
|
||||
It's recommended to move your model inside the `~/` directory for best performance:
|
||||
```
|
||||
cd storage/downloads
|
||||
mv model.gguf ~/
|
||||
```
|
||||
|
||||
#### Building the Project using Android NDK
|
||||
Obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake.
|
||||
|
||||
Execute the following commands on your computer to avoid downloading the NDK to your mobile. Alternatively, you can also do this in Termux:
|
||||
|
||||
```bash
|
||||
mkdir build-android
|
||||
cd build-android
|
||||
export NDK=/your_ndk_path
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod ..
|
||||
make
|
||||
```
|
||||
|
||||
Install [termux](https://github.com/termux/termux-app#installation) on your device and run `termux-setup-storage` to get access to your SD card (if Android 11+ then run the command twice).
|
||||
|
||||
Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission:
|
||||
|
||||
(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`)
|
||||
```
|
||||
$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/
|
||||
$cd /data/data/com.termux/files/home/bin
|
||||
$chmod +x ./*
|
||||
```
|
||||
|
||||
Download models and push them to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/`
|
||||
|
||||
```
|
||||
$mv /sdcard/llama.cpp/ggml-model-Q4_K_M.gguf /data/data/com.termux/files/home/model/
|
||||
$mv /sdcard/llama.cpp/mmproj-model-f16.gguf /data/data/com.termux/files/home/model/
|
||||
```
|
||||
|
||||
Now, you can start chatting:
|
||||
```
|
||||
$cd /data/data/com.termux/files/home/bin
|
||||
$./llama-minicpmv-cli -m ../model/ggml-model-Q4_K_M.gguf --mmproj ../model/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
./build/bin/llama-minicpmv-cli -m ../MiniCPM-V-2_6/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-2_6/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
|
||||
```
|
||||
|
||||
@@ -1378,6 +1378,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
LOG_INF("%s: vision_encoder: %d\n", __func__, new_clip->has_vision_encoder);
|
||||
LOG_INF("%s: llava_projector: %d\n", __func__, new_clip->has_llava_projector);
|
||||
LOG_INF("%s: minicpmv_projector: %d\n", __func__, new_clip->has_minicpmv_projector);
|
||||
LOG_INF("%s: minicpmv_version: %d\n", __func__, new_clip->minicpmv_version);
|
||||
LOG_INF("%s: glm_projector: %d\n", __func__, new_clip->has_glm_projector);
|
||||
LOG_INF("%s: model size: %.2f MB\n", __func__, model_size / 1024.0 / 1024.0);
|
||||
LOG_INF("%s: metadata size: %.2f MB\n", __func__, ggml_get_mem_size(meta) / 1024.0 / 1024.0);
|
||||
|
||||
@@ -148,19 +148,34 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e
|
||||
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
||||
eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
|
||||
if (num_image_embeds > 1) {
|
||||
size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
|
||||
eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
|
||||
for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
|
||||
for (size_t j = 0; j < num_image_embeds_col; ++j) {
|
||||
eval_string(ctx_llava->ctx_llama, std::string("<image>").c_str(), params->n_batch, &n_past, false);
|
||||
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
||||
eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
|
||||
if (j == num_image_embeds_col - 1) {
|
||||
eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
|
||||
if (has_minicpmv_projector == 2) {
|
||||
size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
|
||||
eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
|
||||
for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
|
||||
for (size_t j = 0; j < num_image_embeds_col; ++j) {
|
||||
eval_string(ctx_llava->ctx_llama, std::string("<image>").c_str(), params->n_batch, &n_past, false);
|
||||
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
||||
eval_string(ctx_llava->ctx_llama, std::string("</image>").c_str(), params->n_batch, &n_past, false);
|
||||
if (j == num_image_embeds_col - 1) {
|
||||
eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
|
||||
}
|
||||
else if (has_minicpmv_projector == 3 || has_minicpmv_projector == 4) {
|
||||
size_t num_image_embeds_col = clip_uhd_num_image_embeds_col(ctx_llava->ctx_clip);
|
||||
for (size_t i = 0; i < (num_image_embeds-1)/num_image_embeds_col; ++i) {
|
||||
for (size_t j = 0; j < num_image_embeds_col; ++j) {
|
||||
eval_string(ctx_llava->ctx_llama, std::string("<slice>").c_str(), params->n_batch, &n_past, false);
|
||||
process_eval_image_embed(ctx_llava, embeds, params->n_batch, &n_past, idx++);
|
||||
eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
|
||||
if (j == num_image_embeds_col - 1) {
|
||||
eval_string(ctx_llava->ctx_llama, std::string("\n").c_str(), params->n_batch, &n_past, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
eval_string(ctx_llava->ctx_llama, std::string("</slice>").c_str(), params->n_batch, &n_past, false);
|
||||
}
|
||||
LOG_INF("%s: image token past: %d\n", __func__, n_past);
|
||||
}
|
||||
|
||||
@@ -597,7 +597,6 @@ elif args.minicpmv_projector is not None:
|
||||
fname_middle = "mmproj-"
|
||||
has_text_encoder = False
|
||||
has_minicpmv_projector = True
|
||||
minicpmv_version = 4
|
||||
elif args.vision_only:
|
||||
fname_middle = "vision-"
|
||||
has_text_encoder = False
|
||||
|
||||
@@ -384,8 +384,9 @@ struct server_task {
|
||||
SRV_DBG("Grammar trigger token: %d (`%s`)\n", token, word.c_str());
|
||||
common_grammar_trigger trigger;
|
||||
trigger.type = COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN;
|
||||
trigger.value = (llama_token) token;
|
||||
params.sampling.grammar_triggers.push_back(trigger);
|
||||
trigger.value = word;
|
||||
trigger.token = token;
|
||||
params.sampling.grammar_triggers.push_back(std::move(trigger));
|
||||
} else {
|
||||
SRV_DBG("Grammar trigger word: `%s`\n", word.c_str());
|
||||
params.sampling.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, word});
|
||||
@@ -750,7 +751,10 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||
{"name", tc.name},
|
||||
{"arguments", tc.arguments},
|
||||
}},
|
||||
{"id", tc.id},
|
||||
// Some templates generate and require an id (sometimes in a very specific format, e.g. Mistral Nemo).
|
||||
// We only generate a random id for the ones that don't generate one by themselves
|
||||
// (they also won't get to see it as their template likely doesn't use it, so it's all for the client)
|
||||
{"id", tc.id.empty() ? gen_tool_call_id() : tc.id},
|
||||
});
|
||||
}
|
||||
message["tool_calls"] = tool_calls;
|
||||
@@ -1312,7 +1316,7 @@ struct server_slot {
|
||||
return task_type == SERVER_TASK_TYPE_EMBEDDING || task_type == SERVER_TASK_TYPE_RERANK;
|
||||
}
|
||||
|
||||
bool can_batch_with(server_slot & other_slot) {
|
||||
bool can_batch_with(server_slot & other_slot) const {
|
||||
return is_non_causal() == other_slot.is_non_causal()
|
||||
&& are_lora_equal(lora, other_slot.lora);
|
||||
}
|
||||
@@ -2157,14 +2161,6 @@ struct server_context {
|
||||
}
|
||||
|
||||
if (slot.has_new_line) {
|
||||
// if we have already seen a new line, we stop after a certain time limit
|
||||
if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) {
|
||||
slot.stop = STOP_TYPE_LIMIT;
|
||||
slot.has_next_token = false;
|
||||
|
||||
SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms);
|
||||
}
|
||||
|
||||
// require that each new line has a whitespace prefix (i.e. indentation) of at least slot.params.n_indent
|
||||
if (slot.params.n_indent > 0) {
|
||||
// check the current indentation
|
||||
@@ -2203,6 +2199,14 @@ struct server_context {
|
||||
// check if there is a new line in the generated text
|
||||
if (result.text_to_send.find('\n') != std::string::npos) {
|
||||
slot.has_new_line = true;
|
||||
|
||||
// if we have seen a new line, we stop after a certain time limit, but only upon another new line
|
||||
if (slot.params.t_max_predict_ms > 0 && (ggml_time_us() - slot.t_start_generation > 1000.0f*slot.params.t_max_predict_ms)) {
|
||||
slot.stop = STOP_TYPE_LIMIT;
|
||||
slot.has_next_token = false;
|
||||
|
||||
SLT_DBG(slot, "stopped by time limit, n_decoded = %d, t_max_predict_ms = %d ms\n", slot.n_decoded, (int) slot.params.t_max_predict_ms);
|
||||
}
|
||||
}
|
||||
|
||||
// if context shift is disabled, we stop when it reaches the context limit
|
||||
|
||||
@@ -92,6 +92,7 @@ def do_test_completion_with_required_tool_tiny(server: ServerProcess, tool: dict
|
||||
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
|
||||
tool_call = tool_calls[0]
|
||||
assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
|
||||
assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}'
|
||||
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
|
||||
assert expected_function_name == tool_call["function"]["name"]
|
||||
actual_arguments = tool_call["function"]["arguments"]
|
||||
@@ -373,6 +374,7 @@ def do_test_weather(server: ServerProcess, **kwargs):
|
||||
tool_call = tool_calls[0]
|
||||
# assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
|
||||
assert tool_call["function"]["name"] == WEATHER_TOOL["function"]["name"], f'Expected weather tool call, got {tool_call["function"]["name"]}'
|
||||
assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}'
|
||||
actual_arguments = json.loads(tool_call["function"]["arguments"])
|
||||
assert 'location' in actual_arguments, f"location not found in {json.dumps(actual_arguments)}"
|
||||
location = actual_arguments["location"]
|
||||
@@ -596,6 +598,7 @@ def do_test_hello_world(server: ServerProcess, **kwargs):
|
||||
tool_call = tool_calls[0]
|
||||
# assert choice["message"].get("content") in (None, ""), f'Expected no content in {choice["message"]}'
|
||||
assert tool_call["function"]["name"] == PYTHON_TOOL["function"]["name"]
|
||||
assert len(tool_call.get("id", "")) > 0, f'Expected non empty tool call id in {tool_call}'
|
||||
actual_arguments = json.loads(tool_call["function"]["arguments"])
|
||||
assert 'code' in actual_arguments, f"code not found in {json.dumps(actual_arguments)}"
|
||||
code = actual_arguments["code"]
|
||||
|
||||
@@ -435,6 +435,10 @@ static std::string gen_chatcmplid() {
|
||||
return "chatcmpl-" + random_string();
|
||||
}
|
||||
|
||||
static std::string gen_tool_call_id() {
|
||||
return random_string();
|
||||
}
|
||||
|
||||
//
|
||||
// other common utils
|
||||
//
|
||||
|
||||
@@ -76,7 +76,14 @@ namespace fs = std::filesystem;
|
||||
static std::string path_str(const fs::path & path) {
|
||||
std::string u8path;
|
||||
try {
|
||||
#if defined(__cpp_lib_char8_t)
|
||||
// C++20 and later: u8string() returns std::u8string
|
||||
std::u8string u8str = path.u8string();
|
||||
u8path = std::string(reinterpret_cast<const char*>(u8str.c_str()));
|
||||
#else
|
||||
// C++17: u8string() returns std::string
|
||||
u8path = path.u8string();
|
||||
#endif
|
||||
} catch (...) {
|
||||
}
|
||||
return u8path;
|
||||
|
||||
@@ -11718,9 +11718,12 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
|
||||
|
||||
#elif defined __AVX2__
|
||||
|
||||
const __m256i mask = _mm256_set1_epi16(2 * 0x7);
|
||||
const __m256i mask = _mm256_set1_epi16(0x7);
|
||||
const __m256i mone = _mm256_set1_epi16(1);
|
||||
const __m256i mone8 = _mm256_set1_epi8(1);
|
||||
const __m256i mtwo8 = _mm256_set1_epi8(2);
|
||||
// VPSHUFB cannot cross 128-bit lanes so odd shifts go to upper half.
|
||||
const __m256i scales_shift = _mm256_set_epi64x(9, 3, 6, 0);
|
||||
|
||||
__m256 accum1 = _mm256_setzero_ps();
|
||||
__m256 accum2 = _mm256_setzero_ps();
|
||||
@@ -11732,6 +11735,14 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
|
||||
const uint16_t * sc = (const uint16_t *)x[i].scales;
|
||||
|
||||
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
||||
// Extract 3-bit scales (16 values)
|
||||
__m256i scales = _mm256_set1_epi64x(*(const uint64_t*)sc);
|
||||
scales = _mm256_srlv_epi64(scales, scales_shift);
|
||||
scales = _mm256_add_epi16(_mm256_slli_epi16(_mm256_and_si256(scales, mask), 1), mone);
|
||||
|
||||
// Indices to repeat each scale 8 times.
|
||||
__m256i scales_idx1 = _mm256_set1_epi16(0x0100);
|
||||
__m256i scales_idx2 = _mm256_add_epi8(scales_idx1, _mm256_set1_epi8(8));
|
||||
|
||||
__m256i sumi1 = _mm256_setzero_si256();
|
||||
__m256i sumi2 = _mm256_setzero_si256();
|
||||
@@ -11777,11 +11788,12 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
|
||||
const __m256i dot3 = _mm256_maddubs_epi16(mone8, _mm256_sign_epi8(q8b_1, delta1));
|
||||
const __m256i dot4 = _mm256_maddubs_epi16(mone8, _mm256_sign_epi8(q8b_2, delta2));
|
||||
|
||||
__m256i scale1 = MM256_SET_M128I(_mm_set1_epi16(sc[ib/2] >> 2), _mm_set1_epi16(sc[ib/2] << 1));
|
||||
__m256i scale2 = MM256_SET_M128I(_mm_set1_epi16(sc[ib/2] >> 8), _mm_set1_epi16(sc[ib/2] >> 5));
|
||||
__m256i scale1 = _mm256_shuffle_epi8(scales, scales_idx1);
|
||||
__m256i scale2 = _mm256_shuffle_epi8(scales, scales_idx2);
|
||||
|
||||
scales_idx1 = _mm256_add_epi8(scales_idx1, mtwo8);
|
||||
scales_idx2 = _mm256_add_epi8(scales_idx2, mtwo8);
|
||||
|
||||
scale1 = _mm256_add_epi16(_mm256_and_si256(scale1, mask), mone);
|
||||
scale2 = _mm256_add_epi16(_mm256_and_si256(scale2, mask), mone);
|
||||
const __m256i p1 = _mm256_madd_epi16(dot1, scale1);
|
||||
const __m256i p2 = _mm256_madd_epi16(dot2, scale2);
|
||||
const __m256i p3 = _mm256_madd_epi16(dot3, scale1);
|
||||
|
||||
@@ -6648,6 +6648,135 @@ static void ggml_compute_forward_repeat_back(
|
||||
|
||||
// ggml_compute_forward_concat
|
||||
|
||||
static void ggml_compute_forward_concat_any(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
const size_t len = ggml_type_size(src0->type);
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int32_t dim = ggml_get_op_params_i32(dst, 0);
|
||||
|
||||
GGML_ASSERT(dim >= 0 && dim < 4);
|
||||
|
||||
int64_t o[4] = {0, 0, 0, 0};
|
||||
o[dim] = src0->ne[dim];
|
||||
|
||||
const char * x;
|
||||
|
||||
// TODO: smarter multi-theading
|
||||
for (int i3 = 0; i3 < ne3; i3++) {
|
||||
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
||||
for (int i1 = 0; i1 < ne1; i1++) {
|
||||
for (int i0 = 0; i0 < ne0; i0++) {
|
||||
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
||||
x = (const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03;
|
||||
} else {
|
||||
x = (const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13;
|
||||
}
|
||||
|
||||
char * y = (char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3;
|
||||
|
||||
memcpy(y, x, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_concat_i8(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_ASSERT(ggml_type_size(src0->type) == sizeof(int8_t));
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int32_t dim = ggml_get_op_params_i32(dst, 0);
|
||||
|
||||
GGML_ASSERT(dim >= 0 && dim < 4);
|
||||
|
||||
int64_t o[4] = {0, 0, 0, 0};
|
||||
o[dim] = src0->ne[dim];
|
||||
|
||||
const int8_t * x;
|
||||
|
||||
// TODO: smarter multi-theading
|
||||
for (int i3 = 0; i3 < ne3; i3++) {
|
||||
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
||||
for (int i1 = 0; i1 < ne1; i1++) {
|
||||
for (int i0 = 0; i0 < ne0; i0++) {
|
||||
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
||||
x = (const int8_t *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03);
|
||||
} else {
|
||||
x = (const int8_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
|
||||
}
|
||||
|
||||
int8_t * y = (int8_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
|
||||
*y = *x;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_concat_f16(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_ASSERT(ggml_type_size(src0->type) == sizeof(ggml_fp16_t));
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int32_t dim = ggml_get_op_params_i32(dst, 0);
|
||||
|
||||
GGML_ASSERT(dim >= 0 && dim < 4);
|
||||
|
||||
int64_t o[4] = {0, 0, 0, 0};
|
||||
o[dim] = src0->ne[dim];
|
||||
|
||||
const ggml_fp16_t * x;
|
||||
|
||||
// TODO: smarter multi-theading
|
||||
for (int i3 = 0; i3 < ne3; i3++) {
|
||||
for (int i2 = ith; i2 < ne2; i2 += nth) {
|
||||
for (int i1 = 0; i1 < ne1; i1++) {
|
||||
for (int i0 = 0; i0 < ne0; i0++) {
|
||||
if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
||||
x = (const ggml_fp16_t *) ((const char *)src0->data + (i0 )*nb00 + (i1 )*nb01 + (i2 )*nb02 + (i3 )*nb03);
|
||||
} else {
|
||||
x = (const ggml_fp16_t *) ((const char *)src1->data + (i0 - o[0])*nb10 + (i1 - o[1])*nb11 + (i2 - o[2])*nb12 + (i3 - o[3])*nb13);
|
||||
}
|
||||
|
||||
ggml_fp16_t * y = (ggml_fp16_t *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
||||
|
||||
*y = *x;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_compute_forward_concat_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
struct ggml_tensor * dst) {
|
||||
@@ -6655,7 +6784,7 @@ static void ggml_compute_forward_concat_f32(
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
||||
GGML_ASSERT(ggml_type_size(src0->type) == sizeof(float));
|
||||
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
@@ -6698,6 +6827,16 @@ static void ggml_compute_forward_concat(
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_BF16:
|
||||
case GGML_TYPE_I16:
|
||||
{
|
||||
ggml_compute_forward_concat_f16(params, dst);
|
||||
} break;
|
||||
case GGML_TYPE_I8:
|
||||
{
|
||||
ggml_compute_forward_concat_i8(params, dst);
|
||||
} break;
|
||||
case GGML_TYPE_F32:
|
||||
case GGML_TYPE_I32:
|
||||
{
|
||||
@@ -6705,7 +6844,7 @@ static void ggml_compute_forward_concat(
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ABORT("fatal error");
|
||||
ggml_compute_forward_concat_any(params, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,9 +88,8 @@ else()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o - |
|
||||
xcrun -sdk macosx metallib - -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
|
||||
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
|
||||
DEPENDS ggml-metal.metal ${METALLIB_COMMON}
|
||||
|
||||
@@ -2332,6 +2332,7 @@ struct ggml_tensor * ggml_concat(
|
||||
struct ggml_tensor * b,
|
||||
int dim) {
|
||||
GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
|
||||
GGML_ASSERT(a->type == b->type);
|
||||
|
||||
int64_t ne[GGML_MAX_DIMS];
|
||||
for (int d = 0; d < GGML_MAX_DIMS; ++d) {
|
||||
|
||||
@@ -1 +1 @@
|
||||
58ecf6b96d887e408b6869915863fa1126483d51
|
||||
c7dfe3d174f98b14801f9ed12f129179d3e7b638
|
||||
|
||||
@@ -480,6 +480,21 @@ static void test_msgs_oaicompat_json_conversion() {
|
||||
"]"
|
||||
),
|
||||
common_chat_msgs_to_json_oaicompat<json>({message_assist_call_python}).dump(2));
|
||||
|
||||
auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
|
||||
assert_equals<size_t>(1, res.size());
|
||||
assert_equals<std::string>(res[0].role, "assistant");
|
||||
assert_equals(true, res[0].content.empty());
|
||||
assert_equals(true, res[0].tool_calls.empty());
|
||||
|
||||
try {
|
||||
common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\"}]"));
|
||||
throw std::runtime_error("Expected exception");
|
||||
} catch (const std::exception & e) {
|
||||
if (std::string(e.what()).find("'content'") == std::string::npos) {
|
||||
throw std::runtime_error("Expected exception about missing 'content'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_tools_oaicompat_json_conversion() {
|
||||
@@ -751,6 +766,19 @@ static void test_template_output_parsers() {
|
||||
"{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
||||
|
||||
assert_msg_equals(message_assist_thoughts_unparsed_think,
|
||||
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
||||
assert_msg_equals(message_assist_thoughts_unparsed_think,
|
||||
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
|
||||
|
||||
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
||||
"<tool_call>\n"
|
||||
|
||||
Reference in New Issue
Block a user