# llama.cpp server and DLL build (CPU inference only) # # llama.cpp is an amazing project, but its build system is poor and # growing worse. It's never properly built llama.dll under any compiler, # and DLL builds have been unsupported by w64dk for some time. This # makefile is a replacement build system that produces llama.dll and # llama-server.exe using w64dk. No source file changes are needed. # # The DLL exports the public API and no more, and is readily usable as a # component in another project (game engine, etc.). The server EXE is # fully functional on Windows 7 or later. It is not linked against the # DLL, since that's not useful, but can be made to do so with a small # tweak to this makefile. # # Invoke this makefile in the llama.cpp source tree: # # $ make -j$(nproc) -f path/to/w64devkit/contrib/llama.mak # # Incremental builds are unsupported, so clean rebuild after pulling. It # was last tested at b5711, and an update will inevitably break it. CROSS = CPPFLAGS = -w -O2 LDFLAGS = -s .SUFFIXES: .c .cpp .o def = -DGGML_USE_CPU inc = \ -I. \ -Icommon \ -Iggml/include \ -Iggml/src \ -Iggml/src/ggml-cpu \ -Iinclude \ -Itools/mtmd \ -Ivendor %.c.o: %.c $(CROSS)gcc -c -o $@ $(inc) $(def) $(CPPFLAGS) $< %.cpp.o: %.cpp $(CROSS)g++ -c -o $@ $(inc) $(def) $(CPPFLAGS) $< dll = \ ggml/src/ggml-alloc.c.o \ ggml/src/ggml-backend-reg.cpp.o \ ggml/src/ggml-backend.cpp.o \ ggml/src/ggml-cpu/arch/x86/quants.c.o \ ggml/src/ggml-cpu/binary-ops.cpp.o \ ggml/src/ggml-cpu/ggml-cpu.c.o \ ggml/src/ggml-cpu/ggml-cpu.cpp.o \ ggml/src/ggml-cpu/llamafile/sgemm.cpp.o \ ggml/src/ggml-cpu/ops.cpp.o \ ggml/src/ggml-cpu/quants.c.o \ ggml/src/ggml-cpu/traits.cpp.o \ ggml/src/ggml-cpu/unary-ops.cpp.o \ ggml/src/ggml-cpu/vec.cpp.o \ ggml/src/ggml-opt.cpp.o \ ggml/src/ggml-quants.c.o \ ggml/src/ggml-threading.cpp.o \ ggml/src/ggml.c.o \ ggml/src/gguf.cpp.o \ src/llama-adapter.cpp.o \ src/llama-arch.cpp.o \ src/llama-batch.cpp.o \ src/llama-chat.cpp.o \ src/llama-context.cpp.o \ src/llama-grammar.cpp.o \ src/llama-graph.cpp.o \ src/llama-hparams.cpp.o \ src/llama-impl.cpp.o \ src/llama-io.cpp.o \ src/llama-kv-cache-unified-iswa.cpp.o \ src/llama-kv-cache-unified.cpp.o \ src/llama-memory-hybrid.cpp.o \ src/llama-memory-recurrent.cpp.o \ src/llama-memory.cpp.o \ src/llama-mmap.cpp.o \ src/llama-model-loader.cpp.o \ src/llama-model-saver.cpp.o \ src/llama-model.cpp.o \ src/llama-quant.cpp.o \ src/llama-sampling.cpp.o \ src/llama-vocab.cpp.o \ src/llama.cpp.o \ src/unicode-data.cpp.o \ src/unicode.cpp.o exe = \ common/arg.cpp.o \ common/chat-parser.cpp.o \ common/chat.cpp.o \ common/common.cpp.o \ common/console.cpp.o \ common/json-partial.cpp.o \ common/json-schema-to-grammar.cpp.o \ common/log.cpp.o \ common/ngram-cache.cpp.o \ common/regex-partial.cpp.o \ common/sampling.cpp.o \ common/speculative.cpp.o \ common/w64dk-build-info.cpp.o \ tools/mtmd/clip.cpp.o \ tools/mtmd/mtmd-audio.cpp.o \ tools/mtmd/mtmd-helper.cpp.o \ tools/mtmd/mtmd.cpp.o \ tools/server/server.cpp.o all: llama.dll llama-server.exe llama-server.exe: $(exe) $(dll) $(CROSS)g++ $(LDFLAGS) -o $@ $(exe) $(dll) -lws2_32 llama.dll: $(dll) llama.def $(CROSS)g++ -shared $(LDFLAGS) -o $@ $(dll) llama.def clean: rm -f $(dll) $(exe) llama.def llama.dll llama-server.exe \ tools/server/index.html.gz.hpp tools/server/loading.html.hpp \ common/w64dk-build-info.cpp .ONESHELL: # needed for heredocs # NOTE: produces valid C++ even if Git is unavailable common/w64dk-build-info.cpp: cat >$@ <../index.html.gz.hpp tools/server/loading.html.hpp: tools/server/public/loading.html cd tools/server/public/ && xxd -i loading.html >../loading.html.hpp tools/server/server.cpp.o: \ tools/server/server.cpp \ tools/server/index.html.gz.hpp \ tools/server/loading.html.hpp llama.def: @cat >$@ <