agent-ml/llamacpp/gpu-debug.mjs

import { getLlama } from 'node-llama-cpp';

console.log('=== GPU Loading Debug ===\n');

// Try with explicit CUDA and debug
const llama = await getLlama({ gpu: 'cuda', debug: true });
console.log('Llama GPU:', llama.gpu);

const modelPath = '/var/home/lilith/.cache/models/Ministral-3-3B-Instruct-2512-Q8_0.gguf';

console.log('\nLoading model with gpuLayers: 999...');
const model = await llama.loadModel({
  modelPath,
  gpuLayers: 999,  // Force all layers to GPU
});

console.log('\nModel loaded. GPU layers:', model.gpuLayers);
await model.dispose();