Settings
export OLLAMA_FLASH_ATTENTION=true
export OLLAMA_KV_CACHE_TYPE=q8_0 ### f16 (for 7B–13B), q8_0 (for 34B, 70B)
export OLLAMA_CONTEXT_LENGTH=32768 ### 65536 for large codebase work
export OLLAMA_NUM_PARALLEL=1
export OLLAMA_MAX_LOADED_MODELS=1
export OLLAMA_NUM_THREAD=10
export OLLAMA_ORIGINS="*"
export OLLAMA_HOST="127.0.0.1:11434"
Commands
ollama serve ### Starts the Ollama background server (default: http://localhost:11434)
ollama launch ### Launch the Ollama menu
ollama run <model> ### Starts an interactive chat session with a model.
ollama run gemma4 "what is a black hole"
--------------------------------------------------------------------------------------
ollama launch claude --model qwen3.6
ollama launch opencode --model qwen3.6
--------------------------------------------------------------------------------------
ollama run gemma4:31b "what is a black hole"
ollama run deepseek-coder-v2:16b "what is a black hole"
ollama run qwen3-coder:30b-a3b-q4_K_M "what is a black hole"