Settings

export OLLAMA_FLASH_ATTENTION=true
export OLLAMA_KV_CACHE_TYPE=q8_0	### f16 (for 7B–13B), q8_0 (for 34B, 70B)
export OLLAMA_CONTEXT_LENGTH=32768	### 65536 for large codebase work
export OLLAMA_NUM_PARALLEL=1
export OLLAMA_MAX_LOADED_MODELS=1
export OLLAMA_NUM_THREAD=10
export OLLAMA_ORIGINS="*"
export OLLAMA_HOST="127.0.0.1:11434"

Commands

ollama serve         ### Starts the Ollama background server (default: http://localhost:11434)
ollama launch        ### Launch the Ollama menu
ollama run <model>   ### Starts an interactive chat session with a model.
ollama run gemma4 "what is a black hole"
--------------------------------------------------------------------------------------
ollama launch claude --model qwen3.6
ollama launch opencode --model qwen3.6
--------------------------------------------------------------------------------------
ollama run gemma4:31b "what is a black hole"
ollama run deepseek-coder-v2:16b "what is a black hole"
ollama run qwen3-coder:30b-a3b-q4_K_M "what is a black hole"

IT-AI-Ollama

Settings

Commands

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

Samer

Tools