Ollama

MLeRP is running an Ollama server on mlerp-monash-node13 which you can access from any other MLeRP node.

API Endpoints

We do not enable the full Ollama API but rather only allow the following endpoints:

  • /api/generate
  • /api/chat
  • /api/tags
  • /api/show
  • /api/embed
  • /api/version

Model List

!curl mlerp-monash-node13:80/api/tags | jq
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  4008    0  4008    0     0  51216      0 --:--:-- --:--:-- --:--:-- 52051
{
  "models": [
    {
      "name": "gpt-oss:120b",
      "model": "gpt-oss:120b",
      "modified_at": "2025-08-13T04:08:07.447892868Z",
      "size": 65290192093,
      "digest": "f7f8e2f8f4e087e0e6791636dfe1a28d701d548dada674d12ef0d85ccb02a2a4",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "gptoss",
        "families": [
          "gptoss"
        ],
        "parameter_size": "116.8B",
        "quantization_level": "MXFP4"
      }
    },
    {
      "name": "deepseek-r1:70b",
      "model": "deepseek-r1:70b",
      "modified_at": "2025-05-09T04:30:24.792829412Z",
      "size": 42520397704,
      "digest": "0c1615a8ca32ef41e433aa420558b4685f9fc7f3fd74119860a8e2e389cd7942",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "70.6B",
        "quantization_level": "Q4_K_M"
      }
    },
    {
      "name": "nomic-embed-text:latest",
      "model": "nomic-embed-text:latest",
      "modified_at": "2025-05-09T03:52:16.498038773Z",
      "size": 274302450,
      "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "nomic-bert",
        "families": [
          "nomic-bert"
        ],
        "parameter_size": "137M",
        "quantization_level": "F16"
      }
    },
    {
      "name": "qwen2.5-coder:latest",
      "model": "qwen2.5-coder:latest",
      "modified_at": "2025-05-09T03:51:42.066102898Z",
      "size": 4683087519,
      "digest": "2b0496514337a3d5901f1d253d01726c890b721e891335a56d6e08cedf3e2cb0",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "qwen2",
        "families": [
          "qwen2"
        ],
        "parameter_size": "7.6B",
        "quantization_level": "Q4_K_M"
      }
    },
    {
      "name": "llama2:latest",
      "model": "llama2:latest",
      "modified_at": "2025-04-11T01:56:23.793691145Z",
      "size": 3826793677,
      "digest": "78e26419b4469263f75331927a00a0284ef6544c1975b826b15abdaef17bb962",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "7B",
        "quantization_level": "Q4_0"
      }
    },
    {
      "name": "llava:latest",
      "model": "llava:latest",
      "modified_at": "2024-10-19T01:42:13.25114468Z",
      "size": 4733363377,
      "digest": "8dd30f6b0cb19f555f2c7a7ebda861449ea2cc76bf1f44e262931f45fc81d081",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama",
          "clip"
        ],
        "parameter_size": "7B",
        "quantization_level": "Q4_0"
      }
    },
    {
      "name": "phi3:latest",
      "model": "phi3:latest",
      "modified_at": "2024-05-22T06:28:54.376692434Z",
      "size": 2318920898,
      "digest": "a2c89ceaed85371d0b8a51b5cc70ff054acc37465ea25e72e1612fe28bce7ad9",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "4B",
        "quantization_level": "Q4_K_M"
      }
    },
    {
      "name": "example:latest",
      "model": "example:latest",
      "modified_at": "2024-05-22T06:24:49.868288928Z",
      "size": 4109865272,
      "digest": "d5995444bef50f0228000a192137c99169a1e208310a4513096fbdb19a1ef434",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "7.2B",
        "quantization_level": "Q4_0"
      }
    },
    {
      "name": "mistral:latest",
      "model": "mistral:latest",
      "modified_at": "2024-05-22T05:52:09.029778918Z",
      "size": 4109865159,
      "digest": "61e88e884507ba5e06c49b40e6226884b2a16e872382c2b44a42f2d119d804a5",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "7B",
        "quantization_level": "Q4_0"
      }
    },
    {
      "name": "llama3:70b-instruct-q3_K_L",
      "model": "llama3:70b-instruct-q3_K_L",
      "modified_at": "2024-05-22T02:54:19.560500966Z",
      "size": 37140605383,
      "digest": "e5d560a5f1ae27e8daab2c685267f0816826867187b8a49f48c3dfbb71665130",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "70.6B",
        "quantization_level": "Q3_K_L"
      }
    },
    {
      "name": "mixtral:latest",
      "model": "mixtral:latest",
      "modified_at": "2024-05-22T02:11:25.19863845Z",
      "size": 26442481545,
      "digest": "d39eb76ed9c5a624130f087be1d6e3267e65025cd264c79deb9b3771e4b7bd19",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "47B",
        "quantization_level": "Q4_0"
      }
    },
    {
      "name": "llama3:latest",
      "model": "llama3:latest",
      "modified_at": "2024-05-22T00:37:07.00241393Z",
      "size": 4661224676,
      "digest": "365c0bd3c000a25d28ddbf732fe1c6add414de7275464c4e4d1c3b5fcb5d8ad1",
      "details": {
        "parent_model": "",
        "format": "gguf",
        "family": "llama",
        "families": [
          "llama"
        ],
        "parameter_size": "8.0B",
        "quantization_level": "Q4_0"
      }
    }
  ]
}

If our existing model library does not suit your purposes, more Ollama models can be made available on request.

Ollama Python

This can be used in conjunction with the ollama-python library for large language model experimentation. To connect the library to our API you will need to define a custom client and call it:

from ollama import Client

client = Client(
  host='http://mlerp-monash-node13:80'
)

client.chat(model='deepseek-r1:70b', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])

For more information on using Ollama Python have a look at our tutorial.

Continue

This can be integrated with your VSCode workflow through AI assistant plugins like Continue. This allows you to do tasks like AI assisted code generation and code autocomplete without paying for access or sending research data to online services like ChatGPT and Claude. To do this we recommend the following Continue config:

{
  "models": [
    {
      "model": "AUTODETECT",
      "title": "Autodetect",
      "provider": "ollama",
      "apiBase": "http://mlerp-monash-node13"
    }
  ],
  "contextProviders": [
    {
      "name": "code",
      "params": {}
    },
    {
      "name": "docs",
      "params": {}
    },
    {
      "name": "diff",
      "params": {}
    },
    {
      "name": "terminal",
      "params": {}
    },
    {
      "name": "problems",
      "params": {}
    },
    {
      "name": "folder",
      "params": {}
    },
    {
      "name": "codebase",
      "params": {}
    }
  ],
  "slashCommands": [
    {
      "name": "share",
      "description": "Export the current chat session to markdown"
    },
    {
      "name": "cmd",
      "description": "Generate a shell command"
    },
    {
      "name": "commit",
      "description": "Generate a git commit message"
    }
  ],
  "data": [],
  "tabAutocompleteModel": {
    "title": "Qwen2.5-Coder",
    "provider": "ollama",
    "model": "qwen2.5-coder:latest",
    "apiBase": "http://mlerp-monash-node13"
  },
  "embeddingsProvider": {
    "title": "Nomic-Embed-Text",
    "provider": "ollama",
    "model": "nomic-embed-text:latest",
    "apiBase": "http://mlerp-monash-node13"
  }
}

You can find your Continue config at the following path: .continue/config.json.

Running Ollama yourself

Alternatively you can consider running Ollama yourself within your SLURM job and maintaining your own model repository. You can do that with the following commands:

# Replace this path with wherever you'd like to store your Ollama models
`export OLLAMA_MODELS="/apps/ollama/models/"`
`/apps/ollama/ollama serve`

Note that these commands need to be run in the same job as the rest of your task. For example if you’re working with a Jupyter Lab you will need to open a Jupyter Lab terminal to do this rather than opening a new Terminal App.