使用Python调用Ollama的API

我们定义了一个加法函数和一个减法函数，然后通过tool参数传递个qwen大模型；直接调用generate并使用deepseek-r1模型生成一段文本；使用deepseek-r1模型并调用generate；

无风听海

488人浏览 · 2025-05-07 06:07:54

无风听海 · 2025-05-07 06:07:54 发布

调用Ollama generate API生成一段文本

直接调用generate并使用deepseek-r1模型生成一段文本；

from ollama import generate

response = generate('deepseek-r1', '为什么天空是蓝色的')
print(response['response'])

使用异步方式调用

import asyncio

import ollama


async def main():
  client = ollama.AsyncClient()
  response = await client.generate('deepseek-r1', '为什么天空是蓝色的')
  print(response['response'])


if __name__ == '__main__':
  try:
    asyncio.run(main())
  except KeyboardInterrupt:
    print('\nGoodbye!')

使用流式输出模式

from ollama import generate

for part in generate('deepseek-r1', '为什么天空是蓝色的', stream=True):
  print(part['response'], end='', flush=True)

使用Ollama的chat API

使用deepseek-r1模型并调用generate；

from ollama import chat

messages = [
  {
    'role': 'user',
    'content': '为什么天空是蓝色的',
  },
]

response = chat('deepseek-r1', messages=messages)
print(response['message']['content'])

异步方式调用

import asyncio

from ollama import AsyncClient


async def main():
  messages = [
    {
      'role': 'user',
      'content': '天空为什么是蓝色的',
    },
  ]

  client = AsyncClient()
  response = await client.chat('deepseek-r1', messages=messages)
  print(response['message']['content'])


if __name__ == '__main__':
  asyncio.run(main())

使用流式方式调用

from ollama import chat

messages = [
  {
    'role': 'user',
    'content': '为什么天空是蓝色的',
  },
]

for part in chat('deepseek-r1', messages=messages, stream=True):
  print(part['message']['content'], end='', flush=True)

print()

记录历史会话的聊天

from ollama import chat

messages = [
  {
    'role': 'user',
    'content': '天空为什么是蓝色的',
  },
  {
    'role': 'assistant',
    'content': "天空是蓝色的是由于地球大气对阳光的反射",
  },
  {
    'role': 'user',
    'content': '北京的天气怎么样',
  },
  {
    'role': 'assistant',
    'content': '北京夏季的天气通常是温暖潮湿的，温度经常超过30°C（86°F）。这座城市从6月到9月是雨季，有强降雨和偶尔的大风。冬天是温和的，气温很少降到零度以下。这座城市以高科技和充满活力的文化而闻名，有许多受欢迎的旅游景点。',
  },
]

while True:
  user_input = input('记录历史会话的聊天: ')
  response = chat(
    'deepseek-r1',
    messages=messages
    + [
      {'role': 'user', 'content': user_input},
    ],
  )

  # Add the response to the messages to maintain the history
  messages += [
    {'role': 'user', 'content': user_input},
    {'role': 'assistant', 'content': response.message.content},
  ]
  print(response.message.content + '\n')

调用工具方法

我们定义了一个加法函数和一个减法函数，然后通过tool参数传递个qwen大模型；

from ollama import ChatResponse, chat


def add_two_numbers(a: int, b: int) -> int:
  return int(a) + int(b)


def subtract_two_numbers(a: int, b: int) -> int:
  return int(a) - int(b)


# Tools can still be manually defined and passed into chat
subtract_two_numbers_tool = {
  'type': 'function',
  'function': {
    'name': 'subtract_two_numbers',
    'description': '两个数字相减',
    'parameters': {
      'type': 'object',
      'required': ['a', 'b'],
      'properties': {
        'a': {'type': 'integer', 'description': '第一个数字'},
        'b': {'type': 'integer', 'description': '第二个数字'},
      },
    },
  },
}

messages = [{'role': 'user', 'content': '3减2等于几?'}]
print('Prompt:', messages[0]['content'])

available_functions = {
  'add_two_numbers': add_two_numbers,
  'subtract_two_numbers': subtract_two_numbers,
}

response: ChatResponse = chat(
  'qwen3:0.6b',
  messages=messages,
  tools=[add_two_numbers, subtract_two_numbers_tool],
)

if response.message.tool_calls:
  # There may be multiple tool calls in the response
  for tool in response.message.tool_calls:
    # Ensure the function is available, and then call it
    if function_to_call := available_functions.get(tool.function.name):
      print('正在调用的函数:', tool.function.name)
      print('参数:', tool.function.arguments)
      output = function_to_call(**tool.function.arguments)
      print('函数值:', output)
    else:
      print('Function', tool.function.name, 'not found')

# Only needed to chat with the model using the tool call results
if response.message.tool_calls:
  # Add the function response to messages for the model to use
  messages.append(response.message)
  messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})

  # Get final response from model with function outputs
  final_response = chat('qwen3:0.6b', messages=messages)
  print('Final response:', final_response.message.content)

else:
  print('No tool calls returned from model')

异步调用方式

import asyncio

import ollama
from ollama import ChatResponse


def add_two_numbers(a: int, b: int) -> int:
  return a + b


def subtract_two_numbers(a: int, b: int) -> int:
  return a - b


# Tools can still be manually defined and passed into chat
subtract_two_numbers_tool = {
  'type': 'function',
  'function': {
    'name': 'subtract_two_numbers',
    'description': 'Subtract two numbers',
    'parameters': {
      'type': 'object',
      'required': ['a', 'b'],
      'properties': {
        'a': {'type': 'integer', 'description': 'The first number'},
        'b': {'type': 'integer', 'description': 'The second number'},
      },
    },
  },
}

messages = [{'role': 'user', 'content': '三加一等于多少?'}]
print('Prompt:', messages[0]['content'])

available_functions = {
  'add_two_numbers': add_two_numbers,
  'subtract_two_numbers': subtract_two_numbers,
}


async def main():
  client = ollama.AsyncClient()

  response: ChatResponse = await client.chat(
    'qwen3:0.6b',
    messages=messages,
    tools=[add_two_numbers, subtract_two_numbers_tool],
  )

  if response.message.tool_calls:
    # There may be multiple tool calls in the response
    for tool in response.message.tool_calls:
      # Ensure the function is available, and then call it
      if function_to_call := available_functions.get(tool.function.name):
        print('Calling function:', tool.function.name)
        print('Arguments:', tool.function.arguments)
        output = function_to_call(**tool.function.arguments)
        print('Function output:', output)
      else:
        print('Function', tool.function.name, 'not found')

  # Only needed to chat with the model using the tool call results
  if response.message.tool_calls:
    # Add the function response to messages for the model to use
    messages.append(response.message)
    messages.append({'role': 'tool', 'content': str(output), 'name': tool.function.name})

    # Get final response from model with function outputs
    final_response = await client.chat('qwen3:0.6b', messages=messages)
    print('Final response:', final_response.message.content)

  else:
    print('No tool calls returned from model')


if __name__ == '__main__':
  try:
    asyncio.run(main())
  except KeyboardInterrupt:
    print('\nGoodbye!')

2048 AI社区

有“AI”的1024 = 2048，欢迎大家加入2048 AI社区

更多推荐

Expect脚本实战：多条件匹配与防卡死技巧

在自动化测试和系统初始化中，我们常常使用 Expect 脚本来完成一系列交互操作，比如自动输入密码、监听输出并作出响应。然而，实际项目中可能遇到多步骤输出匹配的场景：只有在先匹配到某一行输出，再匹配到另一行输出时，才需要做出响应。同时，如果 “testX failed” 出现了，但 “set params error” 没有在合理时间内出现，不要卡死，而是打印警告并继续。这个写法的问题在于：如

2048 AI社区

AI模型版本控制的标签管理：架构师的技巧

在AI模型生命周期中，版本控制是保障模型可追溯性、协作效率与生产可靠性的核心环节。而标签管理作为版本控制的"语义接口"，其设计质量直接决定了团队对模型版本的理解、检索与复用能力。本文从架构师视角出发，结合第一性原理与MLOps实践，系统阐述AI模型标签管理的理论框架、架构设计、实现机制与高级考量。通过拆解标签的"唯一标识+语义描述"本质，提出四维标签模型。