コンテンツにスキップ

Python API

注意: この API はプレビュー版であり、変更される可能性があります。

Python APIはonnxruntime-genai Pythonパッケージによって提供されます。

Terminal window
pip install onnxruntime-genai
import onnxruntime_genai
onnxruntime_genai.Model(config_path: str) -> Model
onnxruntime_genai.Model(config: onnxruntime_genai.Config) -> Model
  • type: モデルタイプを文字列として返します。

    model = onnxruntime_genai.Model("config.json")
    print(model.type)
  • device_type: デバイスタイプを文字列として返します。

    print(model.device_type)
  • create_multimodal_processor() -> MultiModalProcessor

    processor = model.create_multimodal_processor()

onnxruntime_genai.Config(config_path: str) -> Config
  • append_provider(provider: str)

    config = onnxruntime_genai.Config("config.json")
    config.append_provider("CUDAExecutionProvider")
  • set_provider_option(option: str, value: str)

    config.set_provider_option("device_id", "0")
  • clear_providers()

    config.clear_providers()

onnxruntime_genai.GeneratorParams(model: Model) -> GeneratorParams
  • set_inputs(named_tensors: NamedTensors)

    params = onnxruntime_genai.GeneratorParams(model)
    named_tensors = onnxruntime_genai.NamedTensors()
    params.set_inputs(named_tensors)
  • set_model_input(name: str, value: numpy.ndarray)

    import numpy as np
    params.set_model_input("input_ids", np.array([1, 2, 3], dtype=np.int32))
  • try_graph_capture_with_max_batch_size(max_batch_size: int)

    params.try_graph_capture_with_max_batch_size(8)
  • set_search_options(**options)

    params.set_search_options(temperature=0.7, top_p=0.9)
  • set_guidance(type: str, data: str)

    params.set_guidance("prefix", "昔々あるところに")

onnxruntime_genai.Generator(model: Model, params: GeneratorParams) -> Generator
  • is_done() -> bool

    generator = onnxruntime_genai.Generator(model, params)
    done = generator.is_done()
  • get_output(name: str) -> numpy.ndarray

    output = generator.get_output("output_ids")
  • append_tokens(tokens: numpy.ndarray[int32])

    generator.append_tokens(np.array([4, 5], dtype=np.int32))
  • append_tokens(tokens: onnxruntime_genai.Tensor)

    tensor = onnxruntime_genai.Tensor(np.array([4, 5], dtype=np.int32))
    generator.append_tokens(tensor)
  • get_logits() -> numpy.ndarray[float32]

    logits = generator.get_logits()
  • set_logits(new_logits: numpy.ndarray[float32])

    generator.set_logits(np.zeros_like(logits))
  • generate_next_token()

    generator.generate_next_token()
  • rewind_to(new_length: int)

    generator.rewind_to(2)
  • get_next_tokens() -> numpy.ndarray[int32]

    next_tokens = generator.get_next_tokens()
  • get_sequence(index: int) -> numpy.ndarray[int32]

    sequence = generator.get_sequence(0)
  • set_active_adapter(adapters: onnxruntime_genai.Adapters, adapter_name: str)

    adapters = onnxruntime_genai.Adapters(model)
    generator.set_active_adapter(adapters, "adapter_name")

onnxruntime_genai.Tokenizer(model: Model) -> Tokenizer
  • encode(text: str) -> numpy.ndarray[int32]

    tokenizer = onnxruntime_genai.Tokenizer(model)
    tokens = tokenizer.encode("こんにちは世界")
  • to_token_id(text: str) -> int

    token_id = tokenizer.to_token_id("こんにちは")
  • decode(tokens: numpy.ndarray[int32]) -> str

    text = tokenizer.decode(tokens)
  • apply_chat_template(template_str: str, messages: str, tools: str = None, add_generation_prompt: bool = False) -> str

    chat = tokenizer.apply_chat_template("{user}: {message}", messages="こんにちは!", add_generation_prompt=True)
  • encode_batch(texts: list[str]) -> onnxruntime_genai.Tensor

    batch_tensor = tokenizer.encode_batch(["こんにちは", "世界"])
  • decode_batch(tokens: onnxruntime_genai.Tensor) -> list[str]

    texts = tokenizer.decode_batch(batch_tensor)
  • create_stream() -> TokenizerStream

    stream = tokenizer.create_stream()

onnxruntime_genai.TokenizerStream(tokenizer: Tokenizer) -> TokenizerStream
  • decode(token: int32) -> str

    token_str = stream.decode(123)

onnxruntime_genai.NamedTensors() -> NamedTensors
  • __getitem__(name: str) -> onnxruntime_genai.Tensor

    tensor = named_tensors["input_ids"]
  • __setitem__(name: str, value: numpy.ndarray or onnxruntime_genai.Tensor)

    named_tensors["input_ids"] = np.array([1, 2, 3], dtype=np.int32)
  • __contains__(name: str) -> bool

    exists = "input_ids" in named_tensors
  • __delitem__(name: str)

    del named_tensors["input_ids"]
  • __len__() -> int

    length = len(named_tensors)
  • keys() -> list[str]

    keys = named_tensors.keys()

onnxruntime_genai.Tensor(array: numpy.ndarray) -> Tensor
  • shape() -> list[int]

    tensor = onnxruntime_genai.Tensor(np.array([1, 2, 3]))
    print(tensor.shape())
  • type() -> int

    print(tensor.type())
  • data() -> memoryview

    data = tensor.data()
  • as_numpy() -> numpy.ndarray

    arr = tensor.as_numpy()

onnxruntime_genai.Adapters(model: Model) -> Adapters
  • unload(adapter_name: str)

    adapters.unload("adapter_name")
  • load(file: str, name: str)

    adapters.load("adapter_file.bin", "adapter_name")

onnxruntime_genai.MultiModalProcessor(model: Model) -> MultiModalProcessor
  • __call__(prompt: str = None, images: Images = None, audios: Audios = None) -> onnxruntime_genai.Tensor

    result = processor(prompt="この画像を説明してください", images=onnxruntime_genai.Images.open("image.png"))
  • create_stream() -> TokenizerStream

    stream = processor.create_stream()
  • decode(tokens: numpy.ndarray[int32]) -> str

    text = processor.decode(tokens)

onnxruntime_genai.Images.open(*image_paths: str) -> Images
onnxruntime_genai.Images.open_bytes(*image_datas: bytes) -> Images
images = onnxruntime_genai.Images.open("image1.png", "image2.jpg")
with open("image1.png", "rb") as f:
images_bytes = onnxruntime_genai.Images.open_bytes(f.read())

onnxruntime_genai.Audios.open(*audio_paths: str) -> Audios
onnxruntime_genai.Audios.open_bytes(*audio_datas: bytes) -> Audios
audios = onnxruntime_genai.Audios.open("audio1.wav")
with open("audio1.wav", "rb") as f:
audios_bytes = onnxruntime_genai.Audios.open_bytes(f.read())

  • onnxruntime_genai.set_log_options(**options)

    onnxruntime_genai.set_log_options(verbose=True)
  • onnxruntime_genai.is_cuda_available() -> bool

    print(onnxruntime_genai.is_cuda_available())
  • onnxruntime_genai.is_dml_available() -> bool

    print(onnxruntime_genai.is_dml_available())
  • onnxruntime_genai.is_rocm_available() -> bool

    print(onnxruntime_genai.is_rocm_available())
  • onnxruntime_genai.is_webgpu_available() -> bool

    print(onnxruntime_genai.is_webgpu_available())
  • onnxruntime_genai.is_qnn_available() -> bool

    print(onnxruntime_genai.is_qnn_available())
  • onnxruntime_genai.is_openvino_available() -> bool

    print(onnxruntime_genai.is_openvino_available())
  • onnxruntime_genai.set_current_gpu_device_id(device_id: int)

    onnxruntime_genai.set_current_gpu_device_id(0)
  • onnxruntime_genai.get_current_gpu_device_id() -> int

    print(onnxruntime_genai.get_current_gpu_device_id())