# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-2-Clause

# Automatically generated by Numbast Static Binding Generator
# Generator Information:
# Ast_canopy version: 0.5.0
# Numbast version: 0.5.0
# Generation command: /home/wangm/numbast/numbast/src/numbast/__main__.py --cfg-path configs/cuda_bf16.yml --output-dir numba_cuda/numba/cuda/_internal/
# Static binding generator parameters: {'cfg_path': 'configs/cuda_bf16.yml', 'output_dir': 'numba_cuda/numba/cuda/_internal/', 'run_ruff_format': True}
# Config file path (relative to the path of the generated binding): ../../../../../configs/cuda_bf16.yml
# Cudatoolkit version: (12, 8)
# Default CUDA_HOME path: /home/wangm/miniforge3/envs/numbast


# Imports:
import io
import operator

import numba
from llvmlite import ir
from numba.cuda import types
from numba.cuda.datamodel import PrimitiveModel, StructModel
from numba.cuda.extending import (
    lower_cast,
    make_attribute_wrapper,
    register_model,
)
from numba.cuda.core.imputils import Registry as TargetRegistry
from numba.cuda.core.imputils import lower_cast
from numba.cuda.typing import signature
from numba.cuda.typing.builtins import (
    BinOp,
    BinOpTrueDiv,
    UnaryNegate,
    UnaryPositive,
    UnorderedCmpOp,
    OrderedCmpOp,
)
from numba.cuda.typing.templates import AttributeTemplate, ConcreteTemplate
from numba.cuda.typing.templates import Registry as TypingRegistry
from numba.cuda import CUSource, declare_device
from numba.cuda.vector_types import vector_types
from numba.cuda.extending import as_numba_type
from numba.cuda.types import (
    CPointer,
    Function,
    Number,
    Type,
    bool_,
    float16,
    float32,
    float64,
    int8,
    int16,
    int32,
    int64,
    uint8,
    uint16,
    uint32,
    uint64,
    void,
)
from numba.cuda.types.ext_types import bfloat16

float32x2 = vector_types["float32x2"]
__half = float16


typing_registry = TypingRegistry()
register = typing_registry.register
register_attr = typing_registry.register_attr
register_global = typing_registry.register_global
target_registry = TargetRegistry()
lower = target_registry.lower
lower_attr = target_registry.lower_getattr
lower_constant = target_registry.lower_constant

# Shim Stream:


class _KeyedStringIO(io.StringIO):
    def __init__(self, *arg, **kwarg):
        super().__init__(*arg, *kwarg)
        self._keys = set()

    def write_with_key(self, key: str, value: str):
        if key in self._keys:
            return
        self._keys.add(key)
        self.write(value)

    def reset(self):
        self._keys.clear()
        self.seek(0)


shim_defines = ""
shim_include = "#include <" + "cuda_bf16.h" + ">"
shim_prefix = shim_defines + "\n" + shim_include
shim_stream = _KeyedStringIO()
shim_stream.write(shim_prefix)
shim_obj = CUSource(shim_stream)


# Enums:


# Structs:


# Typing for unnamed1405307
class _type_class_unnamed1405307(Type):
    def __init__(self):
        super().__init__(name="unnamed1405307")
        self.alignof_ = 2
        self.bitwidth = 2 * 8


_type_unnamed1405307 = _type_class_unnamed1405307()


# Make Python API for struct
unnamed1405307 = type("unnamed1405307", (), {"_nbtype": _type_unnamed1405307})

as_numba_type.register(unnamed1405307, _type_unnamed1405307)


@register_model(_type_class_unnamed1405307)
class _model_unnamed1405307(StructModel):
    def __init__(self, dmm, fe_type):
        members = [("x", uint16)]
        super().__init__(dmm, fe_type, members)


@register_attr
class _attr_typing_unnamed1405307(AttributeTemplate):
    key = globals()["unnamed1405307"]

    def resolve_x(self, obj):
        return uint16


make_attribute_wrapper(_type_class_unnamed1405307, "x", "x")


@register
class _ctor_template_unnamed1405307(ConcreteTemplate):
    key = globals()["unnamed1405307"]
    cases = []


register_global(unnamed1405307, Function(_ctor_template_unnamed1405307))


# Typing for unnamed1405416
class _type_class_unnamed1405416(Type):
    def __init__(self):
        super().__init__(name="unnamed1405416")
        self.alignof_ = 4
        self.bitwidth = 4 * 8


_type_unnamed1405416 = _type_class_unnamed1405416()


# Make Python API for struct
unnamed1405416 = type("unnamed1405416", (), {"_nbtype": _type_unnamed1405416})

as_numba_type.register(unnamed1405416, _type_unnamed1405416)


@register_model(_type_class_unnamed1405416)
class _model_unnamed1405416(StructModel):
    def __init__(self, dmm, fe_type):
        members = [("x", uint16), ("y", uint16)]
        super().__init__(dmm, fe_type, members)


@register_attr
class _attr_typing_unnamed1405416(AttributeTemplate):
    key = globals()["unnamed1405416"]

    def resolve_x(self, obj):
        return uint16

    def resolve_y(self, obj):
        return uint16


make_attribute_wrapper(_type_class_unnamed1405416, "x", "x")


make_attribute_wrapper(_type_class_unnamed1405416, "y", "y")


__nv_bfloat16 = _type___nv_bfloat16 = bfloat16


def _lower__ZN13__nv_bfloat16C1Ev(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ev_nbst(int &ignore, __nv_bfloat16 *self ) {
        new (self) __nv_bfloat16();
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ev_nbst",
        int32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def __nv_bfloat16_device_caller(arg_0):
        return _ctor_decl___nv_bfloat16(arg_0)

    @lower(
        __nv_bfloat16,
    )
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ev_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat16),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower__ZN13__nv_bfloat16C1Ev(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1ERK17__nv_bfloat16_raw(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1ERK17__nv_bfloat16_raw_nbst(int &ignore, __nv_bfloat16 *self , __nv_bfloat16_raw* hr) {
        new (self) __nv_bfloat16(*hr);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1ERK17__nv_bfloat16_raw_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(_type_unnamed1405307)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, _type_unnamed1405307)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN13__nv_bfloat16C1ERK17__nv_bfloat16_raw_nbst", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat16),
                CPointer(_type_unnamed1405307),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(_type_unnamed1405307, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1ERK17__nv_bfloat16_raw(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1E6__half(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1E6__half_nbst(int &ignore, __nv_bfloat16 *self , __half* f) {
        new (self) __nv_bfloat16(*f);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1E6__half_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(float16)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, float16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN13__nv_bfloat16C1E6__half_nbst", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(float16)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    # By default, Numbast does not generate this cast because the c++ conversion
    # constructor is marked explict. We enable it by hand here.
    @lower_cast(float16, __nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(__nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1E6__half(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Ef(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ef_nbst(int &ignore, __nv_bfloat16 *self , float* f) {
        new (self) __nv_bfloat16(*f);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ef_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(float32)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, float32)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ef_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(float32)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(float32, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Ef(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Ed(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ed_nbst(int &ignore, __nv_bfloat16 *self , double* f) {
        new (self) __nv_bfloat16(*f);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ed_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(float64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, float64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ed_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(float64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(float64, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Ed(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Es(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Es_nbst(int &ignore, __nv_bfloat16 *self , short* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Es_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(int16)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Es_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int16)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(int16, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Es(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Et(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Et_nbst(int &ignore, __nv_bfloat16 *self , unsigned short* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Et_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint16)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Et_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint16)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(uint16, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Et(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Ei(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ei_nbst(int &ignore, __nv_bfloat16 *self , int* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ei_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(int32)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int32)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ei_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int32)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(int32, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Ei(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Ej(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ej_nbst(int &ignore, __nv_bfloat16 *self , unsigned int* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ej_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint32)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint32)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ej_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint32)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(uint32, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Ej(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1El(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1El_nbst(int &ignore, __nv_bfloat16 *self , long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1El_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(int64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1El_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(int64, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1El(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Em(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Em_nbst(int &ignore, __nv_bfloat16 *self , unsigned long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Em_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Em_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(uint64, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Em(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Ex(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ex_nbst(int &ignore, __nv_bfloat16 *self , long long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ex_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(int64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ex_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(int64, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Ex(shim_stream, shim_obj)


def _lower__ZN13__nv_bfloat16C1Ey(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN13__nv_bfloat16C1Ey_nbst(int &ignore, __nv_bfloat16 *self , unsigned long long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "_ZN13__nv_bfloat16C1Ey_nbst",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN13__nv_bfloat16C1Ey_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )

    @lower_cast(uint64, _type___nv_bfloat16)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat16, fromty),
            [value],
        )


_lower__ZN13__nv_bfloat16C1Ey(shim_stream, shim_obj)


@register
class _ctor_template___nv_bfloat16(ConcreteTemplate):
    key = globals()["__nv_bfloat16"]
    cases = [
        signature(
            _type___nv_bfloat16,
        ),
        signature(_type___nv_bfloat16, _type_unnamed1405307),
        signature(_type___nv_bfloat16, float16),
        signature(_type___nv_bfloat16, float32),
        signature(_type___nv_bfloat16, float64),
        signature(_type___nv_bfloat16, int16),
        signature(_type___nv_bfloat16, uint16),
        signature(_type___nv_bfloat16, int32),
        signature(_type___nv_bfloat16, uint32),
        signature(_type___nv_bfloat16, int64),
        signature(_type___nv_bfloat16, uint64),
        signature(_type___nv_bfloat16, int64),
        signature(_type___nv_bfloat16, uint64),
    ]


register_global(__nv_bfloat16, Function(_ctor_template___nv_bfloat16))


def _from___nv_bfloat16_to__type_unnamed1405307_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cv17__nv_bfloat16_rawEv_1(__nv_bfloat16_raw &retval, __nv_bfloat16 *self) {
        retval = self->operator __nv_bfloat16_raw();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cv17__nv_bfloat16_rawEv_1",
        _type_unnamed1405307(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, _type_unnamed1405307)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cv17__nv_bfloat16_rawEv_1",
            shim_raw_str,
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                _type_unnamed1405307,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to__type_unnamed1405307_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to__type_unnamed1405307_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNVK13__nv_bfloat16cv17__nv_bfloat16_rawEv_1(__nv_bfloat16_raw &retval, __nv_bfloat16 *self) {
        retval = self->operator __nv_bfloat16_raw();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNVK13__nv_bfloat16cv17__nv_bfloat16_rawEv_1",
        _type_unnamed1405307(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, _type_unnamed1405307)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNVK13__nv_bfloat16cv17__nv_bfloat16_rawEv_1",
            shim_raw_str,
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                _type_unnamed1405307,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to__type_unnamed1405307_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_float32_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvfEv_1(float &retval, __nv_bfloat16 *self) {
        retval = self->operator float();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvfEv_1",
        float32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, float32)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvfEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                float32,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_float32_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvaEv_1(signed char &retval, __nv_bfloat16 *self) {
        retval = self->operator signed char();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvaEv_1",
        int8(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int8)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvaEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int8,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint8_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvhEv_1(unsigned char &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned char();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvhEv_1",
        uint8(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint8)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvhEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint8,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint8_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvcEv_1(char &retval, __nv_bfloat16 *self) {
        retval = self->operator char();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvcEv_1",
        int8(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int8)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvcEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int8,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int16_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvsEv_1(short &retval, __nv_bfloat16 *self) {
        retval = self->operator short();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvsEv_1",
        int16(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int16)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvsEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int16,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int16_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint16_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvtEv_1(unsigned short &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned short();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvtEv_1",
        uint16(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint16)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvtEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint16,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint16_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int32_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cviEv_1(int &retval, __nv_bfloat16 *self) {
        retval = self->operator int();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cviEv_1",
        int32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int32)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cviEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int32,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int32_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint32_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvjEv_1(unsigned int &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned int();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvjEv_1",
        uint32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint32)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvjEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint32,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint32_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvlEv_1(long &retval, __nv_bfloat16 *self) {
        retval = self->operator long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvlEv_1",
        int64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvlEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvmEv_1(unsigned long &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvmEv_1",
        uint64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvmEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvxEv_1(long long &retval, __nv_bfloat16 *self) {
        retval = self->operator long long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvxEv_1",
        int64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvxEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvyEv_1(unsigned long long &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned long long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvyEv_1",
        uint64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvyEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_bool__lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16__ZNK13__nv_bfloat16cvbEv_1(bool &retval, __nv_bfloat16 *self) {
        retval = self->operator bool();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16__ZNK13__nv_bfloat16cvbEv_1",
        bool_(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, bool_)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16__ZNK13__nv_bfloat16cvbEv_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_bool__lower(shim_stream, shim_obj)


# C++ does not provide a conversion operator from bfloat16 to double, so we need to implement it manually.
def _from___nv_bfloat16_to_float64__lower():
    @lower_cast(_type___nv_bfloat16, float64)
    def impl(context, builder, fromty, toty, value):
        # Hand rolled bfloat16 -> float32 -> double conversion with zero-ext
        bits32 = builder.zext(value, ir.IntType(32))
        shift = builder.shl(bits32, ir.Constant(ir.IntType(32), 16))
        f32 = builder.bitcast(shift, ir.FloatType())
        f64 = builder.fpext(f32, ir.DoubleType())
        return f64


_from___nv_bfloat16_to_float64__lower()


def _literalint_to_bf16_lower():
    @lower_cast(types.IntegerLiteral, _type___nv_bfloat16)
    def impl(context, builder, fromty, toty, value):
        f32 = context.cast(builder, value, fromty, float32)
        i32 = builder.bitcast(f32, ir.IntType(32))
        i16 = builder.trunc(i32, ir.IntType(16))
        return i16


_literalint_to_bf16_lower()


# Typing for __nv_bfloat162
class _type_class___nv_bfloat162(Type):
    def __init__(self):
        super().__init__(name="__nv_bfloat162")
        self.alignof_ = 4
        self.bitwidth = 4 * 8


_type___nv_bfloat162 = _type_class___nv_bfloat162()


# Make Python API for struct
__nv_bfloat162 = type("__nv_bfloat162", (), {"_nbtype": _type___nv_bfloat162})

as_numba_type.register(__nv_bfloat162, _type___nv_bfloat162)


@register_model(_type_class___nv_bfloat162)
class _model___nv_bfloat162(StructModel):
    def __init__(self, dmm, fe_type):
        members = [("x", _type___nv_bfloat16), ("y", _type___nv_bfloat16)]
        super().__init__(dmm, fe_type, members)


@register_attr
class _attr_typing___nv_bfloat162(AttributeTemplate):
    key = globals()["__nv_bfloat162"]

    def resolve_x(self, obj):
        return _type___nv_bfloat16

    def resolve_y(self, obj):
        return _type___nv_bfloat16


make_attribute_wrapper(_type_class___nv_bfloat162, "x", "x")


make_attribute_wrapper(_type_class___nv_bfloat162, "y", "y")


def _lower__ZN14__nv_bfloat162C1Ev(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN14__nv_bfloat162C1Ev_nbst(int &ignore, __nv_bfloat162 *self ) {
        new (self) __nv_bfloat162();
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "_ZN14__nv_bfloat162C1Ev_nbst",
        int32(
            CPointer(_type___nv_bfloat162),
        ),
    )

    def __nv_bfloat162_device_caller(arg_0):
        return _ctor_decl___nv_bfloat162(arg_0)

    @lower(
        __nv_bfloat162,
    )
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZN14__nv_bfloat162C1Ev_nbst", shim_raw_str)
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower__ZN14__nv_bfloat162C1Ev(shim_stream, shim_obj)


def _lower__ZN14__nv_bfloat162C1EOS_(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN14__nv_bfloat162C1EOS__nbst(int &ignore, __nv_bfloat162 *self , __nv_bfloat162* src) {
        new (self) __nv_bfloat162(*src);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "_ZN14__nv_bfloat162C1EOS__nbst",
        int32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1)

    @lower(__nv_bfloat162, _type___nv_bfloat162)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN14__nv_bfloat162C1EOS__nbst", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower__ZN14__nv_bfloat162C1EOS_(shim_stream, shim_obj)


def _lower__ZN14__nv_bfloat162C1ERK13__nv_bfloat16S2_(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN14__nv_bfloat162C1ERK13__nv_bfloat16S2__nbst(int &ignore, __nv_bfloat162 *self , __nv_bfloat16* a, __nv_bfloat16* b) {
        new (self) __nv_bfloat162(*a, *b);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "_ZN14__nv_bfloat162C1ERK13__nv_bfloat16S2__nbst",
        int32(
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1, arg_2):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1, arg_2)

    @lower(__nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN14__nv_bfloat162C1ERK13__nv_bfloat16S2__nbst", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower__ZN14__nv_bfloat162C1ERK13__nv_bfloat16S2_(shim_stream, shim_obj)


def _lower__ZN14__nv_bfloat162C1ERKS_(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN14__nv_bfloat162C1ERKS__nbst(int &ignore, __nv_bfloat162 *self , __nv_bfloat162* src) {
        new (self) __nv_bfloat162(*src);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "_ZN14__nv_bfloat162C1ERKS__nbst",
        int32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1)

    @lower(__nv_bfloat162, _type___nv_bfloat162)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN14__nv_bfloat162C1ERKS__nbst", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower__ZN14__nv_bfloat162C1ERKS_(shim_stream, shim_obj)


def _lower__ZN14__nv_bfloat162C1ERK18__nv_bfloat162_raw(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN14__nv_bfloat162C1ERK18__nv_bfloat162_raw_nbst(int &ignore, __nv_bfloat162 *self , __nv_bfloat162_raw* h2r) {
        new (self) __nv_bfloat162(*h2r);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "_ZN14__nv_bfloat162C1ERK18__nv_bfloat162_raw_nbst",
        int32(CPointer(_type___nv_bfloat162), CPointer(_type_unnamed1405416)),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1)

    @lower(__nv_bfloat162, _type_unnamed1405416)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN14__nv_bfloat162C1ERK18__nv_bfloat162_raw_nbst", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type_unnamed1405416),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )

    @lower_cast(_type_unnamed1405416, _type___nv_bfloat162)
    def conversion_impl(context, builder, fromty, toty, value):
        return ctor_impl(
            context,
            builder,
            signature(_type___nv_bfloat162, fromty),
            [value],
        )


_lower__ZN14__nv_bfloat162C1ERK18__nv_bfloat162_raw(shim_stream, shim_obj)


@register
class _ctor_template___nv_bfloat162(ConcreteTemplate):
    key = globals()["__nv_bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162,
        ),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
        signature(_type___nv_bfloat162, _type_unnamed1405416),
    ]


register_global(__nv_bfloat162, Function(_ctor_template___nv_bfloat162))


def _from___nv_bfloat162_to__type_unnamed1405416_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162__ZNK14__nv_bfloat162cv18__nv_bfloat162_rawEv_1(__nv_bfloat162_raw &retval, __nv_bfloat162 *self) {
        retval = self->operator __nv_bfloat162_raw();
        return 0;
    }
        """

    _op_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162__ZNK14__nv_bfloat162cv18__nv_bfloat162_rawEv_1",
        _type_unnamed1405416(
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _conversion_op_caller___nv_bfloat162(arg):
        return _op_decl___nv_bfloat162(arg)

    @lower_cast(_type___nv_bfloat162, _type_unnamed1405416)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162__ZNK14__nv_bfloat162cv18__nv_bfloat162_rawEv_1",
            shim_raw_str,
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat162, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat162,
            signature(
                _type_unnamed1405416,
                CPointer(_type___nv_bfloat162),
            ),
            (ptr,),
        )


_from___nv_bfloat162_to__type_unnamed1405416_lower(shim_stream, shim_obj)


# Functions:


def __double2bfloat16():
    pass


def _lower__ZL17__double2bfloat16d_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__double2bfloat16d_nbst(__nv_bfloat16 &retval , double* a) {
        retval = __double2bfloat16(*a);
        return 0;
    }
        """

    _ZL17__double2bfloat16d_nbst = declare_device(
        "_ZL17__double2bfloat16d_nbst", _type___nv_bfloat16(CPointer(float64))
    )

    def _ZL17__double2bfloat16d_nbst_caller(arg_0):
        return _ZL17__double2bfloat16d_nbst(arg_0)

    @lower(__double2bfloat16, float64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__double2bfloat16d_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__double2bfloat16d_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(float64)),
            ptrs,
        )


_lower__ZL17__double2bfloat16d_nbst(shim_stream, shim_obj)


def __float2bfloat16():
    pass


def _lower__ZL16__float2bfloat16f_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__float2bfloat16f_nbst(__nv_bfloat16 &retval , float* a) {
        retval = __float2bfloat16(*a);
        return 0;
    }
        """

    _ZL16__float2bfloat16f_nbst = declare_device(
        "_ZL16__float2bfloat16f_nbst", _type___nv_bfloat16(CPointer(float32))
    )

    def _ZL16__float2bfloat16f_nbst_caller(arg_0):
        return _ZL16__float2bfloat16f_nbst(arg_0)

    @lower(__float2bfloat16, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL16__float2bfloat16f_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__float2bfloat16f_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(float32)),
            ptrs,
        )


_lower__ZL16__float2bfloat16f_nbst(shim_stream, shim_obj)


def __float2bfloat16_rn():
    pass


def _lower__ZL19__float2bfloat16_rnf_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__float2bfloat16_rnf_nbst(__nv_bfloat16 &retval , float* a) {
        retval = __float2bfloat16_rn(*a);
        return 0;
    }
        """

    _ZL19__float2bfloat16_rnf_nbst = declare_device(
        "_ZL19__float2bfloat16_rnf_nbst", _type___nv_bfloat16(CPointer(float32))
    )

    def _ZL19__float2bfloat16_rnf_nbst_caller(arg_0):
        return _ZL19__float2bfloat16_rnf_nbst(arg_0)

    @lower(__float2bfloat16_rn, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__float2bfloat16_rnf_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__float2bfloat16_rnf_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(float32)),
            ptrs,
        )


_lower__ZL19__float2bfloat16_rnf_nbst(shim_stream, shim_obj)


def __float2bfloat16_rz():
    pass


def _lower__ZL19__float2bfloat16_rzf_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__float2bfloat16_rzf_nbst(__nv_bfloat16 &retval , float* a) {
        retval = __float2bfloat16_rz(*a);
        return 0;
    }
        """

    _ZL19__float2bfloat16_rzf_nbst = declare_device(
        "_ZL19__float2bfloat16_rzf_nbst", _type___nv_bfloat16(CPointer(float32))
    )

    def _ZL19__float2bfloat16_rzf_nbst_caller(arg_0):
        return _ZL19__float2bfloat16_rzf_nbst(arg_0)

    @lower(__float2bfloat16_rz, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__float2bfloat16_rzf_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__float2bfloat16_rzf_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(float32)),
            ptrs,
        )


_lower__ZL19__float2bfloat16_rzf_nbst(shim_stream, shim_obj)


def __float2bfloat16_rd():
    pass


def _lower__ZL19__float2bfloat16_rdf_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__float2bfloat16_rdf_nbst(__nv_bfloat16 &retval , float* a) {
        retval = __float2bfloat16_rd(*a);
        return 0;
    }
        """

    _ZL19__float2bfloat16_rdf_nbst = declare_device(
        "_ZL19__float2bfloat16_rdf_nbst", _type___nv_bfloat16(CPointer(float32))
    )

    def _ZL19__float2bfloat16_rdf_nbst_caller(arg_0):
        return _ZL19__float2bfloat16_rdf_nbst(arg_0)

    @lower(__float2bfloat16_rd, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__float2bfloat16_rdf_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__float2bfloat16_rdf_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(float32)),
            ptrs,
        )


_lower__ZL19__float2bfloat16_rdf_nbst(shim_stream, shim_obj)


def __float2bfloat16_ru():
    pass


def _lower__ZL19__float2bfloat16_ruf_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__float2bfloat16_ruf_nbst(__nv_bfloat16 &retval , float* a) {
        retval = __float2bfloat16_ru(*a);
        return 0;
    }
        """

    _ZL19__float2bfloat16_ruf_nbst = declare_device(
        "_ZL19__float2bfloat16_ruf_nbst", _type___nv_bfloat16(CPointer(float32))
    )

    def _ZL19__float2bfloat16_ruf_nbst_caller(arg_0):
        return _ZL19__float2bfloat16_ruf_nbst(arg_0)

    @lower(__float2bfloat16_ru, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__float2bfloat16_ruf_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__float2bfloat16_ruf_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(float32)),
            ptrs,
        )


_lower__ZL19__float2bfloat16_ruf_nbst(shim_stream, shim_obj)


def __bfloat162float():
    pass


def _lower__ZL16__bfloat162float13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__bfloat162float13__nv_bfloat16_nbst(float &retval , __nv_bfloat16* a) {
        retval = __bfloat162float(*a);
        return 0;
    }
        """

    _ZL16__bfloat162float13__nv_bfloat16_nbst = declare_device(
        "_ZL16__bfloat162float13__nv_bfloat16_nbst",
        float32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL16__bfloat162float13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL16__bfloat162float13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162float, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__bfloat162float13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__bfloat162float13__nv_bfloat16_nbst_caller,
            signature(float32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL16__bfloat162float13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __float2bfloat162_rn():
    pass


def _lower__ZL20__float2bfloat162_rnf_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__float2bfloat162_rnf_nbst(__nv_bfloat162 &retval , float* a) {
        retval = __float2bfloat162_rn(*a);
        return 0;
    }
        """

    _ZL20__float2bfloat162_rnf_nbst = declare_device(
        "_ZL20__float2bfloat162_rnf_nbst",
        _type___nv_bfloat162(CPointer(float32)),
    )

    def _ZL20__float2bfloat162_rnf_nbst_caller(arg_0):
        return _ZL20__float2bfloat162_rnf_nbst(arg_0)

    @lower(__float2bfloat162_rn, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__float2bfloat162_rnf_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__float2bfloat162_rnf_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(float32)),
            ptrs,
        )


_lower__ZL20__float2bfloat162_rnf_nbst(shim_stream, shim_obj)


def __floats2bfloat162_rn():
    pass


def _lower__ZL21__floats2bfloat162_rnff_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL21__floats2bfloat162_rnff_nbst(__nv_bfloat162 &retval , float* a, float* b) {
        retval = __floats2bfloat162_rn(*a, *b);
        return 0;
    }
        """

    _ZL21__floats2bfloat162_rnff_nbst = declare_device(
        "_ZL21__floats2bfloat162_rnff_nbst",
        _type___nv_bfloat162(CPointer(float32), CPointer(float32)),
    )

    def _ZL21__floats2bfloat162_rnff_nbst_caller(arg_0, arg_1):
        return _ZL21__floats2bfloat162_rnff_nbst(arg_0, arg_1)

    @lower(__floats2bfloat162_rn, float32, float32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL21__floats2bfloat162_rnff_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL21__floats2bfloat162_rnff_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(float32), CPointer(float32)
            ),
            ptrs,
        )


_lower__ZL21__floats2bfloat162_rnff_nbst(shim_stream, shim_obj)


def __low2float():
    pass


def _lower__ZL11__low2float14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__low2float14__nv_bfloat162_nbst(float &retval , __nv_bfloat162* a) {
        retval = __low2float(*a);
        return 0;
    }
        """

    _ZL11__low2float14__nv_bfloat162_nbst = declare_device(
        "_ZL11__low2float14__nv_bfloat162_nbst",
        float32(CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__low2float14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL11__low2float14__nv_bfloat162_nbst(arg_0)

    @lower(__low2float, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__low2float14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__low2float14__nv_bfloat162_nbst_caller,
            signature(float32, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL11__low2float14__nv_bfloat162_nbst(shim_stream, shim_obj)


def __high2float():
    pass


def _lower__ZL12__high2float14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__high2float14__nv_bfloat162_nbst(float &retval , __nv_bfloat162* a) {
        retval = __high2float(*a);
        return 0;
    }
        """

    _ZL12__high2float14__nv_bfloat162_nbst = declare_device(
        "_ZL12__high2float14__nv_bfloat162_nbst",
        float32(CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__high2float14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL12__high2float14__nv_bfloat162_nbst(arg_0)

    @lower(__high2float, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__high2float14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__high2float14__nv_bfloat162_nbst_caller,
            signature(float32, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL12__high2float14__nv_bfloat162_nbst(shim_stream, shim_obj)


def __float22bfloat162_rn():
    pass


def _lower__ZL21__float22bfloat162_rn6float2_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL21__float22bfloat162_rn6float2_nbst(__nv_bfloat162 &retval , float2* a) {
        retval = __float22bfloat162_rn(*a);
        return 0;
    }
        """

    _ZL21__float22bfloat162_rn6float2_nbst = declare_device(
        "_ZL21__float22bfloat162_rn6float2_nbst",
        _type___nv_bfloat162(CPointer(float32x2)),
    )

    def _ZL21__float22bfloat162_rn6float2_nbst_caller(arg_0):
        return _ZL21__float22bfloat162_rn6float2_nbst(arg_0)

    @lower(__float22bfloat162_rn, float32x2)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL21__float22bfloat162_rn6float2_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL21__float22bfloat162_rn6float2_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(float32x2)),
            ptrs,
        )


_lower__ZL21__float22bfloat162_rn6float2_nbst(shim_stream, shim_obj)


def __bfloat1622float2():
    pass


def _lower__ZL18__bfloat1622float214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__bfloat1622float214__nv_bfloat162_nbst(float2 &retval , __nv_bfloat162* a) {
        retval = __bfloat1622float2(*a);
        return 0;
    }
        """

    _ZL18__bfloat1622float214__nv_bfloat162_nbst = declare_device(
        "_ZL18__bfloat1622float214__nv_bfloat162_nbst",
        float32x2(CPointer(_type___nv_bfloat162)),
    )

    def _ZL18__bfloat1622float214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL18__bfloat1622float214__nv_bfloat162_nbst(arg_0)

    @lower(__bfloat1622float2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__bfloat1622float214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__bfloat1622float214__nv_bfloat162_nbst_caller,
            signature(float32x2, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL18__bfloat1622float214__nv_bfloat162_nbst(shim_stream, shim_obj)


def __bfloat162char_rz():
    pass


def _lower__ZL18__bfloat162char_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__bfloat162char_rz13__nv_bfloat16_nbst(signed char &retval , __nv_bfloat16* h) {
        retval = __bfloat162char_rz(*h);
        return 0;
    }
        """

    _ZL18__bfloat162char_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL18__bfloat162char_rz13__nv_bfloat16_nbst",
        int8(CPointer(_type___nv_bfloat16)),
    )

    def _ZL18__bfloat162char_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL18__bfloat162char_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162char_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__bfloat162char_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__bfloat162char_rz13__nv_bfloat16_nbst_caller,
            signature(int8, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL18__bfloat162char_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162uchar_rz():
    pass


def _lower__ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst(unsigned char &retval , __nv_bfloat16* h) {
        retval = __bfloat162uchar_rz(*h);
        return 0;
    }
        """

    _ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst",
        uint8(CPointer(_type___nv_bfloat16)),
    )

    def _ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162uchar_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst_caller,
            signature(uint8, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL19__bfloat162uchar_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162int_rn():
    pass


def _lower__ZL17__bfloat162int_rn13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162int_rn13__nv_bfloat16_nbst(int &retval , __nv_bfloat16* h) {
        retval = __bfloat162int_rn(*h);
        return 0;
    }
        """

    _ZL17__bfloat162int_rn13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162int_rn13__nv_bfloat16_nbst",
        int32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162int_rn13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162int_rn13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162int_rn, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162int_rn13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162int_rn13__nv_bfloat16_nbst_caller,
            signature(int32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162int_rn13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162int_rz():
    pass


def _lower__ZL17__bfloat162int_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162int_rz13__nv_bfloat16_nbst(int &retval , __nv_bfloat16* h) {
        retval = __bfloat162int_rz(*h);
        return 0;
    }
        """

    _ZL17__bfloat162int_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162int_rz13__nv_bfloat16_nbst",
        int32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162int_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162int_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162int_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162int_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162int_rz13__nv_bfloat16_nbst_caller,
            signature(int32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162int_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162int_rd():
    pass


def _lower__ZL17__bfloat162int_rd13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162int_rd13__nv_bfloat16_nbst(int &retval , __nv_bfloat16* h) {
        retval = __bfloat162int_rd(*h);
        return 0;
    }
        """

    _ZL17__bfloat162int_rd13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162int_rd13__nv_bfloat16_nbst",
        int32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162int_rd13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162int_rd13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162int_rd, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162int_rd13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162int_rd13__nv_bfloat16_nbst_caller,
            signature(int32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162int_rd13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162int_ru():
    pass


def _lower__ZL17__bfloat162int_ru13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162int_ru13__nv_bfloat16_nbst(int &retval , __nv_bfloat16* h) {
        retval = __bfloat162int_ru(*h);
        return 0;
    }
        """

    _ZL17__bfloat162int_ru13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162int_ru13__nv_bfloat16_nbst",
        int32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162int_ru13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162int_ru13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162int_ru, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162int_ru13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162int_ru13__nv_bfloat16_nbst_caller,
            signature(int32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162int_ru13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __int2bfloat16_rn():
    pass


def _lower__ZL17__int2bfloat16_rni_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__int2bfloat16_rni_nbst(__nv_bfloat16 &retval , int* i) {
        retval = __int2bfloat16_rn(*i);
        return 0;
    }
        """

    _ZL17__int2bfloat16_rni_nbst = declare_device(
        "_ZL17__int2bfloat16_rni_nbst", _type___nv_bfloat16(CPointer(int32))
    )

    def _ZL17__int2bfloat16_rni_nbst_caller(arg_0):
        return _ZL17__int2bfloat16_rni_nbst(arg_0)

    @lower(__int2bfloat16_rn, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__int2bfloat16_rni_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__int2bfloat16_rni_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int32)),
            ptrs,
        )


_lower__ZL17__int2bfloat16_rni_nbst(shim_stream, shim_obj)


def __int2bfloat16_rz():
    pass


def _lower__ZL17__int2bfloat16_rzi_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__int2bfloat16_rzi_nbst(__nv_bfloat16 &retval , int* i) {
        retval = __int2bfloat16_rz(*i);
        return 0;
    }
        """

    _ZL17__int2bfloat16_rzi_nbst = declare_device(
        "_ZL17__int2bfloat16_rzi_nbst", _type___nv_bfloat16(CPointer(int32))
    )

    def _ZL17__int2bfloat16_rzi_nbst_caller(arg_0):
        return _ZL17__int2bfloat16_rzi_nbst(arg_0)

    @lower(__int2bfloat16_rz, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__int2bfloat16_rzi_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__int2bfloat16_rzi_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int32)),
            ptrs,
        )


_lower__ZL17__int2bfloat16_rzi_nbst(shim_stream, shim_obj)


def __int2bfloat16_rd():
    pass


def _lower__ZL17__int2bfloat16_rdi_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__int2bfloat16_rdi_nbst(__nv_bfloat16 &retval , int* i) {
        retval = __int2bfloat16_rd(*i);
        return 0;
    }
        """

    _ZL17__int2bfloat16_rdi_nbst = declare_device(
        "_ZL17__int2bfloat16_rdi_nbst", _type___nv_bfloat16(CPointer(int32))
    )

    def _ZL17__int2bfloat16_rdi_nbst_caller(arg_0):
        return _ZL17__int2bfloat16_rdi_nbst(arg_0)

    @lower(__int2bfloat16_rd, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__int2bfloat16_rdi_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__int2bfloat16_rdi_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int32)),
            ptrs,
        )


_lower__ZL17__int2bfloat16_rdi_nbst(shim_stream, shim_obj)


def __int2bfloat16_ru():
    pass


def _lower__ZL17__int2bfloat16_rui_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__int2bfloat16_rui_nbst(__nv_bfloat16 &retval , int* i) {
        retval = __int2bfloat16_ru(*i);
        return 0;
    }
        """

    _ZL17__int2bfloat16_rui_nbst = declare_device(
        "_ZL17__int2bfloat16_rui_nbst", _type___nv_bfloat16(CPointer(int32))
    )

    def _ZL17__int2bfloat16_rui_nbst_caller(arg_0):
        return _ZL17__int2bfloat16_rui_nbst(arg_0)

    @lower(__int2bfloat16_ru, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__int2bfloat16_rui_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__int2bfloat16_rui_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int32)),
            ptrs,
        )


_lower__ZL17__int2bfloat16_rui_nbst(shim_stream, shim_obj)


def __bfloat162short_rn():
    pass


def _lower__ZL19__bfloat162short_rn13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__bfloat162short_rn13__nv_bfloat16_nbst(short &retval , __nv_bfloat16* h) {
        retval = __bfloat162short_rn(*h);
        return 0;
    }
        """

    _ZL19__bfloat162short_rn13__nv_bfloat16_nbst = declare_device(
        "_ZL19__bfloat162short_rn13__nv_bfloat16_nbst",
        int16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL19__bfloat162short_rn13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL19__bfloat162short_rn13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162short_rn, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__bfloat162short_rn13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__bfloat162short_rn13__nv_bfloat16_nbst_caller,
            signature(int16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL19__bfloat162short_rn13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162short_rz():
    pass


def _lower__ZL19__bfloat162short_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__bfloat162short_rz13__nv_bfloat16_nbst(short &retval , __nv_bfloat16* h) {
        retval = __bfloat162short_rz(*h);
        return 0;
    }
        """

    _ZL19__bfloat162short_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL19__bfloat162short_rz13__nv_bfloat16_nbst",
        int16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL19__bfloat162short_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL19__bfloat162short_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162short_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__bfloat162short_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__bfloat162short_rz13__nv_bfloat16_nbst_caller,
            signature(int16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL19__bfloat162short_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162short_rd():
    pass


def _lower__ZL19__bfloat162short_rd13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__bfloat162short_rd13__nv_bfloat16_nbst(short &retval , __nv_bfloat16* h) {
        retval = __bfloat162short_rd(*h);
        return 0;
    }
        """

    _ZL19__bfloat162short_rd13__nv_bfloat16_nbst = declare_device(
        "_ZL19__bfloat162short_rd13__nv_bfloat16_nbst",
        int16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL19__bfloat162short_rd13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL19__bfloat162short_rd13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162short_rd, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__bfloat162short_rd13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__bfloat162short_rd13__nv_bfloat16_nbst_caller,
            signature(int16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL19__bfloat162short_rd13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162short_ru():
    pass


def _lower__ZL19__bfloat162short_ru13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__bfloat162short_ru13__nv_bfloat16_nbst(short &retval , __nv_bfloat16* h) {
        retval = __bfloat162short_ru(*h);
        return 0;
    }
        """

    _ZL19__bfloat162short_ru13__nv_bfloat16_nbst = declare_device(
        "_ZL19__bfloat162short_ru13__nv_bfloat16_nbst",
        int16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL19__bfloat162short_ru13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL19__bfloat162short_ru13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162short_ru, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__bfloat162short_ru13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__bfloat162short_ru13__nv_bfloat16_nbst_caller,
            signature(int16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL19__bfloat162short_ru13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __short2bfloat16_rn():
    pass


def _lower__ZL19__short2bfloat16_rns_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__short2bfloat16_rns_nbst(__nv_bfloat16 &retval , short* i) {
        retval = __short2bfloat16_rn(*i);
        return 0;
    }
        """

    _ZL19__short2bfloat16_rns_nbst = declare_device(
        "_ZL19__short2bfloat16_rns_nbst", _type___nv_bfloat16(CPointer(int16))
    )

    def _ZL19__short2bfloat16_rns_nbst_caller(arg_0):
        return _ZL19__short2bfloat16_rns_nbst(arg_0)

    @lower(__short2bfloat16_rn, int16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__short2bfloat16_rns_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__short2bfloat16_rns_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int16)),
            ptrs,
        )


_lower__ZL19__short2bfloat16_rns_nbst(shim_stream, shim_obj)


def __short2bfloat16_rz():
    pass


def _lower__ZL19__short2bfloat16_rzs_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__short2bfloat16_rzs_nbst(__nv_bfloat16 &retval , short* i) {
        retval = __short2bfloat16_rz(*i);
        return 0;
    }
        """

    _ZL19__short2bfloat16_rzs_nbst = declare_device(
        "_ZL19__short2bfloat16_rzs_nbst", _type___nv_bfloat16(CPointer(int16))
    )

    def _ZL19__short2bfloat16_rzs_nbst_caller(arg_0):
        return _ZL19__short2bfloat16_rzs_nbst(arg_0)

    @lower(__short2bfloat16_rz, int16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__short2bfloat16_rzs_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__short2bfloat16_rzs_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int16)),
            ptrs,
        )


_lower__ZL19__short2bfloat16_rzs_nbst(shim_stream, shim_obj)


def __short2bfloat16_rd():
    pass


def _lower__ZL19__short2bfloat16_rds_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__short2bfloat16_rds_nbst(__nv_bfloat16 &retval , short* i) {
        retval = __short2bfloat16_rd(*i);
        return 0;
    }
        """

    _ZL19__short2bfloat16_rds_nbst = declare_device(
        "_ZL19__short2bfloat16_rds_nbst", _type___nv_bfloat16(CPointer(int16))
    )

    def _ZL19__short2bfloat16_rds_nbst_caller(arg_0):
        return _ZL19__short2bfloat16_rds_nbst(arg_0)

    @lower(__short2bfloat16_rd, int16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__short2bfloat16_rds_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__short2bfloat16_rds_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int16)),
            ptrs,
        )


_lower__ZL19__short2bfloat16_rds_nbst(shim_stream, shim_obj)


def __short2bfloat16_ru():
    pass


def _lower__ZL19__short2bfloat16_rus_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__short2bfloat16_rus_nbst(__nv_bfloat16 &retval , short* i) {
        retval = __short2bfloat16_ru(*i);
        return 0;
    }
        """

    _ZL19__short2bfloat16_rus_nbst = declare_device(
        "_ZL19__short2bfloat16_rus_nbst", _type___nv_bfloat16(CPointer(int16))
    )

    def _ZL19__short2bfloat16_rus_nbst_caller(arg_0):
        return _ZL19__short2bfloat16_rus_nbst(arg_0)

    @lower(__short2bfloat16_ru, int16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__short2bfloat16_rus_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__short2bfloat16_rus_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int16)),
            ptrs,
        )


_lower__ZL19__short2bfloat16_rus_nbst(shim_stream, shim_obj)


def __bfloat162uint_rn():
    pass


def _lower__ZL18__bfloat162uint_rn13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__bfloat162uint_rn13__nv_bfloat16_nbst(unsigned int &retval , __nv_bfloat16* h) {
        retval = __bfloat162uint_rn(*h);
        return 0;
    }
        """

    _ZL18__bfloat162uint_rn13__nv_bfloat16_nbst = declare_device(
        "_ZL18__bfloat162uint_rn13__nv_bfloat16_nbst",
        uint32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL18__bfloat162uint_rn13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL18__bfloat162uint_rn13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162uint_rn, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__bfloat162uint_rn13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__bfloat162uint_rn13__nv_bfloat16_nbst_caller,
            signature(uint32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL18__bfloat162uint_rn13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162uint_rz():
    pass


def _lower__ZL18__bfloat162uint_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__bfloat162uint_rz13__nv_bfloat16_nbst(unsigned int &retval , __nv_bfloat16* h) {
        retval = __bfloat162uint_rz(*h);
        return 0;
    }
        """

    _ZL18__bfloat162uint_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL18__bfloat162uint_rz13__nv_bfloat16_nbst",
        uint32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL18__bfloat162uint_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL18__bfloat162uint_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162uint_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__bfloat162uint_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__bfloat162uint_rz13__nv_bfloat16_nbst_caller,
            signature(uint32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL18__bfloat162uint_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162uint_rd():
    pass


def _lower__ZL18__bfloat162uint_rd13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__bfloat162uint_rd13__nv_bfloat16_nbst(unsigned int &retval , __nv_bfloat16* h) {
        retval = __bfloat162uint_rd(*h);
        return 0;
    }
        """

    _ZL18__bfloat162uint_rd13__nv_bfloat16_nbst = declare_device(
        "_ZL18__bfloat162uint_rd13__nv_bfloat16_nbst",
        uint32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL18__bfloat162uint_rd13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL18__bfloat162uint_rd13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162uint_rd, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__bfloat162uint_rd13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__bfloat162uint_rd13__nv_bfloat16_nbst_caller,
            signature(uint32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL18__bfloat162uint_rd13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162uint_ru():
    pass


def _lower__ZL18__bfloat162uint_ru13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__bfloat162uint_ru13__nv_bfloat16_nbst(unsigned int &retval , __nv_bfloat16* h) {
        retval = __bfloat162uint_ru(*h);
        return 0;
    }
        """

    _ZL18__bfloat162uint_ru13__nv_bfloat16_nbst = declare_device(
        "_ZL18__bfloat162uint_ru13__nv_bfloat16_nbst",
        uint32(CPointer(_type___nv_bfloat16)),
    )

    def _ZL18__bfloat162uint_ru13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL18__bfloat162uint_ru13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162uint_ru, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__bfloat162uint_ru13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__bfloat162uint_ru13__nv_bfloat16_nbst_caller,
            signature(uint32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL18__bfloat162uint_ru13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __uint2bfloat16_rn():
    pass


def _lower__ZL18__uint2bfloat16_rnj_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__uint2bfloat16_rnj_nbst(__nv_bfloat16 &retval , unsigned int* i) {
        retval = __uint2bfloat16_rn(*i);
        return 0;
    }
        """

    _ZL18__uint2bfloat16_rnj_nbst = declare_device(
        "_ZL18__uint2bfloat16_rnj_nbst", _type___nv_bfloat16(CPointer(uint32))
    )

    def _ZL18__uint2bfloat16_rnj_nbst_caller(arg_0):
        return _ZL18__uint2bfloat16_rnj_nbst(arg_0)

    @lower(__uint2bfloat16_rn, uint32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__uint2bfloat16_rnj_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__uint2bfloat16_rnj_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint32)),
            ptrs,
        )


_lower__ZL18__uint2bfloat16_rnj_nbst(shim_stream, shim_obj)


def __uint2bfloat16_rz():
    pass


def _lower__ZL18__uint2bfloat16_rzj_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__uint2bfloat16_rzj_nbst(__nv_bfloat16 &retval , unsigned int* i) {
        retval = __uint2bfloat16_rz(*i);
        return 0;
    }
        """

    _ZL18__uint2bfloat16_rzj_nbst = declare_device(
        "_ZL18__uint2bfloat16_rzj_nbst", _type___nv_bfloat16(CPointer(uint32))
    )

    def _ZL18__uint2bfloat16_rzj_nbst_caller(arg_0):
        return _ZL18__uint2bfloat16_rzj_nbst(arg_0)

    @lower(__uint2bfloat16_rz, uint32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__uint2bfloat16_rzj_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__uint2bfloat16_rzj_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint32)),
            ptrs,
        )


_lower__ZL18__uint2bfloat16_rzj_nbst(shim_stream, shim_obj)


def __uint2bfloat16_rd():
    pass


def _lower__ZL18__uint2bfloat16_rdj_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__uint2bfloat16_rdj_nbst(__nv_bfloat16 &retval , unsigned int* i) {
        retval = __uint2bfloat16_rd(*i);
        return 0;
    }
        """

    _ZL18__uint2bfloat16_rdj_nbst = declare_device(
        "_ZL18__uint2bfloat16_rdj_nbst", _type___nv_bfloat16(CPointer(uint32))
    )

    def _ZL18__uint2bfloat16_rdj_nbst_caller(arg_0):
        return _ZL18__uint2bfloat16_rdj_nbst(arg_0)

    @lower(__uint2bfloat16_rd, uint32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__uint2bfloat16_rdj_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__uint2bfloat16_rdj_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint32)),
            ptrs,
        )


_lower__ZL18__uint2bfloat16_rdj_nbst(shim_stream, shim_obj)


def __uint2bfloat16_ru():
    pass


def _lower__ZL18__uint2bfloat16_ruj_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__uint2bfloat16_ruj_nbst(__nv_bfloat16 &retval , unsigned int* i) {
        retval = __uint2bfloat16_ru(*i);
        return 0;
    }
        """

    _ZL18__uint2bfloat16_ruj_nbst = declare_device(
        "_ZL18__uint2bfloat16_ruj_nbst", _type___nv_bfloat16(CPointer(uint32))
    )

    def _ZL18__uint2bfloat16_ruj_nbst_caller(arg_0):
        return _ZL18__uint2bfloat16_ruj_nbst(arg_0)

    @lower(__uint2bfloat16_ru, uint32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__uint2bfloat16_ruj_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__uint2bfloat16_ruj_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint32)),
            ptrs,
        )


_lower__ZL18__uint2bfloat16_ruj_nbst(shim_stream, shim_obj)


def __bfloat162ushort_rn():
    pass


def _lower__ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst(unsigned short &retval , __nv_bfloat16* h) {
        retval = __bfloat162ushort_rn(*h);
        return 0;
    }
        """

    _ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst = declare_device(
        "_ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst",
        uint16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ushort_rn, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst_caller,
            signature(uint16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL20__bfloat162ushort_rn13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ushort_rz():
    pass


def _lower__ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst(unsigned short &retval , __nv_bfloat16* h) {
        retval = __bfloat162ushort_rz(*h);
        return 0;
    }
        """

    _ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst",
        uint16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ushort_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst_caller,
            signature(uint16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL20__bfloat162ushort_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ushort_rd():
    pass


def _lower__ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst(unsigned short &retval , __nv_bfloat16* h) {
        retval = __bfloat162ushort_rd(*h);
        return 0;
    }
        """

    _ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst = declare_device(
        "_ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst",
        uint16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ushort_rd, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst_caller,
            signature(uint16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL20__bfloat162ushort_rd13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ushort_ru():
    pass


def _lower__ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst(unsigned short &retval , __nv_bfloat16* h) {
        retval = __bfloat162ushort_ru(*h);
        return 0;
    }
        """

    _ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst = declare_device(
        "_ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst",
        uint16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ushort_ru, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst_caller,
            signature(uint16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL20__bfloat162ushort_ru13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ushort2bfloat16_rn():
    pass


def _lower__ZL20__ushort2bfloat16_rnt_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__ushort2bfloat16_rnt_nbst(__nv_bfloat16 &retval , unsigned short* i) {
        retval = __ushort2bfloat16_rn(*i);
        return 0;
    }
        """

    _ZL20__ushort2bfloat16_rnt_nbst = declare_device(
        "_ZL20__ushort2bfloat16_rnt_nbst", _type___nv_bfloat16(CPointer(uint16))
    )

    def _ZL20__ushort2bfloat16_rnt_nbst_caller(arg_0):
        return _ZL20__ushort2bfloat16_rnt_nbst(arg_0)

    @lower(__ushort2bfloat16_rn, uint16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__ushort2bfloat16_rnt_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__ushort2bfloat16_rnt_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint16)),
            ptrs,
        )


_lower__ZL20__ushort2bfloat16_rnt_nbst(shim_stream, shim_obj)


def __ushort2bfloat16_rz():
    pass


def _lower__ZL20__ushort2bfloat16_rzt_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__ushort2bfloat16_rzt_nbst(__nv_bfloat16 &retval , unsigned short* i) {
        retval = __ushort2bfloat16_rz(*i);
        return 0;
    }
        """

    _ZL20__ushort2bfloat16_rzt_nbst = declare_device(
        "_ZL20__ushort2bfloat16_rzt_nbst", _type___nv_bfloat16(CPointer(uint16))
    )

    def _ZL20__ushort2bfloat16_rzt_nbst_caller(arg_0):
        return _ZL20__ushort2bfloat16_rzt_nbst(arg_0)

    @lower(__ushort2bfloat16_rz, uint16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__ushort2bfloat16_rzt_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__ushort2bfloat16_rzt_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint16)),
            ptrs,
        )


_lower__ZL20__ushort2bfloat16_rzt_nbst(shim_stream, shim_obj)


def __ushort2bfloat16_rd():
    pass


def _lower__ZL20__ushort2bfloat16_rdt_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__ushort2bfloat16_rdt_nbst(__nv_bfloat16 &retval , unsigned short* i) {
        retval = __ushort2bfloat16_rd(*i);
        return 0;
    }
        """

    _ZL20__ushort2bfloat16_rdt_nbst = declare_device(
        "_ZL20__ushort2bfloat16_rdt_nbst", _type___nv_bfloat16(CPointer(uint16))
    )

    def _ZL20__ushort2bfloat16_rdt_nbst_caller(arg_0):
        return _ZL20__ushort2bfloat16_rdt_nbst(arg_0)

    @lower(__ushort2bfloat16_rd, uint16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__ushort2bfloat16_rdt_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__ushort2bfloat16_rdt_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint16)),
            ptrs,
        )


_lower__ZL20__ushort2bfloat16_rdt_nbst(shim_stream, shim_obj)


def __ushort2bfloat16_ru():
    pass


def _lower__ZL20__ushort2bfloat16_rut_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__ushort2bfloat16_rut_nbst(__nv_bfloat16 &retval , unsigned short* i) {
        retval = __ushort2bfloat16_ru(*i);
        return 0;
    }
        """

    _ZL20__ushort2bfloat16_rut_nbst = declare_device(
        "_ZL20__ushort2bfloat16_rut_nbst", _type___nv_bfloat16(CPointer(uint16))
    )

    def _ZL20__ushort2bfloat16_rut_nbst_caller(arg_0):
        return _ZL20__ushort2bfloat16_rut_nbst(arg_0)

    @lower(__ushort2bfloat16_ru, uint16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__ushort2bfloat16_rut_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__ushort2bfloat16_rut_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint16)),
            ptrs,
        )


_lower__ZL20__ushort2bfloat16_rut_nbst(shim_stream, shim_obj)


def __bfloat162ull_rn():
    pass


def _lower__ZL17__bfloat162ull_rn13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162ull_rn13__nv_bfloat16_nbst(unsigned long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ull_rn(*h);
        return 0;
    }
        """

    _ZL17__bfloat162ull_rn13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162ull_rn13__nv_bfloat16_nbst",
        uint64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162ull_rn13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162ull_rn13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ull_rn, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162ull_rn13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162ull_rn13__nv_bfloat16_nbst_caller,
            signature(uint64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162ull_rn13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ull_rz():
    pass


def _lower__ZL17__bfloat162ull_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162ull_rz13__nv_bfloat16_nbst(unsigned long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ull_rz(*h);
        return 0;
    }
        """

    _ZL17__bfloat162ull_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162ull_rz13__nv_bfloat16_nbst",
        uint64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162ull_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162ull_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ull_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162ull_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162ull_rz13__nv_bfloat16_nbst_caller,
            signature(uint64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162ull_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def make_bfloat162():
    pass


def _lower__ZL14make_bfloat16213__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL14make_bfloat16213__nv_bfloat16S__nbst(__nv_bfloat162 &retval , __nv_bfloat16* x, __nv_bfloat16* y) {
        retval = make_bfloat162(*x, *y);
        return 0;
    }
        """

    _ZL14make_bfloat16213__nv_bfloat16S__nbst = declare_device(
        "_ZL14make_bfloat16213__nv_bfloat16S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL14make_bfloat16213__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL14make_bfloat16213__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(make_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL14make_bfloat16213__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL14make_bfloat16213__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL14make_bfloat16213__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __bfloat162ull_rd():
    pass


def _lower__ZL17__bfloat162ull_rd13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162ull_rd13__nv_bfloat16_nbst(unsigned long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ull_rd(*h);
        return 0;
    }
        """

    _ZL17__bfloat162ull_rd13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162ull_rd13__nv_bfloat16_nbst",
        uint64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162ull_rd13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162ull_rd13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ull_rd, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162ull_rd13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162ull_rd13__nv_bfloat16_nbst_caller,
            signature(uint64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162ull_rd13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ull_ru():
    pass


def _lower__ZL17__bfloat162ull_ru13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__bfloat162ull_ru13__nv_bfloat16_nbst(unsigned long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ull_ru(*h);
        return 0;
    }
        """

    _ZL17__bfloat162ull_ru13__nv_bfloat16_nbst = declare_device(
        "_ZL17__bfloat162ull_ru13__nv_bfloat16_nbst",
        uint64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL17__bfloat162ull_ru13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL17__bfloat162ull_ru13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ull_ru, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__bfloat162ull_ru13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__bfloat162ull_ru13__nv_bfloat16_nbst_caller,
            signature(uint64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL17__bfloat162ull_ru13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ull2bfloat16_rn():
    pass


def _lower__ZL17__ull2bfloat16_rny_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__ull2bfloat16_rny_nbst(__nv_bfloat16 &retval , unsigned long long* i) {
        retval = __ull2bfloat16_rn(*i);
        return 0;
    }
        """

    _ZL17__ull2bfloat16_rny_nbst = declare_device(
        "_ZL17__ull2bfloat16_rny_nbst", _type___nv_bfloat16(CPointer(uint64))
    )

    def _ZL17__ull2bfloat16_rny_nbst_caller(arg_0):
        return _ZL17__ull2bfloat16_rny_nbst(arg_0)

    @lower(__ull2bfloat16_rn, uint64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__ull2bfloat16_rny_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__ull2bfloat16_rny_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint64)),
            ptrs,
        )


_lower__ZL17__ull2bfloat16_rny_nbst(shim_stream, shim_obj)


def __ull2bfloat16_rz():
    pass


def _lower__ZL17__ull2bfloat16_rzy_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__ull2bfloat16_rzy_nbst(__nv_bfloat16 &retval , unsigned long long* i) {
        retval = __ull2bfloat16_rz(*i);
        return 0;
    }
        """

    _ZL17__ull2bfloat16_rzy_nbst = declare_device(
        "_ZL17__ull2bfloat16_rzy_nbst", _type___nv_bfloat16(CPointer(uint64))
    )

    def _ZL17__ull2bfloat16_rzy_nbst_caller(arg_0):
        return _ZL17__ull2bfloat16_rzy_nbst(arg_0)

    @lower(__ull2bfloat16_rz, uint64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__ull2bfloat16_rzy_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__ull2bfloat16_rzy_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint64)),
            ptrs,
        )


_lower__ZL17__ull2bfloat16_rzy_nbst(shim_stream, shim_obj)


def __ull2bfloat16_rd():
    pass


def _lower__ZL17__ull2bfloat16_rdy_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__ull2bfloat16_rdy_nbst(__nv_bfloat16 &retval , unsigned long long* i) {
        retval = __ull2bfloat16_rd(*i);
        return 0;
    }
        """

    _ZL17__ull2bfloat16_rdy_nbst = declare_device(
        "_ZL17__ull2bfloat16_rdy_nbst", _type___nv_bfloat16(CPointer(uint64))
    )

    def _ZL17__ull2bfloat16_rdy_nbst_caller(arg_0):
        return _ZL17__ull2bfloat16_rdy_nbst(arg_0)

    @lower(__ull2bfloat16_rd, uint64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__ull2bfloat16_rdy_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__ull2bfloat16_rdy_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint64)),
            ptrs,
        )


_lower__ZL17__ull2bfloat16_rdy_nbst(shim_stream, shim_obj)


def __ull2bfloat16_ru():
    pass


def _lower__ZL17__ull2bfloat16_ruy_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__ull2bfloat16_ruy_nbst(__nv_bfloat16 &retval , unsigned long long* i) {
        retval = __ull2bfloat16_ru(*i);
        return 0;
    }
        """

    _ZL17__ull2bfloat16_ruy_nbst = declare_device(
        "_ZL17__ull2bfloat16_ruy_nbst", _type___nv_bfloat16(CPointer(uint64))
    )

    def _ZL17__ull2bfloat16_ruy_nbst_caller(arg_0):
        return _ZL17__ull2bfloat16_ruy_nbst(arg_0)

    @lower(__ull2bfloat16_ru, uint64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL17__ull2bfloat16_ruy_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__ull2bfloat16_ruy_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint64)),
            ptrs,
        )


_lower__ZL17__ull2bfloat16_ruy_nbst(shim_stream, shim_obj)


def __bfloat162ll_rn():
    pass


def _lower__ZL16__bfloat162ll_rn13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__bfloat162ll_rn13__nv_bfloat16_nbst(long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ll_rn(*h);
        return 0;
    }
        """

    _ZL16__bfloat162ll_rn13__nv_bfloat16_nbst = declare_device(
        "_ZL16__bfloat162ll_rn13__nv_bfloat16_nbst",
        int64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL16__bfloat162ll_rn13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL16__bfloat162ll_rn13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ll_rn, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__bfloat162ll_rn13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__bfloat162ll_rn13__nv_bfloat16_nbst_caller,
            signature(int64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL16__bfloat162ll_rn13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ll_rz():
    pass


def _lower__ZL16__bfloat162ll_rz13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__bfloat162ll_rz13__nv_bfloat16_nbst(long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ll_rz(*h);
        return 0;
    }
        """

    _ZL16__bfloat162ll_rz13__nv_bfloat16_nbst = declare_device(
        "_ZL16__bfloat162ll_rz13__nv_bfloat16_nbst",
        int64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL16__bfloat162ll_rz13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL16__bfloat162ll_rz13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ll_rz, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__bfloat162ll_rz13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__bfloat162ll_rz13__nv_bfloat16_nbst_caller,
            signature(int64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL16__bfloat162ll_rz13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ll_rd():
    pass


def _lower__ZL16__bfloat162ll_rd13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__bfloat162ll_rd13__nv_bfloat16_nbst(long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ll_rd(*h);
        return 0;
    }
        """

    _ZL16__bfloat162ll_rd13__nv_bfloat16_nbst = declare_device(
        "_ZL16__bfloat162ll_rd13__nv_bfloat16_nbst",
        int64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL16__bfloat162ll_rd13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL16__bfloat162ll_rd13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ll_rd, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__bfloat162ll_rd13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__bfloat162ll_rd13__nv_bfloat16_nbst_caller,
            signature(int64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL16__bfloat162ll_rd13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat162ll_ru():
    pass


def _lower__ZL16__bfloat162ll_ru13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__bfloat162ll_ru13__nv_bfloat16_nbst(long long &retval , __nv_bfloat16* h) {
        retval = __bfloat162ll_ru(*h);
        return 0;
    }
        """

    _ZL16__bfloat162ll_ru13__nv_bfloat16_nbst = declare_device(
        "_ZL16__bfloat162ll_ru13__nv_bfloat16_nbst",
        int64(CPointer(_type___nv_bfloat16)),
    )

    def _ZL16__bfloat162ll_ru13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL16__bfloat162ll_ru13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162ll_ru, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__bfloat162ll_ru13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__bfloat162ll_ru13__nv_bfloat16_nbst_caller,
            signature(int64, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL16__bfloat162ll_ru13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ll2bfloat16_rn():
    pass


def _lower__ZL16__ll2bfloat16_rnx_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__ll2bfloat16_rnx_nbst(__nv_bfloat16 &retval , long long* i) {
        retval = __ll2bfloat16_rn(*i);
        return 0;
    }
        """

    _ZL16__ll2bfloat16_rnx_nbst = declare_device(
        "_ZL16__ll2bfloat16_rnx_nbst", _type___nv_bfloat16(CPointer(int64))
    )

    def _ZL16__ll2bfloat16_rnx_nbst_caller(arg_0):
        return _ZL16__ll2bfloat16_rnx_nbst(arg_0)

    @lower(__ll2bfloat16_rn, int64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL16__ll2bfloat16_rnx_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__ll2bfloat16_rnx_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int64)),
            ptrs,
        )


_lower__ZL16__ll2bfloat16_rnx_nbst(shim_stream, shim_obj)


def __ll2bfloat16_rz():
    pass


def _lower__ZL16__ll2bfloat16_rzx_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__ll2bfloat16_rzx_nbst(__nv_bfloat16 &retval , long long* i) {
        retval = __ll2bfloat16_rz(*i);
        return 0;
    }
        """

    _ZL16__ll2bfloat16_rzx_nbst = declare_device(
        "_ZL16__ll2bfloat16_rzx_nbst", _type___nv_bfloat16(CPointer(int64))
    )

    def _ZL16__ll2bfloat16_rzx_nbst_caller(arg_0):
        return _ZL16__ll2bfloat16_rzx_nbst(arg_0)

    @lower(__ll2bfloat16_rz, int64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL16__ll2bfloat16_rzx_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__ll2bfloat16_rzx_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int64)),
            ptrs,
        )


_lower__ZL16__ll2bfloat16_rzx_nbst(shim_stream, shim_obj)


def __ll2bfloat16_rd():
    pass


def _lower__ZL16__ll2bfloat16_rdx_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__ll2bfloat16_rdx_nbst(__nv_bfloat16 &retval , long long* i) {
        retval = __ll2bfloat16_rd(*i);
        return 0;
    }
        """

    _ZL16__ll2bfloat16_rdx_nbst = declare_device(
        "_ZL16__ll2bfloat16_rdx_nbst", _type___nv_bfloat16(CPointer(int64))
    )

    def _ZL16__ll2bfloat16_rdx_nbst_caller(arg_0):
        return _ZL16__ll2bfloat16_rdx_nbst(arg_0)

    @lower(__ll2bfloat16_rd, int64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL16__ll2bfloat16_rdx_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__ll2bfloat16_rdx_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int64)),
            ptrs,
        )


_lower__ZL16__ll2bfloat16_rdx_nbst(shim_stream, shim_obj)


def __ll2bfloat16_ru():
    pass


def _lower__ZL16__ll2bfloat16_rux_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__ll2bfloat16_rux_nbst(__nv_bfloat16 &retval , long long* i) {
        retval = __ll2bfloat16_ru(*i);
        return 0;
    }
        """

    _ZL16__ll2bfloat16_rux_nbst = declare_device(
        "_ZL16__ll2bfloat16_rux_nbst", _type___nv_bfloat16(CPointer(int64))
    )

    def _ZL16__ll2bfloat16_rux_nbst_caller(arg_0):
        return _ZL16__ll2bfloat16_rux_nbst(arg_0)

    @lower(__ll2bfloat16_ru, int64)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL16__ll2bfloat16_rux_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__ll2bfloat16_rux_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int64)),
            ptrs,
        )


_lower__ZL16__ll2bfloat16_rux_nbst(shim_stream, shim_obj)


def htrunc():
    pass


def _lower__ZL6htrunc13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6htrunc13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = htrunc(*h);
        return 0;
    }
        """

    _ZL6htrunc13__nv_bfloat16_nbst = declare_device(
        "_ZL6htrunc13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6htrunc13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6htrunc13__nv_bfloat16_nbst(arg_0)

    @lower(htrunc, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6htrunc13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6htrunc13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6htrunc13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hceil():
    pass


def _lower__ZL5hceil13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5hceil13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = hceil(*h);
        return 0;
    }
        """

    _ZL5hceil13__nv_bfloat16_nbst = declare_device(
        "_ZL5hceil13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL5hceil13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5hceil13__nv_bfloat16_nbst(arg_0)

    @lower(hceil, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5hceil13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5hceil13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL5hceil13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hfloor():
    pass


def _lower__ZL6hfloor13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6hfloor13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = hfloor(*h);
        return 0;
    }
        """

    _ZL6hfloor13__nv_bfloat16_nbst = declare_device(
        "_ZL6hfloor13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6hfloor13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6hfloor13__nv_bfloat16_nbst(arg_0)

    @lower(hfloor, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6hfloor13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6hfloor13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6hfloor13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hrint():
    pass


def _lower__ZL5hrint13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5hrint13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = hrint(*h);
        return 0;
    }
        """

    _ZL5hrint13__nv_bfloat16_nbst = declare_device(
        "_ZL5hrint13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL5hrint13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5hrint13__nv_bfloat16_nbst(arg_0)

    @lower(hrint, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5hrint13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5hrint13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL5hrint13__nv_bfloat16_nbst(shim_stream, shim_obj)


def h2trunc():
    pass


def _lower__ZL7h2trunc14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7h2trunc14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2trunc(*h);
        return 0;
    }
        """

    _ZL7h2trunc14__nv_bfloat162_nbst = declare_device(
        "_ZL7h2trunc14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7h2trunc14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7h2trunc14__nv_bfloat162_nbst(arg_0)

    @lower(h2trunc, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7h2trunc14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7h2trunc14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7h2trunc14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2ceil():
    pass


def _lower__ZL6h2ceil14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6h2ceil14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2ceil(*h);
        return 0;
    }
        """

    _ZL6h2ceil14__nv_bfloat162_nbst = declare_device(
        "_ZL6h2ceil14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL6h2ceil14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6h2ceil14__nv_bfloat162_nbst(arg_0)

    @lower(h2ceil, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6h2ceil14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6h2ceil14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL6h2ceil14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2floor():
    pass


def _lower__ZL7h2floor14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7h2floor14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2floor(*h);
        return 0;
    }
        """

    _ZL7h2floor14__nv_bfloat162_nbst = declare_device(
        "_ZL7h2floor14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7h2floor14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7h2floor14__nv_bfloat162_nbst(arg_0)

    @lower(h2floor, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7h2floor14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7h2floor14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7h2floor14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2rint():
    pass


def _lower__ZL6h2rint14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6h2rint14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2rint(*h);
        return 0;
    }
        """

    _ZL6h2rint14__nv_bfloat162_nbst = declare_device(
        "_ZL6h2rint14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL6h2rint14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6h2rint14__nv_bfloat162_nbst(arg_0)

    @lower(h2rint, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6h2rint14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6h2rint14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL6h2rint14__nv_bfloat162_nbst(shim_stream, shim_obj)


def __bfloat162bfloat162():
    pass


def _lower__ZL20__bfloat162bfloat16213__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__bfloat162bfloat16213__nv_bfloat16_nbst(__nv_bfloat162 &retval , __nv_bfloat16* a) {
        retval = __bfloat162bfloat162(*a);
        return 0;
    }
        """

    _ZL20__bfloat162bfloat16213__nv_bfloat16_nbst = declare_device(
        "_ZL20__bfloat162bfloat16213__nv_bfloat16_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat16)),
    )

    def _ZL20__bfloat162bfloat16213__nv_bfloat16_nbst_caller(arg_0):
        return _ZL20__bfloat162bfloat16213__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat162bfloat162, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__bfloat162bfloat16213__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__bfloat162bfloat16213__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL20__bfloat162bfloat16213__nv_bfloat16_nbst(shim_stream, shim_obj)


def __lowhigh2highlow():
    pass


def _lower__ZL17__lowhigh2highlow14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__lowhigh2highlow14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = __lowhigh2highlow(*a);
        return 0;
    }
        """

    _ZL17__lowhigh2highlow14__nv_bfloat162_nbst = declare_device(
        "_ZL17__lowhigh2highlow14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL17__lowhigh2highlow14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL17__lowhigh2highlow14__nv_bfloat162_nbst(arg_0)

    @lower(__lowhigh2highlow, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__lowhigh2highlow14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__lowhigh2highlow14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL17__lowhigh2highlow14__nv_bfloat162_nbst(shim_stream, shim_obj)


def __lows2bfloat162():
    pass


def _lower__ZL16__lows2bfloat16214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__lows2bfloat16214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __lows2bfloat162(*a, *b);
        return 0;
    }
        """

    _ZL16__lows2bfloat16214__nv_bfloat162S__nbst = declare_device(
        "_ZL16__lows2bfloat16214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL16__lows2bfloat16214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL16__lows2bfloat16214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__lows2bfloat162, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__lows2bfloat16214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__lows2bfloat16214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL16__lows2bfloat16214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __highs2bfloat162():
    pass


def _lower__ZL17__highs2bfloat16214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL17__highs2bfloat16214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __highs2bfloat162(*a, *b);
        return 0;
    }
        """

    _ZL17__highs2bfloat16214__nv_bfloat162S__nbst = declare_device(
        "_ZL17__highs2bfloat16214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL17__highs2bfloat16214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL17__highs2bfloat16214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__highs2bfloat162, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL17__highs2bfloat16214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL17__highs2bfloat16214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL17__highs2bfloat16214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __high2bfloat16():
    pass


def _lower__ZL15__high2bfloat1614__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL15__high2bfloat1614__nv_bfloat162_nbst(__nv_bfloat16 &retval , __nv_bfloat162* a) {
        retval = __high2bfloat16(*a);
        return 0;
    }
        """

    _ZL15__high2bfloat1614__nv_bfloat162_nbst = declare_device(
        "_ZL15__high2bfloat1614__nv_bfloat162_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat162)),
    )

    def _ZL15__high2bfloat1614__nv_bfloat162_nbst_caller(arg_0):
        return _ZL15__high2bfloat1614__nv_bfloat162_nbst(arg_0)

    @lower(__high2bfloat16, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL15__high2bfloat1614__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL15__high2bfloat1614__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL15__high2bfloat1614__nv_bfloat162_nbst(shim_stream, shim_obj)


def __low2bfloat16():
    pass


def _lower__ZL14__low2bfloat1614__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL14__low2bfloat1614__nv_bfloat162_nbst(__nv_bfloat16 &retval , __nv_bfloat162* a) {
        retval = __low2bfloat16(*a);
        return 0;
    }
        """

    _ZL14__low2bfloat1614__nv_bfloat162_nbst = declare_device(
        "_ZL14__low2bfloat1614__nv_bfloat162_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat162)),
    )

    def _ZL14__low2bfloat1614__nv_bfloat162_nbst_caller(arg_0):
        return _ZL14__low2bfloat1614__nv_bfloat162_nbst(arg_0)

    @lower(__low2bfloat16, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL14__low2bfloat1614__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL14__low2bfloat1614__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL14__low2bfloat1614__nv_bfloat162_nbst(shim_stream, shim_obj)


def __hisinf():
    pass


def _lower__ZL8__hisinf13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hisinf13__nv_bfloat16_nbst(int &retval , __nv_bfloat16* a) {
        retval = __hisinf(*a);
        return 0;
    }
        """

    _ZL8__hisinf13__nv_bfloat16_nbst = declare_device(
        "_ZL8__hisinf13__nv_bfloat16_nbst", int32(CPointer(_type___nv_bfloat16))
    )

    def _ZL8__hisinf13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL8__hisinf13__nv_bfloat16_nbst(arg_0)

    @lower(__hisinf, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hisinf13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hisinf13__nv_bfloat16_nbst_caller,
            signature(int32, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL8__hisinf13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __halves2bfloat162():
    pass


def _lower__ZL18__halves2bfloat16213__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL18__halves2bfloat16213__nv_bfloat16S__nbst(__nv_bfloat162 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __halves2bfloat162(*a, *b);
        return 0;
    }
        """

    _ZL18__halves2bfloat16213__nv_bfloat16S__nbst = declare_device(
        "_ZL18__halves2bfloat16213__nv_bfloat16S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL18__halves2bfloat16213__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL18__halves2bfloat16213__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__halves2bfloat162, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL18__halves2bfloat16213__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL18__halves2bfloat16213__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL18__halves2bfloat16213__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __low2bfloat162():
    pass


def _lower__ZL15__low2bfloat16214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL15__low2bfloat16214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = __low2bfloat162(*a);
        return 0;
    }
        """

    _ZL15__low2bfloat16214__nv_bfloat162_nbst = declare_device(
        "_ZL15__low2bfloat16214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL15__low2bfloat16214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL15__low2bfloat16214__nv_bfloat162_nbst(arg_0)

    @lower(__low2bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL15__low2bfloat16214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL15__low2bfloat16214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL15__low2bfloat16214__nv_bfloat162_nbst(shim_stream, shim_obj)


def __high2bfloat162():
    pass


def _lower__ZL16__high2bfloat16214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__high2bfloat16214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = __high2bfloat162(*a);
        return 0;
    }
        """

    _ZL16__high2bfloat16214__nv_bfloat162_nbst = declare_device(
        "_ZL16__high2bfloat16214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL16__high2bfloat16214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL16__high2bfloat16214__nv_bfloat162_nbst(arg_0)

    @lower(__high2bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__high2bfloat16214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__high2bfloat16214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL16__high2bfloat16214__nv_bfloat162_nbst(shim_stream, shim_obj)


def __bfloat16_as_short():
    pass


def _lower__ZL19__bfloat16_as_short13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__bfloat16_as_short13__nv_bfloat16_nbst(short &retval , __nv_bfloat16* h) {
        retval = __bfloat16_as_short(*h);
        return 0;
    }
        """

    _ZL19__bfloat16_as_short13__nv_bfloat16_nbst = declare_device(
        "_ZL19__bfloat16_as_short13__nv_bfloat16_nbst",
        int16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL19__bfloat16_as_short13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL19__bfloat16_as_short13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat16_as_short, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__bfloat16_as_short13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__bfloat16_as_short13__nv_bfloat16_nbst_caller,
            signature(int16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL19__bfloat16_as_short13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __bfloat16_as_ushort():
    pass


def _lower__ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst(unsigned short &retval , __nv_bfloat16* h) {
        retval = __bfloat16_as_ushort(*h);
        return 0;
    }
        """

    _ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst = declare_device(
        "_ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst",
        uint16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst(arg_0)

    @lower(__bfloat16_as_ushort, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst_caller,
            signature(uint16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL20__bfloat16_as_ushort13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __short_as_bfloat16():
    pass


def _lower__ZL19__short_as_bfloat16s_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL19__short_as_bfloat16s_nbst(__nv_bfloat16 &retval , short* i) {
        retval = __short_as_bfloat16(*i);
        return 0;
    }
        """

    _ZL19__short_as_bfloat16s_nbst = declare_device(
        "_ZL19__short_as_bfloat16s_nbst", _type___nv_bfloat16(CPointer(int16))
    )

    def _ZL19__short_as_bfloat16s_nbst_caller(arg_0):
        return _ZL19__short_as_bfloat16s_nbst(arg_0)

    @lower(__short_as_bfloat16, int16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL19__short_as_bfloat16s_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL19__short_as_bfloat16s_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(int16)),
            ptrs,
        )


_lower__ZL19__short_as_bfloat16s_nbst(shim_stream, shim_obj)


def __ushort_as_bfloat16():
    pass


def _lower__ZL20__ushort_as_bfloat16t_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL20__ushort_as_bfloat16t_nbst(__nv_bfloat16 &retval , unsigned short* i) {
        retval = __ushort_as_bfloat16(*i);
        return 0;
    }
        """

    _ZL20__ushort_as_bfloat16t_nbst = declare_device(
        "_ZL20__ushort_as_bfloat16t_nbst", _type___nv_bfloat16(CPointer(uint16))
    )

    def _ZL20__ushort_as_bfloat16t_nbst_caller(arg_0):
        return _ZL20__ushort_as_bfloat16t_nbst(arg_0)

    @lower(__ushort_as_bfloat16, uint16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL20__ushort_as_bfloat16t_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL20__ushort_as_bfloat16t_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(uint16)),
            ptrs,
        )


_lower__ZL20__ushort_as_bfloat16t_nbst(shim_stream, shim_obj)


def __shfl_sync():
    pass


def _lower__ZL11__shfl_syncj14__nv_bfloat162ii_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__shfl_syncj14__nv_bfloat162ii_nbst(__nv_bfloat162 &retval , unsigned int* mask, __nv_bfloat162* var, int* srcLane, int* width) {
        retval = __shfl_sync(*mask, *var, *srcLane, *width);
        return 0;
    }
        """

    _ZL11__shfl_syncj14__nv_bfloat162ii_nbst = declare_device(
        "_ZL11__shfl_syncj14__nv_bfloat162ii_nbst",
        _type___nv_bfloat162(
            CPointer(uint32),
            CPointer(_type___nv_bfloat162),
            CPointer(int32),
            CPointer(int32),
        ),
    )

    def _ZL11__shfl_syncj14__nv_bfloat162ii_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL11__shfl_syncj14__nv_bfloat162ii_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_sync, uint32, _type___nv_bfloat162, int32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__shfl_syncj14__nv_bfloat162ii_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__shfl_syncj14__nv_bfloat162ii_nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(uint32),
                CPointer(_type___nv_bfloat162),
                CPointer(int32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL11__shfl_syncj14__nv_bfloat162ii_nbst(shim_stream, shim_obj)


def __shfl_up_sync():
    pass


def _lower__ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst(__nv_bfloat162 &retval , unsigned int* mask, __nv_bfloat162* var, unsigned int* delta, int* width) {
        retval = __shfl_up_sync(*mask, *var, *delta, *width);
        return 0;
    }
        """

    _ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst = declare_device(
        "_ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst",
        _type___nv_bfloat162(
            CPointer(uint32),
            CPointer(_type___nv_bfloat162),
            CPointer(uint32),
            CPointer(int32),
        ),
    )

    def _ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_up_sync, uint32, _type___nv_bfloat162, uint32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(uint32),
                CPointer(_type___nv_bfloat162),
                CPointer(uint32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL14__shfl_up_syncj14__nv_bfloat162ji_nbst(shim_stream, shim_obj)


def __shfl_down_sync():
    pass


def _lower__ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst(__nv_bfloat162 &retval , unsigned int* mask, __nv_bfloat162* var, unsigned int* delta, int* width) {
        retval = __shfl_down_sync(*mask, *var, *delta, *width);
        return 0;
    }
        """

    _ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst = declare_device(
        "_ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst",
        _type___nv_bfloat162(
            CPointer(uint32),
            CPointer(_type___nv_bfloat162),
            CPointer(uint32),
            CPointer(int32),
        ),
    )

    def _ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_down_sync, uint32, _type___nv_bfloat162, uint32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(uint32),
                CPointer(_type___nv_bfloat162),
                CPointer(uint32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL16__shfl_down_syncj14__nv_bfloat162ji_nbst(shim_stream, shim_obj)


def __shfl_xor_sync():
    pass


def _lower__ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst(__nv_bfloat162 &retval , unsigned int* mask, __nv_bfloat162* var, int* laneMask, int* width) {
        retval = __shfl_xor_sync(*mask, *var, *laneMask, *width);
        return 0;
    }
        """

    _ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst = declare_device(
        "_ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst",
        _type___nv_bfloat162(
            CPointer(uint32),
            CPointer(_type___nv_bfloat162),
            CPointer(int32),
            CPointer(int32),
        ),
    )

    def _ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_xor_sync, uint32, _type___nv_bfloat162, int32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(uint32),
                CPointer(_type___nv_bfloat162),
                CPointer(int32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL15__shfl_xor_syncj14__nv_bfloat162ii_nbst(shim_stream, shim_obj)


def _lower__ZL11__shfl_syncj13__nv_bfloat16ii_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__shfl_syncj13__nv_bfloat16ii_nbst(__nv_bfloat16 &retval , unsigned int* mask, __nv_bfloat16* var, int* srcLane, int* width) {
        retval = __shfl_sync(*mask, *var, *srcLane, *width);
        return 0;
    }
        """

    _ZL11__shfl_syncj13__nv_bfloat16ii_nbst = declare_device(
        "_ZL11__shfl_syncj13__nv_bfloat16ii_nbst",
        _type___nv_bfloat16(
            CPointer(uint32),
            CPointer(_type___nv_bfloat16),
            CPointer(int32),
            CPointer(int32),
        ),
    )

    def _ZL11__shfl_syncj13__nv_bfloat16ii_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL11__shfl_syncj13__nv_bfloat16ii_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_sync, uint32, _type___nv_bfloat16, int32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__shfl_syncj13__nv_bfloat16ii_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__shfl_syncj13__nv_bfloat16ii_nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(uint32),
                CPointer(_type___nv_bfloat16),
                CPointer(int32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL11__shfl_syncj13__nv_bfloat16ii_nbst(shim_stream, shim_obj)


def _lower__ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst(__nv_bfloat16 &retval , unsigned int* mask, __nv_bfloat16* var, unsigned int* delta, int* width) {
        retval = __shfl_up_sync(*mask, *var, *delta, *width);
        return 0;
    }
        """

    _ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst = declare_device(
        "_ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst",
        _type___nv_bfloat16(
            CPointer(uint32),
            CPointer(_type___nv_bfloat16),
            CPointer(uint32),
            CPointer(int32),
        ),
    )

    def _ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_up_sync, uint32, _type___nv_bfloat16, uint32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(uint32),
                CPointer(_type___nv_bfloat16),
                CPointer(uint32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL14__shfl_up_syncj13__nv_bfloat16ji_nbst(shim_stream, shim_obj)


def _lower__ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst(__nv_bfloat16 &retval , unsigned int* mask, __nv_bfloat16* var, unsigned int* delta, int* width) {
        retval = __shfl_down_sync(*mask, *var, *delta, *width);
        return 0;
    }
        """

    _ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst = declare_device(
        "_ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst",
        _type___nv_bfloat16(
            CPointer(uint32),
            CPointer(_type___nv_bfloat16),
            CPointer(uint32),
            CPointer(int32),
        ),
    )

    def _ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_down_sync, uint32, _type___nv_bfloat16, uint32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(uint32),
                CPointer(_type___nv_bfloat16),
                CPointer(uint32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL16__shfl_down_syncj13__nv_bfloat16ji_nbst(shim_stream, shim_obj)


def _lower__ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst(__nv_bfloat16 &retval , unsigned int* mask, __nv_bfloat16* var, int* laneMask, int* width) {
        retval = __shfl_xor_sync(*mask, *var, *laneMask, *width);
        return 0;
    }
        """

    _ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst = declare_device(
        "_ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst",
        _type___nv_bfloat16(
            CPointer(uint32),
            CPointer(_type___nv_bfloat16),
            CPointer(int32),
            CPointer(int32),
        ),
    )

    def _ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst_caller(
        arg_0, arg_1, arg_2, arg_3
    ):
        return _ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst(
            arg_0, arg_1, arg_2, arg_3
        )

    @lower(__shfl_xor_sync, uint32, _type___nv_bfloat16, int32, int32)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(uint32),
                CPointer(_type___nv_bfloat16),
                CPointer(int32),
                CPointer(int32),
            ),
            ptrs,
        )


_lower__ZL15__shfl_xor_syncj13__nv_bfloat16ii_nbst(shim_stream, shim_obj)


def __ldg():
    pass


def _lower__ZL5__ldgPK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__ldgPK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** ptr) {
        retval = __ldg(*ptr);
        return 0;
    }
        """

    _ZL5__ldgPK14__nv_bfloat162_nbst = declare_device(
        "_ZL5__ldgPK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(CPointer(_type___nv_bfloat162))),
    )

    def _ZL5__ldgPK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL5__ldgPK14__nv_bfloat162_nbst(arg_0)

    @lower(__ldg, CPointer(_type___nv_bfloat162))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__ldgPK14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__ldgPK14__nv_bfloat162_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(CPointer(_type___nv_bfloat162))
            ),
            ptrs,
        )


_lower__ZL5__ldgPK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZL5__ldgPK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__ldgPK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** ptr) {
        retval = __ldg(*ptr);
        return 0;
    }
        """

    _ZL5__ldgPK13__nv_bfloat16_nbst = declare_device(
        "_ZL5__ldgPK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(CPointer(_type___nv_bfloat16))),
    )

    def _ZL5__ldgPK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5__ldgPK13__nv_bfloat16_nbst(arg_0)

    @lower(__ldg, CPointer(_type___nv_bfloat16))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__ldgPK13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__ldgPK13__nv_bfloat16_nbst_caller,
            signature(
                _type___nv_bfloat16, CPointer(CPointer(_type___nv_bfloat16))
            ),
            ptrs,
        )


_lower__ZL5__ldgPK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ldcg():
    pass


def _lower__ZL6__ldcgPK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcgPK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** ptr) {
        retval = __ldcg(*ptr);
        return 0;
    }
        """

    _ZL6__ldcgPK14__nv_bfloat162_nbst = declare_device(
        "_ZL6__ldcgPK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(CPointer(_type___nv_bfloat162))),
    )

    def _ZL6__ldcgPK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6__ldcgPK14__nv_bfloat162_nbst(arg_0)

    @lower(__ldcg, CPointer(_type___nv_bfloat162))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcgPK14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcgPK14__nv_bfloat162_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(CPointer(_type___nv_bfloat162))
            ),
            ptrs,
        )


_lower__ZL6__ldcgPK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZL6__ldcgPK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcgPK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** ptr) {
        retval = __ldcg(*ptr);
        return 0;
    }
        """

    _ZL6__ldcgPK13__nv_bfloat16_nbst = declare_device(
        "_ZL6__ldcgPK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(CPointer(_type___nv_bfloat16))),
    )

    def _ZL6__ldcgPK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__ldcgPK13__nv_bfloat16_nbst(arg_0)

    @lower(__ldcg, CPointer(_type___nv_bfloat16))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcgPK13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcgPK13__nv_bfloat16_nbst_caller,
            signature(
                _type___nv_bfloat16, CPointer(CPointer(_type___nv_bfloat16))
            ),
            ptrs,
        )


_lower__ZL6__ldcgPK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ldca():
    pass


def _lower__ZL6__ldcaPK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcaPK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** ptr) {
        retval = __ldca(*ptr);
        return 0;
    }
        """

    _ZL6__ldcaPK14__nv_bfloat162_nbst = declare_device(
        "_ZL6__ldcaPK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(CPointer(_type___nv_bfloat162))),
    )

    def _ZL6__ldcaPK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6__ldcaPK14__nv_bfloat162_nbst(arg_0)

    @lower(__ldca, CPointer(_type___nv_bfloat162))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcaPK14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcaPK14__nv_bfloat162_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(CPointer(_type___nv_bfloat162))
            ),
            ptrs,
        )


_lower__ZL6__ldcaPK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZL6__ldcaPK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcaPK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** ptr) {
        retval = __ldca(*ptr);
        return 0;
    }
        """

    _ZL6__ldcaPK13__nv_bfloat16_nbst = declare_device(
        "_ZL6__ldcaPK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(CPointer(_type___nv_bfloat16))),
    )

    def _ZL6__ldcaPK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__ldcaPK13__nv_bfloat16_nbst(arg_0)

    @lower(__ldca, CPointer(_type___nv_bfloat16))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcaPK13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcaPK13__nv_bfloat16_nbst_caller,
            signature(
                _type___nv_bfloat16, CPointer(CPointer(_type___nv_bfloat16))
            ),
            ptrs,
        )


_lower__ZL6__ldcaPK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ldcs():
    pass


def _lower__ZL6__ldcsPK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcsPK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** ptr) {
        retval = __ldcs(*ptr);
        return 0;
    }
        """

    _ZL6__ldcsPK14__nv_bfloat162_nbst = declare_device(
        "_ZL6__ldcsPK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(CPointer(_type___nv_bfloat162))),
    )

    def _ZL6__ldcsPK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6__ldcsPK14__nv_bfloat162_nbst(arg_0)

    @lower(__ldcs, CPointer(_type___nv_bfloat162))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcsPK14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcsPK14__nv_bfloat162_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(CPointer(_type___nv_bfloat162))
            ),
            ptrs,
        )


_lower__ZL6__ldcsPK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZL6__ldcsPK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcsPK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** ptr) {
        retval = __ldcs(*ptr);
        return 0;
    }
        """

    _ZL6__ldcsPK13__nv_bfloat16_nbst = declare_device(
        "_ZL6__ldcsPK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(CPointer(_type___nv_bfloat16))),
    )

    def _ZL6__ldcsPK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__ldcsPK13__nv_bfloat16_nbst(arg_0)

    @lower(__ldcs, CPointer(_type___nv_bfloat16))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcsPK13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcsPK13__nv_bfloat16_nbst_caller,
            signature(
                _type___nv_bfloat16, CPointer(CPointer(_type___nv_bfloat16))
            ),
            ptrs,
        )


_lower__ZL6__ldcsPK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ldlu():
    pass


def _lower__ZL6__ldluPK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldluPK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** ptr) {
        retval = __ldlu(*ptr);
        return 0;
    }
        """

    _ZL6__ldluPK14__nv_bfloat162_nbst = declare_device(
        "_ZL6__ldluPK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(CPointer(_type___nv_bfloat162))),
    )

    def _ZL6__ldluPK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6__ldluPK14__nv_bfloat162_nbst(arg_0)

    @lower(__ldlu, CPointer(_type___nv_bfloat162))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldluPK14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldluPK14__nv_bfloat162_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(CPointer(_type___nv_bfloat162))
            ),
            ptrs,
        )


_lower__ZL6__ldluPK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZL6__ldluPK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldluPK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** ptr) {
        retval = __ldlu(*ptr);
        return 0;
    }
        """

    _ZL6__ldluPK13__nv_bfloat16_nbst = declare_device(
        "_ZL6__ldluPK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(CPointer(_type___nv_bfloat16))),
    )

    def _ZL6__ldluPK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__ldluPK13__nv_bfloat16_nbst(arg_0)

    @lower(__ldlu, CPointer(_type___nv_bfloat16))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldluPK13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldluPK13__nv_bfloat16_nbst_caller,
            signature(
                _type___nv_bfloat16, CPointer(CPointer(_type___nv_bfloat16))
            ),
            ptrs,
        )


_lower__ZL6__ldluPK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __ldcv():
    pass


def _lower__ZL6__ldcvPK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcvPK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** ptr) {
        retval = __ldcv(*ptr);
        return 0;
    }
        """

    _ZL6__ldcvPK14__nv_bfloat162_nbst = declare_device(
        "_ZL6__ldcvPK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(CPointer(_type___nv_bfloat162))),
    )

    def _ZL6__ldcvPK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6__ldcvPK14__nv_bfloat162_nbst(arg_0)

    @lower(__ldcv, CPointer(_type___nv_bfloat162))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcvPK14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcvPK14__nv_bfloat162_nbst_caller,
            signature(
                _type___nv_bfloat162, CPointer(CPointer(_type___nv_bfloat162))
            ),
            ptrs,
        )


_lower__ZL6__ldcvPK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZL6__ldcvPK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__ldcvPK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** ptr) {
        retval = __ldcv(*ptr);
        return 0;
    }
        """

    _ZL6__ldcvPK13__nv_bfloat16_nbst = declare_device(
        "_ZL6__ldcvPK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(CPointer(_type___nv_bfloat16))),
    )

    def _ZL6__ldcvPK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__ldcvPK13__nv_bfloat16_nbst(arg_0)

    @lower(__ldcv, CPointer(_type___nv_bfloat16))
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__ldcvPK13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__ldcvPK13__nv_bfloat16_nbst_caller,
            signature(
                _type___nv_bfloat16, CPointer(CPointer(_type___nv_bfloat16))
            ),
            ptrs,
        )


_lower__ZL6__ldcvPK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __stwb():
    pass


def _lower__ZL6__stwbP14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stwbP14__nv_bfloat162S__nbst(int &retval , __nv_bfloat162 ** ptr, __nv_bfloat162* value) {
        __stwb(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stwbP14__nv_bfloat162S__nbst = declare_device(
        "_ZL6__stwbP14__nv_bfloat162S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat162)),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL6__stwbP14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__stwbP14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__stwb, CPointer(_type___nv_bfloat162), _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stwbP14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stwbP14__nv_bfloat162S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat162)),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__stwbP14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def _lower__ZL6__stwbP13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stwbP13__nv_bfloat16S__nbst(int &retval , __nv_bfloat16 ** ptr, __nv_bfloat16* value) {
        __stwb(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stwbP13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__stwbP13__nv_bfloat16S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat16)),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL6__stwbP13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__stwbP13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__stwb, CPointer(_type___nv_bfloat16), _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stwbP13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stwbP13__nv_bfloat16S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat16)),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__stwbP13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __stcg():
    pass


def _lower__ZL6__stcgP14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stcgP14__nv_bfloat162S__nbst(int &retval , __nv_bfloat162 ** ptr, __nv_bfloat162* value) {
        __stcg(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stcgP14__nv_bfloat162S__nbst = declare_device(
        "_ZL6__stcgP14__nv_bfloat162S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat162)),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL6__stcgP14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__stcgP14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__stcg, CPointer(_type___nv_bfloat162), _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stcgP14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stcgP14__nv_bfloat162S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat162)),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__stcgP14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def _lower__ZL6__stcgP13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stcgP13__nv_bfloat16S__nbst(int &retval , __nv_bfloat16 ** ptr, __nv_bfloat16* value) {
        __stcg(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stcgP13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__stcgP13__nv_bfloat16S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat16)),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL6__stcgP13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__stcgP13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__stcg, CPointer(_type___nv_bfloat16), _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stcgP13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stcgP13__nv_bfloat16S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat16)),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__stcgP13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __stcs():
    pass


def _lower__ZL6__stcsP14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stcsP14__nv_bfloat162S__nbst(int &retval , __nv_bfloat162 ** ptr, __nv_bfloat162* value) {
        __stcs(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stcsP14__nv_bfloat162S__nbst = declare_device(
        "_ZL6__stcsP14__nv_bfloat162S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat162)),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL6__stcsP14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__stcsP14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__stcs, CPointer(_type___nv_bfloat162), _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stcsP14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stcsP14__nv_bfloat162S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat162)),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__stcsP14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def _lower__ZL6__stcsP13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stcsP13__nv_bfloat16S__nbst(int &retval , __nv_bfloat16 ** ptr, __nv_bfloat16* value) {
        __stcs(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stcsP13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__stcsP13__nv_bfloat16S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat16)),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL6__stcsP13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__stcsP13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__stcs, CPointer(_type___nv_bfloat16), _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stcsP13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stcsP13__nv_bfloat16S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat16)),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__stcsP13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __stwt():
    pass


def _lower__ZL6__stwtP14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stwtP14__nv_bfloat162S__nbst(int &retval , __nv_bfloat162 ** ptr, __nv_bfloat162* value) {
        __stwt(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stwtP14__nv_bfloat162S__nbst = declare_device(
        "_ZL6__stwtP14__nv_bfloat162S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat162)),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL6__stwtP14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__stwtP14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__stwt, CPointer(_type___nv_bfloat162), _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stwtP14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stwtP14__nv_bfloat162S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat162)),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__stwtP14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def _lower__ZL6__stwtP13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__stwtP13__nv_bfloat16S__nbst(int &retval , __nv_bfloat16 ** ptr, __nv_bfloat16* value) {
        __stwt(*ptr, *value);
        return 0;
    }
        """

    _ZL6__stwtP13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__stwtP13__nv_bfloat16S__nbst",
        void(
            CPointer(CPointer(_type___nv_bfloat16)),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL6__stwtP13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__stwtP13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__stwt, CPointer(_type___nv_bfloat16), _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__stwtP13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__stwtP13__nv_bfloat16S__nbst_caller,
            signature(
                void,
                CPointer(CPointer(_type___nv_bfloat16)),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__stwtP13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __heq2():
    pass


def _lower__ZL6__heq214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__heq214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __heq2(*a, *b);
        return 0;
    }
        """

    _ZL6__heq214__nv_bfloat162S__nbst = declare_device(
        "_ZL6__heq214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL6__heq214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__heq214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__heq2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__heq214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__heq214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__heq214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hne2():
    pass


def _lower__ZL6__hne214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hne214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hne2(*a, *b);
        return 0;
    }
        """

    _ZL6__hne214__nv_bfloat162S__nbst = declare_device(
        "_ZL6__hne214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL6__hne214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__hne214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hne2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hne214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hne214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__hne214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hle2():
    pass


def _lower__ZL6__hle214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hle214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hle2(*a, *b);
        return 0;
    }
        """

    _ZL6__hle214__nv_bfloat162S__nbst = declare_device(
        "_ZL6__hle214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL6__hle214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__hle214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hle2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hle214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hle214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__hle214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hge2():
    pass


def _lower__ZL6__hge214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hge214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hge2(*a, *b);
        return 0;
    }
        """

    _ZL6__hge214__nv_bfloat162S__nbst = declare_device(
        "_ZL6__hge214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL6__hge214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__hge214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hge2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hge214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hge214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__hge214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hlt2():
    pass


def _lower__ZL6__hlt214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hlt214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hlt2(*a, *b);
        return 0;
    }
        """

    _ZL6__hlt214__nv_bfloat162S__nbst = declare_device(
        "_ZL6__hlt214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL6__hlt214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__hlt214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hlt2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hlt214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hlt214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__hlt214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hgt2():
    pass


def _lower__ZL6__hgt214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hgt214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hgt2(*a, *b);
        return 0;
    }
        """

    _ZL6__hgt214__nv_bfloat162S__nbst = declare_device(
        "_ZL6__hgt214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL6__hgt214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL6__hgt214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hgt2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hgt214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hgt214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL6__hgt214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hequ2():
    pass


def _lower__ZL7__hequ214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hequ214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hequ2(*a, *b);
        return 0;
    }
        """

    _ZL7__hequ214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hequ214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hequ214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hequ214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hequ2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hequ214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hequ214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hequ214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hneu2():
    pass


def _lower__ZL7__hneu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hneu214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hneu2(*a, *b);
        return 0;
    }
        """

    _ZL7__hneu214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hneu214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hneu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hneu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hneu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hneu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hneu214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hneu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hleu2():
    pass


def _lower__ZL7__hleu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hleu214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hleu2(*a, *b);
        return 0;
    }
        """

    _ZL7__hleu214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hleu214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hleu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hleu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hleu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hleu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hleu214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hleu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hgeu2():
    pass


def _lower__ZL7__hgeu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hgeu214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hgeu2(*a, *b);
        return 0;
    }
        """

    _ZL7__hgeu214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hgeu214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hgeu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hgeu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hgeu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hgeu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hgeu214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hgeu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hltu2():
    pass


def _lower__ZL7__hltu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hltu214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hltu2(*a, *b);
        return 0;
    }
        """

    _ZL7__hltu214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hltu214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hltu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hltu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hltu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hltu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hltu214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hltu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hgtu2():
    pass


def _lower__ZL7__hgtu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hgtu214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hgtu2(*a, *b);
        return 0;
    }
        """

    _ZL7__hgtu214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hgtu214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hgtu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hgtu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hgtu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hgtu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hgtu214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hgtu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __heq2_mask():
    pass


def _lower__ZL11__heq2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__heq2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __heq2_mask(*a, *b);
        return 0;
    }
        """

    _ZL11__heq2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__heq2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__heq2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__heq2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__heq2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__heq2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__heq2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__heq2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hne2_mask():
    pass


def _lower__ZL11__hne2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hne2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hne2_mask(*a, *b);
        return 0;
    }
        """

    _ZL11__hne2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hne2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__hne2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hne2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hne2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hne2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hne2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hne2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hle2_mask():
    pass


def _lower__ZL11__hle2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hle2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hle2_mask(*a, *b);
        return 0;
    }
        """

    _ZL11__hle2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hle2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__hle2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hle2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hle2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hle2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hle2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hle2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hge2_mask():
    pass


def _lower__ZL11__hge2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hge2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hge2_mask(*a, *b);
        return 0;
    }
        """

    _ZL11__hge2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hge2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__hge2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hge2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hge2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hge2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hge2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hge2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hlt2_mask():
    pass


def _lower__ZL11__hlt2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hlt2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hlt2_mask(*a, *b);
        return 0;
    }
        """

    _ZL11__hlt2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hlt2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__hlt2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hlt2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hlt2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hlt2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hlt2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hlt2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hgt2_mask():
    pass


def _lower__ZL11__hgt2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hgt2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hgt2_mask(*a, *b);
        return 0;
    }
        """

    _ZL11__hgt2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hgt2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL11__hgt2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hgt2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hgt2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hgt2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hgt2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hgt2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hequ2_mask():
    pass


def _lower__ZL12__hequ2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hequ2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hequ2_mask(*a, *b);
        return 0;
    }
        """

    _ZL12__hequ2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL12__hequ2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__hequ2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL12__hequ2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hequ2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hequ2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hequ2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hequ2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hneu2_mask():
    pass


def _lower__ZL12__hneu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hneu2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hneu2_mask(*a, *b);
        return 0;
    }
        """

    _ZL12__hneu2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL12__hneu2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__hneu2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL12__hneu2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hneu2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hneu2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hneu2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hneu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hleu2_mask():
    pass


def _lower__ZL12__hleu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hleu2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hleu2_mask(*a, *b);
        return 0;
    }
        """

    _ZL12__hleu2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL12__hleu2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__hleu2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL12__hleu2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hleu2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hleu2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hleu2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hleu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hgeu2_mask():
    pass


def _lower__ZL12__hgeu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hgeu2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hgeu2_mask(*a, *b);
        return 0;
    }
        """

    _ZL12__hgeu2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL12__hgeu2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__hgeu2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL12__hgeu2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hgeu2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hgeu2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hgeu2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hgeu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hltu2_mask():
    pass


def _lower__ZL12__hltu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hltu2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hltu2_mask(*a, *b);
        return 0;
    }
        """

    _ZL12__hltu2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL12__hltu2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__hltu2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL12__hltu2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hltu2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hltu2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hltu2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hltu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hgtu2_mask():
    pass


def _lower__ZL12__hgtu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hgtu2_mask14__nv_bfloat162S__nbst(unsigned int &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hgtu2_mask(*a, *b);
        return 0;
    }
        """

    _ZL12__hgtu2_mask14__nv_bfloat162S__nbst = declare_device(
        "_ZL12__hgtu2_mask14__nv_bfloat162S__nbst",
        uint32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL12__hgtu2_mask14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL12__hgtu2_mask14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hgtu2_mask, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hgtu2_mask14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hgtu2_mask14__nv_bfloat162S__nbst_caller,
            signature(
                uint32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hgtu2_mask14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hisnan2():
    pass


def _lower__ZL9__hisnan214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL9__hisnan214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = __hisnan2(*a);
        return 0;
    }
        """

    _ZL9__hisnan214__nv_bfloat162_nbst = declare_device(
        "_ZL9__hisnan214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL9__hisnan214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL9__hisnan214__nv_bfloat162_nbst(arg_0)

    @lower(__hisnan2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL9__hisnan214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL9__hisnan214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL9__hisnan214__nv_bfloat162_nbst(shim_stream, shim_obj)


def __hadd2():
    pass


def _lower__ZL7__hadd214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hadd214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hadd2(*a, *b);
        return 0;
    }
        """

    _ZL7__hadd214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hadd214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hadd214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hadd214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hadd2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hadd214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hadd214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hadd214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hsub2():
    pass


def _lower__ZL7__hsub214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hsub214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hsub2(*a, *b);
        return 0;
    }
        """

    _ZL7__hsub214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hsub214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hsub214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hsub214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hsub2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hsub214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hsub214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hsub214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hmul2():
    pass


def _lower__ZL7__hmul214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hmul214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmul2(*a, *b);
        return 0;
    }
        """

    _ZL7__hmul214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hmul214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hmul214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hmul214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmul2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hmul214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hmul214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hmul214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hadd2_rn():
    pass


def _lower__ZL10__hadd2_rn14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hadd2_rn14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hadd2_rn(*a, *b);
        return 0;
    }
        """

    _ZL10__hadd2_rn14__nv_bfloat162S__nbst = declare_device(
        "_ZL10__hadd2_rn14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL10__hadd2_rn14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL10__hadd2_rn14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hadd2_rn, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hadd2_rn14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hadd2_rn14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL10__hadd2_rn14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hsub2_rn():
    pass


def _lower__ZL10__hsub2_rn14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hsub2_rn14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hsub2_rn(*a, *b);
        return 0;
    }
        """

    _ZL10__hsub2_rn14__nv_bfloat162S__nbst = declare_device(
        "_ZL10__hsub2_rn14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL10__hsub2_rn14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL10__hsub2_rn14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hsub2_rn, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hsub2_rn14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hsub2_rn14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL10__hsub2_rn14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hmul2_rn():
    pass


def _lower__ZL10__hmul2_rn14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hmul2_rn14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmul2_rn(*a, *b);
        return 0;
    }
        """

    _ZL10__hmul2_rn14__nv_bfloat162S__nbst = declare_device(
        "_ZL10__hmul2_rn14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL10__hmul2_rn14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL10__hmul2_rn14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmul2_rn, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hmul2_rn14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hmul2_rn14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL10__hmul2_rn14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __h2div():
    pass


def _lower__ZL7__h2div14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__h2div14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __h2div(*a, *b);
        return 0;
    }
        """

    _ZL7__h2div14__nv_bfloat162S__nbst = declare_device(
        "_ZL7__h2div14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__h2div14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__h2div14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__h2div, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__h2div14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__h2div14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__h2div14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __habs2():
    pass


def _lower__ZL7__habs214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__habs214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = __habs2(*a);
        return 0;
    }
        """

    _ZL7__habs214__nv_bfloat162_nbst = declare_device(
        "_ZL7__habs214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__habs214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7__habs214__nv_bfloat162_nbst(arg_0)

    @lower(__habs2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__habs214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__habs214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7__habs214__nv_bfloat162_nbst(shim_stream, shim_obj)


def __hadd2_sat():
    pass


def _lower__ZL11__hadd2_sat14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hadd2_sat14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hadd2_sat(*a, *b);
        return 0;
    }
        """

    _ZL11__hadd2_sat14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hadd2_sat14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL11__hadd2_sat14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hadd2_sat14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hadd2_sat, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hadd2_sat14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hadd2_sat14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hadd2_sat14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hsub2_sat():
    pass


def _lower__ZL11__hsub2_sat14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hsub2_sat14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hsub2_sat(*a, *b);
        return 0;
    }
        """

    _ZL11__hsub2_sat14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hsub2_sat14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL11__hsub2_sat14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hsub2_sat14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hsub2_sat, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hsub2_sat14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hsub2_sat14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hsub2_sat14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hmul2_sat():
    pass


def _lower__ZL11__hmul2_sat14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hmul2_sat14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmul2_sat(*a, *b);
        return 0;
    }
        """

    _ZL11__hmul2_sat14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hmul2_sat14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL11__hmul2_sat14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hmul2_sat14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmul2_sat, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hmul2_sat14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hmul2_sat14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hmul2_sat14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hfma2():
    pass


def _lower__ZL7__hfma214__nv_bfloat162S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hfma214__nv_bfloat162S_S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b, __nv_bfloat162* c) {
        retval = __hfma2(*a, *b, *c);
        return 0;
    }
        """

    _ZL7__hfma214__nv_bfloat162S_S__nbst = declare_device(
        "_ZL7__hfma214__nv_bfloat162S_S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL7__hfma214__nv_bfloat162S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL7__hfma214__nv_bfloat162S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hfma2,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hfma214__nv_bfloat162S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hfma214__nv_bfloat162S_S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hfma214__nv_bfloat162S_S__nbst(shim_stream, shim_obj)


def __hfma2_sat():
    pass


def _lower__ZL11__hfma2_sat14__nv_bfloat162S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hfma2_sat14__nv_bfloat162S_S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b, __nv_bfloat162* c) {
        retval = __hfma2_sat(*a, *b, *c);
        return 0;
    }
        """

    _ZL11__hfma2_sat14__nv_bfloat162S_S__nbst = declare_device(
        "_ZL11__hfma2_sat14__nv_bfloat162S_S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL11__hfma2_sat14__nv_bfloat162S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL11__hfma2_sat14__nv_bfloat162S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hfma2_sat,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hfma2_sat14__nv_bfloat162S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hfma2_sat14__nv_bfloat162S_S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hfma2_sat14__nv_bfloat162S_S__nbst(shim_stream, shim_obj)


def __hneg2():
    pass


def _lower__ZL7__hneg214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hneg214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = __hneg2(*a);
        return 0;
    }
        """

    _ZL7__hneg214__nv_bfloat162_nbst = declare_device(
        "_ZL7__hneg214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hneg214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7__hneg214__nv_bfloat162_nbst(arg_0)

    @lower(__hneg2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hneg214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hneg214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7__hneg214__nv_bfloat162_nbst(shim_stream, shim_obj)


def __habs():
    pass


def _lower__ZL6__habs13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__habs13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = __habs(*a);
        return 0;
    }
        """

    _ZL6__habs13__nv_bfloat16_nbst = declare_device(
        "_ZL6__habs13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__habs13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__habs13__nv_bfloat16_nbst(arg_0)

    @lower(__habs, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__habs13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__habs13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6__habs13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __hadd():
    pass


def _lower__ZL6__hadd13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hadd13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hadd(*a, *b);
        return 0;
    }
        """

    _ZL6__hadd13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hadd13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL6__hadd13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hadd13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hadd, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hadd13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hadd13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hadd13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hsub():
    pass


def _lower__ZL6__hsub13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hsub13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hsub(*a, *b);
        return 0;
    }
        """

    _ZL6__hsub13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hsub13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL6__hsub13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hsub13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hsub, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hsub13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hsub13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hsub13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hmul():
    pass


def _lower__ZL6__hmul13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hmul13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmul(*a, *b);
        return 0;
    }
        """

    _ZL6__hmul13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hmul13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL6__hmul13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hmul13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmul, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hmul13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hmul13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hmul13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hadd_rn():
    pass


def _lower__ZL9__hadd_rn13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL9__hadd_rn13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hadd_rn(*a, *b);
        return 0;
    }
        """

    _ZL9__hadd_rn13__nv_bfloat16S__nbst = declare_device(
        "_ZL9__hadd_rn13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL9__hadd_rn13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL9__hadd_rn13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hadd_rn, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL9__hadd_rn13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL9__hadd_rn13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL9__hadd_rn13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hsub_rn():
    pass


def _lower__ZL9__hsub_rn13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL9__hsub_rn13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hsub_rn(*a, *b);
        return 0;
    }
        """

    _ZL9__hsub_rn13__nv_bfloat16S__nbst = declare_device(
        "_ZL9__hsub_rn13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL9__hsub_rn13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL9__hsub_rn13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hsub_rn, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL9__hsub_rn13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL9__hsub_rn13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL9__hsub_rn13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hmul_rn():
    pass


def _lower__ZL9__hmul_rn13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL9__hmul_rn13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmul_rn(*a, *b);
        return 0;
    }
        """

    _ZL9__hmul_rn13__nv_bfloat16S__nbst = declare_device(
        "_ZL9__hmul_rn13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL9__hmul_rn13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL9__hmul_rn13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmul_rn, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL9__hmul_rn13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL9__hmul_rn13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL9__hmul_rn13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hdiv():
    pass


def _lower__ZL6__hdiv13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hdiv13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hdiv(*a, *b);
        return 0;
    }
        """

    _ZL6__hdiv13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hdiv13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL6__hdiv13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hdiv13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hdiv, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hdiv13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hdiv13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hdiv13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hadd_sat():
    pass


def _lower__ZL10__hadd_sat13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hadd_sat13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hadd_sat(*a, *b);
        return 0;
    }
        """

    _ZL10__hadd_sat13__nv_bfloat16S__nbst = declare_device(
        "_ZL10__hadd_sat13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL10__hadd_sat13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL10__hadd_sat13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hadd_sat, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hadd_sat13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hadd_sat13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL10__hadd_sat13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hsub_sat():
    pass


def _lower__ZL10__hsub_sat13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hsub_sat13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hsub_sat(*a, *b);
        return 0;
    }
        """

    _ZL10__hsub_sat13__nv_bfloat16S__nbst = declare_device(
        "_ZL10__hsub_sat13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL10__hsub_sat13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL10__hsub_sat13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hsub_sat, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hsub_sat13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hsub_sat13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL10__hsub_sat13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hmul_sat():
    pass


def _lower__ZL10__hmul_sat13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hmul_sat13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmul_sat(*a, *b);
        return 0;
    }
        """

    _ZL10__hmul_sat13__nv_bfloat16S__nbst = declare_device(
        "_ZL10__hmul_sat13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL10__hmul_sat13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL10__hmul_sat13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmul_sat, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hmul_sat13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hmul_sat13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL10__hmul_sat13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hfma():
    pass


def _lower__ZL6__hfma13__nv_bfloat16S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hfma13__nv_bfloat16S_S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b, __nv_bfloat16* c) {
        retval = __hfma(*a, *b, *c);
        return 0;
    }
        """

    _ZL6__hfma13__nv_bfloat16S_S__nbst = declare_device(
        "_ZL6__hfma13__nv_bfloat16S_S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL6__hfma13__nv_bfloat16S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL6__hfma13__nv_bfloat16S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hfma, _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hfma13__nv_bfloat16S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hfma13__nv_bfloat16S_S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hfma13__nv_bfloat16S_S__nbst(shim_stream, shim_obj)


def __hfma_sat():
    pass


def _lower__ZL10__hfma_sat13__nv_bfloat16S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hfma_sat13__nv_bfloat16S_S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b, __nv_bfloat16* c) {
        retval = __hfma_sat(*a, *b, *c);
        return 0;
    }
        """

    _ZL10__hfma_sat13__nv_bfloat16S_S__nbst = declare_device(
        "_ZL10__hfma_sat13__nv_bfloat16S_S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL10__hfma_sat13__nv_bfloat16S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL10__hfma_sat13__nv_bfloat16S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hfma_sat,
        _type___nv_bfloat16,
        _type___nv_bfloat16,
        _type___nv_bfloat16,
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hfma_sat13__nv_bfloat16S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hfma_sat13__nv_bfloat16S_S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL10__hfma_sat13__nv_bfloat16S_S__nbst(shim_stream, shim_obj)


def __hneg():
    pass


def _lower__ZL6__hneg13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hneg13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = __hneg(*a);
        return 0;
    }
        """

    _ZL6__hneg13__nv_bfloat16_nbst = declare_device(
        "_ZL6__hneg13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hneg13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6__hneg13__nv_bfloat16_nbst(arg_0)

    @lower(__hneg, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hneg13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hneg13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6__hneg13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __hbeq2():
    pass


def _lower__ZL7__hbeq214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hbeq214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbeq2(*a, *b);
        return 0;
    }
        """

    _ZL7__hbeq214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hbeq214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hbeq214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hbeq214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbeq2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hbeq214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hbeq214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hbeq214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbne2():
    pass


def _lower__ZL7__hbne214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hbne214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbne2(*a, *b);
        return 0;
    }
        """

    _ZL7__hbne214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hbne214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hbne214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hbne214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbne2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hbne214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hbne214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hbne214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hble2():
    pass


def _lower__ZL7__hble214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hble214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hble2(*a, *b);
        return 0;
    }
        """

    _ZL7__hble214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hble214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hble214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hble214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hble2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hble214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hble214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hble214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbge2():
    pass


def _lower__ZL7__hbge214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hbge214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbge2(*a, *b);
        return 0;
    }
        """

    _ZL7__hbge214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hbge214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hbge214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hbge214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbge2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hbge214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hbge214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hbge214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hblt2():
    pass


def _lower__ZL7__hblt214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hblt214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hblt2(*a, *b);
        return 0;
    }
        """

    _ZL7__hblt214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hblt214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hblt214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hblt214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hblt2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hblt214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hblt214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hblt214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbgt2():
    pass


def _lower__ZL7__hbgt214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hbgt214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbgt2(*a, *b);
        return 0;
    }
        """

    _ZL7__hbgt214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hbgt214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL7__hbgt214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hbgt214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbgt2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hbgt214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hbgt214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hbgt214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbequ2():
    pass


def _lower__ZL8__hbequ214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hbequ214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbequ2(*a, *b);
        return 0;
    }
        """

    _ZL8__hbequ214__nv_bfloat162S__nbst = declare_device(
        "_ZL8__hbequ214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL8__hbequ214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL8__hbequ214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbequ2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hbequ214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hbequ214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hbequ214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbneu2():
    pass


def _lower__ZL8__hbneu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hbneu214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbneu2(*a, *b);
        return 0;
    }
        """

    _ZL8__hbneu214__nv_bfloat162S__nbst = declare_device(
        "_ZL8__hbneu214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL8__hbneu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL8__hbneu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbneu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hbneu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hbneu214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hbneu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbleu2():
    pass


def _lower__ZL8__hbleu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hbleu214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbleu2(*a, *b);
        return 0;
    }
        """

    _ZL8__hbleu214__nv_bfloat162S__nbst = declare_device(
        "_ZL8__hbleu214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL8__hbleu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL8__hbleu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbleu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hbleu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hbleu214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hbleu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbgeu2():
    pass


def _lower__ZL8__hbgeu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hbgeu214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbgeu2(*a, *b);
        return 0;
    }
        """

    _ZL8__hbgeu214__nv_bfloat162S__nbst = declare_device(
        "_ZL8__hbgeu214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL8__hbgeu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL8__hbgeu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbgeu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hbgeu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hbgeu214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hbgeu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbltu2():
    pass


def _lower__ZL8__hbltu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hbltu214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbltu2(*a, *b);
        return 0;
    }
        """

    _ZL8__hbltu214__nv_bfloat162S__nbst = declare_device(
        "_ZL8__hbltu214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL8__hbltu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL8__hbltu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbltu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hbltu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hbltu214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hbltu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hbgtu2():
    pass


def _lower__ZL8__hbgtu214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hbgtu214__nv_bfloat162S__nbst(bool &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hbgtu2(*a, *b);
        return 0;
    }
        """

    _ZL8__hbgtu214__nv_bfloat162S__nbst = declare_device(
        "_ZL8__hbgtu214__nv_bfloat162S__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZL8__hbgtu214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL8__hbgtu214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hbgtu2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hbgtu214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hbgtu214__nv_bfloat162S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hbgtu214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __heq():
    pass


def _lower__ZL5__heq13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__heq13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __heq(*a, *b);
        return 0;
    }
        """

    _ZL5__heq13__nv_bfloat16S__nbst = declare_device(
        "_ZL5__heq13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL5__heq13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL5__heq13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__heq, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__heq13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__heq13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL5__heq13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hne():
    pass


def _lower__ZL5__hne13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__hne13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hne(*a, *b);
        return 0;
    }
        """

    _ZL5__hne13__nv_bfloat16S__nbst = declare_device(
        "_ZL5__hne13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL5__hne13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL5__hne13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hne, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__hne13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__hne13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL5__hne13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hle():
    pass


def _lower__ZL5__hle13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__hle13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hle(*a, *b);
        return 0;
    }
        """

    _ZL5__hle13__nv_bfloat16S__nbst = declare_device(
        "_ZL5__hle13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL5__hle13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL5__hle13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hle, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__hle13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__hle13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL5__hle13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hge():
    pass


def _lower__ZL5__hge13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__hge13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hge(*a, *b);
        return 0;
    }
        """

    _ZL5__hge13__nv_bfloat16S__nbst = declare_device(
        "_ZL5__hge13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL5__hge13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL5__hge13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hge, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__hge13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__hge13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL5__hge13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hlt():
    pass


def _lower__ZL5__hlt13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__hlt13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hlt(*a, *b);
        return 0;
    }
        """

    _ZL5__hlt13__nv_bfloat16S__nbst = declare_device(
        "_ZL5__hlt13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL5__hlt13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL5__hlt13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hlt, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__hlt13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__hlt13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL5__hlt13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hgt():
    pass


def _lower__ZL5__hgt13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5__hgt13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hgt(*a, *b);
        return 0;
    }
        """

    _ZL5__hgt13__nv_bfloat16S__nbst = declare_device(
        "_ZL5__hgt13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL5__hgt13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL5__hgt13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hgt, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5__hgt13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5__hgt13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL5__hgt13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hequ():
    pass


def _lower__ZL6__hequ13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hequ13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hequ(*a, *b);
        return 0;
    }
        """

    _ZL6__hequ13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hequ13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hequ13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hequ13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hequ, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hequ13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hequ13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hequ13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hneu():
    pass


def _lower__ZL6__hneu13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hneu13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hneu(*a, *b);
        return 0;
    }
        """

    _ZL6__hneu13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hneu13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hneu13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hneu13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hneu, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hneu13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hneu13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hneu13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hleu():
    pass


def _lower__ZL6__hleu13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hleu13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hleu(*a, *b);
        return 0;
    }
        """

    _ZL6__hleu13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hleu13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hleu13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hleu13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hleu, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hleu13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hleu13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hleu13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hgeu():
    pass


def _lower__ZL6__hgeu13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hgeu13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hgeu(*a, *b);
        return 0;
    }
        """

    _ZL6__hgeu13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hgeu13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hgeu13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hgeu13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hgeu, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hgeu13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hgeu13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hgeu13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hltu():
    pass


def _lower__ZL6__hltu13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hltu13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hltu(*a, *b);
        return 0;
    }
        """

    _ZL6__hltu13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hltu13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hltu13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hltu13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hltu, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hltu13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hltu13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hltu13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hgtu():
    pass


def _lower__ZL6__hgtu13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hgtu13__nv_bfloat16S__nbst(bool &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hgtu(*a, *b);
        return 0;
    }
        """

    _ZL6__hgtu13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hgtu13__nv_bfloat16S__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZL6__hgtu13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hgtu13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hgtu, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hgtu13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hgtu13__nv_bfloat16S__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hgtu13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hisnan():
    pass


def _lower__ZL8__hisnan13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hisnan13__nv_bfloat16_nbst(bool &retval , __nv_bfloat16* a) {
        retval = __hisnan(*a);
        return 0;
    }
        """

    _ZL8__hisnan13__nv_bfloat16_nbst = declare_device(
        "_ZL8__hisnan13__nv_bfloat16_nbst", bool_(CPointer(_type___nv_bfloat16))
    )

    def _ZL8__hisnan13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL8__hisnan13__nv_bfloat16_nbst(arg_0)

    @lower(__hisnan, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hisnan13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hisnan13__nv_bfloat16_nbst_caller,
            signature(bool_, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL8__hisnan13__nv_bfloat16_nbst(shim_stream, shim_obj)


def __hmax():
    pass


def _lower__ZL6__hmax13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hmax13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmax(*a, *b);
        return 0;
    }
        """

    _ZL6__hmax13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hmax13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL6__hmax13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hmax13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmax, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hmax13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hmax13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hmax13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hmin():
    pass


def _lower__ZL6__hmin13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6__hmin13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmin(*a, *b);
        return 0;
    }
        """

    _ZL6__hmin13__nv_bfloat16S__nbst = declare_device(
        "_ZL6__hmin13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL6__hmin13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL6__hmin13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmin, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6__hmin13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6__hmin13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL6__hmin13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hmax_nan():
    pass


def _lower__ZL10__hmax_nan13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hmax_nan13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmax_nan(*a, *b);
        return 0;
    }
        """

    _ZL10__hmax_nan13__nv_bfloat16S__nbst = declare_device(
        "_ZL10__hmax_nan13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL10__hmax_nan13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL10__hmax_nan13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmax_nan, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hmax_nan13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hmax_nan13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL10__hmax_nan13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hmin_nan():
    pass


def _lower__ZL10__hmin_nan13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL10__hmin_nan13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b) {
        retval = __hmin_nan(*a, *b);
        return 0;
    }
        """

    _ZL10__hmin_nan13__nv_bfloat16S__nbst = declare_device(
        "_ZL10__hmin_nan13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZL10__hmin_nan13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL10__hmin_nan13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(__hmin_nan, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL10__hmin_nan13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL10__hmin_nan13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL10__hmin_nan13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def __hfma_relu():
    pass


def _lower__ZL11__hfma_relu13__nv_bfloat16S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hfma_relu13__nv_bfloat16S_S__nbst(__nv_bfloat16 &retval , __nv_bfloat16* a, __nv_bfloat16* b, __nv_bfloat16* c) {
        retval = __hfma_relu(*a, *b, *c);
        return 0;
    }
        """

    _ZL11__hfma_relu13__nv_bfloat16S_S__nbst = declare_device(
        "_ZL11__hfma_relu13__nv_bfloat16S_S__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL11__hfma_relu13__nv_bfloat16S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL11__hfma_relu13__nv_bfloat16S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hfma_relu,
        _type___nv_bfloat16,
        _type___nv_bfloat16,
        _type___nv_bfloat16,
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hfma_relu13__nv_bfloat16S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hfma_relu13__nv_bfloat16S_S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL11__hfma_relu13__nv_bfloat16S_S__nbst(shim_stream, shim_obj)


def __hmax2():
    pass


def _lower__ZL7__hmax214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hmax214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmax2(*a, *b);
        return 0;
    }
        """

    _ZL7__hmax214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hmax214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hmax214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hmax214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmax2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hmax214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hmax214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hmax214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hmin2():
    pass


def _lower__ZL7__hmin214__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7__hmin214__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmin2(*a, *b);
        return 0;
    }
        """

    _ZL7__hmin214__nv_bfloat162S__nbst = declare_device(
        "_ZL7__hmin214__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL7__hmin214__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL7__hmin214__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmin2, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7__hmin214__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7__hmin214__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL7__hmin214__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hmax2_nan():
    pass


def _lower__ZL11__hmax2_nan14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hmax2_nan14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmax2_nan(*a, *b);
        return 0;
    }
        """

    _ZL11__hmax2_nan14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hmax2_nan14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL11__hmax2_nan14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hmax2_nan14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmax2_nan, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hmax2_nan14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hmax2_nan14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hmax2_nan14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hmin2_nan():
    pass


def _lower__ZL11__hmin2_nan14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL11__hmin2_nan14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b) {
        retval = __hmin2_nan(*a, *b);
        return 0;
    }
        """

    _ZL11__hmin2_nan14__nv_bfloat162S__nbst = declare_device(
        "_ZL11__hmin2_nan14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZL11__hmin2_nan14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL11__hmin2_nan14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(__hmin2_nan, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL11__hmin2_nan14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL11__hmin2_nan14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL11__hmin2_nan14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def __hfma2_relu():
    pass


def _lower__ZL12__hfma2_relu14__nv_bfloat162S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12__hfma2_relu14__nv_bfloat162S_S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b, __nv_bfloat162* c) {
        retval = __hfma2_relu(*a, *b, *c);
        return 0;
    }
        """

    _ZL12__hfma2_relu14__nv_bfloat162S_S__nbst = declare_device(
        "_ZL12__hfma2_relu14__nv_bfloat162S_S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL12__hfma2_relu14__nv_bfloat162S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL12__hfma2_relu14__nv_bfloat162S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hfma2_relu,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12__hfma2_relu14__nv_bfloat162S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12__hfma2_relu14__nv_bfloat162S_S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL12__hfma2_relu14__nv_bfloat162S_S__nbst(shim_stream, shim_obj)


def __hcmadd():
    pass


def _lower__ZL8__hcmadd14__nv_bfloat162S_S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL8__hcmadd14__nv_bfloat162S_S__nbst(__nv_bfloat162 &retval , __nv_bfloat162* a, __nv_bfloat162* b, __nv_bfloat162* c) {
        retval = __hcmadd(*a, *b, *c);
        return 0;
    }
        """

    _ZL8__hcmadd14__nv_bfloat162S_S__nbst = declare_device(
        "_ZL8__hcmadd14__nv_bfloat162S_S__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL8__hcmadd14__nv_bfloat162S_S__nbst_caller(arg_0, arg_1, arg_2):
        return _ZL8__hcmadd14__nv_bfloat162S_S__nbst(arg_0, arg_1, arg_2)

    @lower(
        __hcmadd,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
        _type___nv_bfloat162,
    )
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL8__hcmadd14__nv_bfloat162S_S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL8__hcmadd14__nv_bfloat162S_S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL8__hcmadd14__nv_bfloat162S_S__nbst(shim_stream, shim_obj)


def hsqrt():
    pass


def _lower__ZL5hsqrt13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5hsqrt13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hsqrt(*a);
        return 0;
    }
        """

    _ZL5hsqrt13__nv_bfloat16_nbst = declare_device(
        "_ZL5hsqrt13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL5hsqrt13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5hsqrt13__nv_bfloat16_nbst(arg_0)

    @lower(hsqrt, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5hsqrt13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5hsqrt13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL5hsqrt13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hrsqrt():
    pass


def _lower__ZL6hrsqrt13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6hrsqrt13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hrsqrt(*a);
        return 0;
    }
        """

    _ZL6hrsqrt13__nv_bfloat16_nbst = declare_device(
        "_ZL6hrsqrt13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6hrsqrt13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6hrsqrt13__nv_bfloat16_nbst(arg_0)

    @lower(hrsqrt, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6hrsqrt13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6hrsqrt13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6hrsqrt13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hrcp():
    pass


def _lower__ZL4hrcp13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL4hrcp13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hrcp(*a);
        return 0;
    }
        """

    _ZL4hrcp13__nv_bfloat16_nbst = declare_device(
        "_ZL4hrcp13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL4hrcp13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL4hrcp13__nv_bfloat16_nbst(arg_0)

    @lower(hrcp, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL4hrcp13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL4hrcp13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL4hrcp13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hlog():
    pass


def _lower__ZL4hlog13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL4hlog13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hlog(*a);
        return 0;
    }
        """

    _ZL4hlog13__nv_bfloat16_nbst = declare_device(
        "_ZL4hlog13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL4hlog13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL4hlog13__nv_bfloat16_nbst(arg_0)

    @lower(hlog, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL4hlog13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL4hlog13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL4hlog13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hlog2():
    pass


def _lower__ZL5hlog213__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5hlog213__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hlog2(*a);
        return 0;
    }
        """

    _ZL5hlog213__nv_bfloat16_nbst = declare_device(
        "_ZL5hlog213__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL5hlog213__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5hlog213__nv_bfloat16_nbst(arg_0)

    @lower(hlog2, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5hlog213__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5hlog213__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL5hlog213__nv_bfloat16_nbst(shim_stream, shim_obj)


def hlog10():
    pass


def _lower__ZL6hlog1013__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6hlog1013__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hlog10(*a);
        return 0;
    }
        """

    _ZL6hlog1013__nv_bfloat16_nbst = declare_device(
        "_ZL6hlog1013__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6hlog1013__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6hlog1013__nv_bfloat16_nbst(arg_0)

    @lower(hlog10, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6hlog1013__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6hlog1013__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6hlog1013__nv_bfloat16_nbst(shim_stream, shim_obj)


def hexp():
    pass


def _lower__ZL4hexp13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL4hexp13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hexp(*a);
        return 0;
    }
        """

    _ZL4hexp13__nv_bfloat16_nbst = declare_device(
        "_ZL4hexp13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL4hexp13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL4hexp13__nv_bfloat16_nbst(arg_0)

    @lower(hexp, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL4hexp13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL4hexp13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL4hexp13__nv_bfloat16_nbst(shim_stream, shim_obj)


def htanh_approx():
    pass


def _lower__ZL12htanh_approx13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL12htanh_approx13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = htanh_approx(*a);
        return 0;
    }
        """

    _ZL12htanh_approx13__nv_bfloat16_nbst = declare_device(
        "_ZL12htanh_approx13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL12htanh_approx13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL12htanh_approx13__nv_bfloat16_nbst(arg_0)

    @lower(htanh_approx, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL12htanh_approx13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL12htanh_approx13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL12htanh_approx13__nv_bfloat16_nbst(shim_stream, shim_obj)


def h2tanh_approx():
    pass


def _lower__ZL13h2tanh_approx14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL13h2tanh_approx14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2tanh_approx(*a);
        return 0;
    }
        """

    _ZL13h2tanh_approx14__nv_bfloat162_nbst = declare_device(
        "_ZL13h2tanh_approx14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL13h2tanh_approx14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL13h2tanh_approx14__nv_bfloat162_nbst(arg_0)

    @lower(h2tanh_approx, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL13h2tanh_approx14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL13h2tanh_approx14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL13h2tanh_approx14__nv_bfloat162_nbst(shim_stream, shim_obj)


def htanh():
    pass


def _lower__ZL5htanh13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5htanh13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = htanh(*a);
        return 0;
    }
        """

    _ZL5htanh13__nv_bfloat16_nbst = declare_device(
        "_ZL5htanh13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL5htanh13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5htanh13__nv_bfloat16_nbst(arg_0)

    @lower(htanh, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5htanh13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5htanh13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL5htanh13__nv_bfloat16_nbst(shim_stream, shim_obj)


def h2tanh():
    pass


def _lower__ZL6h2tanh14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6h2tanh14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2tanh(*a);
        return 0;
    }
        """

    _ZL6h2tanh14__nv_bfloat162_nbst = declare_device(
        "_ZL6h2tanh14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL6h2tanh14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6h2tanh14__nv_bfloat162_nbst(arg_0)

    @lower(h2tanh, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6h2tanh14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6h2tanh14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL6h2tanh14__nv_bfloat162_nbst(shim_stream, shim_obj)


def hexp2():
    pass


def _lower__ZL5hexp213__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5hexp213__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hexp2(*a);
        return 0;
    }
        """

    _ZL5hexp213__nv_bfloat16_nbst = declare_device(
        "_ZL5hexp213__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL5hexp213__nv_bfloat16_nbst_caller(arg_0):
        return _ZL5hexp213__nv_bfloat16_nbst(arg_0)

    @lower(hexp2, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5hexp213__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5hexp213__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL5hexp213__nv_bfloat16_nbst(shim_stream, shim_obj)


def hexp10():
    pass


def _lower__ZL6hexp1013__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6hexp1013__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hexp10(*a);
        return 0;
    }
        """

    _ZL6hexp1013__nv_bfloat16_nbst = declare_device(
        "_ZL6hexp1013__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL6hexp1013__nv_bfloat16_nbst_caller(arg_0):
        return _ZL6hexp1013__nv_bfloat16_nbst(arg_0)

    @lower(hexp10, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6hexp1013__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6hexp1013__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL6hexp1013__nv_bfloat16_nbst(shim_stream, shim_obj)


def hcos():
    pass


def _lower__ZL4hcos13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL4hcos13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hcos(*a);
        return 0;
    }
        """

    _ZL4hcos13__nv_bfloat16_nbst = declare_device(
        "_ZL4hcos13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL4hcos13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL4hcos13__nv_bfloat16_nbst(arg_0)

    @lower(hcos, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL4hcos13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL4hcos13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL4hcos13__nv_bfloat16_nbst(shim_stream, shim_obj)


def hsin():
    pass


def _lower__ZL4hsin13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL4hsin13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hsin(*a);
        return 0;
    }
        """

    _ZL4hsin13__nv_bfloat16_nbst = declare_device(
        "_ZL4hsin13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZL4hsin13__nv_bfloat16_nbst_caller(arg_0):
        return _ZL4hsin13__nv_bfloat16_nbst(arg_0)

    @lower(hsin, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZL4hsin13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL4hsin13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZL4hsin13__nv_bfloat16_nbst(shim_stream, shim_obj)


def h2sqrt():
    pass


def _lower__ZL6h2sqrt14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6h2sqrt14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2sqrt(*a);
        return 0;
    }
        """

    _ZL6h2sqrt14__nv_bfloat162_nbst = declare_device(
        "_ZL6h2sqrt14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL6h2sqrt14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6h2sqrt14__nv_bfloat162_nbst(arg_0)

    @lower(h2sqrt, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6h2sqrt14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6h2sqrt14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL6h2sqrt14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2rsqrt():
    pass


def _lower__ZL7h2rsqrt14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7h2rsqrt14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2rsqrt(*a);
        return 0;
    }
        """

    _ZL7h2rsqrt14__nv_bfloat162_nbst = declare_device(
        "_ZL7h2rsqrt14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7h2rsqrt14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7h2rsqrt14__nv_bfloat162_nbst(arg_0)

    @lower(h2rsqrt, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7h2rsqrt14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7h2rsqrt14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7h2rsqrt14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2rcp():
    pass


def _lower__ZL5h2rcp14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5h2rcp14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2rcp(*a);
        return 0;
    }
        """

    _ZL5h2rcp14__nv_bfloat162_nbst = declare_device(
        "_ZL5h2rcp14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL5h2rcp14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL5h2rcp14__nv_bfloat162_nbst(arg_0)

    @lower(h2rcp, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5h2rcp14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5h2rcp14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL5h2rcp14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2log():
    pass


def _lower__ZL5h2log14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5h2log14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2log(*a);
        return 0;
    }
        """

    _ZL5h2log14__nv_bfloat162_nbst = declare_device(
        "_ZL5h2log14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL5h2log14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL5h2log14__nv_bfloat162_nbst(arg_0)

    @lower(h2log, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5h2log14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5h2log14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL5h2log14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2log2():
    pass


def _lower__ZL6h2log214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6h2log214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2log2(*a);
        return 0;
    }
        """

    _ZL6h2log214__nv_bfloat162_nbst = declare_device(
        "_ZL6h2log214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL6h2log214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6h2log214__nv_bfloat162_nbst(arg_0)

    @lower(h2log2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6h2log214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6h2log214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL6h2log214__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2log10():
    pass


def _lower__ZL7h2log1014__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7h2log1014__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2log10(*a);
        return 0;
    }
        """

    _ZL7h2log1014__nv_bfloat162_nbst = declare_device(
        "_ZL7h2log1014__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7h2log1014__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7h2log1014__nv_bfloat162_nbst(arg_0)

    @lower(h2log10, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7h2log1014__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7h2log1014__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7h2log1014__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2exp():
    pass


def _lower__ZL5h2exp14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5h2exp14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2exp(*a);
        return 0;
    }
        """

    _ZL5h2exp14__nv_bfloat162_nbst = declare_device(
        "_ZL5h2exp14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL5h2exp14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL5h2exp14__nv_bfloat162_nbst(arg_0)

    @lower(h2exp, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5h2exp14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5h2exp14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL5h2exp14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2exp2():
    pass


def _lower__ZL6h2exp214__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL6h2exp214__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2exp2(*a);
        return 0;
    }
        """

    _ZL6h2exp214__nv_bfloat162_nbst = declare_device(
        "_ZL6h2exp214__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL6h2exp214__nv_bfloat162_nbst_caller(arg_0):
        return _ZL6h2exp214__nv_bfloat162_nbst(arg_0)

    @lower(h2exp2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL6h2exp214__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL6h2exp214__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL6h2exp214__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2exp10():
    pass


def _lower__ZL7h2exp1014__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL7h2exp1014__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2exp10(*a);
        return 0;
    }
        """

    _ZL7h2exp1014__nv_bfloat162_nbst = declare_device(
        "_ZL7h2exp1014__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL7h2exp1014__nv_bfloat162_nbst_caller(arg_0):
        return _ZL7h2exp1014__nv_bfloat162_nbst(arg_0)

    @lower(h2exp10, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL7h2exp1014__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL7h2exp1014__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL7h2exp1014__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2cos():
    pass


def _lower__ZL5h2cos14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5h2cos14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2cos(*a);
        return 0;
    }
        """

    _ZL5h2cos14__nv_bfloat162_nbst = declare_device(
        "_ZL5h2cos14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL5h2cos14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL5h2cos14__nv_bfloat162_nbst(arg_0)

    @lower(h2cos, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5h2cos14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5h2cos14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL5h2cos14__nv_bfloat162_nbst(shim_stream, shim_obj)


def h2sin():
    pass


def _lower__ZL5h2sin14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL5h2sin14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2sin(*a);
        return 0;
    }
        """

    _ZL5h2sin14__nv_bfloat162_nbst = declare_device(
        "_ZL5h2sin14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZL5h2sin14__nv_bfloat162_nbst_caller(arg_0):
        return _ZL5h2sin14__nv_bfloat162_nbst(arg_0)

    @lower(h2sin, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL5h2sin14__nv_bfloat162_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL5h2sin14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZL5h2sin14__nv_bfloat162_nbst(shim_stream, shim_obj)


def atomicAdd():
    pass


def _lower__ZL9atomicAddP14__nv_bfloat162S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL9atomicAddP14__nv_bfloat162S__nbst(__nv_bfloat162 &retval , __nv_bfloat162 ** address, __nv_bfloat162* val) {
        retval = atomicAdd(*address, *val);
        return 0;
    }
        """

    _ZL9atomicAddP14__nv_bfloat162S__nbst = declare_device(
        "_ZL9atomicAddP14__nv_bfloat162S__nbst",
        _type___nv_bfloat162(
            CPointer(CPointer(_type___nv_bfloat162)),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _ZL9atomicAddP14__nv_bfloat162S__nbst_caller(arg_0, arg_1):
        return _ZL9atomicAddP14__nv_bfloat162S__nbst(arg_0, arg_1)

    @lower(atomicAdd, CPointer(_type___nv_bfloat162), _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL9atomicAddP14__nv_bfloat162S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL9atomicAddP14__nv_bfloat162S__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(CPointer(_type___nv_bfloat162)),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZL9atomicAddP14__nv_bfloat162S__nbst(shim_stream, shim_obj)


def _lower__ZL9atomicAddP13__nv_bfloat16S__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZL9atomicAddP13__nv_bfloat16S__nbst(__nv_bfloat16 &retval , __nv_bfloat16 ** address, __nv_bfloat16* val) {
        retval = atomicAdd(*address, *val);
        return 0;
    }
        """

    _ZL9atomicAddP13__nv_bfloat16S__nbst = declare_device(
        "_ZL9atomicAddP13__nv_bfloat16S__nbst",
        _type___nv_bfloat16(
            CPointer(CPointer(_type___nv_bfloat16)),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _ZL9atomicAddP13__nv_bfloat16S__nbst_caller(arg_0, arg_1):
        return _ZL9atomicAddP13__nv_bfloat16S__nbst(arg_0, arg_1)

    @lower(atomicAdd, CPointer(_type___nv_bfloat16), _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZL9atomicAddP13__nv_bfloat16S__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZL9atomicAddP13__nv_bfloat16S__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(CPointer(_type___nv_bfloat16)),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZL9atomicAddP13__nv_bfloat16S__nbst(shim_stream, shim_obj)


def _lower__ZplRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZplRK13__nv_bfloat16S1__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator+(*lh, *rh);
        return 0;
    }
        """

    _ZplRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZplRK13__nv_bfloat16S1__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZplRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZplRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.add, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZplRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZplRK13__nv_bfloat16S1__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZplRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZmiRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmiRK13__nv_bfloat16S1__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator-(*lh, *rh);
        return 0;
    }
        """

    _ZmiRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZmiRK13__nv_bfloat16S1__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZmiRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZmiRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.sub, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmiRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmiRK13__nv_bfloat16S1__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZmiRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZmlRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmlRK13__nv_bfloat16S1__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator*(*lh, *rh);
        return 0;
    }
        """

    _ZmlRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZmlRK13__nv_bfloat16S1__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZmlRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZmlRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.mul, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmlRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmlRK13__nv_bfloat16S1__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZmlRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZdvRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZdvRK13__nv_bfloat16S1__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator/(*lh, *rh);
        return 0;
    }
        """

    _ZdvRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZdvRK13__nv_bfloat16S1__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZdvRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZdvRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.truediv, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZdvRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZdvRK13__nv_bfloat16S1__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZdvRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZpLR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZpLR13__nv_bfloat16RKS__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator+=(*lh, *rh);
        return 0;
    }
        """

    _ZpLR13__nv_bfloat16RKS__nbst = declare_device(
        "_ZpLR13__nv_bfloat16RKS__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZpLR13__nv_bfloat16RKS__nbst_caller(arg_0, arg_1):
        return _ZpLR13__nv_bfloat16RKS__nbst(arg_0, arg_1)

    @lower(operator.iadd, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZpLR13__nv_bfloat16RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZpLR13__nv_bfloat16RKS__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZpLR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj)


def _lower__ZmIR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmIR13__nv_bfloat16RKS__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator-=(*lh, *rh);
        return 0;
    }
        """

    _ZmIR13__nv_bfloat16RKS__nbst = declare_device(
        "_ZmIR13__nv_bfloat16RKS__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZmIR13__nv_bfloat16RKS__nbst_caller(arg_0, arg_1):
        return _ZmIR13__nv_bfloat16RKS__nbst(arg_0, arg_1)

    @lower(operator.isub, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmIR13__nv_bfloat16RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmIR13__nv_bfloat16RKS__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZmIR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj)


def _lower__ZmLR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmLR13__nv_bfloat16RKS__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator*=(*lh, *rh);
        return 0;
    }
        """

    _ZmLR13__nv_bfloat16RKS__nbst = declare_device(
        "_ZmLR13__nv_bfloat16RKS__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZmLR13__nv_bfloat16RKS__nbst_caller(arg_0, arg_1):
        return _ZmLR13__nv_bfloat16RKS__nbst(arg_0, arg_1)

    @lower(operator.imul, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmLR13__nv_bfloat16RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmLR13__nv_bfloat16RKS__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZmLR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj)


def _lower__ZdVR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZdVR13__nv_bfloat16RKS__nbst(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator/=(*lh, *rh);
        return 0;
    }
        """

    _ZdVR13__nv_bfloat16RKS__nbst = declare_device(
        "_ZdVR13__nv_bfloat16RKS__nbst",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def _ZdVR13__nv_bfloat16RKS__nbst_caller(arg_0, arg_1):
        return _ZdVR13__nv_bfloat16RKS__nbst(arg_0, arg_1)

    @lower(operator.itruediv, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZdVR13__nv_bfloat16RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZdVR13__nv_bfloat16RKS__nbst_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZdVR13__nv_bfloat16RKS__nbst(shim_stream, shim_obj)


def _lower__ZpsRK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZpsRK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = operator+(*h);
        return 0;
    }
        """

    _ZpsRK13__nv_bfloat16_nbst = declare_device(
        "_ZpsRK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZpsRK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZpsRK13__nv_bfloat16_nbst(arg_0)

    @lower(operator.pos, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZpsRK13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZpsRK13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZpsRK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def _lower__ZngRK13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZngRK13__nv_bfloat16_nbst(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = operator-(*h);
        return 0;
    }
        """

    _ZngRK13__nv_bfloat16_nbst = declare_device(
        "_ZngRK13__nv_bfloat16_nbst",
        _type___nv_bfloat16(CPointer(_type___nv_bfloat16)),
    )

    def _ZngRK13__nv_bfloat16_nbst_caller(arg_0):
        return _ZngRK13__nv_bfloat16_nbst(arg_0)

    @lower(operator.neg, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZngRK13__nv_bfloat16_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZngRK13__nv_bfloat16_nbst_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZngRK13__nv_bfloat16_nbst(shim_stream, shim_obj)


def _lower__ZeqRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZeqRK13__nv_bfloat16S1__nbst(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator==(*lh, *rh);
        return 0;
    }
        """

    _ZeqRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZeqRK13__nv_bfloat16S1__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZeqRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZeqRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.eq, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZeqRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZeqRK13__nv_bfloat16S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZeqRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZneRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZneRK13__nv_bfloat16S1__nbst(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator!=(*lh, *rh);
        return 0;
    }
        """

    _ZneRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZneRK13__nv_bfloat16S1__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZneRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZneRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.ne, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZneRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZneRK13__nv_bfloat16S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZneRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZgtRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZgtRK13__nv_bfloat16S1__nbst(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator>(*lh, *rh);
        return 0;
    }
        """

    _ZgtRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZgtRK13__nv_bfloat16S1__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZgtRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZgtRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.gt, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZgtRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZgtRK13__nv_bfloat16S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZgtRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZltRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZltRK13__nv_bfloat16S1__nbst(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator<(*lh, *rh);
        return 0;
    }
        """

    _ZltRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZltRK13__nv_bfloat16S1__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZltRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZltRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.lt, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZltRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZltRK13__nv_bfloat16S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZltRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZgeRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZgeRK13__nv_bfloat16S1__nbst(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator>=(*lh, *rh);
        return 0;
    }
        """

    _ZgeRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZgeRK13__nv_bfloat16S1__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZgeRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZgeRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.ge, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZgeRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZgeRK13__nv_bfloat16S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZgeRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZleRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZleRK13__nv_bfloat16S1__nbst(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator<=(*lh, *rh);
        return 0;
    }
        """

    _ZleRK13__nv_bfloat16S1__nbst = declare_device(
        "_ZleRK13__nv_bfloat16S1__nbst",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def _ZleRK13__nv_bfloat16S1__nbst_caller(arg_0, arg_1):
        return _ZleRK13__nv_bfloat16S1__nbst(arg_0, arg_1)

    @lower(operator.le, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZleRK13__nv_bfloat16S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZleRK13__nv_bfloat16S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_lower__ZleRK13__nv_bfloat16S1__nbst(shim_stream, shim_obj)


def _lower__ZplRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZplRK14__nv_bfloat162S1__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator+(*lh, *rh);
        return 0;
    }
        """

    _ZplRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZplRK14__nv_bfloat162S1__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZplRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZplRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.add, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZplRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZplRK14__nv_bfloat162S1__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZplRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZmiRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmiRK14__nv_bfloat162S1__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator-(*lh, *rh);
        return 0;
    }
        """

    _ZmiRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZmiRK14__nv_bfloat162S1__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZmiRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZmiRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.sub, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmiRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmiRK14__nv_bfloat162S1__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZmiRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZmlRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmlRK14__nv_bfloat162S1__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator*(*lh, *rh);
        return 0;
    }
        """

    _ZmlRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZmlRK14__nv_bfloat162S1__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZmlRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZmlRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.mul, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmlRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmlRK14__nv_bfloat162S1__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZmlRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZdvRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZdvRK14__nv_bfloat162S1__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator/(*lh, *rh);
        return 0;
    }
        """

    _ZdvRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZdvRK14__nv_bfloat162S1__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZdvRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZdvRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.truediv, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZdvRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZdvRK14__nv_bfloat162S1__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZdvRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZpLR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZpLR14__nv_bfloat162RKS__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator+=(*lh, *rh);
        return 0;
    }
        """

    _ZpLR14__nv_bfloat162RKS__nbst = declare_device(
        "_ZpLR14__nv_bfloat162RKS__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZpLR14__nv_bfloat162RKS__nbst_caller(arg_0, arg_1):
        return _ZpLR14__nv_bfloat162RKS__nbst(arg_0, arg_1)

    @lower(operator.iadd, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZpLR14__nv_bfloat162RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZpLR14__nv_bfloat162RKS__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZpLR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj)


def _lower__ZmIR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmIR14__nv_bfloat162RKS__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator-=(*lh, *rh);
        return 0;
    }
        """

    _ZmIR14__nv_bfloat162RKS__nbst = declare_device(
        "_ZmIR14__nv_bfloat162RKS__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZmIR14__nv_bfloat162RKS__nbst_caller(arg_0, arg_1):
        return _ZmIR14__nv_bfloat162RKS__nbst(arg_0, arg_1)

    @lower(operator.isub, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmIR14__nv_bfloat162RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmIR14__nv_bfloat162RKS__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZmIR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj)


def _lower__ZmLR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZmLR14__nv_bfloat162RKS__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator*=(*lh, *rh);
        return 0;
    }
        """

    _ZmLR14__nv_bfloat162RKS__nbst = declare_device(
        "_ZmLR14__nv_bfloat162RKS__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZmLR14__nv_bfloat162RKS__nbst_caller(arg_0, arg_1):
        return _ZmLR14__nv_bfloat162RKS__nbst(arg_0, arg_1)

    @lower(operator.imul, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZmLR14__nv_bfloat162RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZmLR14__nv_bfloat162RKS__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZmLR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj)


def _lower__ZdVR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZdVR14__nv_bfloat162RKS__nbst(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator/=(*lh, *rh);
        return 0;
    }
        """

    _ZdVR14__nv_bfloat162RKS__nbst = declare_device(
        "_ZdVR14__nv_bfloat162RKS__nbst",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def _ZdVR14__nv_bfloat162RKS__nbst_caller(arg_0, arg_1):
        return _ZdVR14__nv_bfloat162RKS__nbst(arg_0, arg_1)

    @lower(operator.itruediv, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZdVR14__nv_bfloat162RKS__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZdVR14__nv_bfloat162RKS__nbst_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZdVR14__nv_bfloat162RKS__nbst(shim_stream, shim_obj)


def _lower__ZpsRK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZpsRK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = operator+(*h);
        return 0;
    }
        """

    _ZpsRK14__nv_bfloat162_nbst = declare_device(
        "_ZpsRK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZpsRK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZpsRK14__nv_bfloat162_nbst(arg_0)

    @lower(operator.pos, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZpsRK14__nv_bfloat162_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZpsRK14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZpsRK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZngRK14__nv_bfloat162_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZngRK14__nv_bfloat162_nbst(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = operator-(*h);
        return 0;
    }
        """

    _ZngRK14__nv_bfloat162_nbst = declare_device(
        "_ZngRK14__nv_bfloat162_nbst",
        _type___nv_bfloat162(CPointer(_type___nv_bfloat162)),
    )

    def _ZngRK14__nv_bfloat162_nbst_caller(arg_0):
        return _ZngRK14__nv_bfloat162_nbst(arg_0)

    @lower(operator.neg, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("_ZngRK14__nv_bfloat162_nbst", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZngRK14__nv_bfloat162_nbst_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_lower__ZngRK14__nv_bfloat162_nbst(shim_stream, shim_obj)


def _lower__ZeqRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZeqRK14__nv_bfloat162S1__nbst(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator==(*lh, *rh);
        return 0;
    }
        """

    _ZeqRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZeqRK14__nv_bfloat162S1__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZeqRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZeqRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.eq, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZeqRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZeqRK14__nv_bfloat162S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZeqRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZneRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZneRK14__nv_bfloat162S1__nbst(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator!=(*lh, *rh);
        return 0;
    }
        """

    _ZneRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZneRK14__nv_bfloat162S1__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZneRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZneRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.ne, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZneRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZneRK14__nv_bfloat162S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZneRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZgtRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZgtRK14__nv_bfloat162S1__nbst(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator>(*lh, *rh);
        return 0;
    }
        """

    _ZgtRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZgtRK14__nv_bfloat162S1__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZgtRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZgtRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.gt, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZgtRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZgtRK14__nv_bfloat162S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZgtRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZltRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZltRK14__nv_bfloat162S1__nbst(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator<(*lh, *rh);
        return 0;
    }
        """

    _ZltRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZltRK14__nv_bfloat162S1__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZltRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZltRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.lt, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZltRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZltRK14__nv_bfloat162S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZltRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZgeRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZgeRK14__nv_bfloat162S1__nbst(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator>=(*lh, *rh);
        return 0;
    }
        """

    _ZgeRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZgeRK14__nv_bfloat162S1__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZgeRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZgeRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.ge, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZgeRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZgeRK14__nv_bfloat162S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZgeRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def _lower__ZleRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZleRK14__nv_bfloat162S1__nbst(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator<=(*lh, *rh);
        return 0;
    }
        """

    _ZleRK14__nv_bfloat162S1__nbst = declare_device(
        "_ZleRK14__nv_bfloat162S1__nbst",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def _ZleRK14__nv_bfloat162S1__nbst_caller(arg_0, arg_1):
        return _ZleRK14__nv_bfloat162S1__nbst(arg_0, arg_1)

    @lower(operator.le, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZleRK14__nv_bfloat162S1__nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZleRK14__nv_bfloat162S1__nbst_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_lower__ZleRK14__nv_bfloat162S1__nbst(shim_stream, shim_obj)


def __half():
    pass


def _lower__ZN6__halfC1E13__nv_bfloat16_nbst(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    _ZN6__halfC1E13__nv_bfloat16_nbst(int &retval , __nv_bfloat16* f) {
        __half(*f);
        return 0;
    }
        """

    _ZN6__halfC1E13__nv_bfloat16_nbst = declare_device(
        "_ZN6__halfC1E13__nv_bfloat16_nbst", void(CPointer(_type___nv_bfloat16))
    )

    def _ZN6__halfC1E13__nv_bfloat16_nbst_caller(arg_0):
        return _ZN6__halfC1E13__nv_bfloat16_nbst(arg_0)

    @lower(__half, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "_ZN6__halfC1E13__nv_bfloat16_nbst", shim_raw_str
        )
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            _ZN6__halfC1E13__nv_bfloat16_nbst_caller,
            signature(void, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_lower__ZN6__halfC1E13__nv_bfloat16_nbst(shim_stream, shim_obj)


@register
class _typing___double2bfloat16(ConcreteTemplate):
    key = globals()["__double2bfloat16"]
    cases = [signature(_type___nv_bfloat16, float64)]


register_global(__double2bfloat16, types.Function(_typing___double2bfloat16))


@register
class _typing___float2bfloat16(ConcreteTemplate):
    key = globals()["__float2bfloat16"]
    cases = [signature(_type___nv_bfloat16, float32)]


register_global(__float2bfloat16, types.Function(_typing___float2bfloat16))


@register
class _typing___float2bfloat16_rn(ConcreteTemplate):
    key = globals()["__float2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, float32)]


register_global(
    __float2bfloat16_rn, types.Function(_typing___float2bfloat16_rn)
)


@register
class _typing___float2bfloat16_rz(ConcreteTemplate):
    key = globals()["__float2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, float32)]


register_global(
    __float2bfloat16_rz, types.Function(_typing___float2bfloat16_rz)
)


@register
class _typing___float2bfloat16_rd(ConcreteTemplate):
    key = globals()["__float2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, float32)]


register_global(
    __float2bfloat16_rd, types.Function(_typing___float2bfloat16_rd)
)


@register
class _typing___float2bfloat16_ru(ConcreteTemplate):
    key = globals()["__float2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, float32)]


register_global(
    __float2bfloat16_ru, types.Function(_typing___float2bfloat16_ru)
)


@register
class _typing___bfloat162float(ConcreteTemplate):
    key = globals()["__bfloat162float"]
    cases = [signature(float32, _type___nv_bfloat16)]


register_global(__bfloat162float, types.Function(_typing___bfloat162float))


@register
class _typing___float2bfloat162_rn(ConcreteTemplate):
    key = globals()["__float2bfloat162_rn"]
    cases = [signature(_type___nv_bfloat162, float32)]


register_global(
    __float2bfloat162_rn, types.Function(_typing___float2bfloat162_rn)
)


@register
class _typing___floats2bfloat162_rn(ConcreteTemplate):
    key = globals()["__floats2bfloat162_rn"]
    cases = [signature(_type___nv_bfloat162, float32, float32)]


register_global(
    __floats2bfloat162_rn, types.Function(_typing___floats2bfloat162_rn)
)


@register
class _typing___low2float(ConcreteTemplate):
    key = globals()["__low2float"]
    cases = [signature(float32, _type___nv_bfloat162)]


register_global(__low2float, types.Function(_typing___low2float))


@register
class _typing___high2float(ConcreteTemplate):
    key = globals()["__high2float"]
    cases = [signature(float32, _type___nv_bfloat162)]


register_global(__high2float, types.Function(_typing___high2float))


@register
class _typing___float22bfloat162_rn(ConcreteTemplate):
    key = globals()["__float22bfloat162_rn"]
    cases = [signature(_type___nv_bfloat162, float32x2)]


register_global(
    __float22bfloat162_rn, types.Function(_typing___float22bfloat162_rn)
)


@register
class _typing___bfloat1622float2(ConcreteTemplate):
    key = globals()["__bfloat1622float2"]
    cases = [signature(float32x2, _type___nv_bfloat162)]


register_global(__bfloat1622float2, types.Function(_typing___bfloat1622float2))


@register
class _typing___bfloat162char_rz(ConcreteTemplate):
    key = globals()["__bfloat162char_rz"]
    cases = [signature(int8, _type___nv_bfloat16)]


register_global(__bfloat162char_rz, types.Function(_typing___bfloat162char_rz))


@register
class _typing___bfloat162uchar_rz(ConcreteTemplate):
    key = globals()["__bfloat162uchar_rz"]
    cases = [signature(uint8, _type___nv_bfloat16)]


register_global(
    __bfloat162uchar_rz, types.Function(_typing___bfloat162uchar_rz)
)


@register
class _typing___bfloat162int_rn(ConcreteTemplate):
    key = globals()["__bfloat162int_rn"]
    cases = [signature(int32, _type___nv_bfloat16)]


register_global(__bfloat162int_rn, types.Function(_typing___bfloat162int_rn))


@register
class _typing___bfloat162int_rz(ConcreteTemplate):
    key = globals()["__bfloat162int_rz"]
    cases = [signature(int32, _type___nv_bfloat16)]


register_global(__bfloat162int_rz, types.Function(_typing___bfloat162int_rz))


@register
class _typing___bfloat162int_rd(ConcreteTemplate):
    key = globals()["__bfloat162int_rd"]
    cases = [signature(int32, _type___nv_bfloat16)]


register_global(__bfloat162int_rd, types.Function(_typing___bfloat162int_rd))


@register
class _typing___bfloat162int_ru(ConcreteTemplate):
    key = globals()["__bfloat162int_ru"]
    cases = [signature(int32, _type___nv_bfloat16)]


register_global(__bfloat162int_ru, types.Function(_typing___bfloat162int_ru))


@register
class _typing___int2bfloat16_rn(ConcreteTemplate):
    key = globals()["__int2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, int32)]


register_global(__int2bfloat16_rn, types.Function(_typing___int2bfloat16_rn))


@register
class _typing___int2bfloat16_rz(ConcreteTemplate):
    key = globals()["__int2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, int32)]


register_global(__int2bfloat16_rz, types.Function(_typing___int2bfloat16_rz))


@register
class _typing___int2bfloat16_rd(ConcreteTemplate):
    key = globals()["__int2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, int32)]


register_global(__int2bfloat16_rd, types.Function(_typing___int2bfloat16_rd))


@register
class _typing___int2bfloat16_ru(ConcreteTemplate):
    key = globals()["__int2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, int32)]


register_global(__int2bfloat16_ru, types.Function(_typing___int2bfloat16_ru))


@register
class _typing___bfloat162short_rn(ConcreteTemplate):
    key = globals()["__bfloat162short_rn"]
    cases = [signature(int16, _type___nv_bfloat16)]


register_global(
    __bfloat162short_rn, types.Function(_typing___bfloat162short_rn)
)


@register
class _typing___bfloat162short_rz(ConcreteTemplate):
    key = globals()["__bfloat162short_rz"]
    cases = [signature(int16, _type___nv_bfloat16)]


register_global(
    __bfloat162short_rz, types.Function(_typing___bfloat162short_rz)
)


@register
class _typing___bfloat162short_rd(ConcreteTemplate):
    key = globals()["__bfloat162short_rd"]
    cases = [signature(int16, _type___nv_bfloat16)]


register_global(
    __bfloat162short_rd, types.Function(_typing___bfloat162short_rd)
)


@register
class _typing___bfloat162short_ru(ConcreteTemplate):
    key = globals()["__bfloat162short_ru"]
    cases = [signature(int16, _type___nv_bfloat16)]


register_global(
    __bfloat162short_ru, types.Function(_typing___bfloat162short_ru)
)


@register
class _typing___short2bfloat16_rn(ConcreteTemplate):
    key = globals()["__short2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, int16)]


register_global(
    __short2bfloat16_rn, types.Function(_typing___short2bfloat16_rn)
)


@register
class _typing___short2bfloat16_rz(ConcreteTemplate):
    key = globals()["__short2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, int16)]


register_global(
    __short2bfloat16_rz, types.Function(_typing___short2bfloat16_rz)
)


@register
class _typing___short2bfloat16_rd(ConcreteTemplate):
    key = globals()["__short2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, int16)]


register_global(
    __short2bfloat16_rd, types.Function(_typing___short2bfloat16_rd)
)


@register
class _typing___short2bfloat16_ru(ConcreteTemplate):
    key = globals()["__short2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, int16)]


register_global(
    __short2bfloat16_ru, types.Function(_typing___short2bfloat16_ru)
)


@register
class _typing___bfloat162uint_rn(ConcreteTemplate):
    key = globals()["__bfloat162uint_rn"]
    cases = [signature(uint32, _type___nv_bfloat16)]


register_global(__bfloat162uint_rn, types.Function(_typing___bfloat162uint_rn))


@register
class _typing___bfloat162uint_rz(ConcreteTemplate):
    key = globals()["__bfloat162uint_rz"]
    cases = [signature(uint32, _type___nv_bfloat16)]


register_global(__bfloat162uint_rz, types.Function(_typing___bfloat162uint_rz))


@register
class _typing___bfloat162uint_rd(ConcreteTemplate):
    key = globals()["__bfloat162uint_rd"]
    cases = [signature(uint32, _type___nv_bfloat16)]


register_global(__bfloat162uint_rd, types.Function(_typing___bfloat162uint_rd))


@register
class _typing___bfloat162uint_ru(ConcreteTemplate):
    key = globals()["__bfloat162uint_ru"]
    cases = [signature(uint32, _type___nv_bfloat16)]


register_global(__bfloat162uint_ru, types.Function(_typing___bfloat162uint_ru))


@register
class _typing___uint2bfloat16_rn(ConcreteTemplate):
    key = globals()["__uint2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, uint32)]


register_global(__uint2bfloat16_rn, types.Function(_typing___uint2bfloat16_rn))


@register
class _typing___uint2bfloat16_rz(ConcreteTemplate):
    key = globals()["__uint2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, uint32)]


register_global(__uint2bfloat16_rz, types.Function(_typing___uint2bfloat16_rz))


@register
class _typing___uint2bfloat16_rd(ConcreteTemplate):
    key = globals()["__uint2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, uint32)]


register_global(__uint2bfloat16_rd, types.Function(_typing___uint2bfloat16_rd))


@register
class _typing___uint2bfloat16_ru(ConcreteTemplate):
    key = globals()["__uint2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, uint32)]


register_global(__uint2bfloat16_ru, types.Function(_typing___uint2bfloat16_ru))


@register
class _typing___bfloat162ushort_rn(ConcreteTemplate):
    key = globals()["__bfloat162ushort_rn"]
    cases = [signature(uint16, _type___nv_bfloat16)]


register_global(
    __bfloat162ushort_rn, types.Function(_typing___bfloat162ushort_rn)
)


@register
class _typing___bfloat162ushort_rz(ConcreteTemplate):
    key = globals()["__bfloat162ushort_rz"]
    cases = [signature(uint16, _type___nv_bfloat16)]


register_global(
    __bfloat162ushort_rz, types.Function(_typing___bfloat162ushort_rz)
)


@register
class _typing___bfloat162ushort_rd(ConcreteTemplate):
    key = globals()["__bfloat162ushort_rd"]
    cases = [signature(uint16, _type___nv_bfloat16)]


register_global(
    __bfloat162ushort_rd, types.Function(_typing___bfloat162ushort_rd)
)


@register
class _typing___bfloat162ushort_ru(ConcreteTemplate):
    key = globals()["__bfloat162ushort_ru"]
    cases = [signature(uint16, _type___nv_bfloat16)]


register_global(
    __bfloat162ushort_ru, types.Function(_typing___bfloat162ushort_ru)
)


@register
class _typing___ushort2bfloat16_rn(ConcreteTemplate):
    key = globals()["__ushort2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, uint16)]


register_global(
    __ushort2bfloat16_rn, types.Function(_typing___ushort2bfloat16_rn)
)


@register
class _typing___ushort2bfloat16_rz(ConcreteTemplate):
    key = globals()["__ushort2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, uint16)]


register_global(
    __ushort2bfloat16_rz, types.Function(_typing___ushort2bfloat16_rz)
)


@register
class _typing___ushort2bfloat16_rd(ConcreteTemplate):
    key = globals()["__ushort2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, uint16)]


register_global(
    __ushort2bfloat16_rd, types.Function(_typing___ushort2bfloat16_rd)
)


@register
class _typing___ushort2bfloat16_ru(ConcreteTemplate):
    key = globals()["__ushort2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, uint16)]


register_global(
    __ushort2bfloat16_ru, types.Function(_typing___ushort2bfloat16_ru)
)


@register
class _typing___bfloat162ull_rn(ConcreteTemplate):
    key = globals()["__bfloat162ull_rn"]
    cases = [signature(uint64, _type___nv_bfloat16)]


register_global(__bfloat162ull_rn, types.Function(_typing___bfloat162ull_rn))


@register
class _typing___bfloat162ull_rz(ConcreteTemplate):
    key = globals()["__bfloat162ull_rz"]
    cases = [signature(uint64, _type___nv_bfloat16)]


register_global(__bfloat162ull_rz, types.Function(_typing___bfloat162ull_rz))


@register
class _typing_make_bfloat162(ConcreteTemplate):
    key = globals()["make_bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16
        )
    ]


register_global(make_bfloat162, types.Function(_typing_make_bfloat162))


@register
class _typing___bfloat162ull_rd(ConcreteTemplate):
    key = globals()["__bfloat162ull_rd"]
    cases = [signature(uint64, _type___nv_bfloat16)]


register_global(__bfloat162ull_rd, types.Function(_typing___bfloat162ull_rd))


@register
class _typing___bfloat162ull_ru(ConcreteTemplate):
    key = globals()["__bfloat162ull_ru"]
    cases = [signature(uint64, _type___nv_bfloat16)]


register_global(__bfloat162ull_ru, types.Function(_typing___bfloat162ull_ru))


@register
class _typing___ull2bfloat16_rn(ConcreteTemplate):
    key = globals()["__ull2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, uint64)]


register_global(__ull2bfloat16_rn, types.Function(_typing___ull2bfloat16_rn))


@register
class _typing___ull2bfloat16_rz(ConcreteTemplate):
    key = globals()["__ull2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, uint64)]


register_global(__ull2bfloat16_rz, types.Function(_typing___ull2bfloat16_rz))


@register
class _typing___ull2bfloat16_rd(ConcreteTemplate):
    key = globals()["__ull2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, uint64)]


register_global(__ull2bfloat16_rd, types.Function(_typing___ull2bfloat16_rd))


@register
class _typing___ull2bfloat16_ru(ConcreteTemplate):
    key = globals()["__ull2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, uint64)]


register_global(__ull2bfloat16_ru, types.Function(_typing___ull2bfloat16_ru))


@register
class _typing___bfloat162ll_rn(ConcreteTemplate):
    key = globals()["__bfloat162ll_rn"]
    cases = [signature(int64, _type___nv_bfloat16)]


register_global(__bfloat162ll_rn, types.Function(_typing___bfloat162ll_rn))


@register
class _typing___bfloat162ll_rz(ConcreteTemplate):
    key = globals()["__bfloat162ll_rz"]
    cases = [signature(int64, _type___nv_bfloat16)]


register_global(__bfloat162ll_rz, types.Function(_typing___bfloat162ll_rz))


@register
class _typing___bfloat162ll_rd(ConcreteTemplate):
    key = globals()["__bfloat162ll_rd"]
    cases = [signature(int64, _type___nv_bfloat16)]


register_global(__bfloat162ll_rd, types.Function(_typing___bfloat162ll_rd))


@register
class _typing___bfloat162ll_ru(ConcreteTemplate):
    key = globals()["__bfloat162ll_ru"]
    cases = [signature(int64, _type___nv_bfloat16)]


register_global(__bfloat162ll_ru, types.Function(_typing___bfloat162ll_ru))


@register
class _typing___ll2bfloat16_rn(ConcreteTemplate):
    key = globals()["__ll2bfloat16_rn"]
    cases = [signature(_type___nv_bfloat16, int64)]


register_global(__ll2bfloat16_rn, types.Function(_typing___ll2bfloat16_rn))


@register
class _typing___ll2bfloat16_rz(ConcreteTemplate):
    key = globals()["__ll2bfloat16_rz"]
    cases = [signature(_type___nv_bfloat16, int64)]


register_global(__ll2bfloat16_rz, types.Function(_typing___ll2bfloat16_rz))


@register
class _typing___ll2bfloat16_rd(ConcreteTemplate):
    key = globals()["__ll2bfloat16_rd"]
    cases = [signature(_type___nv_bfloat16, int64)]


register_global(__ll2bfloat16_rd, types.Function(_typing___ll2bfloat16_rd))


@register
class _typing___ll2bfloat16_ru(ConcreteTemplate):
    key = globals()["__ll2bfloat16_ru"]
    cases = [signature(_type___nv_bfloat16, int64)]


register_global(__ll2bfloat16_ru, types.Function(_typing___ll2bfloat16_ru))


@register
class _typing_htrunc(ConcreteTemplate):
    key = globals()["htrunc"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(htrunc, types.Function(_typing_htrunc))


@register
class _typing_hceil(ConcreteTemplate):
    key = globals()["hceil"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hceil, types.Function(_typing_hceil))


@register
class _typing_hfloor(ConcreteTemplate):
    key = globals()["hfloor"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hfloor, types.Function(_typing_hfloor))


@register
class _typing_hrint(ConcreteTemplate):
    key = globals()["hrint"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hrint, types.Function(_typing_hrint))


@register
class _typing_h2trunc(ConcreteTemplate):
    key = globals()["h2trunc"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2trunc, types.Function(_typing_h2trunc))


@register
class _typing_h2ceil(ConcreteTemplate):
    key = globals()["h2ceil"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2ceil, types.Function(_typing_h2ceil))


@register
class _typing_h2floor(ConcreteTemplate):
    key = globals()["h2floor"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2floor, types.Function(_typing_h2floor))


@register
class _typing_h2rint(ConcreteTemplate):
    key = globals()["h2rint"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2rint, types.Function(_typing_h2rint))


@register
class _typing___bfloat162bfloat162(ConcreteTemplate):
    key = globals()["__bfloat162bfloat162"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat16)]


register_global(
    __bfloat162bfloat162, types.Function(_typing___bfloat162bfloat162)
)


@register
class _typing___lowhigh2highlow(ConcreteTemplate):
    key = globals()["__lowhigh2highlow"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__lowhigh2highlow, types.Function(_typing___lowhigh2highlow))


@register
class _typing___lows2bfloat162(ConcreteTemplate):
    key = globals()["__lows2bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__lows2bfloat162, types.Function(_typing___lows2bfloat162))


@register
class _typing___highs2bfloat162(ConcreteTemplate):
    key = globals()["__highs2bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__highs2bfloat162, types.Function(_typing___highs2bfloat162))


@register
class _typing___high2bfloat16(ConcreteTemplate):
    key = globals()["__high2bfloat16"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat162)]


register_global(__high2bfloat16, types.Function(_typing___high2bfloat16))


@register
class _typing___low2bfloat16(ConcreteTemplate):
    key = globals()["__low2bfloat16"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat162)]


register_global(__low2bfloat16, types.Function(_typing___low2bfloat16))


@register
class _typing___hisinf(ConcreteTemplate):
    key = globals()["__hisinf"]
    cases = [signature(int32, _type___nv_bfloat16)]


register_global(__hisinf, types.Function(_typing___hisinf))


@register
class _typing___halves2bfloat162(ConcreteTemplate):
    key = globals()["__halves2bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16
        )
    ]


register_global(__halves2bfloat162, types.Function(_typing___halves2bfloat162))


@register
class _typing___low2bfloat162(ConcreteTemplate):
    key = globals()["__low2bfloat162"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__low2bfloat162, types.Function(_typing___low2bfloat162))


@register
class _typing___high2bfloat162(ConcreteTemplate):
    key = globals()["__high2bfloat162"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__high2bfloat162, types.Function(_typing___high2bfloat162))


@register
class _typing___bfloat16_as_short(ConcreteTemplate):
    key = globals()["__bfloat16_as_short"]
    cases = [signature(int16, _type___nv_bfloat16)]


register_global(
    __bfloat16_as_short, types.Function(_typing___bfloat16_as_short)
)


@register
class _typing___bfloat16_as_ushort(ConcreteTemplate):
    key = globals()["__bfloat16_as_ushort"]
    cases = [signature(uint16, _type___nv_bfloat16)]


register_global(
    __bfloat16_as_ushort, types.Function(_typing___bfloat16_as_ushort)
)


@register
class _typing___short_as_bfloat16(ConcreteTemplate):
    key = globals()["__short_as_bfloat16"]
    cases = [signature(_type___nv_bfloat16, int16)]


register_global(
    __short_as_bfloat16, types.Function(_typing___short_as_bfloat16)
)


@register
class _typing___ushort_as_bfloat16(ConcreteTemplate):
    key = globals()["__ushort_as_bfloat16"]
    cases = [signature(_type___nv_bfloat16, uint16)]


register_global(
    __ushort_as_bfloat16, types.Function(_typing___ushort_as_bfloat16)
)


@register
class _typing___shfl_sync(ConcreteTemplate):
    key = globals()["__shfl_sync"]
    cases = [
        signature(
            _type___nv_bfloat162, uint32, _type___nv_bfloat162, int32, int32
        ),
        signature(
            _type___nv_bfloat16, uint32, _type___nv_bfloat16, int32, int32
        ),
    ]


register_global(__shfl_sync, types.Function(_typing___shfl_sync))


@register
class _typing___shfl_up_sync(ConcreteTemplate):
    key = globals()["__shfl_up_sync"]
    cases = [
        signature(
            _type___nv_bfloat162, uint32, _type___nv_bfloat162, uint32, int32
        ),
        signature(
            _type___nv_bfloat16, uint32, _type___nv_bfloat16, uint32, int32
        ),
    ]


register_global(__shfl_up_sync, types.Function(_typing___shfl_up_sync))


@register
class _typing___shfl_down_sync(ConcreteTemplate):
    key = globals()["__shfl_down_sync"]
    cases = [
        signature(
            _type___nv_bfloat162, uint32, _type___nv_bfloat162, uint32, int32
        ),
        signature(
            _type___nv_bfloat16, uint32, _type___nv_bfloat16, uint32, int32
        ),
    ]


register_global(__shfl_down_sync, types.Function(_typing___shfl_down_sync))


@register
class _typing___shfl_xor_sync(ConcreteTemplate):
    key = globals()["__shfl_xor_sync"]
    cases = [
        signature(
            _type___nv_bfloat162, uint32, _type___nv_bfloat162, int32, int32
        ),
        signature(
            _type___nv_bfloat16, uint32, _type___nv_bfloat16, int32, int32
        ),
    ]


register_global(__shfl_xor_sync, types.Function(_typing___shfl_xor_sync))


@register
class _typing___ldg(ConcreteTemplate):
    key = globals()["__ldg"]
    cases = [
        signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
        signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
    ]


register_global(__ldg, types.Function(_typing___ldg))


@register
class _typing___ldcg(ConcreteTemplate):
    key = globals()["__ldcg"]
    cases = [
        signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
        signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
    ]


register_global(__ldcg, types.Function(_typing___ldcg))


@register
class _typing___ldca(ConcreteTemplate):
    key = globals()["__ldca"]
    cases = [
        signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
        signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
    ]


register_global(__ldca, types.Function(_typing___ldca))


@register
class _typing___ldcs(ConcreteTemplate):
    key = globals()["__ldcs"]
    cases = [
        signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
        signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
    ]


register_global(__ldcs, types.Function(_typing___ldcs))


@register
class _typing___ldlu(ConcreteTemplate):
    key = globals()["__ldlu"]
    cases = [
        signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
        signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
    ]


register_global(__ldlu, types.Function(_typing___ldlu))


@register
class _typing___ldcv(ConcreteTemplate):
    key = globals()["__ldcv"]
    cases = [
        signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
        signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
    ]


register_global(__ldcv, types.Function(_typing___ldcv))


@register
class _typing___stwb(ConcreteTemplate):
    key = globals()["__stwb"]
    cases = [
        signature(void, CPointer(_type___nv_bfloat162), _type___nv_bfloat162),
        signature(void, CPointer(_type___nv_bfloat16), _type___nv_bfloat16),
    ]


register_global(__stwb, types.Function(_typing___stwb))


@register
class _typing___stcg(ConcreteTemplate):
    key = globals()["__stcg"]
    cases = [
        signature(void, CPointer(_type___nv_bfloat162), _type___nv_bfloat162),
        signature(void, CPointer(_type___nv_bfloat16), _type___nv_bfloat16),
    ]


register_global(__stcg, types.Function(_typing___stcg))


@register
class _typing___stcs(ConcreteTemplate):
    key = globals()["__stcs"]
    cases = [
        signature(void, CPointer(_type___nv_bfloat162), _type___nv_bfloat162),
        signature(void, CPointer(_type___nv_bfloat16), _type___nv_bfloat16),
    ]


register_global(__stcs, types.Function(_typing___stcs))


@register
class _typing___stwt(ConcreteTemplate):
    key = globals()["__stwt"]
    cases = [
        signature(void, CPointer(_type___nv_bfloat162), _type___nv_bfloat162),
        signature(void, CPointer(_type___nv_bfloat16), _type___nv_bfloat16),
    ]


register_global(__stwt, types.Function(_typing___stwt))


@register
class _typing___heq2(ConcreteTemplate):
    key = globals()["__heq2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__heq2, types.Function(_typing___heq2))


@register
class _typing___hne2(ConcreteTemplate):
    key = globals()["__hne2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hne2, types.Function(_typing___hne2))


@register
class _typing___hle2(ConcreteTemplate):
    key = globals()["__hle2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hle2, types.Function(_typing___hle2))


@register
class _typing___hge2(ConcreteTemplate):
    key = globals()["__hge2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hge2, types.Function(_typing___hge2))


@register
class _typing___hlt2(ConcreteTemplate):
    key = globals()["__hlt2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hlt2, types.Function(_typing___hlt2))


@register
class _typing___hgt2(ConcreteTemplate):
    key = globals()["__hgt2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hgt2, types.Function(_typing___hgt2))


@register
class _typing___hequ2(ConcreteTemplate):
    key = globals()["__hequ2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hequ2, types.Function(_typing___hequ2))


@register
class _typing___hneu2(ConcreteTemplate):
    key = globals()["__hneu2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hneu2, types.Function(_typing___hneu2))


@register
class _typing___hleu2(ConcreteTemplate):
    key = globals()["__hleu2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hleu2, types.Function(_typing___hleu2))


@register
class _typing___hgeu2(ConcreteTemplate):
    key = globals()["__hgeu2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hgeu2, types.Function(_typing___hgeu2))


@register
class _typing___hltu2(ConcreteTemplate):
    key = globals()["__hltu2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hltu2, types.Function(_typing___hltu2))


@register
class _typing___hgtu2(ConcreteTemplate):
    key = globals()["__hgtu2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hgtu2, types.Function(_typing___hgtu2))


@register
class _typing___heq2_mask(ConcreteTemplate):
    key = globals()["__heq2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__heq2_mask, types.Function(_typing___heq2_mask))


@register
class _typing___hne2_mask(ConcreteTemplate):
    key = globals()["__hne2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hne2_mask, types.Function(_typing___hne2_mask))


@register
class _typing___hle2_mask(ConcreteTemplate):
    key = globals()["__hle2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hle2_mask, types.Function(_typing___hle2_mask))


@register
class _typing___hge2_mask(ConcreteTemplate):
    key = globals()["__hge2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hge2_mask, types.Function(_typing___hge2_mask))


@register
class _typing___hlt2_mask(ConcreteTemplate):
    key = globals()["__hlt2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hlt2_mask, types.Function(_typing___hlt2_mask))


@register
class _typing___hgt2_mask(ConcreteTemplate):
    key = globals()["__hgt2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hgt2_mask, types.Function(_typing___hgt2_mask))


@register
class _typing___hequ2_mask(ConcreteTemplate):
    key = globals()["__hequ2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hequ2_mask, types.Function(_typing___hequ2_mask))


@register
class _typing___hneu2_mask(ConcreteTemplate):
    key = globals()["__hneu2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hneu2_mask, types.Function(_typing___hneu2_mask))


@register
class _typing___hleu2_mask(ConcreteTemplate):
    key = globals()["__hleu2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hleu2_mask, types.Function(_typing___hleu2_mask))


@register
class _typing___hgeu2_mask(ConcreteTemplate):
    key = globals()["__hgeu2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hgeu2_mask, types.Function(_typing___hgeu2_mask))


@register
class _typing___hltu2_mask(ConcreteTemplate):
    key = globals()["__hltu2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hltu2_mask, types.Function(_typing___hltu2_mask))


@register
class _typing___hgtu2_mask(ConcreteTemplate):
    key = globals()["__hgtu2_mask"]
    cases = [signature(uint32, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hgtu2_mask, types.Function(_typing___hgtu2_mask))


@register
class _typing___hisnan2(ConcreteTemplate):
    key = globals()["__hisnan2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hisnan2, types.Function(_typing___hisnan2))


@register
class _typing___hadd2(ConcreteTemplate):
    key = globals()["__hadd2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hadd2, types.Function(_typing___hadd2))


@register
class _typing___hsub2(ConcreteTemplate):
    key = globals()["__hsub2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hsub2, types.Function(_typing___hsub2))


@register
class _typing___hmul2(ConcreteTemplate):
    key = globals()["__hmul2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmul2, types.Function(_typing___hmul2))


@register
class _typing___hadd2_rn(ConcreteTemplate):
    key = globals()["__hadd2_rn"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hadd2_rn, types.Function(_typing___hadd2_rn))


@register
class _typing___hsub2_rn(ConcreteTemplate):
    key = globals()["__hsub2_rn"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hsub2_rn, types.Function(_typing___hsub2_rn))


@register
class _typing___hmul2_rn(ConcreteTemplate):
    key = globals()["__hmul2_rn"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmul2_rn, types.Function(_typing___hmul2_rn))


@register
class _typing___h2div(ConcreteTemplate):
    key = globals()["__h2div"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__h2div, types.Function(_typing___h2div))


@register
class _typing___habs2(ConcreteTemplate):
    key = globals()["__habs2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__habs2, types.Function(_typing___habs2))


@register
class _typing___hadd2_sat(ConcreteTemplate):
    key = globals()["__hadd2_sat"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hadd2_sat, types.Function(_typing___hadd2_sat))


@register
class _typing___hsub2_sat(ConcreteTemplate):
    key = globals()["__hsub2_sat"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hsub2_sat, types.Function(_typing___hsub2_sat))


@register
class _typing___hmul2_sat(ConcreteTemplate):
    key = globals()["__hmul2_sat"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmul2_sat, types.Function(_typing___hmul2_sat))


@register
class _typing___hfma2(ConcreteTemplate):
    key = globals()["__hfma2"]
    cases = [
        signature(
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
        )
    ]


register_global(__hfma2, types.Function(_typing___hfma2))


@register
class _typing___hfma2_sat(ConcreteTemplate):
    key = globals()["__hfma2_sat"]
    cases = [
        signature(
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
        )
    ]


register_global(__hfma2_sat, types.Function(_typing___hfma2_sat))


@register
class _typing___hneg2(ConcreteTemplate):
    key = globals()["__hneg2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hneg2, types.Function(_typing___hneg2))


@register
class _typing___habs(ConcreteTemplate):
    key = globals()["__habs"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__habs, types.Function(_typing___habs))


@register
class _typing___hadd(ConcreteTemplate):
    key = globals()["__hadd"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hadd, types.Function(_typing___hadd))


@register
class _typing___hsub(ConcreteTemplate):
    key = globals()["__hsub"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hsub, types.Function(_typing___hsub))


@register
class _typing___hmul(ConcreteTemplate):
    key = globals()["__hmul"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmul, types.Function(_typing___hmul))


@register
class _typing___hadd_rn(ConcreteTemplate):
    key = globals()["__hadd_rn"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hadd_rn, types.Function(_typing___hadd_rn))


@register
class _typing___hsub_rn(ConcreteTemplate):
    key = globals()["__hsub_rn"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hsub_rn, types.Function(_typing___hsub_rn))


@register
class _typing___hmul_rn(ConcreteTemplate):
    key = globals()["__hmul_rn"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmul_rn, types.Function(_typing___hmul_rn))


@register
class _typing___hdiv(ConcreteTemplate):
    key = globals()["__hdiv"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hdiv, types.Function(_typing___hdiv))


@register
class _typing___hadd_sat(ConcreteTemplate):
    key = globals()["__hadd_sat"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hadd_sat, types.Function(_typing___hadd_sat))


@register
class _typing___hsub_sat(ConcreteTemplate):
    key = globals()["__hsub_sat"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hsub_sat, types.Function(_typing___hsub_sat))


@register
class _typing___hmul_sat(ConcreteTemplate):
    key = globals()["__hmul_sat"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmul_sat, types.Function(_typing___hmul_sat))


@register
class _typing___hfma(ConcreteTemplate):
    key = globals()["__hfma"]
    cases = [
        signature(
            _type___nv_bfloat16,
            _type___nv_bfloat16,
            _type___nv_bfloat16,
            _type___nv_bfloat16,
        )
    ]


register_global(__hfma, types.Function(_typing___hfma))


@register
class _typing___hfma_sat(ConcreteTemplate):
    key = globals()["__hfma_sat"]
    cases = [
        signature(
            _type___nv_bfloat16,
            _type___nv_bfloat16,
            _type___nv_bfloat16,
            _type___nv_bfloat16,
        )
    ]


register_global(__hfma_sat, types.Function(_typing___hfma_sat))


@register
class _typing___hneg(ConcreteTemplate):
    key = globals()["__hneg"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hneg, types.Function(_typing___hneg))


@register
class _typing___hbeq2(ConcreteTemplate):
    key = globals()["__hbeq2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbeq2, types.Function(_typing___hbeq2))


@register
class _typing___hbne2(ConcreteTemplate):
    key = globals()["__hbne2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbne2, types.Function(_typing___hbne2))


@register
class _typing___hble2(ConcreteTemplate):
    key = globals()["__hble2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hble2, types.Function(_typing___hble2))


@register
class _typing___hbge2(ConcreteTemplate):
    key = globals()["__hbge2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbge2, types.Function(_typing___hbge2))


@register
class _typing___hblt2(ConcreteTemplate):
    key = globals()["__hblt2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hblt2, types.Function(_typing___hblt2))


@register
class _typing___hbgt2(ConcreteTemplate):
    key = globals()["__hbgt2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbgt2, types.Function(_typing___hbgt2))


@register
class _typing___hbequ2(ConcreteTemplate):
    key = globals()["__hbequ2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbequ2, types.Function(_typing___hbequ2))


@register
class _typing___hbneu2(ConcreteTemplate):
    key = globals()["__hbneu2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbneu2, types.Function(_typing___hbneu2))


@register
class _typing___hbleu2(ConcreteTemplate):
    key = globals()["__hbleu2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbleu2, types.Function(_typing___hbleu2))


@register
class _typing___hbgeu2(ConcreteTemplate):
    key = globals()["__hbgeu2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbgeu2, types.Function(_typing___hbgeu2))


@register
class _typing___hbltu2(ConcreteTemplate):
    key = globals()["__hbltu2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbltu2, types.Function(_typing___hbltu2))


@register
class _typing___hbgtu2(ConcreteTemplate):
    key = globals()["__hbgtu2"]
    cases = [signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162)]


register_global(__hbgtu2, types.Function(_typing___hbgtu2))


@register
class _typing___heq(ConcreteTemplate):
    key = globals()["__heq"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__heq, types.Function(_typing___heq))


@register
class _typing___hne(ConcreteTemplate):
    key = globals()["__hne"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hne, types.Function(_typing___hne))


@register
class _typing___hle(ConcreteTemplate):
    key = globals()["__hle"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hle, types.Function(_typing___hle))


@register
class _typing___hge(ConcreteTemplate):
    key = globals()["__hge"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hge, types.Function(_typing___hge))


@register
class _typing___hlt(ConcreteTemplate):
    key = globals()["__hlt"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hlt, types.Function(_typing___hlt))


@register
class _typing___hgt(ConcreteTemplate):
    key = globals()["__hgt"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hgt, types.Function(_typing___hgt))


@register
class _typing___hequ(ConcreteTemplate):
    key = globals()["__hequ"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hequ, types.Function(_typing___hequ))


@register
class _typing___hneu(ConcreteTemplate):
    key = globals()["__hneu"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hneu, types.Function(_typing___hneu))


@register
class _typing___hleu(ConcreteTemplate):
    key = globals()["__hleu"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hleu, types.Function(_typing___hleu))


@register
class _typing___hgeu(ConcreteTemplate):
    key = globals()["__hgeu"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hgeu, types.Function(_typing___hgeu))


@register
class _typing___hltu(ConcreteTemplate):
    key = globals()["__hltu"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hltu, types.Function(_typing___hltu))


@register
class _typing___hgtu(ConcreteTemplate):
    key = globals()["__hgtu"]
    cases = [signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16)]


register_global(__hgtu, types.Function(_typing___hgtu))


@register
class _typing___hisnan(ConcreteTemplate):
    key = globals()["__hisnan"]
    cases = [signature(bool_, _type___nv_bfloat16)]


register_global(__hisnan, types.Function(_typing___hisnan))


@register
class _typing___hmax(ConcreteTemplate):
    key = globals()["__hmax"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmax, types.Function(_typing___hmax))


@register
class _typing___hmin(ConcreteTemplate):
    key = globals()["__hmin"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmin, types.Function(_typing___hmin))


@register
class _typing___hmax_nan(ConcreteTemplate):
    key = globals()["__hmax_nan"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmax_nan, types.Function(_typing___hmax_nan))


@register
class _typing___hmin_nan(ConcreteTemplate):
    key = globals()["__hmin_nan"]
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16)
    ]


register_global(__hmin_nan, types.Function(_typing___hmin_nan))


@register
class _typing___hfma_relu(ConcreteTemplate):
    key = globals()["__hfma_relu"]
    cases = [
        signature(
            _type___nv_bfloat16,
            _type___nv_bfloat16,
            _type___nv_bfloat16,
            _type___nv_bfloat16,
        )
    ]


register_global(__hfma_relu, types.Function(_typing___hfma_relu))


@register
class _typing___hmax2(ConcreteTemplate):
    key = globals()["__hmax2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmax2, types.Function(_typing___hmax2))


@register
class _typing___hmin2(ConcreteTemplate):
    key = globals()["__hmin2"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmin2, types.Function(_typing___hmin2))


@register
class _typing___hmax2_nan(ConcreteTemplate):
    key = globals()["__hmax2_nan"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmax2_nan, types.Function(_typing___hmax2_nan))


@register
class _typing___hmin2_nan(ConcreteTemplate):
    key = globals()["__hmin2_nan"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        )
    ]


register_global(__hmin2_nan, types.Function(_typing___hmin2_nan))


@register
class _typing___hfma2_relu(ConcreteTemplate):
    key = globals()["__hfma2_relu"]
    cases = [
        signature(
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
        )
    ]


register_global(__hfma2_relu, types.Function(_typing___hfma2_relu))


@register
class _typing___hcmadd(ConcreteTemplate):
    key = globals()["__hcmadd"]
    cases = [
        signature(
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
            _type___nv_bfloat162,
        )
    ]


register_global(__hcmadd, types.Function(_typing___hcmadd))


@register
class _typing_hsqrt(ConcreteTemplate):
    key = globals()["hsqrt"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hsqrt, types.Function(_typing_hsqrt))


@register
class _typing_hrsqrt(ConcreteTemplate):
    key = globals()["hrsqrt"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hrsqrt, types.Function(_typing_hrsqrt))


@register
class _typing_hrcp(ConcreteTemplate):
    key = globals()["hrcp"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hrcp, types.Function(_typing_hrcp))


@register
class _typing_hlog(ConcreteTemplate):
    key = globals()["hlog"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hlog, types.Function(_typing_hlog))


@register
class _typing_hlog2(ConcreteTemplate):
    key = globals()["hlog2"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hlog2, types.Function(_typing_hlog2))


@register
class _typing_hlog10(ConcreteTemplate):
    key = globals()["hlog10"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hlog10, types.Function(_typing_hlog10))


@register
class _typing_hexp(ConcreteTemplate):
    key = globals()["hexp"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hexp, types.Function(_typing_hexp))


@register
class _typing_htanh_approx(ConcreteTemplate):
    key = globals()["htanh_approx"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(htanh_approx, types.Function(_typing_htanh_approx))


@register
class _typing_h2tanh_approx(ConcreteTemplate):
    key = globals()["h2tanh_approx"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2tanh_approx, types.Function(_typing_h2tanh_approx))


@register
class _typing_htanh(ConcreteTemplate):
    key = globals()["htanh"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(htanh, types.Function(_typing_htanh))


@register
class _typing_h2tanh(ConcreteTemplate):
    key = globals()["h2tanh"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2tanh, types.Function(_typing_h2tanh))


@register
class _typing_hexp2(ConcreteTemplate):
    key = globals()["hexp2"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hexp2, types.Function(_typing_hexp2))


@register
class _typing_hexp10(ConcreteTemplate):
    key = globals()["hexp10"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hexp10, types.Function(_typing_hexp10))


@register
class _typing_hcos(ConcreteTemplate):
    key = globals()["hcos"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hcos, types.Function(_typing_hcos))


@register
class _typing_hsin(ConcreteTemplate):
    key = globals()["hsin"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hsin, types.Function(_typing_hsin))


@register
class _typing_h2sqrt(ConcreteTemplate):
    key = globals()["h2sqrt"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2sqrt, types.Function(_typing_h2sqrt))


@register
class _typing_h2rsqrt(ConcreteTemplate):
    key = globals()["h2rsqrt"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2rsqrt, types.Function(_typing_h2rsqrt))


@register
class _typing_h2rcp(ConcreteTemplate):
    key = globals()["h2rcp"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2rcp, types.Function(_typing_h2rcp))


@register
class _typing_h2log(ConcreteTemplate):
    key = globals()["h2log"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2log, types.Function(_typing_h2log))


@register
class _typing_h2log2(ConcreteTemplate):
    key = globals()["h2log2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2log2, types.Function(_typing_h2log2))


@register
class _typing_h2log10(ConcreteTemplate):
    key = globals()["h2log10"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2log10, types.Function(_typing_h2log10))


@register
class _typing_h2exp(ConcreteTemplate):
    key = globals()["h2exp"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2exp, types.Function(_typing_h2exp))


@register
class _typing_h2exp2(ConcreteTemplate):
    key = globals()["h2exp2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2exp2, types.Function(_typing_h2exp2))


@register
class _typing_h2exp10(ConcreteTemplate):
    key = globals()["h2exp10"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2exp10, types.Function(_typing_h2exp10))


@register
class _typing_h2cos(ConcreteTemplate):
    key = globals()["h2cos"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2cos, types.Function(_typing_h2cos))


@register
class _typing_h2sin(ConcreteTemplate):
    key = globals()["h2sin"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2sin, types.Function(_typing_h2sin))


@register
class _typing_atomicAdd(ConcreteTemplate):
    key = globals()["atomicAdd"]
    cases = [
        signature(
            _type___nv_bfloat162,
            CPointer(_type___nv_bfloat162),
            _type___nv_bfloat162,
        ),
        signature(
            _type___nv_bfloat16,
            CPointer(_type___nv_bfloat16),
            _type___nv_bfloat16,
        ),
    ]


register_global(atomicAdd, types.Function(_typing_atomicAdd))


@register
class _typing___half(ConcreteTemplate):
    key = globals()["__half"]
    cases = [signature(void, _type___nv_bfloat16)]


register_global(__half, types.Function(_typing___half))


@register_global(operator.add)
class _typing_operator_add(BinOp):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.sub)
class _typing_operator_sub(BinOp):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.mul)
class _typing_operator_mul(BinOp):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.truediv)
class _typing_operator_truediv(BinOpTrueDiv):
    cases = BinOpTrueDiv.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.iadd)
class _typing_operator_iadd(BinOp):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.isub)
class _typing_operator_isub(BinOp):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.imul)
class _typing_operator_imul(BinOp):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.itruediv)
class _typing_operator_itruediv(BinOpTrueDiv):
    cases = BinOp.cases + [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.pos)
class _typing_operator_pos(UnaryPositive):
    cases = UnaryPositive.cases + [
        signature(_type___nv_bfloat16, _type___nv_bfloat16),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.neg)
class _typing_operator_neg(UnaryNegate):
    cases = UnaryNegate.cases + [
        signature(_type___nv_bfloat16, _type___nv_bfloat16),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.eq)
class _typing_operator_eq(UnorderedCmpOp):
    cases = UnorderedCmpOp.cases + [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.ne)
class _typing_operator_ne(UnorderedCmpOp):
    cases = UnorderedCmpOp.cases + [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.gt)
class _typing_operator_gt(OrderedCmpOp):
    cases = OrderedCmpOp.cases + [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.lt)
class _typing_operator_lt(OrderedCmpOp):
    cases = OrderedCmpOp.cases + [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.ge)
class _typing_operator_ge(OrderedCmpOp):
    cases = OrderedCmpOp.cases + [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.le)
class _typing_operator_le(OrderedCmpOp):
    cases = OrderedCmpOp.cases + [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


# Aliases:
__nv_bfloat16_raw = unnamed1405307
__nv_bfloat162_raw = unnamed1405416
nv_bfloat16 = __nv_bfloat16
nv_bfloat162 = __nv_bfloat162


# Symbols:


_NBTYPE_SYMBOLS = [
    "_type_unnamed1405307",
    "_type_unnamed1405416",
    "_type___nv_bfloat16",
    "_type___nv_bfloat162",
]


_RECORD_SYMBOLS = [
    "unnamed1405307",
    "unnamed1405416",
    "__nv_bfloat16",
    "__nv_bfloat162",
]


_FUNCTION_SYMBOLS = [
    "__double2bfloat16",
    "__float2bfloat16",
    "__float2bfloat16_rn",
    "__float2bfloat16_rz",
    "__float2bfloat16_rd",
    "__float2bfloat16_ru",
    "__bfloat162float",
    "__float2bfloat162_rn",
    "__floats2bfloat162_rn",
    "__low2float",
    "__high2float",
    "__float22bfloat162_rn",
    "__bfloat1622float2",
    "__bfloat162char_rz",
    "__bfloat162uchar_rz",
    "__bfloat162int_rn",
    "__bfloat162int_rz",
    "__bfloat162int_rd",
    "__bfloat162int_ru",
    "__int2bfloat16_rn",
    "__int2bfloat16_rz",
    "__int2bfloat16_rd",
    "__int2bfloat16_ru",
    "__bfloat162short_rn",
    "__bfloat162short_rz",
    "__bfloat162short_rd",
    "__bfloat162short_ru",
    "__short2bfloat16_rn",
    "__short2bfloat16_rz",
    "__short2bfloat16_rd",
    "__short2bfloat16_ru",
    "__bfloat162uint_rn",
    "__bfloat162uint_rz",
    "__bfloat162uint_rd",
    "__bfloat162uint_ru",
    "__uint2bfloat16_rn",
    "__uint2bfloat16_rz",
    "__uint2bfloat16_rd",
    "__uint2bfloat16_ru",
    "__bfloat162ushort_rn",
    "__bfloat162ushort_rz",
    "__bfloat162ushort_rd",
    "__bfloat162ushort_ru",
    "__ushort2bfloat16_rn",
    "__ushort2bfloat16_rz",
    "__ushort2bfloat16_rd",
    "__ushort2bfloat16_ru",
    "__bfloat162ull_rn",
    "__bfloat162ull_rz",
    "make_bfloat162",
    "__bfloat162ull_rd",
    "__bfloat162ull_ru",
    "__ull2bfloat16_rn",
    "__ull2bfloat16_rz",
    "__ull2bfloat16_rd",
    "__ull2bfloat16_ru",
    "__bfloat162ll_rn",
    "__bfloat162ll_rz",
    "__bfloat162ll_rd",
    "__bfloat162ll_ru",
    "__ll2bfloat16_rn",
    "__ll2bfloat16_rz",
    "__ll2bfloat16_rd",
    "__ll2bfloat16_ru",
    "htrunc",
    "hceil",
    "hfloor",
    "hrint",
    "h2trunc",
    "h2ceil",
    "h2floor",
    "h2rint",
    "__bfloat162bfloat162",
    "__lowhigh2highlow",
    "__lows2bfloat162",
    "__highs2bfloat162",
    "__high2bfloat16",
    "__low2bfloat16",
    "__hisinf",
    "__halves2bfloat162",
    "__low2bfloat162",
    "__high2bfloat162",
    "__bfloat16_as_short",
    "__bfloat16_as_ushort",
    "__short_as_bfloat16",
    "__ushort_as_bfloat16",
    "__shfl_sync",
    "__shfl_sync",
    "__shfl_up_sync",
    "__shfl_up_sync",
    "__shfl_down_sync",
    "__shfl_down_sync",
    "__shfl_xor_sync",
    "__shfl_xor_sync",
    "__ldg",
    "__ldg",
    "__ldcg",
    "__ldcg",
    "__ldca",
    "__ldca",
    "__ldcs",
    "__ldcs",
    "__ldlu",
    "__ldlu",
    "__ldcv",
    "__ldcv",
    "__stwb",
    "__stwb",
    "__stcg",
    "__stcg",
    "__stcs",
    "__stcs",
    "__stwt",
    "__stwt",
    "__heq2",
    "__hne2",
    "__hle2",
    "__hge2",
    "__hlt2",
    "__hgt2",
    "__hequ2",
    "__hneu2",
    "__hleu2",
    "__hgeu2",
    "__hltu2",
    "__hgtu2",
    "__heq2_mask",
    "__hne2_mask",
    "__hle2_mask",
    "__hge2_mask",
    "__hlt2_mask",
    "__hgt2_mask",
    "__hequ2_mask",
    "__hneu2_mask",
    "__hleu2_mask",
    "__hgeu2_mask",
    "__hltu2_mask",
    "__hgtu2_mask",
    "__hisnan2",
    "__hadd2",
    "__hsub2",
    "__hmul2",
    "__hadd2_rn",
    "__hsub2_rn",
    "__hmul2_rn",
    "__h2div",
    "__habs2",
    "__hadd2_sat",
    "__hsub2_sat",
    "__hmul2_sat",
    "__hfma2",
    "__hfma2_sat",
    "__hneg2",
    "__habs",
    "__hadd",
    "__hsub",
    "__hmul",
    "__hadd_rn",
    "__hsub_rn",
    "__hmul_rn",
    "__hdiv",
    "__hadd_sat",
    "__hsub_sat",
    "__hmul_sat",
    "__hfma",
    "__hfma_sat",
    "__hneg",
    "__hbeq2",
    "__hbne2",
    "__hble2",
    "__hbge2",
    "__hblt2",
    "__hbgt2",
    "__hbequ2",
    "__hbneu2",
    "__hbleu2",
    "__hbgeu2",
    "__hbltu2",
    "__hbgtu2",
    "__heq",
    "__hne",
    "__hle",
    "__hge",
    "__hlt",
    "__hgt",
    "__hequ",
    "__hneu",
    "__hleu",
    "__hgeu",
    "__hltu",
    "__hgtu",
    "__hisnan",
    "__hmax",
    "__hmin",
    "__hmax_nan",
    "__hmin_nan",
    "__hfma_relu",
    "__hmax2",
    "__hmin2",
    "__hmax2_nan",
    "__hmin2_nan",
    "__hfma2_relu",
    "__hcmadd",
    "hsqrt",
    "hrsqrt",
    "hrcp",
    "hlog",
    "hlog2",
    "hlog10",
    "hexp",
    "htanh_approx",
    "h2tanh_approx",
    "htanh",
    "h2tanh",
    "hexp2",
    "hexp10",
    "hcos",
    "hsin",
    "h2sqrt",
    "h2rsqrt",
    "h2rcp",
    "h2log",
    "h2log2",
    "h2log10",
    "h2exp",
    "h2exp2",
    "h2exp10",
    "h2cos",
    "h2sin",
    "atomicAdd",
]


__all__ = _NBTYPE_SYMBOLS + _RECORD_SYMBOLS + _FUNCTION_SYMBOLS
