Skip to content
1 change: 1 addition & 0 deletions include/infinicore/ops.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "ops/add.hpp"
#include "ops/aminmax.hpp"
#include "ops/attention.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/matmul.hpp"
Expand Down
25 changes: 25 additions & 0 deletions include/infinicore/ops/aminmax.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <utility>

namespace infinicore::op {
class Aminmax {
public:
using schema = void (*)(Tensor, Tensor, Tensor, std::optional<int64_t>, bool);
static void execute(Tensor min_output, Tensor max_output, Tensor input,
std::optional<int64_t> dim, bool keepdim);
static common::OpDispatcher<schema> &dispatcher();
};

// 返回 (min_tensor, max_tensor) 的 pair
std::pair<Tensor, Tensor> aminmax(Tensor input,
std::optional<int64_t> dim = std::nullopt,
bool keepdim = false);

void aminmax_(Tensor min_output, Tensor max_output, Tensor input,
std::optional<int64_t> dim = std::nullopt,
bool keepdim = false);
} // namespace infinicore::op
20 changes: 20 additions & 0 deletions include/infinicore/ops/diagflat.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {

class Diagflat {
public:
using schema = void (*)(Tensor, Tensor, int64_t);
static void execute(Tensor output, Tensor input, int64_t offset);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor diagflat(Tensor input, int64_t offset = 0);
void diagflat_(Tensor output, Tensor input, int64_t offset = 0);

} // namespace infinicore::op


17 changes: 17 additions & 0 deletions include/infinicore/ops/elu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Elu {
public:
using schema = void (*)(Tensor, Tensor, float);
static void execute(Tensor output, Tensor input, float alpha);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor elu(Tensor input, float alpha = 1.0f);
void elu_(Tensor output, Tensor input, float alpha = 1.0f);

} // namespace infinicore::op
17 changes: 17 additions & 0 deletions include/infinicore/ops/sqrt.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once

#include "../device.hpp"
#include "common/op.hpp"

namespace infinicore::op {
class Sqrt {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor output, Tensor input);
static common::OpDispatcher<schema> &dispatcher();
};

Tensor sqrt(Tensor input);
void sqrt_(Tensor ouput, Tensor input);

} // namespace infinicore::op
3 changes: 3 additions & 0 deletions include/infiniop.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@

#include "infiniop/handle.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/aminmax.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/elu.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/layer_norm.h"
Expand All @@ -24,6 +26,7 @@
#include "infiniop/ops/silu.h"
#include "infiniop/ops/softmax.h"
#include "infiniop/ops/softplus.h"
#include "infiniop/ops/sqrt.h"
#include "infiniop/ops/sub.h"
#include "infiniop/ops/swiglu.h"
#include "infiniop/ops/tanh.h"
Expand Down
31 changes: 31 additions & 0 deletions include/infiniop/ops/aminmax.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#pragma once

#include "../operator_descriptor.h"
#include <cstdint>

typedef struct InfiniopDescriptor *infiniopAminmaxDescriptor_t;

__C __export infiniStatus_t infiniopCreateAminmaxDescriptor(
infiniopHandle_t handle,
infiniopAminmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t min_output_desc,
infiniopTensorDescriptor_t max_output_desc,
infiniopTensorDescriptor_t input_desc,
int64_t dim,
int32_t keepdim,
int32_t has_dim);

__C __export infiniStatus_t
infiniopGetAminmaxWorkspaceSize(infiniopAminmaxDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopAminmax(
infiniopAminmaxDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *min_output,
void *max_output,
const void *input,
void *stream);

__C __export infiniStatus_t
infiniopDestroyAminmaxDescriptor(infiniopAminmaxDescriptor_t desc);
32 changes: 32 additions & 0 deletions include/infiniop/ops/diagflat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#pragma once

#ifndef __INFINIOP_DIAGFLAT_API_H__
#define __INFINIOP_DIAGFLAT_API_H__

#include "../operator_descriptor.h"
#include <cstdint>

typedef struct InfiniopDescriptor *infiniopDiagflatDescriptor_t;

__C __export infiniStatus_t infiniopCreateDiagflatDescriptor(
infiniopHandle_t handle,
infiniopDiagflatDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
int64_t offset);

__C __export infiniStatus_t
infiniopGetDiagflatWorkspaceSize(infiniopDiagflatDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopDiagflat(
infiniopDiagflatDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__C __export infiniStatus_t
infiniopDestroyDiagflatDescriptor(infiniopDiagflatDescriptor_t desc);

#endif // __INFINIOP_DIAGFLAT_API_H__
25 changes: 25 additions & 0 deletions include/infiniop/ops/elu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#ifndef __INFINIOP_ELU_API_H__
#define __INFINIOP_ELU_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopEluDescriptor_t;

__C __export infiniStatus_t infiniopCreateEluDescriptor(infiniopHandle_t handle,
infiniopEluDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input,
float alpha);

__C __export infiniStatus_t infiniopGetEluWorkspaceSize(infiniopEluDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopElu(infiniopEluDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__C __export infiniStatus_t infiniopDestroyEluDescriptor(infiniopEluDescriptor_t desc);

#endif // INFINIOP_OPS_ELU_H
26 changes: 26 additions & 0 deletions include/infiniop/ops/sqrt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#ifndef __INFINIOP_SQRT_API_H__
#define __INFINIOP_SQRT_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopSqrtDescriptor_t;

__C __export infiniStatus_t infiniopCreateSqrtDescriptor(
infiniopHandle_t handle,
infiniopSqrtDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output,
infiniopTensorDescriptor_t input);

__C __export infiniStatus_t infiniopGetSqrtWorkspaceSize(infiniopSqrtDescriptor_t desc, size_t *size);

__C __export infiniStatus_t infiniopSqrt(
infiniopSqrtDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
void *stream);

__C __export infiniStatus_t infiniopDestroySqrtDescriptor(infiniopSqrtDescriptor_t desc);

#endif
6 changes: 6 additions & 0 deletions python/infinicore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@
uint8,
)
from infinicore.ops.add import add
from infinicore.ops.aminmax import aminmax
from infinicore.ops.attention import attention
from infinicore.ops.matmul import matmul
from infinicore.ops.mul import mul
from infinicore.ops.narrow import narrow
from infinicore.ops.rearrange import rearrange
from .ops.sqrt import sqrt
from .ops.diagflat import diagflat
from infinicore.tensor import (
Tensor,
empty,
Expand Down Expand Up @@ -100,6 +103,7 @@
"uint8",
# Operations.
"add",
"aminmax",
"attention",
"matmul",
"mul",
Expand All @@ -115,6 +119,8 @@
"strided_empty",
"strided_from_blob",
"zeros",
"sqrt",
"diagflat",
]

use_ntops = False
Expand Down
2 changes: 2 additions & 0 deletions python/infinicore/nn/functional/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .causal_softmax import causal_softmax
from .elu import elu
from .embedding import embedding
from .linear import linear
from .random_sample import random_sample
Expand All @@ -9,6 +10,7 @@

__all__ = [
"causal_softmax",
"elu",
"random_sample",
"rms_norm",
"silu",
Expand Down
32 changes: 32 additions & 0 deletions python/infinicore/nn/functional/elu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import infinicore
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor


def elu(input: Tensor, alpha: float = 1.0, inplace: bool = False, *, out=None) -> Tensor:
r"""Apply the Exponential Linear Unit (ELU) function, element-wise.

ELU(x) = x if x >= 0 else alpha * (exp(x) - 1)

Args:
input: Input tensor
alpha: ELU parameter (default: 1.0)
inplace: If True, performs the operation in-place (default: False)
out: Optional output tensor for in-place operation

Returns:
Output tensor with ELU applied element-wise.
"""
if infinicore.use_ntops and input.device.type in ("cuda", "musa") and out is None:
return infinicore.ntops.torch.elu(input, alpha=alpha, inplace=inplace)

if inplace:
_infinicore.elu_(input._underlying, input._underlying, alpha)
return input

if out is None:
return Tensor(_infinicore.elu(input._underlying, alpha))

_infinicore.elu_(out._underlying, input._underlying, alpha)
return out

25 changes: 25 additions & 0 deletions python/infinicore/ops/aminmax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
import infinicore


def aminmax(input, dim=None, keepdim=False, *, out=None):
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.aminmax(input, dim=dim, keepdim=keepdim, out=out)

if out is None:
min_tensor, max_tensor = _infinicore.aminmax(input._underlying, dim, keepdim)
return (Tensor(min_tensor), Tensor(max_tensor))

# if not isinstance(out, tuple) or len(out) != 2:
# raise ValueError("out must be a tuple of (min_tensor, max_tensor)")

# 接受元组或列表
if not isinstance(out, (tuple, list)) or len(out) != 2:
raise ValueError("out must be a tuple or list of (min_tensor, max_tensor)")


min_out, max_out = out
_infinicore.aminmax_(min_out._underlying, max_out._underlying, input._underlying, dim, keepdim)
return out

16 changes: 16 additions & 0 deletions python/infinicore/ops/diagflat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
import infinicore


def diagflat(input, *, offset=0):
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.diagflat(input, offset=offset)


return Tensor(_infinicore.diagflat(input._underlying, offset))

_infinicore.diagflat_(out._underlying, input._underlying, offset)
return out


14 changes: 14 additions & 0 deletions python/infinicore/ops/sqrt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
import infinicore


def sqrt(input, *, out=None):
if infinicore.use_ntops and input.device.type in ("cuda", "musa"):
return infinicore.ntops.torch.sqrt(input, out=out)

if out is None:
return Tensor(_infinicore.sqrt(input._underlying))

_infinicore.sqrt_(out._underlying, input._underlying)
return out
Loading