Skip to content

Commit 9faf189

Browse files
Regenerate MLIR Bindings (#1881)
Co-authored-by: enzyme-ci-bot[bot] <78882869+enzyme-ci-bot[bot]@users.noreply.github.com>
1 parent 4bde670 commit 9faf189

File tree

5 files changed

+119
-129
lines changed

5 files changed

+119
-129
lines changed

src/mlir/Dialects/EnzymeXLA.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -902,11 +902,11 @@ function subindex(source::Value, index::Value; result::IR.Type, location=Locatio
902902
end
903903

904904
"""
905-
`lapack_symm`
905+
`blas_symm`
906906
907907
C := alpha*A*B + beta*C, or C := alpha*B*A + beta*C, where alpha and beta are scalars, A is a symmetric matrix\"
908908
"""
909-
function lapack_symm(
909+
function blas_symm(
910910
A::Value,
911911
B::Value,
912912
C::Value,
@@ -924,7 +924,7 @@ function lapack_symm(
924924
attributes = NamedAttribute[namedattribute("side", side), namedattribute("uplo", uplo)]
925925

926926
return create_operation(
927-
"enzymexla.lapack.symm",
927+
"enzymexla.blas.symm",
928928
location;
929929
operands,
930930
owned_regions,

src/mlir/Dialects/MosaicGPU.jl

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,25 @@ function arrive_expect_tx(barrier::Value; expect_tx, location=Location())
3232
)
3333
end
3434

35+
function arrive(barrier::Value; orders_tensor_core, location=Location())
36+
op_ty_results = IR.Type[]
37+
operands = Value[barrier,]
38+
owned_regions = Region[]
39+
successors = Block[]
40+
attributes = NamedAttribute[namedattribute("orders_tensor_core", orders_tensor_core),]
41+
42+
return create_operation(
43+
"mosaic_gpu.arrive",
44+
location;
45+
operands,
46+
owned_regions,
47+
successors,
48+
attributes,
49+
results=op_ty_results,
50+
result_inference=false,
51+
)
52+
end
53+
3554
"""
3655
`async_load`
3756
@@ -280,6 +299,31 @@ function broadcast_in_dim(
280299
)
281300
end
282301

302+
"""
303+
`broadcasted_iota`
304+
305+
Creates an array that has the specified shape and holds values starting at
306+
zero and incrementing by one along the specified dimension.
307+
"""
308+
function broadcasted_iota(; result_0::IR.Type, dimension, location=Location())
309+
op_ty_results = IR.Type[result_0,]
310+
operands = Value[]
311+
owned_regions = Region[]
312+
successors = Block[]
313+
attributes = NamedAttribute[namedattribute("dimension", dimension),]
314+
315+
return create_operation(
316+
"mosaic_gpu.broadcasted_iota",
317+
location;
318+
operands,
319+
owned_regions,
320+
successors,
321+
attributes,
322+
results=op_ty_results,
323+
result_inference=false,
324+
)
325+
end
326+
283327
"""
284328
`custom_primitive`
285329
@@ -423,6 +467,25 @@ function optimization_barrier(
423467
)
424468
end
425469

470+
function print_layout(value::Value; format, location=Location())
471+
op_ty_results = IR.Type[]
472+
operands = Value[value,]
473+
owned_regions = Region[]
474+
successors = Block[]
475+
attributes = NamedAttribute[namedattribute("format", format),]
476+
477+
return create_operation(
478+
"mosaic_gpu.print_layout",
479+
location;
480+
operands,
481+
owned_regions,
482+
successors,
483+
attributes,
484+
results=op_ty_results,
485+
result_inference=false,
486+
)
487+
end
488+
426489
"""
427490
`return_`
428491

src/mlir/Dialects/Nvvm.jl

Lines changed: 40 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,8 +1853,10 @@ end
18531853
The `cp.async.mbarrier.arrive` Op makes the *mbarrier object* track
18541854
all prior cp.async operations initiated by the executing thread.
18551855
The `addr` operand specifies the address of the *mbarrier object*
1856-
in generic address space. The `noinc` attr impacts how the
1857-
mbarrier\'s state is updated.
1856+
in generic or shared::cta address space. When it is generic, the
1857+
underlying memory should fall within the shared::cta space;
1858+
otherwise the behavior is undefined. The `noinc` attr impacts
1859+
how the mbarrier\'s state is updated.
18581860
18591861
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
18601862
"""
@@ -1878,37 +1880,6 @@ function cp_async_mbarrier_arrive(addr::Value; noinc=nothing, location=Location(
18781880
)
18791881
end
18801882

1881-
"""
1882-
`cp_async_mbarrier_arrive_shared`
1883-
1884-
The `cp.async.mbarrier.arrive.shared` Op makes the *mbarrier object*
1885-
track all prior cp.async operations initiated by the executing thread.
1886-
The `addr` operand specifies the address of the *mbarrier object* in
1887-
shared memory. The `noinc` attr impacts how the mbarrier\'s state
1888-
is updated.
1889-
1890-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-cp-async-mbarrier-arrive)
1891-
"""
1892-
function cp_async_mbarrier_arrive_shared(addr::Value; noinc=nothing, location=Location())
1893-
op_ty_results = IR.Type[]
1894-
operands = Value[addr,]
1895-
owned_regions = Region[]
1896-
successors = Block[]
1897-
attributes = NamedAttribute[]
1898-
!isnothing(noinc) && push!(attributes, namedattribute("noinc", noinc))
1899-
1900-
return create_operation(
1901-
"nvvm.cp.async.mbarrier.arrive.shared",
1902-
location;
1903-
operands,
1904-
owned_regions,
1905-
successors,
1906-
attributes,
1907-
results=op_ty_results,
1908-
result_inference=false,
1909-
)
1910-
end
1911-
19121883
function cp_async_shared_global(
19131884
dst::Value,
19141885
src::Value,
@@ -3334,8 +3305,10 @@ a result of this operation. The operation returns an opaque value that
33343305
captures the phase of the *mbarrier object* prior to the arrive-on operation.
33353306
33363307
The operation takes the following operands:
3337-
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
3338-
addressing, but the address must still be in the shared memory space.
3308+
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
3309+
must be a pointer to generic or shared::cta memory. When it is generic, the
3310+
underlying address must be within the shared::cta memory space; otherwise
3311+
the behavior is undefined.
33393312
- `count`: Integer specifying the count argument to the arrive-on operation.
33403313
Must be in the valid range as specified in the *mbarrier object* contents.
33413314
@@ -3362,35 +3335,6 @@ function mbarrier_arrive_nocomplete(
33623335
)
33633336
end
33643337

3365-
"""
3366-
`mbarrier_arrive_nocomplete_shared`
3367-
3368-
This Op is the same as `nvvm.mbarrier.arrive.nocomplete` except that the *mbarrier object*
3369-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
3370-
3371-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
3372-
"""
3373-
function mbarrier_arrive_nocomplete_shared(
3374-
addr::Value, count::Value; res::IR.Type, location=Location()
3375-
)
3376-
op_ty_results = IR.Type[res,]
3377-
operands = Value[addr, count]
3378-
owned_regions = Region[]
3379-
successors = Block[]
3380-
attributes = NamedAttribute[]
3381-
3382-
return create_operation(
3383-
"nvvm.mbarrier.arrive.nocomplete.shared",
3384-
location;
3385-
operands,
3386-
owned_regions,
3387-
successors,
3388-
attributes,
3389-
results=op_ty_results,
3390-
result_inference=false,
3391-
)
3392-
end
3393-
33943338
"""
33953339
`mbarrier_arrive`
33963340
@@ -3408,8 +3352,10 @@ The operation returns an opaque value that captures the phase of the
34083352
value are implementation-specific.
34093353
34103354
The operation takes the following operand:
3411-
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
3412-
addressing, but the address must still be in the shared memory space.
3355+
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
3356+
must be a pointer to generic or shared::cta memory. When it is generic, the
3357+
underlying address must be within the shared::cta memory space; otherwise
3358+
the behavior is undefined.
34133359
34143360
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
34153361
"""
@@ -3432,33 +3378,6 @@ function mbarrier_arrive(addr::Value; res::IR.Type, location=Location())
34323378
)
34333379
end
34343380

3435-
"""
3436-
`mbarrier_arrive_shared`
3437-
3438-
This Op is the same as `nvvm.mbarrier.arrive` except that the *mbarrier object*
3439-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
3440-
3441-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
3442-
"""
3443-
function mbarrier_arrive_shared(addr::Value; res::IR.Type, location=Location())
3444-
op_ty_results = IR.Type[res,]
3445-
operands = Value[addr,]
3446-
owned_regions = Region[]
3447-
successors = Block[]
3448-
attributes = NamedAttribute[]
3449-
3450-
return create_operation(
3451-
"nvvm.mbarrier.arrive.shared",
3452-
location;
3453-
operands,
3454-
owned_regions,
3455-
successors,
3456-
attributes,
3457-
results=op_ty_results,
3458-
result_inference=false,
3459-
)
3460-
end
3461-
34623381
"""
34633382
`mbarrier_init`
34643383
@@ -3607,35 +3526,6 @@ function mbarrier_test_wait(addr::Value, state::Value; res::IR.Type, location=Lo
36073526
)
36083527
end
36093528

3610-
"""
3611-
`mbarrier_test_wait_shared`
3612-
3613-
This Op is the same as `nvvm.mbarrier.test.wait` except that the *mbarrier object*
3614-
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
3615-
3616-
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-mbarrier-test-wait-try-wait)
3617-
"""
3618-
function mbarrier_test_wait_shared(
3619-
addr::Value, state::Value; res::IR.Type, location=Location()
3620-
)
3621-
op_ty_results = IR.Type[res,]
3622-
operands = Value[addr, state]
3623-
owned_regions = Region[]
3624-
successors = Block[]
3625-
attributes = NamedAttribute[]
3626-
3627-
return create_operation(
3628-
"nvvm.mbarrier.test.wait.shared",
3629-
location;
3630-
operands,
3631-
owned_regions,
3632-
successors,
3633-
attributes,
3634-
results=op_ty_results,
3635-
result_inference=false,
3636-
)
3637-
end
3638-
36393529
"""
36403530
`mbarrier_try_wait_parity`
36413531
@@ -3793,6 +3683,34 @@ function match_sync(thread_mask::Value, val::Value; res::IR.Type, kind, location
37933683
)
37943684
end
37953685

3686+
"""
3687+
`memory_barrier`
3688+
3689+
`membar` operation guarantees that prior memory accesses requested by this
3690+
thread are performed at the specified `scope`, before later memory
3691+
operations requested by this thread following the membar instruction.
3692+
3693+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-membar)
3694+
"""
3695+
function memory_barrier(; scope, location=Location())
3696+
op_ty_results = IR.Type[]
3697+
operands = Value[]
3698+
owned_regions = Region[]
3699+
successors = Block[]
3700+
attributes = NamedAttribute[namedattribute("scope", scope),]
3701+
3702+
return create_operation(
3703+
"nvvm.memory.barrier",
3704+
location;
3705+
operands,
3706+
owned_regions,
3707+
successors,
3708+
attributes,
3709+
results=op_ty_results,
3710+
result_inference=false,
3711+
)
3712+
end
3713+
37963714
"""
37973715
`mma_sync`
37983716

src/mlir/Dialects/TPU.jl

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -476,12 +476,15 @@ function enqueue_indirect_dma(
476476
)
477477
end
478478

479-
function erase_memref_layout(operand::Value; result::IR.Type, location=Location())
480-
op_ty_results = IR.Type[result,]
479+
function erase_memref_layout(
480+
operand::Value; result=nothing::Union{Nothing,IR.Type}, location=Location()
481+
)
482+
op_ty_results = IR.Type[]
481483
operands = Value[operand,]
482484
owned_regions = Region[]
483485
successors = Block[]
484486
attributes = NamedAttribute[]
487+
!isnothing(result) && push!(op_ty_results, result)
485488

486489
return create_operation(
487490
"tpu.erase_memref_layout",
@@ -490,8 +493,8 @@ function erase_memref_layout(operand::Value; result::IR.Type, location=Location(
490493
owned_regions,
491494
successors,
492495
attributes,
493-
results=op_ty_results,
494-
result_inference=false,
496+
results=(length(op_ty_results) == 0 ? nothing : op_ty_results),
497+
result_inference=(length(op_ty_results) == 0 ? true : false),
495498
)
496499
end
497500

src/mlir/libMLIR_h.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11623,6 +11623,12 @@ function mlirGetDialectHandle__mosaic_gpu__()
1162311623
@ccall mlir_c.mlirGetDialectHandle__mosaic_gpu__()::MlirDialectHandle
1162411624
end
1162511625

11626+
function mlirDialectRegistryInsertMosaicGpuInlinerExtensions(registry)
11627+
@ccall mlir_c.mlirDialectRegistryInsertMosaicGpuInlinerExtensions(
11628+
registry::MlirDialectRegistry
11629+
)::Cvoid
11630+
end
11631+
1162611632
function enzymexlaLapackLayoutAttrGet(ctx, col_major)
1162711633
@ccall mlir_c.enzymexlaLapackLayoutAttrGet(
1162811634
ctx::MlirContext, col_major::UInt8

0 commit comments

Comments
 (0)