Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
e34a906
migrate elf utils file
katsyoshi Jan 3, 2026
4a69e2a
replace package name to Binary
katsyoshi Jan 4, 2026
3c68a19
package restructuring section header
katsyoshi Jan 4, 2026
c60fd95
migrate elf header description file
katsyoshi Jan 4, 2026
7bad6fe
remove unused file
katsyoshi Jan 4, 2026
e47df6f
adjust elf section storage
katsyoshi Jan 6, 2026
566b494
simplify section
katsyoshi Jan 6, 2026
932266d
replace linker elf with binary elf
katsyoshi Jan 6, 2026
393f6bd
remove unused files in linker
katsyoshi Jan 6, 2026
575cb46
update sig
katsyoshi Jan 6, 2026
224c93a
introduce reader for assembly file
katsyoshi Jan 7, 2026
43359a2
wire assembler and reader
katsyoshi Jan 7, 2026
c8f278c
read instructions in assembler
katsyoshi Jan 7, 2026
fa46ea4
add builder for assembler
katsyoshi Jan 7, 2026
ee10983
refactoring full class path in assembler
katsyoshi Jan 7, 2026
3fd2820
migrate elf text section class
katsyoshi Jan 7, 2026
71edbe2
refactoring section definitions
katsyoshi Jan 7, 2026
399c048
introduce builder for symtab, and strtab
katsyoshi Jan 8, 2026
8106454
introduce write for assembler
katsyoshi Jan 8, 2026
6c2aab3
write minimal writer implemnt
katsyoshi Jan 8, 2026
3254422
write sections to elf file
katsyoshi Jan 8, 2026
4a47f25
wire assembler builder and writer
katsyoshi Jan 10, 2026
251a776
assemble to binary for assembler
katsyoshi Jan 10, 2026
4f10d45
remove assembler/elf files
katsyoshi Jan 10, 2026
0c4cb2e
fix assembler elf section headers
katsyoshi Jan 10, 2026
d3ecf2f
update assembler elf tests
katsyoshi Jan 10, 2026
8d5319d
adjust compiler tests for self linker
katsyoshi Jan 10, 2026
38628b5
update linker reader expectations
katsyoshi Jan 10, 2026
b707060
fix sig for refactoring
katsyoshi Jan 10, 2026
4b39fb2
steep sensei
katsyoshi Jan 10, 2026
ac38fea
move elf reader into binary
katsyoshi Jan 10, 2026
6670e72
convert binary to module
katsyoshi Jan 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions lib/caotral/assembler.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative "assembler/elf"
require_relative "assembler/elf/utils"
require_relative "assembler/elf/header"
require_relative "assembler/elf/sections"
require_relative "assembler/elf/section_header"
require "caotral/binary/elf"

require_relative "assembler/builder"
require_relative "assembler/reader"
require_relative "assembler/writer"

class Caotral::Assembler
GCC_ASSEMBLERS = ["gcc", "as"].freeze
Expand All @@ -15,7 +15,7 @@ def self.assemble!(input:, output: File.basename(input, ".*") + ".o", assembler:

def initialize(input:, output: File.basename(input, ".*") + ".o", assembler: "as", type: :relocatable, debug: false)
@input, @output = input, output
@elf = ELF.new(type:, input:, output:, debug:)
@asm_reader = Caotral::Assembler::Reader.new(input:, debug:)
@assembler = assembler
@debug = debug
end
Expand All @@ -30,7 +30,10 @@ def assemble(assembler: @assembler, assembler_options: [], input: @input, output
output
end
def obj_file = @output
def to_elf(input: @input, output: @output, debug: false) = @elf.build(input:, output:, debug:)
def to_elf(input: @input, output: @output, debug: false)
elf_obj = Caotral::Assembler::Builder.new(instructions:).build
Caotral::Assembler::Writer.new(elf_obj:, output:, debug:).write
end

def command(asm)
case asm
Expand All @@ -44,6 +47,7 @@ def command(asm)
end

private
def instructions = @instructions ||= @asm_reader.read
def gcc_assembler(assembler)
case assembler
when "as", "gcc"
Expand Down
50 changes: 50 additions & 0 deletions lib/caotral/assembler/builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
require "caotral/binary/elf"

require_relative "builder/text"

module Caotral
class Assembler
class Builder
def initialize(instructions:) = @instructions = instructions

def build
elf = Caotral::Binary::ELF.new
elf.header = Caotral::Binary::ELF::Header.new

sections = []
sections << [nil, nil]
sections << [".text", assemble_text(@instructions)]
sections << [".strtab", Caotral::Binary::ELF::Section::Strtab.new]
sections << [".symtab", Caotral::Binary::ELF::Section::Symtab.new]
sections << [".shstrtab", Caotral::Binary::ELF::Section::Strtab.new]
sections.each do |(section_name, body)|
header = Caotral::Binary::ELF::SectionHeader.new
section = Caotral::Binary::ELF::Section.new(header:, body:, section_name:)
elf.sections << section
end
strtab = elf.find_by_name(".strtab")
symtab = elf.find_by_name(".symtab")
symtab.body = build_symtab(strtab.body)
elf
end

private
def assemble_text(instructions)
text = Caotral::Assembler::Builder::Text.new(instructions:)
instructions.each do |label, lines|
text.entries << { label:, size: 0 }
lines.each { |line| text.assemble!(line) }
end
text.build
end

def build_symtab(strtab)
entries = []
entries << Caotral::Binary::ELF::Section::Symtab.new.set!(name: 0, info: 0, shndx: 0, value: 0, size: 0)
name = strtab.offset_of("main")
entries << Caotral::Binary::ELF::Section::Symtab.new.set!(name:, info: 0x12, other: 0, shndx: 1, value: 0, size: 0)
entries
end
end
end
end
275 changes: 275 additions & 0 deletions lib/caotral/assembler/builder/text.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
require "caotral/binary/elf"
module Caotral
class Assembler
class Builder
class Text
PREFIX = {
REX_W: 0x48,
}.freeze

REGISTER_CODE = {
RAX: 0b000,
RCX: 0b001,
RDX: 0b010,
RBX: 0b011,
RSP: 0b100,
RBP: 0b101,
RSI: 0b110,
RDI: 0b111,
}.freeze

OPECODE = {
ADD: [0x01],
CMP: [0x39],
CQO: [0x99],
IDIV: [0xf7],
IMUL: [0x0f],
MOV: [0x89],
MOVR: [0x8B],
MOVXZ: [0x0f, 0xb7],
SUB: [0x83],
XOR: [0x31],
}.freeze
HEX_PATTERN = /\A0x[0-9a-fA-F]+\z/.freeze
attr_reader :entries

def initialize(instructions:)
@instructions = instructions
@entries = []
@label_positions = {}
end

def assemble!(line)
line = line.strip
return if line.empty?
@entries << parse_line(line)
end

def build
@label_positions.clear
offset = 0
@entries.each do |entry|
if entry[:label]
@label_positions[entry[:label]] = offset
next
end
offset += entry[:size]
end

@bytes = []
offset = 0
@entries.each do |entry|
next if entry[:label]
bytes = encode(entry, offset)
@bytes << bytes
offset += bytes.size
end

@bytes.flatten.pack("C*")
end

def size = build.bytesize
def align(val, bytes) = (val << [0] until build.bytesize % bytes == 0)

private

def encode(entry, offset)
opecode(entry[:op], offset, *entry[:operands])
end

def parse_line(line)
op, *operands = line.split(/\s+/).reject(&:empty?).map { it.gsub(/,/, "") }
size = instruction_size(op, *operands)
{ op:, operands:, size: }
end

def instruction_size(op, *operands)
case op
when "je", "jne"
6
when "jmp"
5
else
opecode(op, 0, *operands).size
end
end

def opecode(op, offset, *operands)
case op
when "push"
push(*operands)
when "mov", "movzb"
[PREFIX[:REX_W], *mov(op, *operands)]
when "sub", "add", "imul", "cqo", "idiv"
[PREFIX[:REX_W], *calc(op, *operands)]
when "xor"
[PREFIX[:REX_W], *calc_bit(op, *operands)]
when "lea"
[PREFIX[:REX_W], *calc_addr(op, *operands)]
when "pop"
pop(*operands)
when "cmp"
[PREFIX[:REX_W], *cmp(op, *operands)]
when "sete", "setl"
sete(op, *operands)
when "je", "jmp", "jne"
jump(op, offset, *operands)
when "syscall"
[0x0f, 0x05]
when "ret"
[0xc3]
else
raise Caotral::Binary::ELF::Error, "yet implemented operations: #{op}"
end
end

def jump(op, offset, *operands)
label = operands.first
target = @label_positions.fetch(label) do
raise Caotral::Binary::ELF::Error, "unknown label: #{label}"
end
size = instruction_size(op, label)
rel = Integer(target) - Integer(offset) - Integer(size)
displacement = [rel].pack("l<").bytes
case op
when "je"
[0x0f, 0x84, *displacement]
when "jmp"
[0xe9, *displacement]
when "jne"
[0x0f, 0x85, *displacement]
else
raise Caotral::Binary::ELF::Error, "unknown jump: #{op}"
end
end

def mov(op, *operands)
reg = case operands
in ["rax", "rbp"]
[0xe8]
in ["rbp", "rsp"]
[0xe5]
in ["rsp", "rbp"]
[0xec]
in ["[rax]", "rdi"]
[0x38]
in ["rax", "al"]
op = "MOVXZ"
[0xc0]
in ["rax", "[rax]"]
op = "MOVR"
[0x00]
in ["rdi", "rax"]
[0xC7]
in ["rax", HEX_PATTERN]
return [0xC7, 0xC0, *immediate(operands[1])]
else
operands&.map { reg(_1) }
end # steep:ignore
[OPECODE[op.upcase.to_sym], reg].flatten
end

def calc(op, *operands)
ope_code = OPECODE[op.upcase.to_sym]
case [op, *operands]
in ["sub", "rax", "rdi"]
[0x29, 0xf8]
in ["add", "rax", "rdi"]
[ope_code, 0xf8]
in ["imul", "rax", "rdi"]
[ope_code, 0xaf, 0xc7]
in ["idiv", "rdi"]
[ope_code, 0xff]
in ["sub", "rsp", *num]
[ope_code, 0xec, *num.map { |n| n.to_i(16) }]
in ["sub", "rax", *num]
[ope_code, 0xe8, *num.map { |n| n.to_i(16) }]
in ["cqo"]
[0x99]
end # steep:ignore
end

def calc_bit(op, *operands)
case [op, *operands]
in ["xor", "rax", "rax"]
[0x31, 0xc0]
in ["xor", "rdi", "rdi"]
[0x31, 0xff]
end # steep:ignore
end

def calc_addr(op, *operands)
case [op, *operands]
in ["lea", "rax", *addrs]
rm, disp = parse_addressing_mode(addrs.first)
[0x8D, *mod_rm(0b01, 0b000, rm), disp]
end # steep:ignore
end

def cmp(op, *operands)
case operands
in ["rax", "rdi"]
[0x39, 0xf8]
in ["rax", "0"]
[0x83, 0xf8, 0x00]
end # steep:ignore
end

def sete(op, *operands)
case [op, operands]
in ["sete", ["al"]]
[0x0f, 0x94, 0xc0]
in ["setl", ["al"]]
[0x0f, 0x9c, 0xc0]
end # steep:ignore
end

def push(*operands)
case operands
in ["rbp"] | ["rdi"]
[0x55]
in ["rax"]
[0x50]
in [HEX_PATTERN]
[0x68, *immediate(operands.first)]
else
[0x6a, *operands.map { |o| reg(o) }]
end # steep:ignore
end

def pop(*operands)
case operands
in ["rax"] | ["rdi"]
[0x58 + REGISTER_CODE[operands.first.upcase.to_sym]]
in ["rbp"]
[0x5d]
end # steep:ignore
end

def reg(r)
case r
when "rsp"
0xec
when "rbp"
0x5e
when "rax"
0x29
when "rdi"
0xf8
when /\d+/
r.to_i(16)
else
raise Caotral::Binary::ELF::Error, "yet implemented operand address: #{r}"
end
end
def immediate(operand) = [operand.to_i(16)].pack("L").unpack("C*")
def mod_rm(mod, reg, rm) = (mod << 6) | (reg << 3) | rm
def parse_addressing_mode(str)
m = str.match(/\[(?<reg>\w+)(?<disp>[\+\-]\d+)?\]/)
[REGISTER_CODE[m[:reg].upcase.to_sym], m[:disp].to_i & 0xff]
end
end
end
end
end

Loading