Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ lefthook-local.yml
# Binary files
server/cmd/fleetd/fleetd
server/fleetnode
server/.fleetnode/
server/fake-antminer/fake-antminer
server/fake-proto-rig/fake-proto-rig
server/miner-debug-cli
Expand Down
70 changes: 51 additions & 19 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,25 @@ update-go-deps:

# --- Packaging ---

# build the fleetnode operator CLI (writes to server/fleetnode)
[working-directory: 'server']
build-fleetnode:
go build -o ./fleetnode ./cmd/fleetnode
# Build the fleetnode operator CLI into server/.fleetnode/ along with native
# plugins and an nmap symlink so the binary-adjacent defaults in
# `fleetnode run` resolve without flags. Kept separate from server/plugins/
# because `just dev` puts cross-compiled Linux/arm64 plugins there for the
# Docker server, and the native agent can't exec ELF binaries.
build-fleetnode: (_build-go-plugins-native "server/.fleetnode/plugins") (_asicrs-build "server/.fleetnode/plugins")
#!/usr/bin/env bash
set -euo pipefail
cd server
mkdir -p ./.fleetnode
go build -o ./.fleetnode/fleetnode ./cmd/fleetnode
if NMAP=$(command -v nmap 2>/dev/null); then
ln -sfn "$NMAP" ./.fleetnode/nmap
echo "linked server/.fleetnode/nmap -> $NMAP"
else
rm -f ./.fleetnode/nmap
echo "note: nmap not on PATH; install it (brew install nmap / apt-get install nmap) so the agent finds it at scan time"
fi
echo "agent staged at server/.fleetnode/fleetnode"

# build Windows installer
[working-directory: 'deployment-files/windows']
Expand Down Expand Up @@ -325,30 +340,47 @@ _build-go-plugins-multi-arch: _go-work-sync
(cd plugin/antminer && GOOS=linux GOARCH=arm64 go build -o ../../deployment-files/server/antminer-plugin-arm64 .)
chmod +x deployment-files/server/*-plugin-*

_asicrs-build:
_asicrs-build outdir="server/plugins":
#!/usr/bin/env bash
set -euo pipefail
BIN=server/plugins/asicrs-plugin
PLATFORM_MARKER=server/plugins/.asicrs-platform
WANT_PLATFORM="native"
BIN={{outdir}}/asicrs-plugin
PLATFORM_MARKER={{outdir}}/.asicrs-platform
HOST_OS="$(uname -s)"
# Docker on macOS produces a Linux ELF that the host can't exec. Use local
# cargo there; Linux hosts stay on the docker path so CI doesn't need Rust.
if [ "$HOST_OS" = "Darwin" ]; then
WANT_PLATFORM="darwin-native"
else
WANT_PLATFORM="native"
fi
if [ -f "$BIN" ] \
&& [ -f "$PLATFORM_MARKER" ] && [ "$(cat "$PLATFORM_MARKER")" = "$WANT_PLATFORM" ] \
&& [ -z "$(find plugin/asicrs sdk/rust server/sdk/v1/pb -newer "$BIN" -type f 2>/dev/null | head -1)" ]; then
echo "asicrs plugin up to date, skipping build."
exit 0
fi
echo "Building asicrs plugin..."
mkdir -p server/plugins
CACHE_ARGS=()
if [ -n "${GITHUB_ACTIONS:-}" ]; then
CACHE_ARGS+=(--cache-from 'type=gha,scope=asicrs-native')
CACHE_ARGS+=(--cache-to 'type=gha,mode=max,scope=asicrs-native')
echo "Building asicrs plugin ($WANT_PLATFORM)..."
mkdir -p {{outdir}}
if [ "$HOST_OS" = "Darwin" ]; then
if ! command -v cargo >/dev/null 2>&1; then
echo "cargo not on PATH; install Rust (https://rustup.rs/) to build asicrs natively on macOS" >&2
exit 1
fi
(cd plugin/asicrs && cargo build --release)
cp plugin/asicrs/target/release/asicrs-plugin "$BIN"
cp plugin/asicrs/config.yaml {{outdir}}/asicrs-config.yaml
else
CACHE_ARGS=()
if [ -n "${GITHUB_ACTIONS:-}" ]; then
CACHE_ARGS+=(--cache-from 'type=gha,scope=asicrs-native')
CACHE_ARGS+=(--cache-to 'type=gha,mode=max,scope=asicrs-native')
fi
docker buildx build \
${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \
--file plugin/asicrs/Dockerfile.build \
--output type=local,dest={{outdir}} \
.
fi
docker buildx build \
${CACHE_ARGS[@]+"${CACHE_ARGS[@]}"} \
--file plugin/asicrs/Dockerfile.build \
--output type=local,dest=server/plugins \
.
chmod +x "$BIN"
# buildx --output type=local preserves the in-image mtime; touch so freshness checks see "now".
touch "$BIN"
Expand Down
54 changes: 54 additions & 0 deletions server/cmd/fleetnode/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# fleetnode

`fleetnode` is the on-prem agent that enrolls with a fleet server, holds a session, and (in later stack PRs) opens a `ControlStream` to execute server-issued discovery commands against the operator's local network.

## Subcommands

| Command | Purpose |
|---------|---------|
| `fleetnode enroll` | Register with a fleet server using a one-time enrollment code. Persists keys and `api_key`. See [enroll.go](enroll.go). |
Comment thread
ankitgoswami marked this conversation as resolved.
| `fleetnode status` | Print local state (server URL, fleet_node_id, fingerprint, session expiry). See [status.go](status.go). |
| `fleetnode refresh` | Renew the session token using the stored `api_key`. See [refresh.go](refresh.go). |
Comment thread
ankitgoswami marked this conversation as resolved.
| `fleetnode run` | Long-running daemon: maintain session, post heartbeats, and (in the discovery PR) consume the control stream. See [run.go](run.go). |

## State directory and lock

State lives in `state.yaml` under one of, in order:

1. `--state-dir <path>` (override; primarily for tests)
2. `$XDG_STATE_HOME/fleetnode`
3. `~/.local/state/fleetnode`

The file holds `server_url`, `fleet_node_id`, `identity_fingerprint`, both keypairs, the `api_key`, and the current `session_token`. It is created `0600` under a `0700` directory. See [state.go](../../internal/fleetnodebootstrap/state.go).

A `state.lock` file in the same directory serializes commands. Only one process may hold it at a time (`LOCK_EX | LOCK_NB`). The PID of the holder is written under the lock; if another invocation hits contention, the error includes that PID. To clear a stale lock, identify the owner with `ps -p <pid>`; remove the lock file only if the process is gone.

## Build

```bash
just build-fleetnode # produces server/.fleetnode/{fleetnode, nmap, plugins/}
go build -o fleetnode ./server/cmd/fleetnode # fast iteration
```

The staged layout reserves a `plugins/` subdirectory and a `nmap` symlink for the discovery PR. The agent does not yet exec anything from those locations.

## Enrollment flow

1. Operator mints an enrollment code in the UI and shares it.
2. `fleetnode enroll --server-url=...` prompts for the code, registers, prints a fingerprint.
3. Operator verifies the fingerprint in the UI and clicks confirm; the UI displays the `api_key`.
4. Agent prompts for the `api_key`, completes the handshake, persists session.

If anything is interrupted between Register and Complete, `fleetnode refresh` resumes from the persisted state.

## Security model

- **Transport.** `ValidateServerURL` requires `https://` for non-loopback servers; `--allow-insecure-transport` permits `http://` for testing only. The HTTP/2 transport is scheme-aware: `https://` goes through a TLS-validating `http2.Transport`, `http://` goes through h2c. See [client.go](../../internal/fleetnodebootstrap/client.go).
- **State file.** `state.yaml` is `0600` under a `0700` directory; the writer fsyncs the temp file, renames, then fsyncs the directory. Symlinks at the state dir leaf are refused.
- **Lock contention.** PID is written under the lock so contention reports are actionable.

## Development

```bash
go test ./server/internal/fleetnodebootstrap/... -race -count=1
```
6 changes: 2 additions & 4 deletions server/cmd/fleetnode/fake_gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import (

pb "github.com/block/proto-fleet/server/generated/grpc/fleetnodegateway/v1"
"github.com/block/proto-fleet/server/generated/grpc/fleetnodegateway/v1/fleetnodegatewayv1connect"

"github.com/block/proto-fleet/server/internal/fleetnodebootstrap"
"github.com/block/proto-fleet/server/internal/testutil"
)

type fakeFleetNodeGateway struct {
Expand Down Expand Up @@ -125,7 +125,5 @@ func newFakeServer(t *testing.T, fake *fakeFleetNodeGateway) *httptest.Server {
mux := http.NewServeMux()
path, h := fleetnodegatewayv1connect.NewFleetNodeGatewayServiceHandler(fake)
mux.Handle(path, h)
srv := httptest.NewServer(mux)
t.Cleanup(srv.Close)
return srv
return testutil.NewH2CServer(t, mux)
}
15 changes: 9 additions & 6 deletions server/cmd/fleetnode/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ const (
type RunCmd struct {
HeartbeatInterval time.Duration `name:"heartbeat-interval" default:"30s" help:"interval between UploadHeartbeat calls"`

now func() time.Time `kong:"-"`
clientFactory func(serverURL string, tokenSource func() string) gatewayClient `kong:"-"`
signals []os.Signal `kong:"-"`
parentCtx context.Context `kong:"-"` //nolint:containedctx // test seam for daemon shutdown without OS signals
now func() time.Time `kong:"-"`
clientFactory func(serverURL string, tokenSource func() string) (gatewayClient, error) `kong:"-"`
signals []os.Signal `kong:"-"`
parentCtx context.Context `kong:"-"` //nolint:containedctx // test seam for daemon shutdown without OS signals
}

type gatewayClient interface {
Expand All @@ -49,7 +49,7 @@ func (r *RunCmd) run(c *Context, stderr io.Writer) error {
r.now = func() time.Time { return time.Now().UTC() }
}
if r.clientFactory == nil {
r.clientFactory = func(url string, src func() string) gatewayClient {
r.clientFactory = func(url string, src func() string) (gatewayClient, error) {
return fleetnodebootstrap.NewAuthenticatedGatewayClient(url, src)
}
}
Expand Down Expand Up @@ -107,7 +107,10 @@ func (r *RunCmd) runLocked(c *Context, logger *slog.Logger) error {
}
}

client := r.clientFactory(st.ServerURL, func() string { return st.SessionToken })
client, err := r.clientFactory(st.ServerURL, func() string { return st.SessionToken })
if err != nil {
return err
}
Comment thread
ankitgoswami marked this conversation as resolved.

logger.Info("daemon started",
"fleet_node_id", st.FleetNodeID,
Expand Down
6 changes: 3 additions & 3 deletions server/cmd/fleetnode/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestRunCmd_HappyPathThreeTicks(t *testing.T) {
cmd := &RunCmd{
HeartbeatInterval: 5 * time.Millisecond,
parentCtx: parent,
clientFactory: func(_ string, _ func() string) gatewayClient { return stub },
clientFactory: func(_ string, _ func() string) (gatewayClient, error) { return stub, nil },
}

done := make(chan error, 1)
Expand Down Expand Up @@ -327,7 +327,7 @@ func TestRunCmd_ValidatesServerURLBeforeBuildingClient(t *testing.T) {
stub := &stubGatewayClient{}
cmd := &RunCmd{
HeartbeatInterval: time.Second,
clientFactory: func(_ string, _ func() string) gatewayClient { return stub },
clientFactory: func(_ string, _ func() string) (gatewayClient, error) { return stub, nil },
}

// Act
Expand All @@ -353,7 +353,7 @@ func TestRunCmd_ExitsOnCodeNotFoundHeartbeat(t *testing.T) {
}
cmd := &RunCmd{
HeartbeatInterval: 5 * time.Millisecond,
clientFactory: func(_ string, _ func() string) gatewayClient { return stub },
clientFactory: func(_ string, _ func() string) (gatewayClient, error) { return stub, nil },
}

// Act
Expand Down
10 changes: 7 additions & 3 deletions server/internal/fleetnodebootstrap/authclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ import (
// tokenSource is invoked per-call so a daemon that mutates its own
// state.SessionToken via Refresh picks up the new value on the next
// request without rebuilding the client.
func NewAuthenticatedGatewayClient(serverURL string, tokenSource func() string) fleetnodegatewayv1connect.FleetNodeGatewayServiceClient {
func NewAuthenticatedGatewayClient(serverURL string, tokenSource func() string) (fleetnodegatewayv1connect.FleetNodeGatewayServiceClient, error) {
httpClient, err := newGatewayHTTPClient(serverURL)
if err != nil {
return nil, err
}
return fleetnodegatewayv1connect.NewFleetNodeGatewayServiceClient(
newGatewayHTTPClient(),
httpClient,
serverURL,
connect.WithInterceptors(bearerInterceptor(tokenSource)),
)
), nil
}

func bearerInterceptor(tokenSource func() string) connect.Interceptor {
Expand Down
18 changes: 9 additions & 9 deletions server/internal/fleetnodebootstrap/authclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package fleetnodebootstrap
import (
"context"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
Expand All @@ -15,6 +14,7 @@ import (

pb "github.com/block/proto-fleet/server/generated/grpc/fleetnodegateway/v1"
"github.com/block/proto-fleet/server/generated/grpc/fleetnodegateway/v1/fleetnodegatewayv1connect"
"github.com/block/proto-fleet/server/internal/testutil"
)

type captureGateway struct {
Expand Down Expand Up @@ -47,15 +47,15 @@ func TestAuthenticatedClient_AttachesBearerHeaderPerCall(t *testing.T) {
mux := http.NewServeMux()
path, h := fleetnodegatewayv1connect.NewFleetNodeGatewayServiceHandler(fake)
mux.Handle(path, h)
srv := httptest.NewServer(mux)
t.Cleanup(srv.Close)
srv := testutil.NewH2CServer(t, mux)

var token string
client := NewAuthenticatedGatewayClient(srv.URL, func() string { return token })
client, err := NewAuthenticatedGatewayClient(srv.URL, func() string { return token })
require.NoError(t, err)

// Act
token = "t1"
_, err := client.UploadHeartbeat(context.Background(), connect.NewRequest(&pb.UploadHeartbeatRequest{SentAt: timestamppb.Now()}))
_, err = client.UploadHeartbeat(context.Background(), connect.NewRequest(&pb.UploadHeartbeatRequest{SentAt: timestamppb.Now()}))
require.NoError(t, err)
token = "t2"
_, err = client.UploadHeartbeat(context.Background(), connect.NewRequest(&pb.UploadHeartbeatRequest{SentAt: timestamppb.Now()}))
Expand All @@ -72,14 +72,14 @@ func TestAuthenticatedClient_RejectsEmptyToken(t *testing.T) {
t.Parallel()

// Arrange
srv := httptest.NewServer(http.NewServeMux())
t.Cleanup(srv.Close)
client := NewAuthenticatedGatewayClient(srv.URL, func() string { return "" })
srv := testutil.NewH2CServer(t, http.NewServeMux())
client, err := NewAuthenticatedGatewayClient(srv.URL, func() string { return "" })
require.NoError(t, err)

// Act
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
_, err := client.UploadHeartbeat(ctx, connect.NewRequest(&pb.UploadHeartbeatRequest{SentAt: timestamppb.Now()}))
_, err = client.UploadHeartbeat(ctx, connect.NewRequest(&pb.UploadHeartbeatRequest{SentAt: timestamppb.Now()}))

// Assert
require.Error(t, err)
Expand Down
57 changes: 47 additions & 10 deletions server/internal/fleetnodebootstrap/client.go
Original file line number Diff line number Diff line change
@@ -1,29 +1,66 @@
package fleetnodebootstrap

import (
"context"
"crypto/tls"
"errors"
"fmt"
"net"
"net/http"
"net/url"
"time"

"golang.org/x/net/http2"

"github.com/block/proto-fleet/server/generated/grpc/fleetnodegateway/v1/fleetnodegatewayv1connect"
)

const httpClientTimeout = 30 * time.Second

// Refusing every 30x stops a downgrade redirect from replaying the POST body
// (enrollment token, api_key, signature) to a plaintext target; Connect-RPC
// itself never expects redirects.
// Refusing 3xx stops a downgrade redirect from replaying the signed POST
// body (enrollment token, api_key, signature) to an attacker-chosen target.
var errRedirectNotAllowed = errors.New("redirects are not allowed for connect-rpc calls")

func newGatewayHTTPClient() *http.Client {
// A shared AllowHTTP+DialTLSContext shim would silently downgrade https to
// plaintext, defeating ValidateServerURL's https-required policy. The https
// branch uses net/http's Transport so ALPN can negotiate H2 when available
// and fall back to HTTP/1.1 — packaged nginx terminates TLS and proxies
// upstream over HTTP/1.1, so a bare http2.Transport refuses to talk to it.
func newGatewayHTTPClient(serverURL string) (*http.Client, error) {
u, err := url.Parse(serverURL)
if err != nil {
return nil, fmt.Errorf("parse server-url: %w", err)
}
var tr http.RoundTripper
switch u.Scheme {
case "http":
tr = &http2.Transport{
Comment thread
ankitgoswami marked this conversation as resolved.
AllowHTTP: true,
DialTLSContext: func(ctx context.Context, network, addr string, _ *tls.Config) (net.Conn, error) {
var d net.Dialer
return d.DialContext(ctx, network, addr)
},
}
case "https":
tr = &http.Transport{
Proxy: http.ProxyFromEnvironment,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
ForceAttemptHTTP2: true,
}
default:
return nil, fmt.Errorf("server-url scheme must be http or https; got %q", u.Scheme)
}
return &http.Client{
Timeout: httpClientTimeout,
CheckRedirect: func(_ *http.Request, _ []*http.Request) error {
return errRedirectNotAllowed
},
}
Transport: tr,
}, nil
Comment thread
ankitgoswami marked this conversation as resolved.
}

func NewGatewayClient(serverURL string) fleetnodegatewayv1connect.FleetNodeGatewayServiceClient {
return fleetnodegatewayv1connect.NewFleetNodeGatewayServiceClient(newGatewayHTTPClient(), serverURL)
func NewGatewayClient(serverURL string) (fleetnodegatewayv1connect.FleetNodeGatewayServiceClient, error) {
httpClient, err := newGatewayHTTPClient(serverURL)
if err != nil {
return nil, err
}
return fleetnodegatewayv1connect.NewFleetNodeGatewayServiceClient(httpClient, serverURL), nil
}
Loading
Loading