diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
new file mode 100644
index 00000000..44376043
--- /dev/null
+++ b/.github/workflows/integration.yml
@@ -0,0 +1,34 @@
+name: "Integration Tests"
+
+on:
+  push:
+    branches:
+      - 'main'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  models: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  integration:
+    runs-on: ubuntu-latest
+    env:
+      GOPROXY: https://proxy.golang.org/,direct
+      GOPRIVATE: ""
+      GONOPROXY: ""
+      GONOSUMDB: github.com/github/*
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: 'go.mod'
+      - name: Run integration tests
+        run: make integration
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 54f9c6bc..8f1fad44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,10 @@
 /gh-models
 /gh-models.exe
+/gh-models-test
 /gh-models-darwin-*
 /gh-models-linux-*
 /gh-models-windows-*
 /gh-models-android-*
+
+# Integration test dependencies
+integration/go.sum
diff --git a/DEV.md b/DEV.md
index 36c44fd1..fb33d9bc 100644
--- a/DEV.md
+++ b/DEV.md
@@ -34,6 +34,21 @@ make vet  # to find suspicious constructs
 make tidy # to keep dependencies up-to-date
 ```
 
+### Integration Tests
+
+In addition to unit tests, we have integration tests that use the compiled binary to test against live endpoints:
+
+```shell
+# Build the binary first
+make build
+
+# Run integration tests
+cd integration
+go test -v
+```
+
+Integration tests are located in the `integration/` directory and automatically skip tests requiring authentication when no GitHub token is available. See `integration/README.md` for more details.
+
 ## Releasing
 
 When upgrading or installing the extension using `gh extension upgrade github/gh-models` or
diff --git a/Makefile b/Makefile
index 898120db..12bde1ff 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,16 @@
 check: fmt vet tidy test
 .PHONY: check
 
+build:
+	@echo "==> building gh-models binary <=="
+	script/build
+.PHONY: build
+
+integration: build
+	@echo "==> running integration tests <=="
+	cd integration && go mod tidy && go test -v -timeout=5m
+.PHONY: integration
+
 fmt:
 	@echo "==> running Go format <=="
 	gofmt -s -l -w .
diff --git a/integration/README.md b/integration/README.md
new file mode 100644
index 00000000..5ebeb9a5
--- /dev/null
+++ b/integration/README.md
@@ -0,0 +1,76 @@
+# Integration Tests
+
+This directory contains integration tests for the `gh-models` CLI extension. These tests are separate from the unit tests and use the compiled binary to test actual functionality.
+
+## Overview
+
+The integration tests:
+- Use the compiled `gh-models` binary (not mocked clients)
+- Test basic functionality of each command (`list`, `run`, `view`, `eval`)
+- Are designed to work with or without GitHub authentication
+- Skip tests requiring live endpoints when authentication is unavailable
+- Keep assertions minimal to avoid brittleness
+
+## Running the Tests
+
+### Prerequisites
+
+1. Build the `gh-models` binary:
+   ```bash
+   cd ..
+   script/build
+   ```
+
+2. (Optional) Authenticate with GitHub CLI for full testing:
+   ```bash
+   gh auth login
+   ```
+
+### Running Locally
+
+From the integration directory:
+```bash
+go test -v
+```
+
+Without authentication, some tests will be skipped:
+```
+=== RUN   TestIntegrationHelp
+--- PASS: TestIntegrationHelp (0.05s)
+=== RUN   TestIntegrationList
+    integration_test.go:90: Skipping integration test - no GitHub authentication available
+--- SKIP: TestIntegrationList (0.04s)
+```
+
+With authentication, all tests should run and test live endpoints.
+
+## CI/CD
+
+The integration tests run automatically on pushes to `main` via the GitHub Actions workflow `.github/workflows/integration.yml`.
+
+The workflow:
+1. Builds the binary
+2. Runs tests without authentication (tests basic functionality)
+3. On manual dispatch, can also run with authentication for full testing
+
+## Test Structure
+
+Each test follows this pattern:
+- Check for binary existence (skip if not built)
+- Check for authentication (skip live endpoint tests if unavailable)
+- Execute the binary with specific arguments
+- Verify basic output format and success/failure
+
+Tests are intentionally simple and focus on:
+- Commands execute without errors
+- Help text is present and correctly formatted
+- Basic output format is as expected
+- Authentication requirements are respected
+
+## Adding New Tests
+
+When adding new commands or features:
+1. Add a corresponding integration test
+2. Follow the existing pattern of checking authentication
+3. Keep assertions minimal but meaningful
+4. Ensure tests work both with and without authentication
\ No newline at end of file
diff --git a/integration/go.mod b/integration/go.mod
new file mode 100644
index 00000000..3e104b8f
--- /dev/null
+++ b/integration/go.mod
@@ -0,0 +1,11 @@
+module github.com/github/gh-models/integration
+
+go 1.22
+
+require github.com/stretchr/testify v1.10.0
+
+require (
+	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
+	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
diff --git a/integration/integration_test.go b/integration/integration_test.go
new file mode 100644
index 00000000..0b7bfb28
--- /dev/null
+++ b/integration/integration_test.go
@@ -0,0 +1,101 @@
+package integration
+
+import (
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+const (
+	binaryName      = "gh-models-test"
+	timeoutDuration = 30 * time.Second
+)
+
+// getBinaryPath returns the path to the compiled gh-models binary
+func getBinaryPath(t *testing.T) string {
+	wd, err := os.Getwd()
+	require.NoError(t, err)
+
+	// Binary should be in the parent directory
+	binaryPath := filepath.Join(filepath.Dir(wd), binaryName)
+
+	// Check if binary exists
+	if _, err := os.Stat(binaryPath); os.IsNotExist(err) {
+		t.Skipf("Binary %s not found. Run 'script/build' first.", binaryPath)
+	}
+
+	return binaryPath
+}
+
+// runCommand executes the gh-models binary with given arguments
+func runCommand(t *testing.T, args ...string) (stdout, stderr string, err error) {
+	binaryPath := getBinaryPath(t)
+
+	cmd := exec.Command(binaryPath, args...)
+	cmd.Env = os.Environ()
+
+	// Set timeout
+	done := make(chan error, 1)
+	var stdoutBytes, stderrBytes []byte
+
+	go func() {
+		stdoutBytes, err = cmd.Output()
+		if err != nil {
+			if exitError, ok := err.(*exec.ExitError); ok {
+				stderrBytes = exitError.Stderr
+			}
+		}
+		done <- err
+	}()
+
+	select {
+	case err = <-done:
+		return string(stdoutBytes), string(stderrBytes), err
+	case <-time.After(timeoutDuration):
+		if cmd.Process != nil {
+			cmd.Process.Kill()
+		}
+		t.Fatalf("Command timed out after %v", timeoutDuration)
+		return "", "", nil
+	}
+}
+
+func TestList(t *testing.T) {
+	stdout, stderr, err := runCommand(t, "list")
+	if err != nil {
+		t.Logf("List command failed. stdout: %s, stderr: %s", stdout, stderr)
+		// If the command fails due to auth issues, skip the test
+		if strings.Contains(stderr, "authentication") || strings.Contains(stderr, "token") {
+			t.Skip("Skipping - authentication issue")
+		}
+		require.NoError(t, err, "List command should succeed with valid auth")
+	}
+
+	// Basic verification that list command produces expected output format
+	require.NotEmpty(t, stdout, "List should produce output")
+	// Should contain some indication of models or table headers
+	lowerOut := strings.ToLower(stdout)
+	hasExpectedContent := strings.Contains(lowerOut, "openai/gpt-4.1")
+	require.True(t, hasExpectedContent, "List output should contain model information")
+}
+
+// TestRun tests the run command with a simple prompt
+// This test is more limited since it requires actual model inference
+func TestRun(t *testing.T) {
+	stdout, _, err := runCommand(t, "run", "openai/gpt-4.1-nano", "say 'pain' in french")
+	require.NoError(t, err, "Run should work")
+	require.Contains(t, strings.ToLower(stdout), "pain")
+}
+
+// TestIntegrationRunWithOrg tests the run command with --org flag
+func TestRunWithOrg(t *testing.T) {
+	// Test run command with --org flag (using help to avoid expensive API calls)
+	stdout, _, err := runCommand(t, "run", "openai/gpt-4.1-nano", "say 'pain' in french", "--org", "github")
+	require.NoError(t, err, "Run should work")
+	require.Contains(t, strings.ToLower(stdout), "pain")
+}