Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/adminutils/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ import (

func CreateRevision(changeset string, timestamp int64, isKeyRev bool, authorId *string, atext apool.AText, attribPool apool.APool) revision.Revision {
if authorId != nil {
// Work on a deep copy: the struct parameter shares the caller's maps,
// so PutAttrib on it would add entries to the caller's pool without
// bumping the caller's NextNum, corrupting the pool (Pad.Check then
// fails with "numToAttrib length does not match nextNum").
attribPool = attribPool.Clone()
attribPool.PutAttrib(apool.Attribute{
Key: "author",
Value: *authorId,
Expand Down
42 changes: 42 additions & 0 deletions lib/api/author/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/ether/etherpad-go/lib"
"github.com/ether/etherpad-go/lib/api/errors"
"github.com/ether/etherpad-go/lib/author"
"github.com/ether/etherpad-go/lib/db"
"github.com/gofiber/fiber/v3"
)

Expand Down Expand Up @@ -35,6 +36,12 @@ type PadsResponse struct {
PadIds []string `json:"padIds"`
}

// AnonymizeAuthorResponse represents the response after anonymizing an author
type AnonymizeAuthorResponse struct {
AuthorId string `json:"authorId" example:"a.s8oes9dhwrvt0zif"`
Anonymized bool `json:"anonymized" example:"true"`
}

// CreateAuthor godoc
// @Summary Create a new author
// @Description Creates a new author with the specified name
Expand Down Expand Up @@ -213,6 +220,40 @@ func GetAuthorPads(initStore *lib.InitStore, authorManager *author.Manager) fibe
}
}

// AnonymizeAuthor godoc
// @Summary Anonymize an author (GDPR Art. 17 erasure)
// @Description Severs the token binding of an author, zeroes their display identity (name, color) and nulls their authorship on chat messages. Pad content and revisions are left intact. Idempotent.
// @Tags Authors
// @Accept json
// @Produce json
// @Param authorId path string true "Author ID"
// @Success 200 {object} AnonymizeAuthorResponse
// @Failure 400 {object} errors.Error
// @Failure 404 {object} errors.Error
// @Failure 500 {object} errors.Error
// @Security BearerAuth
// @Router /admin/api/author/{authorId}/anonymize [post]
func AnonymizeAuthor(initStore *lib.InitStore, authorManager *author.Manager) fiber.Handler {
return func(c fiber.Ctx) error {
authorId := c.Params("authorId")
if authorId == "" {
return c.Status(400).JSON(errors.NewInvalidParamError("authorId is required"))
}

if err := authorManager.AnonymizeAuthor(authorId); err != nil {
if err.Error() == db.AuthorNotFoundError {
return c.Status(404).JSON(errors.AuthorNotFoundError)
}
return c.Status(500).JSON(errors.InternalServerError)
}

return c.JSON(AnonymizeAuthorResponse{
AuthorId: authorId,
Anonymized: true,
})
}
}

func Init(initStore *lib.InitStore) {
var authorManager = author.NewManager(initStore.Store)

Expand All @@ -221,4 +262,5 @@ func Init(initStore *lib.InitStore) {
initStore.PrivateAPI.Get("/author/:authorId", GetAuthor(initStore, authorManager))
initStore.PrivateAPI.Get("/author/:authorId/name", GetAuthorName(initStore, authorManager))
initStore.PrivateAPI.Get("/author/:authorId/pads", GetAuthorPads(initStore, authorManager))
initStore.PrivateAPI.Post("/author/:authorId/anonymize", AnonymizeAuthor(initStore, authorManager))
}
178 changes: 178 additions & 0 deletions lib/api/pad/compactDiff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
package pad

import (
"github.com/ether/etherpad-go/lib"
errors2 "github.com/ether/etherpad-go/lib/api/errors"
utils2 "github.com/ether/etherpad-go/lib/api/utils"
"github.com/ether/etherpad-go/lib/apool"
"github.com/ether/etherpad-go/lib/author"
io2 "github.com/ether/etherpad-go/lib/io"
"github.com/ether/etherpad-go/lib/paddiff"
"github.com/ether/etherpad-go/lib/utils"
"github.com/ether/etherpad-go/lib/ws"
"github.com/gofiber/fiber/v3"
)

// CompactPadRequest represents the request to compact a pad's revision history
type CompactPadRequest struct {
KeepRevisions int `json:"keepRevisions"`
}

// CompactPadResponse represents the response after compacting a pad
type CompactPadResponse struct {
Ok bool `json:"ok"`
KeepRevisions int `json:"keepRevisions"`
}

// CompactPad godoc
// @Summary Compact a pad's revision history
// @Description Collapses the pad's revision history so that only the last keepRevisions revisions are kept (original API: compactPad). The revisions below the cut are composed into a single base revision; pad text is preserved. Destructive — consider exporting the pad first.
// @Tags Pads
// @Accept json
// @Produce json
// @Param padId path string true "Pad ID"
// @Param request body CompactPadRequest true "Number of recent revisions to keep (must be >= 1 and lower than the pad's head revision)"
// @Success 200 {object} CompactPadResponse
// @Failure 400 {object} errors.Error
// @Failure 404 {object} errors.Error
// @Failure 500 {object} errors.Error
// @Security BearerAuth
// @Router /admin/api/pads/{padId}/compact [post]
func CompactPad(initStore *lib.InitStore) fiber.Handler {
return func(c fiber.Ctx) error {
padId := c.Params("padId")
var request CompactPadRequest
if err := c.Bind().Body(&request); err != nil {
return c.Status(400).JSON(errors2.InvalidRequestError)
}

foundPad, err := utils2.GetPadSafe(padId, true, nil, nil, initStore.PadManager)
if err != nil {
return c.Status(404).JSON(errors2.PadNotFoundError)
}

if request.KeepRevisions < 1 {
return c.Status(400).JSON(errors2.NewInvalidParamError("keepRevisions must be at least 1"))
}
if request.KeepRevisions >= foundPad.Head {
return c.Status(400).JSON(errors2.NewInvalidParamError("keepRevisions must be lower than the pad's head revision"))
}

// Reuse the revision compaction the admin UI uses
// (AdminMessageHandler.DeleteRevisions). DeleteRevisions only needs the
// store, pad manager, pad message handler and logger, all of which are
// available from the InitStore, so a handler is wired up on the fly
// (hub is not used by DeleteRevisions).
adminHandler := ws.NewAdminMessageHandler(initStore.Store, initStore.Hooks, initStore.PadManager, initStore.Handler, initStore.Logger, nil, initStore.C)
if err := adminHandler.DeleteRevisions(padId, request.KeepRevisions); err != nil {
initStore.Logger.Errorf("Error compacting pad %s: %v", padId, err)
return c.Status(500).JSON(errors2.InternalServerError)
}

return c.JSON(CompactPadResponse{
Ok: true,
KeepRevisions: request.KeepRevisions,
})
}
}

// CreateDiffHTML godoc
// @Summary Create an HTML diff between two revisions
// @Description Returns the changes between startRev and endRev as HTML (original API: createDiffHTML). Insertions keep their author attribution (rendered with the author's color), deletions are re-inserted with a 'removed' attribute (rendered struck through). Also returns the list of authors involved in the changes.
// @Tags Pads
// @Accept json
// @Produce json
// @Param padId path string true "Pad ID"
// @Param startRev query int true "Start revision number"
// @Param endRev query int false "End revision number (defaults to the head revision)"
// @Success 200 {object} DiffHTMLResponse
// @Failure 400 {object} errors.Error
// @Failure 404 {object} errors.Error
// @Failure 500 {object} errors.Error
// @Security BearerAuth
// @Router /admin/api/pads/{padId}/diffHTML [get]
func CreateDiffHTML(initStore *lib.InitStore) fiber.Handler {
return func(c fiber.Ctx) error {
padId := c.Params("padId")

foundPad, err := utils2.GetPadSafe(padId, true, nil, nil, initStore.PadManager)
if err != nil {
return c.Status(404).JSON(errors2.PadNotFoundError)
}

startRevStr := c.Query("startRev")
if startRevStr == "" {
return c.Status(400).JSON(errors2.NewMissingParamError("startRev"))
}
startRev, err := utils.CheckValidRev(startRevStr)
if err != nil {
return c.Status(400).JSON(errors2.InvalidRevisionError)
}

var endRev *int
if endRevStr := c.Query("endRev"); endRevStr != "" {
endRevNum, err := utils.CheckValidRev(endRevStr)
if err != nil {
return c.Status(400).JSON(errors2.InvalidRevisionError)
}
endRev = endRevNum
}

// The original API clamps startRev to the head revision before
// validating the range; endRev is clamped inside GetValidRevisionRange.
head := foundPad.Head
start := *startRev
if start > head {
start = head
}

from, to, ok := paddiff.GetValidRevisionRange(start, endRev, head)
if !ok {
return c.Status(400).JSON(errors2.NewInvalidParamError("invalid revision range"))
}

diffAText, authors, err := paddiff.CreateDiffAText(foundPad, &foundPad.Pool, from, to)
if err != nil {
initStore.Logger.Errorf("Error creating diff atext for pad %s: %v", padId, err)
return c.Status(500).JSON(errors2.InternalServerError)
}

// Render the diff atext with the regular export-HTML pipeline (it
// understands the 'removed' attribute). GetPadHTML reads pad.AText when
// no revision is requested, so a shallow copy of the pad carrying the
// diff atext is passed.
padWithDiff := *foundPad
padWithDiff.AText = *diffAText

authorColors := buildAuthorColors(&foundPad.Pool, initStore.AuthorManager)
exporter := io2.NewExportHtml(initStore.PadManager, initStore.AuthorManager, initStore.Hooks)
Comment on lines +134 to +148

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Diffhtml mutates cached pool 🐞 Bug ☼ Reliability

CreateDiffHTML passes the cached pad’s Pool into paddiff.CreateDiffAText, which mutates it via
PutAttrib; this makes a read-only endpoint modify shared pad state and can introduce request-time
data races/corruption. Because pads are globally cached and reused across requests, the mutation
persists beyond the diff request.
Agent Prompt
## Issue description
`CreateDiffHTML` currently calls `paddiff.CreateDiffAText(foundPad, &foundPad.Pool, ...)`, and paddiff mutates the provided pool (`PutAttrib`). Because `foundPad` is a cached/shared pad instance, this causes a GET endpoint to mutate shared state and can create data races under concurrent requests.

## Issue Context
Pads are returned from a global cache and reused across requests; the pad struct contains an `apool.APool` with mutable maps.

## Fix
Clone the pad’s pool in `CreateDiffHTML` and use the clone consistently:
- `poolClone := foundPad.Pool.Clone()`
- call `CreateDiffAText(foundPad, &poolClone, ...)`
- set `padWithDiff.Pool = poolClone` before rendering
- build colors from the clone (`buildAuthorColors(&poolClone, ...)`)
This keeps diff generation side-effect-free and avoids concurrent mutation of cached pad state.

## Fix Focus Areas
- lib/api/pad/compactDiff.go[121-149]
- lib/paddiff/paddiff.go[51-56]
- lib/paddiff/paddiff.go[209-226]
- lib/apool/APool.go[94-108]
- lib/pad/padManager.go[69-166]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

html, err := exporter.GetPadHTML(&padWithDiff, nil, authorColors)
if err != nil {
initStore.Logger.Errorf("Error rendering diff HTML for pad %s: %v", padId, err)
return c.Status(500).JSON(errors2.InternalServerError)
}

return c.JSON(DiffHTMLResponse{
HTML: html,
Authors: authors,
})
}
}

// buildAuthorColors maps the author IDs found in the pad's attribute pool to
// their colors (equivalent of the original pad.getAllAuthorColors; mirrors the
// unexported buildAuthorColorCache in lib/io/exportHtml.go).
func buildAuthorColors(padPool *apool.APool, authorManager *author.Manager) map[string]string {
authorColors := make(map[string]string)
for _, attr := range padPool.NumToAttrib {
if attr.Key == "author" && attr.Value != "" {
if _, exists := authorColors[attr.Value]; exists {
continue
}
if authorData, err := authorManager.GetAuthor(attr.Value); err == nil {
authorColors[attr.Value] = authorData.ColorId
}
}
}
return authorColors
}
2 changes: 2 additions & 0 deletions lib/api/pad/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ func Init(initStore *lib.InitStore) {

// Pad operations
initStore.PrivateAPI.Post("/pads/:padId/restoreRevision", RestoreRevision(initStore))
initStore.PrivateAPI.Post("/pads/:padId/compact", CompactPad(initStore))
initStore.PrivateAPI.Get("/pads/:padId/diffHTML", CreateDiffHTML(initStore))
initStore.PrivateAPI.Get("/pads/:padId/readOnlyID", GetReadOnlyID(initStore))
initStore.PrivateAPI.Get("/pads/:padId/authors", ListAuthorsOfPad(initStore))
initStore.PrivateAPI.Get("/pads/:padId/chatHead", GetChatHead(initStore))
Expand Down
41 changes: 24 additions & 17 deletions lib/apool/APool.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,23 +172,6 @@ func (a *APool) toDBRev() db.RevPool {
}
}

func (a *APool) clone() APool {
var newPool = APool{}

for num, attrib := range a.NumToAttrib {
newPool.NumToAttrib[num] = attrib
newPool.AttribToNum[attrib] = num
}

for attrib, num := range a.AttribToNum {
newPool.AttribToNum[attrib] = num
newPool.NumToAttrib[num] = attrib
}

newPool.NextNum = a.NextNum
return newPool
}

type AttributeIterator func(attributeKey *string, attributeValue *string)

/**
Expand All @@ -210,3 +193,27 @@ func (a *APool) GetAttrib(num int) (*Attribute, error) {
}
return &pair, nil
}

// Clone returns a deep copy of the pool. Mutating the clone (e.g. PutAttrib)
// leaves the original untouched — a plain struct copy would share the
// underlying maps and corrupt the original's NextNum/map consistency.
func (a *APool) Clone() APool {
clone := APool{
NumToAttrib: make(map[int]Attribute, len(a.NumToAttrib)),
AttribToNum: make(map[Attribute]int, len(a.AttribToNum)),
NextNum: a.NextNum,
}
for num, attrib := range a.NumToAttrib {
clone.NumToAttrib[num] = attrib
}
for attrib, num := range a.AttribToNum {
clone.AttribToNum[attrib] = num
}
if a.NumToAttribRaw != nil {
clone.NumToAttribRaw = make(map[int][]string, len(a.NumToAttribRaw))
for num, raw := range a.NumToAttribRaw {
clone.NumToAttribRaw[num] = append([]string(nil), raw...)
}
}
return clone
}
41 changes: 41 additions & 0 deletions lib/author/authorManager.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,47 @@ func (m *Manager) GetAuthor(authorId string) (*Author, error) {
return &mappedDbAuthor, nil
}

/**
* AnonymizeAuthor performs GDPR Art. 17 erasure for an author, mirroring the
* original Etherpad's AuthorManager.anonymizeAuthor (API 1.3.1):
* - the token binding that links a person to this author id is severed
* first, so a concurrent token lookup can no longer resolve the author
* mid-erasure,
* - the display identity on the author record is zeroed (name -> null,
* colorId -> 0) while the record itself is kept,
* - authorship on chat messages the author posted is nulled; the message
* text itself survives,
* - pad content, revisions and attribute pools are left intact: changeset
* references are opaque without the identity record.
* The operation is idempotent: re-running it leaves the same erased state.
* Returns db.AuthorNotFoundError if the author does not exist.
* @param {String} authorId The id of the author
*/
func (m *Manager) AnonymizeAuthor(authorId string) error {
if _, err := m.Db.GetAuthor(authorId); err != nil {
return errors.New(db.AuthorNotFoundError)
}
Comment on lines +188 to +191

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

2. Db errors returned as 404 🐞 Bug ≡ Correctness

author.Manager.AnonymizeAuthor converts any error from Db.GetAuthor into db.AuthorNotFoundError, so
DB/query failures are misreported as “not found” and the API returns 404 instead of 500. This hides
real backend failures and breaks correct error semantics.
Agent Prompt
## Issue description
`Manager.AnonymizeAuthor()` currently does:
```go
if _, err := m.Db.GetAuthor(authorId); err != nil {
  return errors.New(db.AuthorNotFoundError)
}
```
This collapses all failures (including database/query errors) into "author not found".

## Issue Context
Datastore `GetAuthor` implementations can return non-not-found errors (query/scan/driver errors). The API layer maps `db.AuthorNotFoundError` to HTTP 404.

## Fix
Return the original `GetAuthor` error, or only map to `db.AuthorNotFoundError` when the underlying error is actually the not-found case.
A minimal safe change:
- `if _, err := m.Db.GetAuthor(authorId); err != nil { return err }`
If you want to keep explicit mapping, do:
- `if err.Error() == db.AuthorNotFoundError { return err } else { return err }` (i.e., don’t replace non-not-found errors).

## Fix Focus Areas
- lib/author/authorManager.go[188-191]
- lib/db/PostgresDB.go[285-314]
- lib/api/author/init.go[236-248]

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools


// Sever the token binding first, before touching anything else.
if err := m.Db.RemoveTokenOfAuthor(authorId); err != nil {
return err
}

// Zero the display identity. The token was already removed above, so
// SaveAuthor's token-preservation has nothing left to preserve.
if err := m.saveAuthor(Author{
Id: authorId,
Name: nil,
ColorId: "0",
Timestamp: time.Now().Unix(),
}); err != nil {
return err
}

// Null authorship on chat messages the author posted.
return m.Db.ClearChatAuthorship(authorId)
}

func (m *Manager) GetPadsOfAuthor(authorId string) (*[]string, error) {
padIds, err := m.Db.GetPadIdsOfAuthor(authorId)
if err != nil {
Expand Down
7 changes: 7 additions & 0 deletions lib/db/DataStore.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ type AuthorMethods interface {
SaveAuthorName(authorId string, authorName string) error
SaveAuthorColor(authorId string, authorColor string) error
GetAuthors(ids []string) (*[]db.AuthorDB, error)
// RemoveTokenOfAuthor severs the token binding that links a person to the
// given author id (GDPR erasure). It is a no-op if the author does not
// exist or has no token.
RemoveTokenOfAuthor(authorId string) error
}

type SessionMethods interface {
Expand All @@ -71,6 +75,9 @@ type ChatMethods interface {
SaveChatMessage(padId string, head int, authorId *string, timestamp int64, text string) error
GetChatsOfPad(padId string, start int, end int) (*[]db.ChatMessageDBWithDisplayName, error)
GetAuthorIdsOfPadChats(id string) (*[]string, error)
// ClearChatAuthorship nulls the authorship of all chat messages posted by
// the given author while preserving the messages themselves (GDPR erasure).
ClearChatAuthorship(authorId string) error
}

type ServerMethods interface {
Expand Down
Loading
Loading