diff --git a/lib/adminutils/cleanup.go b/lib/adminutils/cleanup.go index ca60e499..f9f0e295 100644 --- a/lib/adminutils/cleanup.go +++ b/lib/adminutils/cleanup.go @@ -7,6 +7,11 @@ import ( func CreateRevision(changeset string, timestamp int64, isKeyRev bool, authorId *string, atext apool.AText, attribPool apool.APool) revision.Revision { if authorId != nil { + // Work on a deep copy: the struct parameter shares the caller's maps, + // so PutAttrib on it would add entries to the caller's pool without + // bumping the caller's NextNum, corrupting the pool (Pad.Check then + // fails with "numToAttrib length does not match nextNum"). + attribPool = attribPool.Clone() attribPool.PutAttrib(apool.Attribute{ Key: "author", Value: *authorId, diff --git a/lib/api/author/init.go b/lib/api/author/init.go index eb1eb05a..47fd2300 100644 --- a/lib/api/author/init.go +++ b/lib/api/author/init.go @@ -6,6 +6,7 @@ import ( "github.com/ether/etherpad-go/lib" "github.com/ether/etherpad-go/lib/api/errors" "github.com/ether/etherpad-go/lib/author" + "github.com/ether/etherpad-go/lib/db" "github.com/gofiber/fiber/v3" ) @@ -35,6 +36,12 @@ type PadsResponse struct { PadIds []string `json:"padIds"` } +// AnonymizeAuthorResponse represents the response after anonymizing an author +type AnonymizeAuthorResponse struct { + AuthorId string `json:"authorId" example:"a.s8oes9dhwrvt0zif"` + Anonymized bool `json:"anonymized" example:"true"` +} + // CreateAuthor godoc // @Summary Create a new author // @Description Creates a new author with the specified name @@ -213,6 +220,40 @@ func GetAuthorPads(initStore *lib.InitStore, authorManager *author.Manager) fibe } } +// AnonymizeAuthor godoc +// @Summary Anonymize an author (GDPR Art. 17 erasure) +// @Description Severs the token binding of an author, zeroes their display identity (name, color) and nulls their authorship on chat messages. Pad content and revisions are left intact. Idempotent. +// @Tags Authors +// @Accept json +// @Produce json +// @Param authorId path string true "Author ID" +// @Success 200 {object} AnonymizeAuthorResponse +// @Failure 400 {object} errors.Error +// @Failure 404 {object} errors.Error +// @Failure 500 {object} errors.Error +// @Security BearerAuth +// @Router /admin/api/author/{authorId}/anonymize [post] +func AnonymizeAuthor(initStore *lib.InitStore, authorManager *author.Manager) fiber.Handler { + return func(c fiber.Ctx) error { + authorId := c.Params("authorId") + if authorId == "" { + return c.Status(400).JSON(errors.NewInvalidParamError("authorId is required")) + } + + if err := authorManager.AnonymizeAuthor(authorId); err != nil { + if err.Error() == db.AuthorNotFoundError { + return c.Status(404).JSON(errors.AuthorNotFoundError) + } + return c.Status(500).JSON(errors.InternalServerError) + } + + return c.JSON(AnonymizeAuthorResponse{ + AuthorId: authorId, + Anonymized: true, + }) + } +} + func Init(initStore *lib.InitStore) { var authorManager = author.NewManager(initStore.Store) @@ -221,4 +262,5 @@ func Init(initStore *lib.InitStore) { initStore.PrivateAPI.Get("/author/:authorId", GetAuthor(initStore, authorManager)) initStore.PrivateAPI.Get("/author/:authorId/name", GetAuthorName(initStore, authorManager)) initStore.PrivateAPI.Get("/author/:authorId/pads", GetAuthorPads(initStore, authorManager)) + initStore.PrivateAPI.Post("/author/:authorId/anonymize", AnonymizeAuthor(initStore, authorManager)) } diff --git a/lib/api/pad/compactDiff.go b/lib/api/pad/compactDiff.go new file mode 100644 index 00000000..e4c556d5 --- /dev/null +++ b/lib/api/pad/compactDiff.go @@ -0,0 +1,178 @@ +package pad + +import ( + "github.com/ether/etherpad-go/lib" + errors2 "github.com/ether/etherpad-go/lib/api/errors" + utils2 "github.com/ether/etherpad-go/lib/api/utils" + "github.com/ether/etherpad-go/lib/apool" + "github.com/ether/etherpad-go/lib/author" + io2 "github.com/ether/etherpad-go/lib/io" + "github.com/ether/etherpad-go/lib/paddiff" + "github.com/ether/etherpad-go/lib/utils" + "github.com/ether/etherpad-go/lib/ws" + "github.com/gofiber/fiber/v3" +) + +// CompactPadRequest represents the request to compact a pad's revision history +type CompactPadRequest struct { + KeepRevisions int `json:"keepRevisions"` +} + +// CompactPadResponse represents the response after compacting a pad +type CompactPadResponse struct { + Ok bool `json:"ok"` + KeepRevisions int `json:"keepRevisions"` +} + +// CompactPad godoc +// @Summary Compact a pad's revision history +// @Description Collapses the pad's revision history so that only the last keepRevisions revisions are kept (original API: compactPad). The revisions below the cut are composed into a single base revision; pad text is preserved. Destructive — consider exporting the pad first. +// @Tags Pads +// @Accept json +// @Produce json +// @Param padId path string true "Pad ID" +// @Param request body CompactPadRequest true "Number of recent revisions to keep (must be >= 1 and lower than the pad's head revision)" +// @Success 200 {object} CompactPadResponse +// @Failure 400 {object} errors.Error +// @Failure 404 {object} errors.Error +// @Failure 500 {object} errors.Error +// @Security BearerAuth +// @Router /admin/api/pads/{padId}/compact [post] +func CompactPad(initStore *lib.InitStore) fiber.Handler { + return func(c fiber.Ctx) error { + padId := c.Params("padId") + var request CompactPadRequest + if err := c.Bind().Body(&request); err != nil { + return c.Status(400).JSON(errors2.InvalidRequestError) + } + + foundPad, err := utils2.GetPadSafe(padId, true, nil, nil, initStore.PadManager) + if err != nil { + return c.Status(404).JSON(errors2.PadNotFoundError) + } + + if request.KeepRevisions < 1 { + return c.Status(400).JSON(errors2.NewInvalidParamError("keepRevisions must be at least 1")) + } + if request.KeepRevisions >= foundPad.Head { + return c.Status(400).JSON(errors2.NewInvalidParamError("keepRevisions must be lower than the pad's head revision")) + } + + // Reuse the revision compaction the admin UI uses + // (AdminMessageHandler.DeleteRevisions). DeleteRevisions only needs the + // store, pad manager, pad message handler and logger, all of which are + // available from the InitStore, so a handler is wired up on the fly + // (hub is not used by DeleteRevisions). + adminHandler := ws.NewAdminMessageHandler(initStore.Store, initStore.Hooks, initStore.PadManager, initStore.Handler, initStore.Logger, nil, initStore.C) + if err := adminHandler.DeleteRevisions(padId, request.KeepRevisions); err != nil { + initStore.Logger.Errorf("Error compacting pad %s: %v", padId, err) + return c.Status(500).JSON(errors2.InternalServerError) + } + + return c.JSON(CompactPadResponse{ + Ok: true, + KeepRevisions: request.KeepRevisions, + }) + } +} + +// CreateDiffHTML godoc +// @Summary Create an HTML diff between two revisions +// @Description Returns the changes between startRev and endRev as HTML (original API: createDiffHTML). Insertions keep their author attribution (rendered with the author's color), deletions are re-inserted with a 'removed' attribute (rendered struck through). Also returns the list of authors involved in the changes. +// @Tags Pads +// @Accept json +// @Produce json +// @Param padId path string true "Pad ID" +// @Param startRev query int true "Start revision number" +// @Param endRev query int false "End revision number (defaults to the head revision)" +// @Success 200 {object} DiffHTMLResponse +// @Failure 400 {object} errors.Error +// @Failure 404 {object} errors.Error +// @Failure 500 {object} errors.Error +// @Security BearerAuth +// @Router /admin/api/pads/{padId}/diffHTML [get] +func CreateDiffHTML(initStore *lib.InitStore) fiber.Handler { + return func(c fiber.Ctx) error { + padId := c.Params("padId") + + foundPad, err := utils2.GetPadSafe(padId, true, nil, nil, initStore.PadManager) + if err != nil { + return c.Status(404).JSON(errors2.PadNotFoundError) + } + + startRevStr := c.Query("startRev") + if startRevStr == "" { + return c.Status(400).JSON(errors2.NewMissingParamError("startRev")) + } + startRev, err := utils.CheckValidRev(startRevStr) + if err != nil { + return c.Status(400).JSON(errors2.InvalidRevisionError) + } + + var endRev *int + if endRevStr := c.Query("endRev"); endRevStr != "" { + endRevNum, err := utils.CheckValidRev(endRevStr) + if err != nil { + return c.Status(400).JSON(errors2.InvalidRevisionError) + } + endRev = endRevNum + } + + // The original API clamps startRev to the head revision before + // validating the range; endRev is clamped inside GetValidRevisionRange. + head := foundPad.Head + start := *startRev + if start > head { + start = head + } + + from, to, ok := paddiff.GetValidRevisionRange(start, endRev, head) + if !ok { + return c.Status(400).JSON(errors2.NewInvalidParamError("invalid revision range")) + } + + diffAText, authors, err := paddiff.CreateDiffAText(foundPad, &foundPad.Pool, from, to) + if err != nil { + initStore.Logger.Errorf("Error creating diff atext for pad %s: %v", padId, err) + return c.Status(500).JSON(errors2.InternalServerError) + } + + // Render the diff atext with the regular export-HTML pipeline (it + // understands the 'removed' attribute). GetPadHTML reads pad.AText when + // no revision is requested, so a shallow copy of the pad carrying the + // diff atext is passed. + padWithDiff := *foundPad + padWithDiff.AText = *diffAText + + authorColors := buildAuthorColors(&foundPad.Pool, initStore.AuthorManager) + exporter := io2.NewExportHtml(initStore.PadManager, initStore.AuthorManager, initStore.Hooks) + html, err := exporter.GetPadHTML(&padWithDiff, nil, authorColors) + if err != nil { + initStore.Logger.Errorf("Error rendering diff HTML for pad %s: %v", padId, err) + return c.Status(500).JSON(errors2.InternalServerError) + } + + return c.JSON(DiffHTMLResponse{ + HTML: html, + Authors: authors, + }) + } +} + +// buildAuthorColors maps the author IDs found in the pad's attribute pool to +// their colors (equivalent of the original pad.getAllAuthorColors; mirrors the +// unexported buildAuthorColorCache in lib/io/exportHtml.go). +func buildAuthorColors(padPool *apool.APool, authorManager *author.Manager) map[string]string { + authorColors := make(map[string]string) + for _, attr := range padPool.NumToAttrib { + if attr.Key == "author" && attr.Value != "" { + if _, exists := authorColors[attr.Value]; exists { + continue + } + if authorData, err := authorManager.GetAuthor(attr.Value); err == nil { + authorColors[attr.Value] = authorData.ColorId + } + } + } + return authorColors +} diff --git a/lib/api/pad/init.go b/lib/api/pad/init.go index 0c6200b7..9a5ff2fc 100644 --- a/lib/api/pad/init.go +++ b/lib/api/pad/init.go @@ -210,6 +210,8 @@ func Init(initStore *lib.InitStore) { // Pad operations initStore.PrivateAPI.Post("/pads/:padId/restoreRevision", RestoreRevision(initStore)) + initStore.PrivateAPI.Post("/pads/:padId/compact", CompactPad(initStore)) + initStore.PrivateAPI.Get("/pads/:padId/diffHTML", CreateDiffHTML(initStore)) initStore.PrivateAPI.Get("/pads/:padId/readOnlyID", GetReadOnlyID(initStore)) initStore.PrivateAPI.Get("/pads/:padId/authors", ListAuthorsOfPad(initStore)) initStore.PrivateAPI.Get("/pads/:padId/chatHead", GetChatHead(initStore)) diff --git a/lib/apool/APool.go b/lib/apool/APool.go index 277e99d7..0363fb66 100644 --- a/lib/apool/APool.go +++ b/lib/apool/APool.go @@ -172,23 +172,6 @@ func (a *APool) toDBRev() db.RevPool { } } -func (a *APool) clone() APool { - var newPool = APool{} - - for num, attrib := range a.NumToAttrib { - newPool.NumToAttrib[num] = attrib - newPool.AttribToNum[attrib] = num - } - - for attrib, num := range a.AttribToNum { - newPool.AttribToNum[attrib] = num - newPool.NumToAttrib[num] = attrib - } - - newPool.NextNum = a.NextNum - return newPool -} - type AttributeIterator func(attributeKey *string, attributeValue *string) /** @@ -210,3 +193,27 @@ func (a *APool) GetAttrib(num int) (*Attribute, error) { } return &pair, nil } + +// Clone returns a deep copy of the pool. Mutating the clone (e.g. PutAttrib) +// leaves the original untouched — a plain struct copy would share the +// underlying maps and corrupt the original's NextNum/map consistency. +func (a *APool) Clone() APool { + clone := APool{ + NumToAttrib: make(map[int]Attribute, len(a.NumToAttrib)), + AttribToNum: make(map[Attribute]int, len(a.AttribToNum)), + NextNum: a.NextNum, + } + for num, attrib := range a.NumToAttrib { + clone.NumToAttrib[num] = attrib + } + for attrib, num := range a.AttribToNum { + clone.AttribToNum[attrib] = num + } + if a.NumToAttribRaw != nil { + clone.NumToAttribRaw = make(map[int][]string, len(a.NumToAttribRaw)) + for num, raw := range a.NumToAttribRaw { + clone.NumToAttribRaw[num] = append([]string(nil), raw...) + } + } + return clone +} diff --git a/lib/author/authorManager.go b/lib/author/authorManager.go index 353da813..26d63afa 100644 --- a/lib/author/authorManager.go +++ b/lib/author/authorManager.go @@ -169,6 +169,47 @@ func (m *Manager) GetAuthor(authorId string) (*Author, error) { return &mappedDbAuthor, nil } +/** + * AnonymizeAuthor performs GDPR Art. 17 erasure for an author, mirroring the + * original Etherpad's AuthorManager.anonymizeAuthor (API 1.3.1): + * - the token binding that links a person to this author id is severed + * first, so a concurrent token lookup can no longer resolve the author + * mid-erasure, + * - the display identity on the author record is zeroed (name -> null, + * colorId -> 0) while the record itself is kept, + * - authorship on chat messages the author posted is nulled; the message + * text itself survives, + * - pad content, revisions and attribute pools are left intact: changeset + * references are opaque without the identity record. + * The operation is idempotent: re-running it leaves the same erased state. + * Returns db.AuthorNotFoundError if the author does not exist. + * @param {String} authorId The id of the author + */ +func (m *Manager) AnonymizeAuthor(authorId string) error { + if _, err := m.Db.GetAuthor(authorId); err != nil { + return errors.New(db.AuthorNotFoundError) + } + + // Sever the token binding first, before touching anything else. + if err := m.Db.RemoveTokenOfAuthor(authorId); err != nil { + return err + } + + // Zero the display identity. The token was already removed above, so + // SaveAuthor's token-preservation has nothing left to preserve. + if err := m.saveAuthor(Author{ + Id: authorId, + Name: nil, + ColorId: "0", + Timestamp: time.Now().Unix(), + }); err != nil { + return err + } + + // Null authorship on chat messages the author posted. + return m.Db.ClearChatAuthorship(authorId) +} + func (m *Manager) GetPadsOfAuthor(authorId string) (*[]string, error) { padIds, err := m.Db.GetPadIdsOfAuthor(authorId) if err != nil { diff --git a/lib/db/DataStore.go b/lib/db/DataStore.go index cc30a6db..e9f81f62 100644 --- a/lib/db/DataStore.go +++ b/lib/db/DataStore.go @@ -51,6 +51,10 @@ type AuthorMethods interface { SaveAuthorName(authorId string, authorName string) error SaveAuthorColor(authorId string, authorColor string) error GetAuthors(ids []string) (*[]db.AuthorDB, error) + // RemoveTokenOfAuthor severs the token binding that links a person to the + // given author id (GDPR erasure). It is a no-op if the author does not + // exist or has no token. + RemoveTokenOfAuthor(authorId string) error } type SessionMethods interface { @@ -71,6 +75,9 @@ type ChatMethods interface { SaveChatMessage(padId string, head int, authorId *string, timestamp int64, text string) error GetChatsOfPad(padId string, start int, end int) (*[]db.ChatMessageDBWithDisplayName, error) GetAuthorIdsOfPadChats(id string) (*[]string, error) + // ClearChatAuthorship nulls the authorship of all chat messages posted by + // the given author while preserving the messages themselves (GDPR erasure). + ClearChatAuthorship(authorId string) error } type ServerMethods interface { diff --git a/lib/db/MemoryDataStore.go b/lib/db/MemoryDataStore.go index 8b7e4281..4d70f24a 100644 --- a/lib/db/MemoryDataStore.go +++ b/lib/db/MemoryDataStore.go @@ -296,6 +296,17 @@ func (m *MemoryDataStore) SaveAuthorColor(authorId string, authorColor string) e return nil } +func (m *MemoryDataStore) RemoveTokenOfAuthor(authorId string) error { + retrievedAuthor, ok := m.authorStore[authorId] + if !ok { + return nil + } + + retrievedAuthor.Token = nil + m.authorStore[authorId] = retrievedAuthor + return nil +} + // ============== REVISION METHODS ============== func (m *MemoryDataStore) SaveRevision( @@ -483,6 +494,16 @@ func (m *MemoryDataStore) GetAuthorIdsOfPadChats(id string) (*[]string, error) { return &authorIds, nil } +func (m *MemoryDataStore) ClearChatAuthorship(authorId string) error { + for k, chatMessage := range m.chatPads { + if chatMessage.AuthorId != nil && *chatMessage.AuthorId == authorId { + chatMessage.AuthorId = nil + m.chatPads[k] = chatMessage + } + } + return nil +} + func (m *MemoryDataStore) RemoveChat(padId string) error { for k := range m.chatPads { if strings.HasPrefix(k, padId+":") { diff --git a/lib/db/MySQLDB.go b/lib/db/MySQLDB.go index a395563c..76de7689 100644 --- a/lib/db/MySQLDB.go +++ b/lib/db/MySQLDB.go @@ -421,6 +421,21 @@ func (d MysqlDB) SetAuthorByToken(token, authorId string) error { return err } +func (d MysqlDB) RemoveTokenOfAuthor(authorId string) error { + resultedSQL, args, err := mysql. + Update("globalAuthor"). + Set("token", nil). + Where(sq.Eq{"id": authorId}). + ToSql() + + if err != nil { + return err + } + + _, err = d.sqlDB.Exec(resultedSQL, args...) + return err +} + func (d MysqlDB) GetAuthorByToken(token string) (*string, error) { resultedSQL, args, err := mysql. Select("id"). @@ -694,7 +709,7 @@ func (d MysqlDB) GetChatsOfPad( resultedSQL, args, err := mysql. Select("pc.padId", "pc.padHead", "pc.chatText", "pc.authorId", "pc.timestamp", "ga.name"). From("padChat pc"). - Join("globalAuthor ga ON ga.id = pc.authorId"). + LeftJoin("globalAuthor ga ON ga.id = pc.authorId"). Where(sq.Eq{"pc.padId": padId}). Where(sq.GtOrEq{"pc.padHead": start}). Where(sq.LtOrEq{"pc.padHead": end}). @@ -731,6 +746,7 @@ func (d MysqlDB) GetAuthorIdsOfPadChats(id string) (*[]string, error) { Select("DISTINCT authorId"). From("padChat"). Where(sq.Eq{"padId": id}). + Where(sq.NotEq{"authorId": nil}). ToSql() if err != nil { @@ -755,6 +771,21 @@ func (d MysqlDB) GetAuthorIdsOfPadChats(id string) (*[]string, error) { return &authorIds, query.Err() } +func (d MysqlDB) ClearChatAuthorship(authorId string) error { + resultedSQL, args, err := mysql. + Update("padChat"). + Set("authorId", nil). + Where(sq.Eq{"authorId": authorId}). + ToSql() + + if err != nil { + return err + } + + _, err = d.sqlDB.Exec(resultedSQL, args...) + return err +} + func (d MysqlDB) RemoveChat(padId string) error { resultedSQL, args, err := mysql. Delete("padChat"). diff --git a/lib/db/PostgresDB.go b/lib/db/PostgresDB.go index f05ccf93..450636f5 100644 --- a/lib/db/PostgresDB.go +++ b/lib/db/PostgresDB.go @@ -388,6 +388,14 @@ func (d PostgresDB) SaveAuthorColor(authorId string, authorColor string) error { return nil } +func (d PostgresDB) RemoveTokenOfAuthor(authorId string) error { + ctx := context.Background() + _, err := d.pool.Exec(ctx, + `UPDATE "globalauthor" SET token = NULL, updated_at = NOW() WHERE id = $1`, + authorId) + return err +} + // ============== REVISION METHODS ============== func (d PostgresDB) SaveRevision( @@ -551,7 +559,7 @@ func (d PostgresDB) GetChatsOfPad( `SELECT pc."padid", pc."padhead", pc."chattext", pc."authorid", pc.timestamp, ga.name FROM "padchat" pc - JOIN "globalauthor" ga ON ga.id = pc."authorid" + LEFT JOIN "globalauthor" ga ON ga.id = pc."authorid" WHERE pc."padid" = $1 AND pc."padhead" >= $2 AND pc."padhead" <= $3 ORDER BY pc."padhead" ASC`, padId, start, end) @@ -579,7 +587,7 @@ func (d PostgresDB) GetAuthorIdsOfPadChats(id string) (*[]string, error) { ctx := context.Background() rows, err := d.pool.Query(ctx, - `SELECT DISTINCT "authorid" FROM "padchat" WHERE "padid" = $1`, + `SELECT DISTINCT "authorid" FROM "padchat" WHERE "padid" = $1 AND "authorid" IS NOT NULL`, id) if err != nil { return nil, err @@ -597,6 +605,14 @@ func (d PostgresDB) GetAuthorIdsOfPadChats(id string) (*[]string, error) { return &authorIds, rows.Err() } +func (d PostgresDB) ClearChatAuthorship(authorId string) error { + ctx := context.Background() + _, err := d.pool.Exec(ctx, + `UPDATE "padchat" SET "authorid" = NULL WHERE "authorid" = $1`, + authorId) + return err +} + func (d PostgresDB) RemoveChat(padId string) error { ctx := context.Background() _, err := d.pool.Exec(ctx, `DELETE FROM "padchat" WHERE "padid" = $1`, padId) diff --git a/lib/db/SQLiteDB.go b/lib/db/SQLiteDB.go index fd8582c6..92d88cfe 100644 --- a/lib/db/SQLiteDB.go +++ b/lib/db/SQLiteDB.go @@ -522,6 +522,21 @@ func (d SQLiteDB) SaveAuthorColor(authorId string, authorColor string) error { return err } +func (d SQLiteDB) RemoveTokenOfAuthor(authorId string) error { + resultedSQL, args, err := sq. + Update("globalAuthor"). + Set("token", nil). + Where(sq.Eq{"id": authorId}). + ToSql() + + if err != nil { + return err + } + + _, err = d.sqlDB.Exec(resultedSQL, args...) + return err +} + // ============== REVISION METHODS ============== func (d SQLiteDB) SaveRevision( @@ -710,7 +725,7 @@ func (d SQLiteDB) GetChatsOfPad( resultedSQL, args, err := sq. Select("pc.padId", "pc.padHead", "pc.chatText", "pc.authorId", "pc.created_at", "ga.name"). From("padChat pc"). - Join("globalAuthor ga ON ga.id = pc.authorId"). + LeftJoin("globalAuthor ga ON ga.id = pc.authorId"). Where(sq.Eq{"pc.padId": padId}). Where(sq.GtOrEq{"pc.padHead": start}). Where(sq.LtOrEq{"pc.padHead": end}). @@ -747,6 +762,7 @@ func (d SQLiteDB) GetAuthorIdsOfPadChats(id string) (*[]string, error) { Select("DISTINCT authorId"). From("padChat"). Where(sq.Eq{"padId": id}). + Where(sq.NotEq{"authorId": nil}). ToSql() if err != nil { @@ -771,6 +787,21 @@ func (d SQLiteDB) GetAuthorIdsOfPadChats(id string) (*[]string, error) { return &authorIds, query.Err() } +func (d SQLiteDB) ClearChatAuthorship(authorId string) error { + resultedSQL, args, err := sq. + Update("padChat"). + Set("authorId", nil). + Where(sq.Eq{"authorId": authorId}). + ToSql() + + if err != nil { + return err + } + + _, err = d.sqlDB.Exec(resultedSQL, args...) + return err +} + func (d SQLiteDB) RemoveChat(padId string) error { resultedSQL, args, err := sq. Delete("padChat"). diff --git a/lib/hooks/HookConstants.go b/lib/hooks/HookConstants.go index 7fba4da6..0c8879fe 100644 --- a/lib/hooks/HookConstants.go +++ b/lib/hooks/HookConstants.go @@ -1,6 +1,8 @@ package hooks const PadDefaultContentString = "padDefaultContent" +const PreAuthorizeString = "preAuthorize" +const PreAuthzFailureString = "preAuthzFailure" const PadLoadString = "padLoad" const PadCreateString = "padCreate" const PadUpdateString = "padUpdate" diff --git a/lib/hooks/events/preAuthorize.go b/lib/hooks/events/preAuthorize.go new file mode 100644 index 00000000..32a58d90 --- /dev/null +++ b/lib/hooks/events/preAuthorize.go @@ -0,0 +1,118 @@ +package events + +// PreAuthorizeDecision is the aggregated answer of all preAuthorize hook +// callbacks for a request. +type PreAuthorizeDecision int + +const ( + // PreAuthorizeDefer means no callback answered: fall through to the regular + // authenticate/authorize steps (the original's empty result list). + PreAuthorizeDefer PreAuthorizeDecision = iota + // PreAuthorizePermit means access was explicitly granted: skip the remaining + // steps (the original's `return next()` when every result is truthy). + PreAuthorizePermit + // PreAuthorizeDeny means access was explicitly denied: respond with 403 + // unless a preAuthzFailure callback overrides the response. + PreAuthorizeDeny +) + +// PreAuthorizeContext is passed to preAuthorize hook callbacks. It is the Go +// counterpart of the original hook context {req, res, next}: callbacks inspect +// Path/RequireAdmin and answer by calling Permit or Deny; calling neither +// defers to the regular authenticate/authorize steps. The classic use case is +// permitting static resource paths so they skip authentication. +// +// Semantics adapted from the original's hooks.aCallFirst: the Go hook system +// runs every registered callback (in unspecified order) instead of stopping at +// the first one that answers, so a single Deny always wins over any number of +// Permits. As in the original, Permits on /admin-auth pages (RequireAdmin) are +// filtered out so plugins cannot accidentally grant admin privileges to the +// general public. +type PreAuthorizeContext struct { + // Path is the request path, e.g. "/p/mypad" (input, read-only). + Path string + // RequireAdmin is true when the request is for an /admin-auth page (input, + // read-only). + RequireAdmin bool + + results []bool +} + +// Permit explicitly grants access. On admin pages the permit is ignored (see +// the type documentation). +func (c *PreAuthorizeContext) Permit() { + c.results = append(c.results, true) +} + +// Deny explicitly denies access. +func (c *PreAuthorizeContext) Deny() { + c.results = append(c.results, false) +} + +// Decision aggregates the callbacks' answers, mirroring the original's result +// handling: admin pages drop all permits, an empty result list defers, any +// remaining false denies, and all-true permits. +func (c *PreAuthorizeContext) Decision() PreAuthorizeDecision { + answered := false + for _, r := range c.results { + if c.RequireAdmin && r { + continue // never let a plugin permit grant admin access + } + answered = true + if !r { + return PreAuthorizeDeny + } + } + if !answered { + return PreAuthorizeDefer + } + return PreAuthorizePermit +} + +// PreAuthzFailureContext is passed to preAuthzFailure hook callbacks when a +// preAuthorize callback denied access. A callback can take over the error +// response — the original's "return truthy after writing to res" — by calling +// Respond (optionally adding headers via SetHeader, e.g. a Location header for +// a login redirect). If no callback responds, the default 403 Forbidden is +// sent. +type PreAuthzFailureContext struct { + // Path is the request path (input, read-only). + Path string + // RequireAdmin is true when the request is for an /admin-auth page (input, + // read-only). + RequireAdmin bool + + handled bool + status int + body string + headers map[string]string +} + +// Respond marks the failure as handled and records the response to send +// instead of the default 403 Forbidden. +func (c *PreAuthzFailureContext) Respond(status int, body string) { + c.handled = true + c.status = status + c.body = body +} + +// SetHeader records a response header to set alongside the Respond status and +// body. +func (c *PreAuthzFailureContext) SetHeader(key, value string) { + if c.headers == nil { + c.headers = make(map[string]string) + } + c.headers[key] = value +} + +// Handled reports whether a callback overrode the default error response. +func (c *PreAuthzFailureContext) Handled() bool { return c.handled } + +// Status returns the recorded response status. +func (c *PreAuthzFailureContext) Status() int { return c.status } + +// Body returns the recorded response body. +func (c *PreAuthzFailureContext) Body() string { return c.body } + +// Headers returns the recorded response headers (may be nil). +func (c *PreAuthzFailureContext) Headers() map[string]string { return c.headers } diff --git a/lib/hooks/hook.go b/lib/hooks/hook.go index 8203c715..1842348b 100644 --- a/lib/hooks/hook.go +++ b/lib/hooks/hook.go @@ -35,6 +35,36 @@ func (h *Hook) ExecuteGetLineHtmlForExportHooks(ctx any) { h.ExecuteHooks("getLineHTMLForExport", ctx) } +// EnqueuePreAuthorizeHook registers a callback for the preAuthorize hook, +// which lets plugins permit or deny a request before authentication runs (see +// events.PreAuthorizeContext). +func (h *Hook) EnqueuePreAuthorizeHook(cb func(ctx *events.PreAuthorizeContext)) string { + return h.EnqueueHook(PreAuthorizeString, func(ctx any) { + if preAuthorizeCtx, ok := ctx.(*events.PreAuthorizeContext); ok { + cb(preAuthorizeCtx) + } + }) +} + +func (h *Hook) ExecutePreAuthorizeHooks(ctx *events.PreAuthorizeContext) { + h.ExecuteHooks(PreAuthorizeString, ctx) +} + +// EnqueuePreAuthzFailureHook registers a callback for the preAuthzFailure +// hook, which lets plugins override the default 403 response after a +// preAuthorize deny (see events.PreAuthzFailureContext). +func (h *Hook) EnqueuePreAuthzFailureHook(cb func(ctx *events.PreAuthzFailureContext)) string { + return h.EnqueueHook(PreAuthzFailureString, func(ctx any) { + if preAuthzFailureCtx, ok := ctx.(*events.PreAuthzFailureContext); ok { + cb(preAuthzFailureCtx) + } + }) +} + +func (h *Hook) ExecutePreAuthzFailureHooks(ctx *events.PreAuthzFailureContext) { + h.ExecuteHooks(PreAuthzFailureString, ctx) +} + func (h *Hook) EnqueueHook(key string, ctx func(ctx any)) string { var uuid = utils.UUID() var _, ok = h.hooks[key] diff --git a/lib/pad/webaccess.go b/lib/pad/webaccess.go index 1533cbc3..69144e84 100644 --- a/lib/pad/webaccess.go +++ b/lib/pad/webaccess.go @@ -10,6 +10,8 @@ import ( "time" "unicode/utf8" + "github.com/ether/etherpad-go/lib/hooks" + "github.com/ether/etherpad-go/lib/hooks/events" "github.com/ether/etherpad-go/lib/models/clientVars" "github.com/ether/etherpad-go/lib/models/webaccess" "github.com/ether/etherpad-go/lib/settings" @@ -44,14 +46,43 @@ func UserCanModify(padId *string, req *webaccess.SocketClientRequest, readOnlyMa return level != nil && *level != "readOnly" } +// CheckAccess keeps the historical signature (no hook system) and runs without +// any plugin preAuthorize/preAuthzFailure hooks. New callers should prefer +// CheckAccessWithHooks so that plugins get a chance to permit or deny early. func CheckAccess(ctx fiber.Ctx, logger *zap.SugaredLogger, retrievedSettings *settings.Settings, readOnlyManager *ReadOnlyManager) error { + return CheckAccessWithHooks(ctx, logger, retrievedSettings, readOnlyManager, nil) +} + +func CheckAccessWithHooks(ctx fiber.Ctx, logger *zap.SugaredLogger, retrievedSettings *settings.Settings, readOnlyManager *ReadOnlyManager, hookSystem *hooks.Hook) error { var requireAdmin = strings.HasPrefix(strings.ToLower(ctx.Path()), "/admin-auth") // /////////////////////////////////////////////////////////////////////////////////////////////// - // Step 1 of the original — the preAuthorize hook for early permit/deny by plugins — is not - // implemented: the Go plugin system has no preAuthorize/preAuthzFailure hooks yet. Until it - // does, every request goes through the regular authorize/authenticate steps below. + // Step 1: Check the preAuthorize hook for early permit/deny (permit is only allowed for + // non-admin pages). If any plugin explicitly grants or denies access, skip the remaining steps. + // Plugins can use the preAuthzFailure hook to override the default 403 error. // /////////////////////////////////////////////////////////////////////////////////////////////// + if hookSystem != nil { + preAuthorizeCtx := &events.PreAuthorizeContext{Path: ctx.Path(), RequireAdmin: requireAdmin} + hookSystem.ExecutePreAuthorizeHooks(preAuthorizeCtx) + switch preAuthorizeCtx.Decision() { + case events.PreAuthorizePermit: + return ctx.Next() + case events.PreAuthorizeDeny: + preAuthzFailureCtx := &events.PreAuthzFailureContext{Path: ctx.Path(), RequireAdmin: requireAdmin} + hookSystem.ExecutePreAuthzFailureHooks(preAuthzFailureCtx) + if preAuthzFailureCtx.Handled() { + for key, value := range preAuthzFailureCtx.Headers() { + ctx.Set(key, value) + } + return ctx.Status(preAuthzFailureCtx.Status()).SendString(preAuthzFailureCtx.Body()) + } + // No plugin handled the pre-authentication authorization failure. + return ctx.Status(403).SendString("Forbidden") + case events.PreAuthorizeDefer: + // No plugin answered; fall through to the regular authorize/authenticate steps below. + } + } + // This helper is used in steps 2 and 4 below, so it may be called twice per access: once before // authentication is checked and once after (if settings.requireAuthorization is true). diff --git a/lib/paddiff/builder.go b/lib/paddiff/builder.go new file mode 100644 index 00000000..f07fa25d --- /dev/null +++ b/lib/paddiff/builder.go @@ -0,0 +1,64 @@ +package paddiff + +import ( + "github.com/ether/etherpad-go/lib/changeset" +) + +// opBuilder is a tiny local replacement for changeset.Builder. It is needed +// because changeset.Builder only accepts attributes through the unexported +// fields of changeset.KeepArgs, while the padDiff port has to pass already +// encoded attribute strings (e.g. "*3*4") to keep/insert operations. It uses +// the same SmartOpAssembler/Pack primitives as changeset.Builder, so the +// produced changesets are identical. +type opBuilder struct { + oldLen int + assem *changeset.SmartOpAssembler + charBank changeset.StringAssembler +} + +func newOpBuilder(oldLen int) *opBuilder { + return &opBuilder{ + oldLen: oldLen, + assem: changeset.NewSmartOpAssembler(), + charBank: changeset.NewStringAssembler(), + } +} + +// keep appends a '=' op over n chars (l of them newlines) carrying the given +// already-encoded attribute string. +func (b *opBuilder) keep(n int, l int, attribs string) { + opCode := "=" + op := changeset.NewOp(&opCode) + op.Chars = n + if l > 0 { + op.Lines = l + } + op.Attribs = attribs + b.assem.Append(op) +} + +// keepText appends '=' ops covering the given text (which may contain +// newlines) carrying the given already-encoded attribute string. +func (b *opBuilder) keepText(text string, attribs string) { + for _, op := range changeset.OpsFromText("=", text, nil, nil) { + op.Attribs = attribs + b.assem.Append(op) + } +} + +// insert appends '+' ops for the given text carrying the given +// already-encoded attribute string. +func (b *opBuilder) insert(text string, attribs string) { + for _, op := range changeset.OpsFromText("+", text, nil, nil) { + op.Attribs = attribs + b.assem.Append(op) + } + b.charBank.Append(text) +} + +// toString finalizes the assembler and packs the changeset. +func (b *opBuilder) toString() string { + b.assem.EndDocument() + newLen := b.oldLen + b.assem.LengthChange() + return changeset.Pack(b.oldLen, newLen, b.assem.String(), b.charBank.String()) +} diff --git a/lib/paddiff/paddiff.go b/lib/paddiff/paddiff.go new file mode 100644 index 00000000..593aab3b --- /dev/null +++ b/lib/paddiff/paddiff.go @@ -0,0 +1,523 @@ +// Package paddiff is a port of the original Etherpad src/node/utils/padDiff.ts. +// +// It composes the changesets between two revisions of a pad into a single +// "diff" atext: all insertions of the range keep their author attribution and +// all deletions are re-inserted at the position they were deleted from, +// carrying a 'removed' attribute plus the author who deleted them. The +// resulting atext can be rendered with the regular export-HTML pipeline +// (lib/io/exportHtml.go understands the 'removed' attribute) to visualize the +// changes between the two revisions. +package paddiff + +import ( + "errors" + "fmt" + "unicode/utf8" + + "github.com/ether/etherpad-go/lib/apool" + "github.com/ether/etherpad-go/lib/changeset" + db2 "github.com/ether/etherpad-go/lib/models/db" + "github.com/ether/etherpad-go/lib/utils" +) + +// Pad is the subset of *padModel.Pad that is needed to build a diff. It is an +// interface so the diff logic can be unit tested with an in-memory fake. +type Pad interface { + GetInternalRevisionAText(targetRev int) *apool.AText + GetRevision(revNumber int) (*db2.PadSingleRevision, error) +} + +// GetValidRevisionRange mirrors Pad.getValidRevisionRange of the original +// Etherpad: startRev must lie within [0, head]; endRev defaults to head when +// nil, is clamped to head and must not be lower than startRev. ok is false +// when the range is invalid. +func GetValidRevisionRange(startRev int, endRev *int, head int) (from int, to int, ok bool) { + if startRev < 0 || startRev > head { + return 0, 0, false + } + end := head + if endRev != nil { + end = *endRev + } + if end < startRev { + return 0, 0, false + } + if end > head { + end = head + } + return startRev, end, true +} + +// CreateDiffAText builds the diff atext between fromRev and toRev (both +// inclusive endpoints of the revision range, fromRev <= toRev <= head) and +// returns it together with the list of authors that contributed changes in +// that range. Like the original PadDiff, it adds the needed 'author' and +// 'removed' attributes to the supplied pool (in memory only; the pad record +// itself is not saved). +func CreateDiffAText(p Pad, pool *apool.APool, fromRev int, toRev int) (*apool.AText, []string, error) { + startAText := p.GetInternalRevisionAText(fromRev) + if startAText == nil { + return nil, nil, fmt.Errorf("could not load atext of revision %d", fromRev) + } + + // Strip the authorship of the start atext so that only the changes of the + // requested range are attributed (original: _createClearStartAtext). + atext, err := createClearStartAText(*startAText, pool) + if err != nil { + return nil, nil, err + } + + authors := make([]string, 0) + var superChangeset *string + + for rev := fromRev + 1; rev <= toRev; rev++ { + revision, err := p.GetRevision(rev) + if err != nil { + return nil, nil, err + } + + // Skip clearAuthorship changesets — they would wipe the authorship + // attribution we are trying to display. + if isClearAuthorship(revision.Changeset, pool) { + continue + } + + author := "" + if revision.AuthorId != nil { + author = *revision.AuthorId + } + + cs, err := extendChangesetWithAuthor(revision.Changeset, author, pool) + if err != nil { + return nil, nil, err + } + + if !contains(authors, author) { + authors = append(authors, author) + } + + if superChangeset == nil { + superChangeset = &cs + } else { + composed, err := changeset.Compose(*superChangeset, cs, pool) + if err != nil { + return nil, nil, err + } + superChangeset = composed + } + } + + // If there are only clearAuthorship changesets we don't get a + // superChangeset, so we can skip this step. + if superChangeset != nil { + deletionChangeset, err := createDeletionChangeset(*superChangeset, atext, pool) + if err != nil { + return nil, nil, err + } + + // Apply the superChangeset, which includes all the insertions. + newAText, err := changeset.ApplyToAText(*superChangeset, atext, *pool) + if err != nil { + return nil, nil, err + } + atext = *newAText + + // Apply the deletionChangeset, which re-adds the deletions. + newAText, err = changeset.ApplyToAText(deletionChangeset, atext, *pool) + if err != nil { + return nil, nil, err + } + atext = *newAText + } + + return &atext, authors, nil +} + +func contains(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} + +// createClearAuthorship builds a changeset that keeps the whole text while +// setting the 'author' attribute to the empty string (original: +// _createClearAuthorship). +func createClearAuthorship(atext apool.AText, pool *apool.APool) string { + authorAttrib := pool.PutAttrib(apool.Attribute{Key: "author", Value: ""}, nil) + attribs := "*" + utils.NumToString(authorAttrib) + + builder := newOpBuilder(utf8.RuneCountInString(atext.Text)) + builder.keepText(atext.Text, attribs) + return builder.toString() +} + +// createClearStartAText returns the atext with all authorship cleared +// (original: _createClearStartAtext). +func createClearStartAText(atext apool.AText, pool *apool.APool) (apool.AText, error) { + cs := createClearAuthorship(atext, pool) + newAText, err := changeset.ApplyToAText(cs, atext, *pool) + if err != nil { + return atext, err + } + return *newAText, nil +} + +// isClearAuthorship checks whether the changeset only resets the author +// attribute of the whole text to the anonymous author (original: +// _isClearAuthorship). +func isClearAuthorship(cs string, pool *apool.APool) bool { + unpacked, err := changeset.Unpack(cs) + if err != nil { + return false + } + + // check if there is nothing in the charBank and oldLength == newLength + if unpacked.CharBank != "" || unpacked.OldLen != unpacked.NewLen { + return false + } + + ops, err := changeset.DeserializeOps(unpacked.Ops) + if err != nil || ops == nil || len(*ops) != 1 { + return false + } + clearOperator := (*ops)[0] + + // check if this operator doesn't change text + if clearOperator.OpCode != "=" { + return false + } + + // check that this operator applies to the complete text. If the text ends + // with a new line, it is exactly one character less, else it has the same + // length. + if clearOperator.Chars != unpacked.OldLen-1 && clearOperator.Chars != unpacked.OldLen { + return false + } + + // Check that the operation has exactly one attribute and that it is an + // anonymous author attribute. + appliedAttribs := changeset.AttribsFromString(clearOperator.Attribs, *pool) + if len(appliedAttribs) != 1 { + return false + } + return appliedAttribs[0].Key == "author" && appliedAttribs[0].Value == "" +} + +// extendChangesetWithAuthor marks all deletions of the changeset with the +// author who performed them plus a 'removed' attribute, and attribute-only +// changes with the author (original: _extendChangesetWithAuthor). +func extendChangesetWithAuthor(cs string, author string, pool *apool.APool) (string, error) { + unpacked, err := changeset.Unpack(cs) + if err != nil { + return "", err + } + ops, err := changeset.DeserializeOps(unpacked.Ops) + if err != nil { + return "", err + } + + assem := changeset.NewOpAssembler() + + authorAttrib := pool.PutAttrib(apool.Attribute{Key: "author", Value: author}, nil) + deletedAttrib := pool.PutAttrib(apool.Attribute{Key: "removed", Value: "true"}, nil) + attribs := "*" + utils.NumToString(authorAttrib) + "*" + utils.NumToString(deletedAttrib) + + for _, operator := range *ops { + if operator.OpCode == "-" { + // this is a delete operator, extend it with the author + operator.Attribs = attribs + } else if operator.OpCode == "=" && operator.Attribs != "" { + // this operator changes only attributes, mark which author did that + operator.Attribs += "*" + utils.NumToString(authorAttrib) + } + assem.Append(operator) + } + + return changeset.Pack(unpacked.OldLen, unpacked.NewLen, assem.String(), unpacked.CharBank), nil +} + +// createDeletionChangeset builds a changeset (applying to the result of cs) +// that re-inserts all text deleted by cs, carrying the attributes the text had +// before the deletion plus the 'removed'/author attributes that +// extendChangesetWithAuthor attached to the delete ops (original: +// _createDeletionChangeset). +func createDeletionChangeset(cs string, startAText apool.AText, pool *apool.APool) (string, error) { + lines := changeset.SplitTextLines(startAText.Text) + alines, err := changeset.SplitAttributionLines(startAText.Attribs, startAText.Text) + if err != nil { + return "", err + } + + // lines and alines are what the changeset is meant to apply to. They + // include final newlines on lines. + linesGet := func(idx int) string { + if idx >= 0 && idx < len(lines) { + return lines[idx] + } + return "" + } + aLinesGet := func(idx int) string { + if idx >= 0 && idx < len(alines) { + return alines[idx] + } + return "" + } + + curLine := 0 + curChar := 0 + curLineOpsLoaded := false + var curLineOps []changeset.Op + curLineOpsIdx := 0 + curLineOpsLine := 0 + plus := "+" + curLineNextOp := changeset.NewOp(&plus) + // mirrors the `curLineOpsNext = curLineOps.next()` generator state of the + // original implementation + curLineOpsNextDone := true + var curLineOpsNextVal changeset.Op + + loadLineOps := func(lineIdx int) error { + ops, err := changeset.DeserializeOps(aLinesGet(lineIdx)) + if err != nil { + return err + } + if ops == nil { + curLineOps = []changeset.Op{} + } else { + curLineOps = *ops + } + curLineOpsIdx = 0 + curLineOpsLoaded = true + return nil + } + nextLineOp := func() { + if curLineOpsIdx < len(curLineOps) { + curLineOpsNextVal = curLineOps[curLineOpsIdx] + curLineOpsIdx++ + curLineOpsNextDone = false + } else { + curLineOpsNextVal = changeset.NewOp(nil) + curLineOpsNextDone = true + } + } + + unpacked, err := changeset.Unpack(cs) + if err != nil { + return "", err + } + builder := newOpBuilder(unpacked.NewLen) + + consumeAttribRuns := func(numChars int, f func(n int, attribs string, endsLine bool)) error { + if !curLineOpsLoaded || curLineOpsLine != curLine { + if err := loadLineOps(curLine); err != nil { + return err + } + nextLineOp() + curLineOpsLine = curLine + indexIntoLine := 0 + for !curLineOpsNextDone { + curLineNextOp = curLineOpsNextVal + nextLineOp() + if indexIntoLine+curLineNextOp.Chars >= curChar { + curLineNextOp.Chars -= curChar - indexIntoLine + break + } + indexIntoLine += curLineNextOp.Chars + } + } + + for numChars > 0 { + if curLineNextOp.Chars == 0 && curLineOpsNextDone { + curLine++ + curChar = 0 + curLineOpsLine = curLine + curLineNextOp.Chars = 0 + if err := loadLineOps(curLine); err != nil { + return err + } + nextLineOp() + } + + if curLineNextOp.Chars == 0 { + if curLineOpsNextDone { + curLineNextOp = changeset.NewOp(nil) + } else { + curLineNextOp = curLineOpsNextVal + nextLineOp() + } + } + + if curLineNextOp.Chars == 0 { + // Defensive: the original relies on well-formed attribution + // lines; bail out instead of looping forever on malformed input. + return errors.New("ran out of attribution ops while consuming attrib runs") + } + + charsToUse := numChars + if curLineNextOp.Chars < charsToUse { + charsToUse = curLineNextOp.Chars + } + + f(charsToUse, curLineNextOp.Attribs, + charsToUse == curLineNextOp.Chars && curLineNextOp.Lines > 0) + numChars -= charsToUse + curLineNextOp.Chars -= charsToUse + curChar += charsToUse + } + + if curLineNextOp.Chars == 0 && curLineOpsNextDone { + curLine++ + curChar = 0 + } + return nil + } + + skip := func(n int, l int) error { + if l > 0 { + curLine += l + curChar = 0 + } else if curLineOpsLoaded && curLineOpsLine == curLine { + return consumeAttribRuns(n, func(int, string, bool) {}) + } else { + curChar += n + } + return nil + } + + nextText := func(numChars int) string { + collected := make([]rune, 0, numChars) + firstRunes := []rune(linesGet(curLine)) + if curChar < len(firstRunes) { + collected = append(collected, firstRunes[curChar:]...) + } + + lineNum := curLine + 1 + for len(collected) < numChars && lineNum <= len(lines) { + collected = append(collected, []rune(linesGet(lineNum))...) + lineNum++ + } + + if len(collected) > numChars { + collected = collected[:numChars] + } + return string(collected) + } + + csOps, err := changeset.DeserializeOps(unpacked.Ops) + if err != nil { + return "", err + } + + for _, csOp := range *csOps { + switch csOp.OpCode { + case "=": + textBank := nextText(csOp.Chars) + + // Decide whether this equal operator is an attribute change. If + // the text this operator applies to is only a star, then this is a + // false positive and should be ignored. + if csOp.Attribs != "" && textBank != "*" { + attribs := changeset.FromString(csOp.Attribs, pool) + undoCache := make(map[string]string) + undoBackToAttribs := func(oldAttribsStr string) string { + if cached, ok := undoCache[oldAttribsStr]; ok { + return cached + } + oldAttribs := changeset.FromString(oldAttribsStr, pool) + backAttribs := changeset.NewAttributeMap(pool). + Set("author", ""). + Set("removed", "true") + for key, value := range attribs.Iter() { + oldValue := "" + if v := oldAttribs.Get(key); v != nil { + oldValue = *v + } + if oldValue != value { + backAttribs.Set(key, oldValue) + } + } + result := backAttribs.String() + undoCache[oldAttribsStr] = result + return result + } + + textLeftToProcess := []rune(textBank) + for len(textLeftToProcess) > 0 { + // process till the next line break or process only one + // line break + lengthToProcess := indexOfRune(textLeftToProcess, '\n') + lineBreak := false + switch lengthToProcess { + case -1: + lengthToProcess = len(textLeftToProcess) + case 0: + lineBreak = true + lengthToProcess = 1 + } + + processText := textLeftToProcess[:lengthToProcess] + textLeftToProcess = textLeftToProcess[lengthToProcess:] + + if lineBreak { + // just skip linebreaks, don't do an insert + keep for + // a linebreak + builder.keep(1, 1, "") + + // consume the attributes of this linebreak + if err := consumeAttribRuns(1, func(int, string, bool) {}); err != nil { + return "", err + } + } else { + // add the old text via an insert, with a deletion + // attribute + the author attribute of the author who + // deleted it + textBankIndex := 0 + if err := consumeAttribRuns(lengthToProcess, func(n int, attribs string, endsLine bool) { + oldAttribs := undoBackToAttribs(attribs) + builder.insert(string(processText[textBankIndex:textBankIndex+n]), oldAttribs) + textBankIndex += n + }); err != nil { + return "", err + } + + builder.keep(lengthToProcess, 0, "") + } + } + } else { + if err := skip(csOp.Chars, csOp.Lines); err != nil { + return "", err + } + builder.keep(csOp.Chars, csOp.Lines, "") + } + case "+": + builder.keep(csOp.Chars, csOp.Lines, "") + case "-": + textBank := []rune(nextText(csOp.Chars)) + textBankIndex := 0 + if err := consumeAttribRuns(csOp.Chars, func(n int, attribs string, endsLine bool) { + builder.insert(string(textBank[textBankIndex:textBankIndex+n]), attribs+csOp.Attribs) + textBankIndex += n + }); err != nil { + return "", err + } + } + } + + result, err := changeset.CheckRep(builder.toString()) + if err != nil { + return "", err + } + return *result, nil +} + +func indexOfRune(runes []rune, r rune) int { + for i, c := range runes { + if c == r { + return i + } + } + return -1 +} diff --git a/lib/paddiff/paddiff_test.go b/lib/paddiff/paddiff_test.go new file mode 100644 index 00000000..f947b0ac --- /dev/null +++ b/lib/paddiff/paddiff_test.go @@ -0,0 +1,233 @@ +package paddiff + +import ( + "errors" + "testing" + + "github.com/ether/etherpad-go/lib/apool" + "github.com/ether/etherpad-go/lib/changeset" + db2 "github.com/ether/etherpad-go/lib/models/db" + "github.com/ether/etherpad-go/lib/utils" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// fakePad is an in-memory Pad implementation for unit tests. +type fakePad struct { + atexts map[int]apool.AText + revs map[int]db2.PadSingleRevision +} + +func (f *fakePad) GetInternalRevisionAText(targetRev int) *apool.AText { + atext, ok := f.atexts[targetRev] + if !ok { + return nil + } + return &atext +} + +func (f *fakePad) GetRevision(revNumber int) (*db2.PadSingleRevision, error) { + rev, ok := f.revs[revNumber] + if !ok { + return nil, errors.New("revision not found") + } + return &rev, nil +} + +// buildRevision creates a revision changeset by splicing the given text and +// returns the changeset plus the resulting text. +func buildRevision(t *testing.T, pool *apool.APool, orig string, start int, ndel int, ins string, author string) (string, string) { + t.Helper() + + var attribs *string + if author != "" { + authorNum := pool.PutAttrib(apool.Attribute{Key: "author", Value: author}, nil) + attribStr := "*" + utils.NumToString(authorNum) + attribs = &attribStr + } + + cs, err := changeset.MakeSplice(orig, start, ndel, ins, attribs, pool) + require.NoError(t, err) + + newText := orig[:start] + ins + orig[start+ndel:] + return cs, newText +} + +func TestGetValidRevisionRange(t *testing.T) { + intPtr := func(i int) *int { return &i } + + tests := []struct { + name string + startRev int + endRev *int + head int + wantFrom int + wantTo int + wantOk bool + }{ + {"full range", 0, intPtr(3), 3, 0, 3, true}, + {"endRev defaults to head", 1, nil, 3, 1, 3, true}, + {"endRev clamped to head", 0, intPtr(99), 3, 0, 3, true}, + {"start equals end", 2, intPtr(2), 3, 2, 2, true}, + {"negative start invalid", -1, intPtr(2), 3, 0, 0, false}, + {"start beyond head invalid", 4, nil, 3, 0, 0, false}, + {"end below start invalid", 2, intPtr(1), 3, 0, 0, false}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + from, to, ok := GetValidRevisionRange(tc.startRev, tc.endRev, tc.head) + assert.Equal(t, tc.wantOk, ok) + if tc.wantOk { + assert.Equal(t, tc.wantFrom, from) + assert.Equal(t, tc.wantTo, to) + } + }) + } +} + +func TestCreateDiffATextReinsertsDeletionsWithRemovedAttribute(t *testing.T) { + pool := apool.NewAPool() + + text0 := "Hello World\n" + atext0 := changeset.MakeAText(text0, nil) + + // Revision 1 (author a1) replaces "World" with "Etherpad" + cs1, text1 := buildRevision(t, &pool, text0, 6, 5, "Etherpad", "a1") + assert.Equal(t, "Hello Etherpad\n", text1) + + a1 := "a1" + p := &fakePad{ + atexts: map[int]apool.AText{0: atext0}, + revs: map[int]db2.PadSingleRevision{ + 1: {RevNum: 1, Changeset: cs1, AuthorId: &a1}, + }, + } + + atext, authors, err := CreateDiffAText(p, &pool, 0, 1) + require.NoError(t, err) + require.NotNil(t, atext) + + // The deleted text is re-inserted before the text that replaced it + assert.Equal(t, "Hello WorldEtherpad\n", atext.Text) + assert.Equal(t, []string{"a1"}, authors) + + // The re-inserted "World" (chars 6..11) must carry the 'removed' attribute + end := 11 + attribsAtWorld, err := changeset.Subattribution(atext.Attribs, 6, &end) + require.NoError(t, err) + ops, err := changeset.DeserializeOps(*attribsAtWorld) + require.NoError(t, err) + require.NotEmpty(t, *ops) + + foundRemoved := false + for _, op := range *ops { + for _, attr := range changeset.AttribsFromString(op.Attribs, pool) { + if attr.Key == "removed" && attr.Value == "true" { + foundRemoved = true + } + } + } + assert.True(t, foundRemoved, "re-inserted deletion must carry the removed attribute") + + // The inserted "Etherpad" (chars 11..19) must be attributed to a1 and must + // not be marked as removed + end = 19 + attribsAtInsert, err := changeset.Subattribution(atext.Attribs, 11, &end) + require.NoError(t, err) + insertOps, err := changeset.DeserializeOps(*attribsAtInsert) + require.NoError(t, err) + + foundAuthor := false + for _, op := range *insertOps { + for _, attr := range changeset.AttribsFromString(op.Attribs, pool) { + assert.NotEqual(t, "removed", attr.Key, "inserted text must not be marked as removed") + if attr.Key == "author" && attr.Value == "a1" { + foundAuthor = true + } + } + } + assert.True(t, foundAuthor, "inserted text must be attributed to its author") +} + +func TestCreateDiffATextComposesMultipleRevisionsAndCollectsAuthors(t *testing.T) { + pool := apool.NewAPool() + + text0 := "Hello World\n" + atext0 := changeset.MakeAText(text0, nil) + + // Revision 1 (author a1) replaces "World" with "Etherpad" + cs1, text1 := buildRevision(t, &pool, text0, 6, 5, "Etherpad", "a1") + // Revision 2 (author a2) appends " Rocks" + cs2, text2 := buildRevision(t, &pool, text1, 14, 0, " Rocks", "a2") + assert.Equal(t, "Hello Etherpad Rocks\n", text2) + + a1, a2 := "a1", "a2" + p := &fakePad{ + atexts: map[int]apool.AText{0: atext0}, + revs: map[int]db2.PadSingleRevision{ + 1: {RevNum: 1, Changeset: cs1, AuthorId: &a1}, + 2: {RevNum: 2, Changeset: cs2, AuthorId: &a2}, + }, + } + + atext, authors, err := CreateDiffAText(p, &pool, 0, 2) + require.NoError(t, err) + require.NotNil(t, atext) + + assert.Equal(t, "Hello WorldEtherpad Rocks\n", atext.Text) + assert.ElementsMatch(t, []string{"a1", "a2"}, authors) +} + +func TestCreateDiffATextSkipsClearAuthorshipChangesets(t *testing.T) { + pool := apool.NewAPool() + + text0 := "Hello World\n" + atext0 := changeset.MakeAText(text0, nil) + + // Revision 1 only clears the authorship of the whole text + clearCs := createClearAuthorship(atext0, &pool) + assert.True(t, isClearAuthorship(clearCs, &pool)) + + a1 := "a1" + p := &fakePad{ + atexts: map[int]apool.AText{0: atext0}, + revs: map[int]db2.PadSingleRevision{ + 1: {RevNum: 1, Changeset: clearCs, AuthorId: &a1}, + }, + } + + atext, authors, err := CreateDiffAText(p, &pool, 0, 1) + require.NoError(t, err) + require.NotNil(t, atext) + + // No diffable change happened, so the text stays the same and no author is + // reported + assert.Equal(t, text0, atext.Text) + assert.Empty(t, authors) +} + +func TestIsClearAuthorshipRejectsRegularChangesets(t *testing.T) { + pool := apool.NewAPool() + text0 := "Hello World\n" + + cs, _ := buildRevision(t, &pool, text0, 6, 5, "Etherpad", "a1") + assert.False(t, isClearAuthorship(cs, &pool)) +} + +func TestCreateDiffATextEmptyRangeReturnsClearedStartAText(t *testing.T) { + pool := apool.NewAPool() + text0 := "Hello World\n" + atext0 := changeset.MakeAText(text0, nil) + + p := &fakePad{ + atexts: map[int]apool.AText{0: atext0}, + revs: map[int]db2.PadSingleRevision{}, + } + + atext, authors, err := CreateDiffAText(p, &pool, 0, 0) + require.NoError(t, err) + require.NotNil(t, atext) + assert.Equal(t, text0, atext.Text) + assert.Empty(t, authors) +} diff --git a/lib/server/server.go b/lib/server/server.go index 65c68c6e..c54c2aca 100644 --- a/lib/server/server.go +++ b/lib/server/server.go @@ -71,7 +71,7 @@ func InitServer(setupLogger *zap.SugaredLogger, uiAssets embed.FS, pluginAssets })) app.Use(func(c fiber.Ctx) error { - return pad.CheckAccess(c, setupLogger, &settings, readOnlyManager) + return pad.CheckAccessWithHooks(c, setupLogger, &settings, readOnlyManager, &retrievedHooks) }) padManager := pad.NewManager(dataStore, &retrievedHooks) diff --git a/lib/test/api/author/author_test.go b/lib/test/api/author/author_test.go index 973698fa..3c8b68cd 100644 --- a/lib/test/api/author/author_test.go +++ b/lib/test/api/author/author_test.go @@ -53,6 +53,18 @@ func TestAuthor(t *testing.T) { Name: "Get Author Name Not Found", Test: testGetAuthorNameNotFound, }, + testutils.TestRunConfig{ + Name: "Anonymize Author", + Test: testAnonymizeAuthor, + }, + testutils.TestRunConfig{ + Name: "Anonymize Author Not Found", + Test: testAnonymizeAuthorNotFound, + }, + testutils.TestRunConfig{ + Name: "Anonymize Author Idempotent", + Test: testAnonymizeAuthorIdempotent, + }, ) defer testDb.StartTestDBHandler() } @@ -262,3 +274,81 @@ func testGetAuthorNameNotFound(t *testing.T, tsStore testutils.TestDataStore) { require.NotNil(t, resp) assert.Equal(t, 404, resp.StatusCode) } + +// ========== Anonymize Author (GDPR Art. 17 erasure) ========== + +func testAnonymizeAuthor(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + author.Init(initStore) + + // Author with a name, color, token binding and a chat message on a pad. + testName := "GDPR Test Author" + createdAuthor, err := tsStore.AuthorManager.CreateAuthor(&testName) + require.NoError(t, err) + require.NoError(t, tsStore.AuthorManager.SetAuthorColor(createdAuthor.Id, "#123abc")) + require.NoError(t, tsStore.DS.SetAuthorByToken("api-anonymize-token", createdAuthor.Id)) + + padText := "anonymize pad text\n" + padId := "anonymizeApiPad" + _, err = tsStore.PadManager.GetPad(padId, &padText, &createdAuthor.Id) + require.NoError(t, err) + require.NoError(t, tsStore.DS.SaveChatMessage(padId, 0, &createdAuthor.Id, 4711, "identifying chat text")) + + req := httptest.NewRequest("POST", "/admin/api/author/"+createdAuthor.Id+"/anonymize", nil) + resp, err := initStore.C.Test(req) + require.NoError(t, err) + require.NotNil(t, resp) + assert.Equal(t, 200, resp.StatusCode) + + // Name is scrubbed but the author record still exists. + req = httptest.NewRequest("GET", "/admin/api/author/"+createdAuthor.Id+"/name", nil) + resp, err = initStore.C.Test(req) + require.NoError(t, err) + require.NotNil(t, resp) + assert.Equal(t, 200, resp.StatusCode) + + var nameResponse author.AuthorNameResponse + body, _ := io.ReadAll(resp.Body) + _ = json.Unmarshal(body, &nameResponse) + assert.Equal(t, "", nameResponse.AuthorName, "author name must be scrubbed") + + // Token binding is severed. + _, err = tsStore.DS.GetAuthorByToken("api-anonymize-token") + assert.Error(t, err, "token must no longer resolve to the author") + + // Chat message survives, authorship is nulled. + chats, err := tsStore.DS.GetChatsOfPad(padId, 0, 0) + require.NoError(t, err) + require.Len(t, *chats, 1) + assert.Nil(t, (*chats)[0].AuthorId, "chat authorship must be nulled") + assert.Equal(t, "identifying chat text", (*chats)[0].Message) +} + +func testAnonymizeAuthorNotFound(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + author.Init(initStore) + + req := httptest.NewRequest("POST", "/admin/api/author/a.unknownAuthor9876/anonymize", nil) + resp, err := initStore.C.Test(req) + + require.NoError(t, err) + require.NotNil(t, resp) + assert.Equal(t, 404, resp.StatusCode) +} + +func testAnonymizeAuthorIdempotent(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + author.Init(initStore) + + testName := "GDPR Idempotent Author" + createdAuthor, err := tsStore.AuthorManager.CreateAuthor(&testName) + require.NoError(t, err) + + for i := 0; i < 2; i++ { + req := httptest.NewRequest("POST", "/admin/api/author/"+createdAuthor.Id+"/anonymize", nil) + resp, err := initStore.C.Test(req) + require.NoError(t, err) + require.NotNil(t, resp) + assert.Equal(t, 200, resp.StatusCode, "anonymize call %d must succeed", i+1) + } +} diff --git a/lib/test/api/pad/pad_compact_diff_api_test.go b/lib/test/api/pad/pad_compact_diff_api_test.go new file mode 100644 index 00000000..e911621d --- /dev/null +++ b/lib/test/api/pad/pad_compact_diff_api_test.go @@ -0,0 +1,267 @@ +package pad + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "net/http/httptest" + "testing" + "time" + + "github.com/ether/etherpad-go/lib" + "github.com/ether/etherpad-go/lib/api/pad" + "github.com/ether/etherpad-go/lib/test/testutils" + "github.com/gofiber/fiber/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPadCompactAndDiffAPI(t *testing.T) { + testDb := testutils.NewTestDBHandler(t) + + testDb.AddTests( + // compactPad + testutils.TestRunConfig{ + Name: "CompactPad shrinks revision history and keeps text", + Test: testCompactPadSuccess, + }, + testutils.TestRunConfig{ + Name: "CompactPad pad not found returns 404", + Test: testCompactPadNotFound, + }, + testutils.TestRunConfig{ + Name: "CompactPad invalid keepRevisions returns 400", + Test: testCompactPadInvalidKeepRevisions, + }, + // createDiffHTML + testutils.TestRunConfig{ + Name: "CreateDiffHTML returns diff html and authors", + Test: testCreateDiffHTMLSuccess, + }, + testutils.TestRunConfig{ + Name: "CreateDiffHTML invalid revisions return 400", + Test: testCreateDiffHTMLInvalidRevs, + }, + testutils.TestRunConfig{ + Name: "CreateDiffHTML pad not found returns 404", + Test: testCreateDiffHTMLNotFound, + }, + ) + + defer testDb.StartTestDBHandler() +} + +// setPadTextViaAPI updates the pad text through the public admin API, creating +// one new revision per call. +func setPadTextViaAPI(t *testing.T, initStore *lib.InitStore, padId string, text string, authorId string) { + t.Helper() + + reqBody := pad.SetTextRequest{ + Text: text, + AuthorId: authorId, + } + body, _ := json.Marshal(reqBody) + + req := httptest.NewRequest("POST", "/admin/api/pads/"+padId+"/text", bytes.NewBuffer(body)) + req.Header.Set("Content-Type", "application/json") + resp, err := initStore.C.Test(req) + + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) +} + +// getRevisionsCountViaAPI returns the pad's head revision number via the API. +func getRevisionsCountViaAPI(t *testing.T, initStore *lib.InitStore, padId string) int { + t.Helper() + + req := httptest.NewRequest("GET", "/admin/api/pads/"+padId+"/revisionsCount", nil) + resp, err := initStore.C.Test(req) + + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + + var response struct { + Revisions int `json:"revisions"` + } + body, _ := io.ReadAll(resp.Body) + assert.NoError(t, json.Unmarshal(body, &response)) + return response.Revisions +} + +// getCurrentPadText returns the pad's current text via the API. +func getCurrentPadText(t *testing.T, initStore *lib.InitStore, padId string) string { + t.Helper() + + req := httptest.NewRequest("GET", "/admin/api/pads/"+padId+"/text", nil) + resp, err := initStore.C.Test(req) + + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + + var response pad.TextResponse + body, _ := io.ReadAll(resp.Body) + assert.NoError(t, json.Unmarshal(body, &response)) + return response.Text +} + +// compactPadViaAPI issues the compact request and returns the response status. +func compactPadViaAPI(t *testing.T, initStore *lib.InitStore, padId string, body []byte) int { + t.Helper() + + req := httptest.NewRequest("POST", "/admin/api/pads/"+padId+"/compact", bytes.NewBuffer(body)) + req.Header.Set("Content-Type", "application/json") + resp, err := initStore.C.Test(req, fiber.TestConfig{Timeout: 30 * time.Second}) + + require.NoError(t, err) + if resp.StatusCode == 500 { + debugBody, _ := io.ReadAll(resp.Body) + t.Logf("compact returned 500: %s", string(debugBody)) + } + return resp.StatusCode +} + +// ========== compactPad ========== + +func testCompactPadSuccess(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + pad.Init(initStore) + + testAuthor, err := tsStore.AuthorManager.CreateAuthor(nil) + assert.NoError(t, err) + + createTestPad(t, tsStore, "compactpad", "Initial\n") + for i := 1; i <= 4; i++ { + setPadTextViaAPI(t, initStore, "compactpad", fmt.Sprintf("Version %d", i), testAuthor.Id) + } + assert.Equal(t, 4, getRevisionsCountViaAPI(t, initStore, "compactpad")) + + body, _ := json.Marshal(pad.CompactPadRequest{KeepRevisions: 2}) + status := compactPadViaAPI(t, initStore, "compactpad", body) + assert.Equal(t, 200, status) + + // The revision history must have been collapsed to the last 2 revisions + assert.Equal(t, 2, getRevisionsCountViaAPI(t, initStore, "compactpad")) + + // The pad text must be preserved + assert.Contains(t, getCurrentPadText(t, initStore, "compactpad"), "Version 4") +} + +func testCompactPadNotFound(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + pad.Init(initStore) + + body, _ := json.Marshal(pad.CompactPadRequest{KeepRevisions: 1}) + status := compactPadViaAPI(t, initStore, "compactnonexistentpad", body) + assert.Equal(t, 404, status) +} + +func testCompactPadInvalidKeepRevisions(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + pad.Init(initStore) + + testAuthor, err := tsStore.AuthorManager.CreateAuthor(nil) + assert.NoError(t, err) + + createTestPad(t, tsStore, "compactvalpad", "Initial\n") + setPadTextViaAPI(t, initStore, "compactvalpad", "Version 1", testAuthor.Id) + setPadTextViaAPI(t, initStore, "compactvalpad", "Version 2", testAuthor.Id) + // head is now 2 + + // keepRevisions must be >= 1 + body, _ := json.Marshal(pad.CompactPadRequest{KeepRevisions: 0}) + assert.Equal(t, 400, compactPadViaAPI(t, initStore, "compactvalpad", body)) + + // keepRevisions missing defaults to 0 and is rejected + assert.Equal(t, 400, compactPadViaAPI(t, initStore, "compactvalpad", []byte("{}"))) + + // keepRevisions must be lower than the head revision + body, _ = json.Marshal(pad.CompactPadRequest{KeepRevisions: 2}) + assert.Equal(t, 400, compactPadViaAPI(t, initStore, "compactvalpad", body)) + + body, _ = json.Marshal(pad.CompactPadRequest{KeepRevisions: 5}) + assert.Equal(t, 400, compactPadViaAPI(t, initStore, "compactvalpad", body)) + + // nothing was deleted + assert.Equal(t, 2, getRevisionsCountViaAPI(t, initStore, "compactvalpad")) +} + +// ========== createDiffHTML ========== + +func testCreateDiffHTMLSuccess(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + pad.Init(initStore) + + testAuthor, err := tsStore.AuthorManager.CreateAuthor(nil) + assert.NoError(t, err) + + createTestPad(t, tsStore, "diffpad", "First line\n") + setPadTextViaAPI(t, initStore, "diffpad", "Second version text", testAuthor.Id) + + req := httptest.NewRequest("GET", "/admin/api/pads/diffpad/diffHTML?startRev=0&endRev=1", nil) + resp, err := initStore.C.Test(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + + var response pad.DiffHTMLResponse + respBody, _ := io.ReadAll(resp.Body) + assert.NoError(t, json.Unmarshal(respBody, &response)) + + assert.NotEmpty(t, response.HTML) + // the inserted text must show up in the diff + assert.Contains(t, response.HTML, "Second version text") + // the deleted text is re-inserted carrying the 'removed' attribute + assert.Contains(t, response.HTML, "First line") + assert.Contains(t, response.HTML, "removed") + // the author of the change is reported + assert.Contains(t, response.Authors, testAuthor.Id) + + // omitting endRev defaults to the head revision + req = httptest.NewRequest("GET", "/admin/api/pads/diffpad/diffHTML?startRev=0", nil) + resp, err = initStore.C.Test(req) + assert.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + + var responseNoEnd pad.DiffHTMLResponse + respBody, _ = io.ReadAll(resp.Body) + assert.NoError(t, json.Unmarshal(respBody, &responseNoEnd)) + // The CSS block ordering of the export is map-iteration dependent, so only + // compare the diff content itself. + assert.Contains(t, responseNoEnd.HTML, "Second version text") + assert.Contains(t, responseNoEnd.HTML, "First line") + assert.Contains(t, responseNoEnd.HTML, "removed") + assert.Contains(t, responseNoEnd.Authors, testAuthor.Id) +} + +func testCreateDiffHTMLInvalidRevs(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + pad.Init(initStore) + + testAuthor, err := tsStore.AuthorManager.CreateAuthor(nil) + assert.NoError(t, err) + + createTestPad(t, tsStore, "diffvalpad", "First line\n") + setPadTextViaAPI(t, initStore, "diffvalpad", "Changed text", testAuthor.Id) + + for _, query := range []string{ + "", // startRev is required + "?startRev=abc", // startRev must be a number + "?startRev=-1", // startRev must not be negative + "?startRev=1&endRev=0", // endRev must not be lower than startRev + } { + req := httptest.NewRequest("GET", "/admin/api/pads/diffvalpad/diffHTML"+query, nil) + resp, err := initStore.C.Test(req) + assert.NoError(t, err) + assert.Equal(t, 400, resp.StatusCode, "query: %q", query) + } +} + +func testCreateDiffHTMLNotFound(t *testing.T, tsStore testutils.TestDataStore) { + initStore := tsStore.ToInitStore() + pad.Init(initStore) + + req := httptest.NewRequest("GET", "/admin/api/pads/diffnonexistentpad/diffHTML?startRev=0", nil) + resp, err := initStore.C.Test(req) + assert.NoError(t, err) + assert.Equal(t, 404, resp.StatusCode) +} diff --git a/lib/test/author/authorManager_test.go b/lib/test/author/authorManager_test.go index 5eb62b69..646a16bd 100644 --- a/lib/test/author/authorManager_test.go +++ b/lib/test/author/authorManager_test.go @@ -40,6 +40,18 @@ func TestAuthorManager(t *testing.T) { Name: "TestGetPadsOfAuthor", Test: testGetPadsOfAuthor, }, + testutils.TestRunConfig{ + Name: "TestAnonymizeAuthor", + Test: testAnonymizeAuthor, + }, + testutils.TestRunConfig{ + Name: "TestAnonymizeAuthor_UnknownAuthor", + Test: testAnonymizeAuthorUnknown, + }, + testutils.TestRunConfig{ + Name: "TestAnonymizeAuthor_Idempotent", + Test: testAnonymizeAuthorIdempotent, + }, ) } @@ -160,3 +172,84 @@ func testGetPadsOfAuthor(t *testing.T, dbHandler testutils.TestDataStore) { t.Fatalf("unexpected pads") } } + +// testAnonymizeAuthor mirrors the original Etherpad's +// AuthorManager.anonymizeAuthor (GDPR Art. 17 erasure): the display identity +// (name, color) is zeroed, the token binding that links a person to the +// author id is severed, and authorship on chat messages is nulled while the +// message text itself is preserved. Pad content and revisions stay intact. +func testAnonymizeAuthor(t *testing.T, dbHandler testutils.TestDataStore) { + name := "Alice GDPR" + createdAuthor, err := dbHandler.AuthorManager.CreateAuthor(&name) + if err != nil { + t.Fatalf("failed to create author: %v", err) + } + assert.NoError(t, dbHandler.AuthorManager.SetAuthorColor(createdAuthor.Id, "#aabbcc")) + + token := "anonymize-token-1" + assert.NoError(t, dbHandler.DS.SetAuthorByToken(token, createdAuthor.Id)) + + // Author writes to a pad and posts a chat message. + padText := "GDPR pad text\n" + padId := "anonymizePad" + _, err = dbHandler.PadManager.GetPad(padId, &padText, &createdAuthor.Id) + if err != nil { + t.Fatalf("failed to create pad: %v", err) + } + assert.NoError(t, dbHandler.DS.SaveChatMessage(padId, 0, &createdAuthor.Id, 12345, "my secret chat message")) + + assert.NoError(t, dbHandler.AuthorManager.AnonymizeAuthor(createdAuthor.Id)) + + // Display identity is zeroed (name -> nil, colorId -> "0"). + scrubbed, err := dbHandler.AuthorManager.GetAuthor(createdAuthor.Id) + if err != nil { + t.Fatalf("anonymized author record must still exist: %v", err) + } + assert.Nil(t, scrubbed.Name, "name must be scrubbed") + assert.Equal(t, "0", scrubbed.ColorId, "colorId must be zeroed") + + // Token binding is severed, the token can no longer resolve the author. + resolved, err := dbHandler.DS.GetAuthorByToken(token) + assert.Error(t, err, "token must no longer resolve to the author, got %v", resolved) + + // Chat message survives but its authorship is nulled. + chats, err := dbHandler.DS.GetChatsOfPad(padId, 0, 0) + if err != nil { + t.Fatalf("failed to load chats: %v", err) + } + if len(*chats) != 1 { + t.Fatalf("chat message must survive anonymization, got %d messages", len(*chats)) + } + chat := (*chats)[0] + assert.Nil(t, chat.AuthorId, "chat authorship must be nulled") + assert.Nil(t, chat.DisplayName, "chat display name must be gone") + assert.Equal(t, "my secret chat message", chat.Message, "chat text is preserved") +} + +func testAnonymizeAuthorUnknown(t *testing.T, dbHandler testutils.TestDataStore) { + err := dbHandler.AuthorManager.AnonymizeAuthor("a.doesNotExist123456") + if err == nil { + t.Fatalf("expected error for unknown author") + } + assert.Equal(t, db.AuthorNotFoundError, err.Error()) +} + +func testAnonymizeAuthorIdempotent(t *testing.T, dbHandler testutils.TestDataStore) { + name := "Bob GDPR" + createdAuthor, err := dbHandler.AuthorManager.CreateAuthor(&name) + if err != nil { + t.Fatalf("failed to create author: %v", err) + } + + assert.NoError(t, dbHandler.AuthorManager.AnonymizeAuthor(createdAuthor.Id)) + // The original is idempotent: a second call succeeds and leaves the + // record in the same erased state. + assert.NoError(t, dbHandler.AuthorManager.AnonymizeAuthor(createdAuthor.Id)) + + scrubbed, err := dbHandler.AuthorManager.GetAuthor(createdAuthor.Id) + if err != nil { + t.Fatalf("anonymized author record must still exist: %v", err) + } + assert.Nil(t, scrubbed.Name) + assert.Equal(t, "0", scrubbed.ColorId) +} diff --git a/lib/test/pad/webaccess_test.go b/lib/test/pad/webaccess_test.go new file mode 100644 index 00000000..721fad90 --- /dev/null +++ b/lib/test/pad/webaccess_test.go @@ -0,0 +1,165 @@ +package pad + +import ( + "net/http/httptest" + "testing" + "time" + + "github.com/ether/etherpad-go/lib/db" + "github.com/ether/etherpad-go/lib/hooks" + "github.com/ether/etherpad-go/lib/hooks/events" + "github.com/ether/etherpad-go/lib/pad" + "github.com/ether/etherpad-go/lib/settings" + "github.com/gofiber/fiber/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +// newWebAccessApp builds a minimal fiber app with the CheckAccess middleware in +// front of a catch-all pad route, mirroring how lib/server/server.go installs it. +func newWebAccessApp(hookSystem *hooks.Hook, retrievedSettings *settings.Settings) *fiber.App { + app := fiber.New() + readOnlyManager := pad.NewReadOnlyManager(db.NewMemoryDataStore()) + logger := zap.NewNop().Sugar() + app.Use(func(c fiber.Ctx) error { + return pad.CheckAccessWithHooks(c, logger, retrievedSettings, readOnlyManager, hookSystem) + }) + app.Get("/p/*", func(c fiber.Ctx) error { + return c.SendString("pad content") + }) + return app +} + +// The admin-auth 401 branch sleeps one second to slow down brute force attacks, +// which is exactly fiber's default Test timeout — give those requests headroom. +var adminTestConfig = fiber.TestConfig{Timeout: 5 * time.Second, FailOnTimeout: true} + +func TestCheckAccessWithoutPreAuthorizeHooksUnchanged(t *testing.T) { + // No registered preAuthorize hook (and even a nil hook system) must leave the + // existing behavior untouched. + runUnchanged := func(hookSystem *hooks.Hook) func(t *testing.T) { + return func(t *testing.T) { + // Without authentication requirements a pad is freely accessible. + app := newWebAccessApp(hookSystem, &settings.Settings{}) + resp, err := app.Test(httptest.NewRequest("GET", "/p/testpad", nil)) + require.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) + + // With requireAuthentication an unauthenticated request gets a 401. + app = newWebAccessApp(hookSystem, &settings.Settings{RequireAuthentication: true}) + resp, err = app.Test(httptest.NewRequest("GET", "/p/testpad", nil)) + require.NoError(t, err) + assert.Equal(t, 401, resp.StatusCode) + assert.Contains(t, resp.Header.Get("WWW-Authenticate"), "Basic") + } + } + + emptyHooks := hooks.NewHook() + t.Run("nilHookSystem", runUnchanged(nil)) + t.Run("emptyHookSystem", runUnchanged(&emptyHooks)) +} + +func TestCheckAccessPreAuthorizeDeny(t *testing.T) { + hookSystem := hooks.NewHook() + var seenPath string + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { + seenPath = ctx.Path + ctx.Deny() + }) + + // Even with no authentication requirement at all, an explicit deny rejects the + // request before the regular steps run. + app := newWebAccessApp(&hookSystem, &settings.Settings{}) + resp, err := app.Test(httptest.NewRequest("GET", "/p/testpad", nil)) + require.NoError(t, err) + assert.Equal(t, 403, resp.StatusCode) + assert.Equal(t, "/p/testpad", seenPath) +} + +func TestCheckAccessPreAuthorizeDenyWinsOverPermit(t *testing.T) { + hookSystem := hooks.NewHook() + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { ctx.Permit() }) + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { ctx.Deny() }) + + app := newWebAccessApp(&hookSystem, &settings.Settings{}) + resp, err := app.Test(httptest.NewRequest("GET", "/p/testpad", nil)) + require.NoError(t, err) + assert.Equal(t, 403, resp.StatusCode) +} + +func TestCheckAccessPreAuthorizePermitBypassesAuthentication(t *testing.T) { + hookSystem := hooks.NewHook() + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { ctx.Permit() }) + + // requireAuthentication is on and the request carries no credentials, yet the + // explicit permit skips the remaining steps for this non-admin page. + app := newWebAccessApp(&hookSystem, &settings.Settings{RequireAuthentication: true}) + resp, err := app.Test(httptest.NewRequest("GET", "/p/testpad", nil)) + require.NoError(t, err) + assert.Equal(t, 200, resp.StatusCode) +} + +func TestCheckAccessPreAuthorizePermitDoesNotBypassAdmin(t *testing.T) { + hookSystem := hooks.NewHook() + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { + assert.True(t, ctx.RequireAdmin) + ctx.Permit() + }) + + // Permits are filtered out on /admin-auth pages (so plugins cannot + // accidentally grant admin privileges); the request falls through to the + // regular steps and fails authentication. + app := newWebAccessApp(&hookSystem, &settings.Settings{}) + resp, err := app.Test(httptest.NewRequest("GET", "/admin-auth/", nil), adminTestConfig) + require.NoError(t, err) + assert.Equal(t, 401, resp.StatusCode) +} + +func TestCheckAccessPreAuthorizeDenyAppliesToAdmin(t *testing.T) { + hookSystem := hooks.NewHook() + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { ctx.Deny() }) + + app := newWebAccessApp(&hookSystem, &settings.Settings{}) + resp, err := app.Test(httptest.NewRequest("GET", "/admin-auth/", nil), adminTestConfig) + require.NoError(t, err) + assert.Equal(t, 403, resp.StatusCode) +} + +func TestCheckAccessPreAuthzFailureOverridesResponse(t *testing.T) { + hookSystem := hooks.NewHook() + hookSystem.EnqueuePreAuthorizeHook(func(ctx *events.PreAuthorizeContext) { ctx.Deny() }) + hookSystem.EnqueuePreAuthzFailureHook(func(ctx *events.PreAuthzFailureContext) { + ctx.SetHeader("Location", "/login") + ctx.Respond(302, "redirecting to login") + }) + + app := newWebAccessApp(&hookSystem, &settings.Settings{}) + resp, err := app.Test(httptest.NewRequest("GET", "/p/testpad", nil)) + require.NoError(t, err) + assert.Equal(t, 302, resp.StatusCode) + assert.Equal(t, "/login", resp.Header.Get("Location")) +} + +func TestPreAuthorizeDecisionSemantics(t *testing.T) { + // No answer defers to the regular authenticate/authorize steps. + ctx := &events.PreAuthorizeContext{Path: "/p/x"} + assert.Equal(t, events.PreAuthorizeDefer, ctx.Decision()) + + // All permits -> permit. + ctx.Permit() + assert.Equal(t, events.PreAuthorizePermit, ctx.Decision()) + + // Any deny wins. + ctx.Deny() + assert.Equal(t, events.PreAuthorizeDeny, ctx.Decision()) + + // On admin pages permits are filtered out: a lone permit defers... + adminCtx := &events.PreAuthorizeContext{Path: "/admin-auth/", RequireAdmin: true} + adminCtx.Permit() + assert.Equal(t, events.PreAuthorizeDefer, adminCtx.Decision()) + + // ...while a deny still counts. + adminCtx.Deny() + assert.Equal(t, events.PreAuthorizeDeny, adminCtx.Decision()) +}