From 6ec4e4079105e2f22f613e917eccf32fd4490655 Mon Sep 17 00:00:00 2001 From: Rustam Gilyazov <16064414+rusq@users.noreply.github.com> Date: Sat, 11 Jan 2025 18:01:02 +1000 Subject: [PATCH 1/3] implement avatar download in export and archive --- cmd/slackdump/internal/archive/archive.go | 16 +++++- .../internal/archive/archive_wizard.go | 1 + cmd/slackdump/internal/archive/search.go | 9 ++- cmd/slackdump/internal/cfg/cfg.go | 18 ++++-- cmd/slackdump/internal/convertcmd/convert.go | 1 + cmd/slackdump/internal/dump/dump.go | 2 +- cmd/slackdump/internal/emoji/emoji.go | 2 +- cmd/slackdump/internal/export/v3.go | 4 ++ cmd/slackdump/internal/export/wizard.go | 1 + .../internal/ui/cfgui/common_params.go | 9 +++ internal/chunk/control/control.go | 12 +++- internal/chunk/control/noop.go | 7 +++ internal/chunk/control/workers.go | 8 ++- internal/chunk/dirproc/users.go | 1 + internal/chunk/transform/fileproc/avatar.go | 55 +++++++++++++++++++ .../chunk/transform/fileproc/avatar_test.go | 40 ++++++++++++++ internal/convert/chunkexp.go | 8 ++- processor/processor.go | 6 ++ stream/stream.go | 3 +- stream/stream_workers.go | 3 + 20 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 internal/chunk/transform/fileproc/avatar.go create mode 100644 internal/chunk/transform/fileproc/avatar_test.go diff --git a/cmd/slackdump/internal/archive/archive.go b/cmd/slackdump/internal/archive/archive.go index 79d7b9a5..c0157c59 100644 --- a/cmd/slackdump/internal/archive/archive.go +++ b/cmd/slackdump/internal/archive/archive.go @@ -77,14 +77,26 @@ func RunArchive(ctx context.Context, cmd *base.Command, args []string) error { lg, ) defer stop() - // archive format has files stored in mattermost format. - subproc := fileproc.NewExport(fileproc.STmattermost, dl) + avdl, avstop := fileproc.NewDownloader( + ctx, + cfg.DownloadAvatars, + sess.Client(), + fsadapter.NewDirectory(cd.Name()), + lg, + ) + defer avstop() + var ( + // archive format has files stored in mattermost format. + subproc = fileproc.NewExport(fileproc.STmattermost, dl) + avproc = fileproc.NewAvatarProc(avdl) + ) ctrl := control.New( cd, stream, control.WithLogger(lg), control.WithFiler(subproc), control.WithFlags(control.Flags{MemberOnly: cfg.MemberOnly, RecordFiles: cfg.RecordFiles}), + control.WithAvatarProcessor(avproc), ) if err := ctrl.Run(ctx, list); err != nil { base.SetExitStatus(base.SApplicationError) diff --git a/cmd/slackdump/internal/archive/archive_wizard.go b/cmd/slackdump/internal/archive/archive_wizard.go index 0fd3156d..ab649ada 100644 --- a/cmd/slackdump/internal/archive/archive_wizard.go +++ b/cmd/slackdump/internal/archive/archive_wizard.go @@ -35,6 +35,7 @@ func configuration() cfgui.Configuration { cfgui.ChannelIDs(&entryList, false), cfgui.MemberOnly(), cfgui.RecordFiles(), + cfgui.Avatars(), }, }, } diff --git a/cmd/slackdump/internal/archive/search.go b/cmd/slackdump/internal/archive/search.go index 032c0926..e1644491 100644 --- a/cmd/slackdump/internal/archive/search.go +++ b/cmd/slackdump/internal/archive/search.go @@ -151,7 +151,6 @@ func initController(ctx context.Context, args []string) (*control.Controller, fu fsadapter.NewDirectory(cd.Name()), lg, ) - pb := bootstrap.ProgressBar(ctx, lg, progressbar.OptionShowCount()) // progress bar stop := func() { dlstop() @@ -175,7 +174,13 @@ func initController(ctx context.Context, args []string) (*control.Controller, fu var ( subproc = fileproc.NewExport(fileproc.STmattermost, dl) stream = sess.Stream(sopts...) - ctrl = control.New(cd, stream, control.WithLogger(lg), control.WithFiler(subproc), control.WithFlags(control.Flags{RecordFiles: cfg.RecordFiles})) + ctrl = control.New( + cd, + stream, + control.WithLogger(lg), + control.WithFiler(subproc), + control.WithFlags(control.Flags{RecordFiles: cfg.RecordFiles}), + ) ) return ctrl, stop, nil } diff --git a/cmd/slackdump/internal/cfg/cfg.go b/cmd/slackdump/internal/cfg/cfg.go index ecdb4c83..174f75e1 100644 --- a/cmd/slackdump/internal/cfg/cfg.go +++ b/cmd/slackdump/internal/cfg/cfg.go @@ -41,9 +41,10 @@ var ( ForceEnterprise bool MachineIDOvr string // Machine ID override - MemberOnly bool - DownloadFiles bool - RecordFiles bool // record file chunks in chunk files. + MemberOnly bool + DownloadFiles bool + DownloadAvatars bool + RecordFiles bool // record file chunks in chunk files. // Oldest is the default timestamp of the oldest message to fetch, that is // used by the dump and export commands. @@ -91,6 +92,7 @@ const ( OmitChunkCacheFlag OmitMemberOnlyFlag OmitRecordFilesFlag + OmitDownloadAvatarsFlag OmitAll = OmitConfigFlag | OmitDownloadFlag | @@ -102,7 +104,8 @@ const ( OmitTimeframeFlag | OmitChunkCacheFlag | OmitMemberOnlyFlag | - OmitRecordFilesFlag + OmitRecordFilesFlag | + OmitDownloadAvatarsFlag ) // SetBaseFlags sets base flags @@ -129,9 +132,12 @@ func SetBaseFlags(fs *flag.FlagSet, mask FlagMask) { } if mask&OmitDownloadFlag == 0 { fs.BoolVar(&DownloadFiles, "files", true, "enables file attachments download (to disable, specify: -files=false)") + if mask&OmitRecordFilesFlag == 0 { + fs.BoolVar(&RecordFiles, "files-rec", false, "include file chunks in chunk files") + } } - if mask&OmitRecordFilesFlag == 0 && mask&OmitDownloadFlag == 0 { - fs.BoolVar(&RecordFiles, "files-rec", false, "include file chunks in chunk files") + if mask&OmitDownloadAvatarsFlag == 0 { + fs.BoolVar(&DownloadAvatars, "avatars", true, "enables user avatar download (placed in __avatars directory)") } if mask&OmitConfigFlag == 0 { fs.StringVar(&ConfigFile, "api-config", "", "configuration `file` with Slack API limits overrides.\nYou can generate one with default values with 'slackdump config new`") diff --git a/cmd/slackdump/internal/convertcmd/convert.go b/cmd/slackdump/internal/convertcmd/convert.go index 1807d151..8b7f57cd 100644 --- a/cmd/slackdump/internal/convertcmd/convert.go +++ b/cmd/slackdump/internal/convertcmd/convert.go @@ -121,6 +121,7 @@ func chunk2export(ctx context.Context, src, trg string, cflg convertflags) error cd, fsa, convert.WithIncludeFiles(cflg.withFiles), + convert.WithSrcFileLoc(sttFn), convert.WithTrgFileLoc(sttFn), convert.WithLogger(cfg.Log), ) diff --git a/cmd/slackdump/internal/dump/dump.go b/cmd/slackdump/internal/dump/dump.go index ff6cd7e4..14698d6c 100644 --- a/cmd/slackdump/internal/dump/dump.go +++ b/cmd/slackdump/internal/dump/dump.go @@ -38,7 +38,7 @@ var CmdDump = &base.Command{ Long: dumpMd, RequireAuth: true, PrintFlags: true, - FlagMask: cfg.OmitMemberOnlyFlag | cfg.OmitRecordFilesFlag, + FlagMask: cfg.OmitMemberOnlyFlag | cfg.OmitRecordFilesFlag | cfg.OmitDownloadAvatarsFlag, } func init() { diff --git a/cmd/slackdump/internal/emoji/emoji.go b/cmd/slackdump/internal/emoji/emoji.go index f2caf202..6507b94a 100644 --- a/cmd/slackdump/internal/emoji/emoji.go +++ b/cmd/slackdump/internal/emoji/emoji.go @@ -29,7 +29,7 @@ var CmdEmoji = &base.Command{ UsageLine: "slackdump emoji [flags]", Short: "download workspace emoticons ಠ_ಠ", Long: emojiMD, - FlagMask: cfg.OmitDownloadFlag | cfg.OmitConfigFlag | cfg.OmitChunkCacheFlag | cfg.OmitUserCacheFlag, + FlagMask: cfg.OmitDownloadFlag | cfg.OmitConfigFlag | cfg.OmitChunkCacheFlag | cfg.OmitUserCacheFlag | cfg.OmitRecordFilesFlag, RequireAuth: true, PrintFlags: true, } diff --git a/cmd/slackdump/internal/export/v3.go b/cmd/slackdump/internal/export/v3.go index 317085a6..bea68283 100644 --- a/cmd/slackdump/internal/export/v3.go +++ b/cmd/slackdump/internal/export/v3.go @@ -56,6 +56,9 @@ func export(ctx context.Context, sess *slackdump.Session, fsa fsadapter.FS, list dlEnabled := cfg.DownloadFiles && params.ExportStorageType != fileproc.STnone sdl, stop := fileproc.NewDownloader(ctx, dlEnabled, sess.Client(), fsa, lg) defer stop() + avdl, avstop := fileproc.NewDownloader(ctx, cfg.DownloadAvatars, sess.Client(), fsa, lg) + defer avstop() + avp := fileproc.NewAvatarProc(avdl) pb := bootstrap.ProgressBar(ctx, lg, progressbar.OptionShowCount()) // progress bar @@ -81,6 +84,7 @@ func export(ctx context.Context, sess *slackdump.Session, fsa fsadapter.FS, list control.WithLogger(lg), control.WithFlags(flags), control.WithTransformer(tf), + control.WithAvatarProcessor(avp), ) lg.InfoContext(ctx, "running export...") diff --git a/cmd/slackdump/internal/export/wizard.go b/cmd/slackdump/internal/export/wizard.go index a640e39e..0377d044 100644 --- a/cmd/slackdump/internal/export/wizard.go +++ b/cmd/slackdump/internal/export/wizard.go @@ -51,6 +51,7 @@ func (fl *exportFlags) configuration() cfgui.Configuration { )), }, cfgui.MemberOnly(), + cfgui.Avatars(), { Name: "Export Token", Value: fl.ExportToken, diff --git a/cmd/slackdump/internal/ui/cfgui/common_params.go b/cmd/slackdump/internal/ui/cfgui/common_params.go index acbe7c99..03b3e0cb 100644 --- a/cmd/slackdump/internal/ui/cfgui/common_params.go +++ b/cmd/slackdump/internal/ui/cfgui/common_params.go @@ -43,3 +43,12 @@ func RecordFiles() Parameter { Updater: updaters.NewBool(&cfg.RecordFiles), } } + +func Avatars() Parameter { + return Parameter{ + Name: "Download Avatars", + Value: Checkbox(cfg.DownloadAvatars), + Description: "Download avatars", + Updater: updaters.NewBool(&cfg.DownloadAvatars), + } +} diff --git a/internal/chunk/control/control.go b/internal/chunk/control/control.go index c74fa6fa..2c251fd9 100644 --- a/internal/chunk/control/control.go +++ b/internal/chunk/control/control.go @@ -35,6 +35,8 @@ type Controller struct { // files subprocessor, if not configured with options, it's a noop, as // it's not necessary for all use cases. filer processor.Filer + // avp is avatar downloader + avp processor.Avatars // lg is the logger lg *slog.Logger // flags @@ -51,6 +53,13 @@ func WithFiler(f processor.Filer) Option { } } +// WithAvatarProcessor configures the controller with an avatar downloader. +func WithAvatarProcessor(avp processor.Avatars) Option { + return func(c *Controller) { + c.avp = avp + } +} + // WithFlags configures the controller with flags. func WithFlags(f Flags) Option { return func(c *Controller) { @@ -83,6 +92,7 @@ func New(cd *chunk.Directory, s Streamer, opts ...Option) *Controller { s: s, filer: &noopFiler{}, tf: &noopTransformer{}, + avp: &noopAvatarProc{}, lg: slog.Default(), } for _, opt := range opts { @@ -167,7 +177,7 @@ func (c *Controller) Run(ctx context.Context, list *structures.EntityList) error wg.Add(1) go func() { defer wg.Done() - if err := userWorker(ctx, c.s, c.cd, c.tf); err != nil { + if err := userWorker(ctx, c.s, c.avp, c.cd, c.tf); err != nil { errC <- Error{"user", "worker", err} return } diff --git a/internal/chunk/control/noop.go b/internal/chunk/control/noop.go index ff2eaa64..5eb6b25e 100644 --- a/internal/chunk/control/noop.go +++ b/internal/chunk/control/noop.go @@ -7,6 +7,7 @@ import ( "context" "github.com/rusq/slack" + "github.com/rusq/slackdump/v3/internal/chunk" ) @@ -29,3 +30,9 @@ func (n *noopTransformer) Transform(ctx context.Context, id chunk.FileID) error func (n *noopTransformer) Wait() error { return nil } + +type noopAvatarProc struct{} + +func (n *noopAvatarProc) Avatars(ctx context.Context, users []slack.User) error { + return nil +} diff --git a/internal/chunk/control/workers.go b/internal/chunk/control/workers.go index f723553e..f5dd01f3 100644 --- a/internal/chunk/control/workers.go +++ b/internal/chunk/control/workers.go @@ -10,16 +10,20 @@ import ( "github.com/rusq/slackdump/v3/internal/structures" "github.com/rusq/slack" + "github.com/rusq/slackdump/v3/internal/chunk" "github.com/rusq/slackdump/v3/internal/chunk/dirproc" "github.com/rusq/slackdump/v3/internal/chunk/transform" "github.com/rusq/slackdump/v3/processor" ) -func userWorker(ctx context.Context, s Streamer, chunkdir *chunk.Directory, tf TransformStarter) error { - var users = make([]slack.User, 0, 100) +func userWorker(ctx context.Context, s Streamer, avp processor.Avatars, chunkdir *chunk.Directory, tf TransformStarter) error { + users := make([]slack.User, 0, 100) userproc, err := dirproc.NewUsers(chunkdir, dirproc.WithUsers(func(us []slack.User) error { users = append(users, us...) + if err := avp.Avatars(ctx, us); err != nil { + slog.Warn("error downloading avatars", "error", err) + } return nil })) if err != nil { diff --git a/internal/chunk/dirproc/users.go b/internal/chunk/dirproc/users.go index 9ecdfb98..901342ba 100644 --- a/internal/chunk/dirproc/users.go +++ b/internal/chunk/dirproc/users.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/rusq/slack" + "github.com/rusq/slackdump/v3/internal/chunk" "github.com/rusq/slackdump/v3/processor" ) diff --git a/internal/chunk/transform/fileproc/avatar.go b/internal/chunk/transform/fileproc/avatar.go new file mode 100644 index 00000000..49f06410 --- /dev/null +++ b/internal/chunk/transform/fileproc/avatar.go @@ -0,0 +1,55 @@ +package fileproc + +import ( + "context" + "path" + "path/filepath" + + "github.com/rusq/slack" +) + +type AvatarProc struct { + dl Downloader + filepath func(u *slack.User) string +} + +func NewAvatarProc(dl Downloader) AvatarProc { + return AvatarProc{ + dl: dl, + filepath: avatarPath, + } +} + +func (a AvatarProc) Avatars(ctx context.Context, users []slack.User) error { + for _, u := range users { + if u.Profile.ImageOriginal == "" { + // skip empty + continue + } + if err := a.dl.Download(a.filepath(&u), u.Profile.ImageOriginal); err != nil { + return err + } + } + return nil +} + +func avatarPath(u *slack.User) string { + filename := path.Base(u.Profile.ImageOriginal) + return filepath.Join( + "__avatars", + u.ID, + filename, + ) +} + +func nvl(s string, ss ...string) string { + if s != "" { + return s + } + for _, v := range ss { + if v != "" { + return v + } + } + return "" +} diff --git a/internal/chunk/transform/fileproc/avatar_test.go b/internal/chunk/transform/fileproc/avatar_test.go new file mode 100644 index 00000000..b4b866f1 --- /dev/null +++ b/internal/chunk/transform/fileproc/avatar_test.go @@ -0,0 +1,40 @@ +package fileproc + +import ( + "path/filepath" + "testing" + + "github.com/rusq/slack" +) + +func Test_avatarPath(t *testing.T) { + type args struct { + u *slack.User + } + tests := []struct { + name string + args args + want string + }{ + { + name: "name with display name", + args: args{ + u: &slack.User{ + ID: "U12345678", + Profile: slack.UserProfile{ + ImageOriginal: "https://example/image.jpg", + DisplayNameNormalized: "displayname", + }, + }, + }, + want: filepath.Join("__avatars", "U12345678", "image.jpg"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := avatarPath(tt.args.u); got != tt.want { + t.Errorf("avatarPath() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/convert/chunkexp.go b/internal/convert/chunkexp.go index cf3a7eb6..3fa21c96 100644 --- a/internal/convert/chunkexp.go +++ b/internal/convert/chunkexp.go @@ -13,6 +13,7 @@ import ( "github.com/rusq/fsadapter" "github.com/rusq/slack" + "github.com/rusq/slackdump/v3/internal/chunk" "github.com/rusq/slackdump/v3/internal/chunk/transform" "github.com/rusq/slackdump/v3/internal/chunk/transform/fileproc" @@ -36,8 +37,9 @@ type ChunkToExport struct { trg fsadapter.FS // UploadDir is the upload directory name (relative to Src) includeFiles bool - // FindFile should return the path to the file within the upload directory + // srcFileLoc should return the file location within the source directory. srcFileLoc func(*slack.Channel, *slack.File) string + // trgFileLoc should return the file location within the target directory trgFileLoc func(*slack.Channel, *slack.File) string lg *slog.Logger @@ -151,7 +153,7 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { if err != nil { return err } - var tfopts = []transform.ExpCvtOption{ + tfopts := []transform.ExpCvtOption{ transform.ExpWithUsers(users), } if c.includeFiles { @@ -169,7 +171,7 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { } // 1. generator - var chC = make(chan slack.Channel) + chC := make(chan slack.Channel) go func() { defer close(chC) for _, ch := range channels { diff --git a/processor/processor.go b/processor/processor.go index 3684bf38..09bacd84 100644 --- a/processor/processor.go +++ b/processor/processor.go @@ -75,3 +75,9 @@ type Searcher interface { MessageSearcher FileSearcher } + +// Avatars is the interface for downloading avatars. +type Avatars interface { + // Avatars should download avatars for the slice of users. + Avatars(ctx context.Context, users []slack.User) error +} diff --git a/stream/stream.go b/stream/stream.go index e6b8f27f..25a89841 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -205,7 +205,8 @@ func (cs *Stream) WorkspaceInfo(ctx context.Context, proc processor.WorkspaceInf return proc.WorkspaceInfo(ctx, atr) } -// Users returns all users in the workspace. +// Users processes all users in the workspace, calling proc for each batch of +// users returned by the API. func (cs *Stream) Users(ctx context.Context, proc processor.Users, opt ...slack.GetUsersOption) error { ctx, task := trace.NewTask(ctx, "Users") defer task.End() diff --git a/stream/stream_workers.go b/stream/stream_workers.go index b9a2a1ec..6238739d 100644 --- a/stream/stream_workers.go +++ b/stream/stream_workers.go @@ -30,12 +30,15 @@ func (cs *Stream) channelWorker(ctx context.Context, proc processor.Conversation results <- Result{Type: RTChannel, ChannelID: req.sl.Channel, Err: err} continue } + + // get the channel canvas if channel.Properties != nil && !channel.Properties.Canvas.IsEmpty { if err := cs.canvas(ctx, proc, channel, channel.Properties.Canvas.FileId); err != nil { // ignore canvas errors slog.Warn("canvas error: %s", "err", err) } } + if err := cs.channel(ctx, req, func(mm []slack.Message, isLast bool) error { n, err := procChanMsg(ctx, proc, threadC, channel, isLast, mm) if err != nil { From e8a63c080c7c8c9cec41b70c7f249e49b9031a27 Mon Sep 17 00:00:00 2001 From: Rustam Gilyazov <16064414+rusq@users.noreply.github.com> Date: Sun, 12 Jan 2025 07:20:06 +1000 Subject: [PATCH 2/3] commit progress --- cmd/slackdump/internal/archive/search.go | 2 +- cmd/slackdump/internal/convertcmd/convert.go | 4 +- cmd/slackdump/internal/emoji/emoji.go | 2 +- cmd/slackdump/internal/list/common.go | 2 +- internal/chunk/transform/export.go | 4 +- internal/chunk/transform/fileproc/avatar.go | 4 +- .../chunk/transform/fileproc/avatar_test.go | 2 +- internal/chunk/transform/fileproc/fileproc.go | 4 +- internal/chunk/transform/standard.go | 8 +- internal/convert/chunkexp.go | 153 +++++++++++++----- 10 files changed, 127 insertions(+), 58 deletions(-) diff --git a/cmd/slackdump/internal/archive/search.go b/cmd/slackdump/internal/archive/search.go index e1644491..935aed05 100644 --- a/cmd/slackdump/internal/archive/search.go +++ b/cmd/slackdump/internal/archive/search.go @@ -36,7 +36,7 @@ var CmdSearch = &base.Command{ //go:embed assets/search.md var searchMD string -const flagMask = cfg.OmitUserCacheFlag | cfg.OmitCacheDir | cfg.OmitTimeframeFlag | cfg.OmitMemberOnlyFlag +const flagMask = cfg.OmitUserCacheFlag | cfg.OmitCacheDir | cfg.OmitTimeframeFlag | cfg.OmitMemberOnlyFlag | cfg.OmitDownloadAvatarsFlag var cmdSearchMessages = &base.Command{ UsageLine: "slackdump search messages [flags] query terms", diff --git a/cmd/slackdump/internal/convertcmd/convert.go b/cmd/slackdump/internal/convertcmd/convert.go index 8b7f57cd..0bd7915a 100644 --- a/cmd/slackdump/internal/convertcmd/convert.go +++ b/cmd/slackdump/internal/convertcmd/convert.go @@ -22,8 +22,10 @@ var CmdConvert = &base.Command{ Run: runConvert, UsageLine: "slackdump convert [flags] ", Short: "convert slackdump chunks to various formats", - Long: ` + Long: `# Convert Command +Convert slackdump archive format to various formats. +Currently only "export" format is supported. `, CustomFlags: false, FlagMask: cfg.OmitAll & ^cfg.OmitDownloadFlag &^ cfg.OmitOutputFlag, diff --git a/cmd/slackdump/internal/emoji/emoji.go b/cmd/slackdump/internal/emoji/emoji.go index 6507b94a..c5c520ad 100644 --- a/cmd/slackdump/internal/emoji/emoji.go +++ b/cmd/slackdump/internal/emoji/emoji.go @@ -29,7 +29,7 @@ var CmdEmoji = &base.Command{ UsageLine: "slackdump emoji [flags]", Short: "download workspace emoticons ಠ_ಠ", Long: emojiMD, - FlagMask: cfg.OmitDownloadFlag | cfg.OmitConfigFlag | cfg.OmitChunkCacheFlag | cfg.OmitUserCacheFlag | cfg.OmitRecordFilesFlag, + FlagMask: cfg.OmitAll &^ cfg.OmitAuthFlags, RequireAuth: true, PrintFlags: true, } diff --git a/cmd/slackdump/internal/list/common.go b/cmd/slackdump/internal/list/common.go index 43a17eb9..dcab6a11 100644 --- a/cmd/slackdump/internal/list/common.go +++ b/cmd/slackdump/internal/list/common.go @@ -18,7 +18,7 @@ import ( "github.com/rusq/slackdump/v3/types" ) -const flagMask = cfg.OmitDownloadFlag | cfg.OmitMemberOnlyFlag +const flagMask = cfg.OmitAll &^ cfg.OmitAuthFlags &^ cfg.OmitCacheDir // CmdList is the list command. The logic is in the subcommands. var CmdList = &base.Command{ diff --git a/internal/chunk/transform/export.go b/internal/chunk/transform/export.go index f906fa79..a49b7466 100644 --- a/internal/chunk/transform/export.go +++ b/internal/chunk/transform/export.go @@ -99,7 +99,7 @@ func (e *ExpConverter) writeMessages(ctx context.Context, pl *chunk.File, ci *sl uidx := types.Users(e.users).IndexByID() trgdir := ExportChanName(ci) - var mm []export.ExportMessage = make([]export.ExportMessage, 0, 100) + mm := make([]export.ExportMessage, 0, 100) var prevDt string var currDt string if err := pl.Sorted(ctx, false, func(ts time.Time, m *slack.Message) error { @@ -117,7 +117,7 @@ func (e *ExpConverter) writeMessages(ctx context.Context, pl *chunk.File, ci *sl // the "thread" is only used to collect statistics. Thread messages // are passed by Sorted and written as a normal course of action. var thread []slack.Message - if m.ThreadTimestamp == m.Timestamp && m.LatestReply != structures.LatestReplyNoReplies { + if structures.IsThreadStart(m) && m.LatestReply != structures.LatestReplyNoReplies { // get the thread for the initial thread message only. var err error thread, err = pl.AllThreadMessages(ci.ID, m.ThreadTimestamp) diff --git a/internal/chunk/transform/fileproc/avatar.go b/internal/chunk/transform/fileproc/avatar.go index 49f06410..0f289d1c 100644 --- a/internal/chunk/transform/fileproc/avatar.go +++ b/internal/chunk/transform/fileproc/avatar.go @@ -16,7 +16,7 @@ type AvatarProc struct { func NewAvatarProc(dl Downloader) AvatarProc { return AvatarProc{ dl: dl, - filepath: avatarPath, + filepath: AvatarPath, } } @@ -33,7 +33,7 @@ func (a AvatarProc) Avatars(ctx context.Context, users []slack.User) error { return nil } -func avatarPath(u *slack.User) string { +func AvatarPath(u *slack.User) string { filename := path.Base(u.Profile.ImageOriginal) return filepath.Join( "__avatars", diff --git a/internal/chunk/transform/fileproc/avatar_test.go b/internal/chunk/transform/fileproc/avatar_test.go index b4b866f1..91bd48e0 100644 --- a/internal/chunk/transform/fileproc/avatar_test.go +++ b/internal/chunk/transform/fileproc/avatar_test.go @@ -32,7 +32,7 @@ func Test_avatarPath(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := avatarPath(tt.args.u); got != tt.want { + if got := AvatarPath(tt.args.u); got != tt.want { t.Errorf("avatarPath() = %v, want %v", got, tt.want) } }) diff --git a/internal/chunk/transform/fileproc/fileproc.go b/internal/chunk/transform/fileproc/fileproc.go index 99058bb1..4a92427d 100644 --- a/internal/chunk/transform/fileproc/fileproc.go +++ b/internal/chunk/transform/fileproc/fileproc.go @@ -124,8 +124,8 @@ type FileGetter interface { // NewDownloader initializes the downloader and returns it, along with a // function that should be called to stop it. -func NewDownloader(ctx context.Context, gEnabled bool, cl FileGetter, fsa fsadapter.FS, lg *slog.Logger) (sdl Downloader, stop func()) { - if !gEnabled { +func NewDownloader(ctx context.Context, enabled bool, cl FileGetter, fsa fsadapter.FS, lg *slog.Logger) (sdl Downloader, stop func()) { + if !enabled { return NoopDownloader{}, func() {} } else { dl := downloader.New(cl, fsa, downloader.WithLogger(lg)) diff --git a/internal/chunk/transform/standard.go b/internal/chunk/transform/standard.go index 9a0a3b2c..ba6cee98 100644 --- a/internal/chunk/transform/standard.go +++ b/internal/chunk/transform/standard.go @@ -60,8 +60,10 @@ func NewStandard(fsa fsadapter.FS, cd *chunk.Directory, opts ...StdOption) (*Std // pipelineFunc is a function that performs caller-defined transformations of // a slice of slack messages. The type alias is defined for brevity. -type pipelineFunc = func(channelID string, threadTS string, mm []slack.Message) error -type pipeline []pipelineFunc +type ( + pipelineFunc = func(channelID string, threadTS string, mm []slack.Message) error + pipeline []pipelineFunc +) // apply applies the pipeline functions in order. func (p pipeline) apply(channelID, threadTS string, mm []slack.Message) error { @@ -158,7 +160,7 @@ func stdConversation(cf *chunk.File, ci *slack.Channel, pipeline pipeline) ([]ty } var sdm types.Message // slackdump message sdm.Message = mm[i] - if mm[i].ThreadTimestamp != "" && mm[i].ThreadTimestamp == mm[i].Timestamp && mm[i].LatestReply != structures.LatestReplyNoReplies { + if mm[i].ThreadTimestamp != "" && structures.IsThreadStart(&mm[i]) && mm[i].LatestReply != structures.LatestReplyNoReplies { // process thread only for parent messages // if there's a thread timestamp, we need to find and add it. thread, err := cf.AllThreadMessages(ci.ID, mm[i].ThreadTimestamp) diff --git a/internal/convert/chunkexp.go b/internal/convert/chunkexp.go index 3fa21c96..ff57eb5d 100644 --- a/internal/convert/chunkexp.go +++ b/internal/convert/chunkexp.go @@ -35,19 +35,24 @@ type ChunkToExport struct { src *chunk.Directory // trg is the target FS for the export trg fsadapter.FS - // UploadDir is the upload directory name (relative to Src) + // includeFiles is a flag to include files in the export includeFiles bool + // includeAvatars is a flag to include avatars in the export + includeAvatars bool // srcFileLoc should return the file location within the source directory. srcFileLoc func(*slack.Channel, *slack.File) string // trgFileLoc should return the file location within the target directory trgFileLoc func(*slack.Channel, *slack.File) string + // avtrFileLoc should return the avatar file location. + avtrFileLoc func(*slack.User) string lg *slog.Logger workers int // number of workers to use to convert channels - request chan copyrequest - result chan copyresult + filerequest chan copyrequest + fileresult chan copyresult + avtrresult chan copyresult } type C2EOption func(*ChunkToExport) @@ -59,6 +64,13 @@ func WithIncludeFiles(b bool) C2EOption { } } +// WithIncludeAvatars sets the IncludeAvataars option. +func WithIncludeAvatars(b bool) C2EOption { + return func(c *ChunkToExport) { + c.includeAvatars = b + } +} + // WithSrcFileLoc sets the SrcFileLoc function. func WithSrcFileLoc(fn func(*slack.Channel, *slack.File) string) C2EOption { return func(c *ChunkToExport) { @@ -88,15 +100,18 @@ func WithLogger(lg *slog.Logger) C2EOption { func NewChunkToExport(src *chunk.Directory, trg fsadapter.FS, opt ...C2EOption) *ChunkToExport { c := &ChunkToExport{ - src: src, - trg: trg, - includeFiles: false, - srcFileLoc: fileproc.MattermostFilepath, - trgFileLoc: fileproc.MattermostFilepath, - lg: slog.Default(), - request: make(chan copyrequest, 1), - result: make(chan copyresult, 1), - workers: defWorkers, + src: src, + trg: trg, + includeFiles: false, + includeAvatars: false, + srcFileLoc: fileproc.MattermostFilepath, + trgFileLoc: fileproc.MattermostFilepath, + avtrFileLoc: fileproc.AvatarPath, + lg: slog.Default(), + filerequest: make(chan copyrequest, 1), + fileresult: make(chan copyresult, 1), + avtrresult: make(chan copyresult, 1), + workers: defWorkers, } for _, o := range opt { o(c) @@ -106,11 +121,11 @@ func NewChunkToExport(src *chunk.Directory, trg fsadapter.FS, opt ...C2EOption) // Validate validates the input parameters. func (c *ChunkToExport) Validate() error { + const format = "convert: internal error: %s: %w" if c.src == nil || c.trg == nil { return errors.New("convert: source and target must be set") } if c.includeFiles { - const format = "convert: internal error: %s: %w" if c.srcFileLoc == nil { return fmt.Errorf(format, "source", ErrNoLocFunction) } @@ -118,6 +133,11 @@ func (c *ChunkToExport) Validate() error { return fmt.Errorf(format, "target", ErrNoLocFunction) } } + if c.includeAvatars { + if c.avtrFileLoc == nil { + return fmt.Errorf(format, "avatar", ErrNoLocFunction) + } + } // users chunk is required if fi, err := c.src.Stat(chunk.FUsers); err != nil { return fmt.Errorf("users chunk: %w", err) @@ -130,14 +150,14 @@ func (c *ChunkToExport) Validate() error { return nil } -// Convert converts the chunk directory contents to the export format. -// It validates the input parameters. +// Convert converts the chunk directory contents to the export format. It +// validates the input parameters. // // # Restrictions // -// Currently, one chunk file per channel is supported. If there are multiple -// chunk files per channel, the behaviour is undefined, but I expect it to -// overwrite the previous files. +// TODO: Currently, one chunk file per channel is supported. If there are +// multiple chunk files per channel, the behaviour is undefined, but I expect +// it to overwrite the previous files. func (c *ChunkToExport) Convert(ctx context.Context) error { ctx, task := trace.NewTask(ctx, "convert.ChunkToExport") defer task.End() @@ -153,23 +173,11 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { if err != nil { return err } + if c.includeFiles { + } tfopts := []transform.ExpCvtOption{ transform.ExpWithUsers(users), } - if c.includeFiles { - tfopts = append(tfopts, transform.ExpWithMsgUpdateFunc(func(ch *slack.Channel, m *slack.Message) error { - // copy in a separate goroutine to avoid blocking the transform in - // case of a synchronous fsadapter (e.g. zip file adapter can - // write only one file at a time). - c.request <- copyrequest{ - channel: ch, - message: m, - } - return nil - })) - go c.copyworker(c.result, c.request) - } - // 1. generator chC := make(chan slack.Channel) go func() { @@ -182,9 +190,36 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { errC := make(chan error, c.workers) { // 2. workers + var filewg sync.WaitGroup + + if c.includeFiles { + tfopts = append(tfopts, transform.ExpWithMsgUpdateFunc(func(ch *slack.Channel, m *slack.Message) error { + // copy in a separate goroutine to avoid blocking the transform in + // case of a synchronous fsadapter (e.g. zip file adapter can write + // only one file at a time). + c.filerequest <- copyrequest{ + channel: ch, + message: m, + } + return nil + })) + filewg.Add(1) + go func() { + c.copyworker(c.filerequest) + filewg.Done() + }() + } + if c.includeAvatars { + filewg.Add(1) + go func() { + c.avatarWorker(users) + filewg.Done() + }() + } + // 2.1 converter - conv := transform.NewExpConverter(c.src, c.trg, tfopts...) var wg sync.WaitGroup + conv := transform.NewExpConverter(c.src, c.trg, tfopts...) for i := 0; i < c.workers; i++ { wg.Add(1) go func() { @@ -208,24 +243,23 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { errC <- err } }() - // 2.3. workers sentinel + // 2.3. workers sentinels go func() { wg.Wait() - close(c.request) + close(c.filerequest) }() } - // 3. result processor LOOP: for { select { case <-ctx.Done(): - return ctx.Err() + return context.Cause(ctx) case err := <-errC: // get rid of this shit. if err != nil { return err } - case res, more := <-c.result: + case res, more := <-c.fileresult: if !more { break LOOP } @@ -238,6 +272,27 @@ LOOP: return nil } +func merge(resC ...<-chan copyresult) chan<- copyresult { + var wg sync.WaitGroup + out := make(chan<- copyresult, 1) + + output := func(c <-chan copyresult) { + for res := range c { + out <- res + } + wg.Done() + } + wg.Add(len(resC)) + for _, c := range resC { + go output(c) + } + go func() { + wg.Wait() + close(out) + }() + return out +} + type copyerror struct { FileID string Err error @@ -276,11 +331,13 @@ func (c *ChunkToExport) fileCopy(ch *slack.Channel, msg *slack.Message) error { srcpath := filepath.Join(c.src.Name(), c.srcFileLoc(ch, &f)) trgpath := c.trgFileLoc(ch, &f) - if _, err := os.Stat(srcpath); err != nil { + sfi, err := os.Stat(srcpath) + if err != nil { return ©error{f.ID, err} } - if _, err := os.Stat(srcpath); err != nil { - return ©error{f.ID, err} + if sfi.Size() == 0 { + c.lg.Warn("skipping", "file", f.ID, "reason", "empty file") + continue } c.lg.Debug("copying", "srcpath", srcpath, "trgpath", trgpath) if err := copy2trg(c.trg, trgpath, srcpath); err != nil { @@ -327,12 +384,20 @@ func (cr copyresult) Unwrap() error { return cr.err } -func (c *ChunkToExport) copyworker(res chan<- copyresult, req <-chan copyrequest) { - defer close(res) +func (c *ChunkToExport) copyworker(req <-chan copyrequest) { for fr := range req { - res <- copyresult{ + c.fileresult <- copyresult{ fr: fr, err: c.fileCopy(fr.channel, fr.message), } } } + +func (c *ChunkToExport) avatarWorker(users []slack.User) { + for _, u := range users { + loc := c.avtrFileLoc(&u) + c.avtrresult <- copyresult{ + err: copy2trg(c.trg, loc, loc), + } + } +} From d5d594813e5f10fd1a1d60e569100fe0254163af Mon Sep 17 00:00:00 2001 From: Rustam Gilyazov <16064414+rusq@users.noreply.github.com> Date: Sun, 19 Jan 2025 15:03:17 +1000 Subject: [PATCH 3/3] Update converter to support avatars --- cmd/slackdump/internal/convertcmd/convert.go | 13 ++- internal/convert/chunkexp.go | 106 +++++++++++++------ slackdump.1 | 41 +++++-- 3 files changed, 118 insertions(+), 42 deletions(-) diff --git a/cmd/slackdump/internal/convertcmd/convert.go b/cmd/slackdump/internal/convertcmd/convert.go index 0bd7915a..28b033f0 100644 --- a/cmd/slackdump/internal/convertcmd/convert.go +++ b/cmd/slackdump/internal/convertcmd/convert.go @@ -28,7 +28,7 @@ Convert slackdump archive format to various formats. Currently only "export" format is supported. `, CustomFlags: false, - FlagMask: cfg.OmitAll & ^cfg.OmitDownloadFlag &^ cfg.OmitOutputFlag, + FlagMask: cfg.OmitAll & ^cfg.OmitDownloadFlag &^ cfg.OmitOutputFlag &^ cfg.OmitDownloadAvatarsFlag, PrintFlags: true, } @@ -65,8 +65,9 @@ func runConvert(ctx context.Context, cmd *base.Command, args []string) error { lg.InfoContext(ctx, "converting", "input_format", params.inputfmt, "source", args[0], "output_format", params.outputfmt, "output", cfg.Output) cflg := convertflags{ - withFiles: cfg.DownloadFiles, - stt: params.storageType, + withFiles: cfg.DownloadFiles, + withAvatars: cfg.DownloadAvatars, + stt: params.storageType, } start := time.Now() if err := fn(ctx, args[0], cfg.Output, cflg); err != nil { @@ -98,8 +99,9 @@ var converters = map[datafmt]map[datafmt]convertFunc{ } type convertflags struct { - withFiles bool - stt fileproc.StorageType + withFiles bool + withAvatars bool + stt fileproc.StorageType } func chunk2export(ctx context.Context, src, trg string, cflg convertflags) error { @@ -123,6 +125,7 @@ func chunk2export(ctx context.Context, src, trg string, cflg convertflags) error cd, fsa, convert.WithIncludeFiles(cflg.withFiles), + convert.WithIncludeAvatars(cflg.withAvatars), convert.WithSrcFileLoc(sttFn), convert.WithTrgFileLoc(sttFn), convert.WithLogger(cfg.Log), diff --git a/internal/convert/chunkexp.go b/internal/convert/chunkexp.go index ff57eb5d..47593e2b 100644 --- a/internal/convert/chunkexp.go +++ b/internal/convert/chunkexp.go @@ -150,6 +150,17 @@ func (c *ChunkToExport) Validate() error { return nil } +func sliceToChan[T any](s []T) <-chan T { + ch := make(chan T) + go func() { + defer close(ch) + for _, v := range s { + ch <- v + } + }() + return ch +} + // Convert converts the chunk directory contents to the export format. It // validates the input parameters. // @@ -173,19 +184,12 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { if err != nil { return err } - if c.includeFiles { - } + tfopts := []transform.ExpCvtOption{ transform.ExpWithUsers(users), } // 1. generator - chC := make(chan slack.Channel) - go func() { - defer close(chC) - for _, ch := range channels { - chC <- ch - } - }() + chC := sliceToChan(channels) errC := make(chan error, c.workers) { @@ -208,22 +212,27 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { c.copyworker(c.filerequest) filewg.Done() }() + } else { + close(c.fileresult) } + if c.includeAvatars { filewg.Add(1) go func() { c.avatarWorker(users) filewg.Done() }() + } else { + close(c.avtrresult) } // 2.1 converter - var wg sync.WaitGroup + var msgwg sync.WaitGroup conv := transform.NewExpConverter(c.src, c.trg, tfopts...) - for i := 0; i < c.workers; i++ { - wg.Add(1) + for range c.workers { + msgwg.Add(1) go func() { - defer wg.Done() + defer msgwg.Done() for ch := range chC { lg := c.lg.With("channel", ch.ID) lg.Debug("processing channel") @@ -235,9 +244,9 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { }() } // 2.2 index writer - wg.Add(1) + msgwg.Add(1) go func() { - defer wg.Done() + defer msgwg.Done() c.lg.DebugContext(ctx, "writing index", "name", c.src.Name()) if err := conv.WriteIndex(); err != nil { errC <- err @@ -245,36 +254,53 @@ func (c *ChunkToExport) Convert(ctx context.Context) error { }() // 2.3. workers sentinels go func() { - wg.Wait() + msgwg.Wait() + c.lg.Debug("messages wait group done, closing file requests") close(c.filerequest) + filewg.Wait() + c.lg.Debug("file workers done, finalising") + close(errC) }() } // 3. result processor + fileresults := merge(c.fileresult, c.avtrresult) + go func() { + for res := range fileresults { + if res.err != nil { + if res.fr.message != nil { + c.lg.Error("file converter: error processing message", "ts", res.fr.message.Timestamp, "err", res.err) + } else { + c.lg.Error("file converter", "err", res.err) + } + errC <- res.err + } + } + }() + + var failed bool LOOP: for { select { case <-ctx.Done(): return context.Cause(ctx) - case err := <-errC: // get rid of this shit. - if err != nil { - return err - } - case res, more := <-c.fileresult: + case err, more := <-errC: if !more { break LOOP } - if res.err != nil { - return fmt.Errorf("error processing message with ts=%s: %w", res.fr.message.Timestamp, res.err) + if err != nil { + failed = true } } } - + if failed { + return errors.New("convert: there were errors") + } return nil } -func merge(resC ...<-chan copyresult) chan<- copyresult { +func merge(resC ...<-chan copyresult) <-chan copyresult { var wg sync.WaitGroup - out := make(chan<- copyresult, 1) + out := make(chan copyresult, 1) output := func(c <-chan copyresult) { for res := range c { @@ -324,7 +350,7 @@ func (c *ChunkToExport) fileCopy(ch *slack.Channel, msg *slack.Message) error { } for _, f := range msg.Files { if err := fileproc.IsValidWithReason(&f); err != nil { - c.lg.Warn("skipping", "file", f.ID, "error", err) + c.lg.Info("skipping", "file", f.ID, "error", err) continue } @@ -385,19 +411,37 @@ func (cr copyresult) Unwrap() error { } func (c *ChunkToExport) copyworker(req <-chan copyrequest) { - for fr := range req { + defer close(c.fileresult) + c.lg.Debug("copy worker started") + for r := range req { c.fileresult <- copyresult{ - fr: fr, - err: c.fileCopy(fr.channel, fr.message), + fr: r, + err: c.fileCopy(r.channel, r.message), } } + c.lg.Debug("copy worker done") } func (c *ChunkToExport) avatarWorker(users []slack.User) { + c.lg.Debug("avatar worker started") + defer close(c.avtrresult) for _, u := range users { + if u.Profile.ImageOriginal == "" { + continue + } + c.lg.Debug("processing avatar", "user", u.ID) loc := c.avtrFileLoc(&u) + err := copy2trg(c.trg, loc, filepath.Join(c.src.Name(), loc)) + if err != nil { + err = fmt.Errorf("error copying avatar for user %s: %w", u.ID, err) + } c.avtrresult <- copyresult{ - err: copy2trg(c.trg, loc, loc), + err: err, + } + if err != nil { + continue } + c.lg.Debug("avatar processed", "user", u.ID) } + c.lg.Debug("avatar worker done") } diff --git a/slackdump.1 b/slackdump.1 index 1c2fbb9f..15b89258 100644 --- a/slackdump.1 +++ b/slackdump.1 @@ -120,8 +120,15 @@ command. The flags are listed in alphabetical order. Use the specified API limits configuration TOML file (see the .Cm config command). -.It Fl base Ar path -Specifies the base directory or zip file where all data will be stored. +.It Fl autologin-timeout Ar duration +Headless autologin timeout, without the browser starting time, just the +interaction time. The duration must be specified in the following format: +.Dq XhYmZs , +for example, +.Dq 1h20m32s +would set the timeout to 1 hour, 20 minutes, 32 seconds. +.It Fl o Ar path +Specifies the output directory or zip file where all data will be stored. If the path ends with .Dq .zip , the data will be stored in the zip file, otherwise @@ -215,13 +222,25 @@ Authenticate in a new workspace using the command; .It Run -.Cm dump -, .Cm archive -or +, .Cm export -, depending on your requirements. +or +.Cm dump +, depending on your requirements. The +.Dq archive +format can be converted to +.Dq export +or +.Dq dump +formats using the +.Cm convert +command. .El +See also: +.Bd -literal -offset indent +.Nm Cm help Ar quickstart +.Ed .Sh AUTHENTICATION Slackdump supports multiple authentication methods listed below. .Ss Automatic login (EZ-LOGIN 3000) @@ -290,11 +309,16 @@ encrypt files for secure transmission, i.e. encrypting trace.out before posting it in Github Issues. .It Em eztest test the EZ-LOGIN 3000 method. +.It Em hydrate +allows to "hydrate" the native Slack Exports with attachments. It downloads +attachments from Slack and creates a copy of the export with downloaded files. .It Em info show information about Slackdump environment .It Em obfuscate obfuscate the sensitive data in Slackdump archive. Works only on archive file format. +.It Em redownload +downloads any files that failed to download while running the archival process. .It Em uninstall uninstall Slackdump components or purge it from the system. .It Em thread @@ -318,6 +342,11 @@ enables debug output and switches the viewer output to RAW (JSON) format. enables JSON log format. .It Ev LOG_FILE Contains path to a file where log output will be written. +.It Ev MACHINE_ID_OVERRIDE +Allows to override the Machine ID when opening or saving credentials and cache +files. See flag +.Fl machine-id +for more details. .It Ev SLACK_COOKIE Contains Slack cookie (for token+cookie-based authentication). See .Sx Authentication