From e226720cff5f1de146e4bc80750a9191575f3e28 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Fri, 9 Jan 2026 05:37:36 +0800 Subject: [PATCH] Refactor cat-file batch operations and support `--batch-command` approach (#35775) Replace #34651 and address more problems including fix framework bugs and changing to QueryInfo and QueryContent calls. --------- Co-authored-by: Lunny Xiao --- modules/git/batch_reader.go | 57 ---- modules/git/blob_nogogit.go | 51 +--- modules/git/catfile/batch.go | 178 ----------- modules/git/catfile/reader.go | 211 ------------- modules/git/catfile_batch.go | 52 ++++ modules/git/catfile_batch_command.go | 63 ++++ modules/git/catfile_batch_legacy.go | 78 +++++ modules/git/catfile_batch_reader.go | 283 ++++++++++++++++++ modules/git/catfile_batch_test.go | 55 ++++ modules/git/commit_info_test.go | 60 +++- modules/git/git.go | 35 ++- modules/git/git_test.go | 30 +- modules/git/gitcmd/command_test.go | 2 + .../languagestats/language_stats_nogogit.go | 28 +- modules/git/languagestats/main_test.go | 28 +- modules/git/parse_treeentry.go | 3 +- modules/git/pipeline/lfs_nogogit.go | 44 +-- modules/git/pipeline/lfs_test.go | 38 +++ modules/git/pipeline/main_test.go | 14 + modules/git/repo_base_nogogit.go | 76 ++--- modules/git/repo_base_nogogit_test.go | 26 ++ modules/git/repo_branch_nogogit.go | 24 +- modules/git/repo_commit_nogogit.go | 56 +--- modules/git/repo_tag_nogogit.go | 17 +- modules/git/repo_tree_nogogit.go | 22 +- modules/git/tree_entry_nogogit.go | 10 +- modules/git/tree_nogogit.go | 19 +- modules/gitrepo/cat_file.go | 6 +- modules/graceful/manager.go | 12 - modules/indexer/code/bleve/bleve.go | 14 +- .../code/elasticsearch/elasticsearch.go | 13 +- modules/testlogger/testlogger.go | 30 +- routers/web/repo/compare.go | 4 +- services/contexttest/context_tests.go | 5 +- 34 files changed, 832 insertions(+), 812 deletions(-) delete mode 100644 modules/git/batch_reader.go delete mode 100644 modules/git/catfile/batch.go delete mode 100644 modules/git/catfile/reader.go create mode 100644 modules/git/catfile_batch.go create mode 100644 modules/git/catfile_batch_command.go create mode 100644 modules/git/catfile_batch_legacy.go create mode 100644 modules/git/catfile_batch_reader.go create mode 100644 modules/git/catfile_batch_test.go create mode 100644 modules/git/pipeline/lfs_test.go create mode 100644 modules/git/pipeline/main_test.go create mode 100644 modules/git/repo_base_nogogit_test.go diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go deleted file mode 100644 index 3d612f5549..0000000000 --- a/modules/git/batch_reader.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2020 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package git - -import ( - "bufio" - "errors" - - "code.gitea.io/gitea/modules/git/catfile" -) - -// ReadBatchLine reads the header line from cat-file --batch while preserving the traditional return signature. -func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { - sha, typ, size, err = catfile.ReadBatchLine(rd) - return sha, typ, size, convertCatfileError(err, sha) -} - -// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTagObjectID(rd, size) -} - -// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { - return catfile.ReadTreeID(rd, size) -} - -// BinToHex converts a binary hash into a hex encoded one. -func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { - return catfile.BinToHex(objectFormat, sha, out) -} - -// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream. -func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { - mode, fname, sha, n, err = catfile.ParseCatFileTreeLine(objectFormat, rd, modeBuf, fnameBuf, shaBuf) - return mode, fname, sha, n, convertCatfileError(err, nil) -} - -// DiscardFull discards the requested number of bytes from the buffered reader. -func DiscardFull(rd *bufio.Reader, discard int64) error { - return catfile.DiscardFull(rd, discard) -} - -func convertCatfileError(err error, defaultID []byte) error { - if err == nil { - return nil - } - var notFound catfile.ErrObjectNotFound - if errors.As(err, ¬Found) { - if notFound.ID == "" && len(defaultID) > 0 { - notFound.ID = string(defaultID) - } - return ErrNotExist{ID: notFound.ID} - } - return err -} diff --git a/modules/git/blob_nogogit.go b/modules/git/blob_nogogit.go index 88e2be792b..837b30fd88 100644 --- a/modules/git/blob_nogogit.go +++ b/modules/git/blob_nogogit.go @@ -6,8 +6,6 @@ package git import ( - "bufio" - "bytes" "io" "code.gitea.io/gitea/modules/log" @@ -25,39 +23,28 @@ type Blob struct { // DataAsync gets a ReadCloser for the contents of a blob without reading it all. // Calling the Close function on the result will discard all unread output. -func (b *Blob) DataAsync() (io.ReadCloser, error) { +func (b *Blob) DataAsync() (_ io.ReadCloser, retErr error) { batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { return nil, err } + defer func() { + // if there was an error, cancel the batch right away, + // otherwise let the caller close it + if retErr != nil { + cancel() + } + }() - rd := batch.Reader() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) + info, contentReader, err := batch.QueryContent(b.ID.String()) if err != nil { - cancel() - return nil, err - } - _, _, size, err := ReadBatchLine(rd) - if err != nil { - cancel() return nil, err } b.gotSize = true - b.size = size - - if size < 4096 { - bs, err := io.ReadAll(io.LimitReader(rd, size)) - defer cancel() - if err != nil { - return nil, err - } - _, err = rd.Discard(1) - return io.NopCloser(bytes.NewReader(bs)), err - } - + b.size = info.Size return &blobReader{ - rd: rd, - n: size, + rd: contentReader, + n: info.Size, cancel: cancel, }, nil } @@ -68,30 +55,24 @@ func (b *Blob) Size() int64 { return b.size } - batch, cancel, err := b.repo.CatFileBatchCheck(b.repo.Ctx) + batch, cancel, err := b.repo.CatFileBatch(b.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } defer cancel() - _, err = batch.Writer().Write([]byte(b.ID.String() + "\n")) + info, err := batch.QueryInfo(b.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) return 0 } - _, _, b.size, err = ReadBatchLine(batch.Reader()) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err) - return 0 - } - b.gotSize = true - + b.size = info.Size return b.size } type blobReader struct { - rd *bufio.Reader + rd BufferedReader n int64 cancel func() } diff --git a/modules/git/catfile/batch.go b/modules/git/catfile/batch.go deleted file mode 100644 index 1facb8946e..0000000000 --- a/modules/git/catfile/batch.go +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2024 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package catfile - -import ( - "bufio" - "context" - "io" - "strings" - - "code.gitea.io/gitea/modules/git/gitcmd" - - "github.com/djherbis/buffer" - "github.com/djherbis/nio/v3" -) - -// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function -type WriteCloserError interface { - io.WriteCloser - CloseWithError(err error) error -} - -type Batch interface { - Writer() WriteCloserError - Reader() *bufio.Reader - Close() -} - -// batch represents an active `git cat-file --batch` or `--batch-check` invocation -// paired with the pipes that feed/read from it. Call Close to release resources. -type batch struct { - cancel context.CancelFunc - reader *bufio.Reader - writer WriteCloserError -} - -// NewBatch creates a new cat-file --batch process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatch(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var batch batch - batch.writer, batch.reader, batch.cancel = catFileBatch(ctx, repoPath) - return &batch, nil -} - -// NewBatchCheck creates a cat-file --batch-check process for the provided repository path. -// The returned Batch must be closed once the caller has finished with it. -func NewBatchCheck(ctx context.Context, repoPath string) (Batch, error) { - if err := EnsureValidGitRepository(ctx, repoPath); err != nil { - return nil, err - } - - var check batch - check.writer, check.reader, check.cancel = catFileBatchCheck(ctx, repoPath) - return &check, nil -} - -func (b *batch) Writer() WriteCloserError { - return b.writer -} - -func (b *batch) Reader() *bufio.Reader { - return b.reader -} - -// Close stops the underlying git cat-file process and releases held resources. -func (b *batch) Close() { - if b == nil || b.cancel == nil { - return - } - b.cancel() - b.reader = nil - b.writer = nil - b.cancel = nil -} - -// EnsureValidGitRepository runs `git rev-parse` in the repository path to make sure -// the directory is a valid git repository. This avoids git cat-file hanging indefinitely -// when invoked in invalid paths. -func EnsureValidGitRepository(ctx context.Context, repoPath string) error { - stder := strings.Builder{} - err := gitcmd.NewCommand("rev-parse"). - WithDir(repoPath). - WithStderr(&stder). - Run(ctx) - if err != nil { - return gitcmd.ConcatenateError(err, stder.String()) - } - return nil -} - -// catFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, -// a stdout reader and a cancel function. -func catFileBatch(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { - batchStdinReader, batchStdinWriter := io.Pipe() - batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024)) - ctx, ctxCancel := context.WithCancel(ctx) - closed := make(chan struct{}) - cancel := func() { - ctxCancel() - _ = batchStdinWriter.Close() - _ = batchStdoutReader.Close() - <-closed - } - - go func() { - <-ctx.Done() - cancel() - }() - - go func() { - stder := strings.Builder{} - err := gitcmd.NewCommand("cat-file", "--batch"). - WithDir(repoPath). - WithStdin(batchStdinReader). - WithStdout(batchStdoutWriter). - WithStderr(&stder). - WithUseContextTimeout(true). - Run(ctx) - if err != nil { - _ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, stder.String())) - _ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, stder.String())) - } else { - _ = batchStdoutWriter.Close() - _ = batchStdinReader.Close() - } - close(closed) - }() - - batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) - return batchStdinWriter, batchReader, cancel -} - -// catFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, -// a stdout reader and cancel function. -func catFileBatchCheck(ctx context.Context, repoPath string) (WriteCloserError, *bufio.Reader, func()) { - batchStdinReader, batchStdinWriter := io.Pipe() - batchStdoutReader, batchStdoutWriter := io.Pipe() - ctx, ctxCancel := context.WithCancel(ctx) - closed := make(chan struct{}) - cancel := func() { - ctxCancel() - _ = batchStdoutReader.Close() - _ = batchStdinWriter.Close() - <-closed - } - - go func() { - <-ctx.Done() - cancel() - }() - - go func() { - stder := strings.Builder{} - err := gitcmd.NewCommand("cat-file", "--batch-check"). - WithDir(repoPath). - WithStdin(batchStdinReader). - WithStdout(batchStdoutWriter). - WithStderr(&stder). - WithUseContextTimeout(true). - Run(ctx) - if err != nil { - _ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, stder.String())) - _ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, stder.String())) - } else { - _ = batchStdoutWriter.Close() - _ = batchStdinReader.Close() - } - close(closed) - }() - - batchReader := bufio.NewReader(batchStdoutReader) - return batchStdinWriter, batchReader, cancel -} diff --git a/modules/git/catfile/reader.go b/modules/git/catfile/reader.go deleted file mode 100644 index 1785cc4cc0..0000000000 --- a/modules/git/catfile/reader.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2020 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package catfile - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "math" - "strconv" - "strings" - - "code.gitea.io/gitea/modules/log" -) - -// ErrObjectNotFound indicates that the requested object could not be read from cat-file -type ErrObjectNotFound struct { - ID string -} - -func (err ErrObjectNotFound) Error() string { - return fmt.Sprintf("catfile: object does not exist [id: %s]", err.ID) -} - -// IsErrObjectNotFound reports whether err is an ErrObjectNotFound -func IsErrObjectNotFound(err error) bool { - var target ErrObjectNotFound - return errors.As(err, &target) -} - -// ObjectFormat abstracts the minimal information needed from git.ObjectFormat. -type ObjectFormat interface { - FullLength() int -} - -// ReadBatchLine reads the header line from cat-file --batch. It expects the format -// " SP SP LF" and leaves the reader positioned at the start of -// the object contents (which must be fully consumed by the caller). -func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) { - typ, err = rd.ReadString('\n') - if err != nil { - return sha, typ, size, err - } - if len(typ) == 1 { - typ, err = rd.ReadString('\n') - if err != nil { - return sha, typ, size, err - } - } - idx := strings.IndexByte(typ, ' ') - if idx < 0 { - return sha, typ, size, ErrObjectNotFound{} - } - sha = []byte(typ[:idx]) - typ = typ[idx+1:] - - idx = strings.IndexByte(typ, ' ') - if idx < 0 { - return sha, typ, size, ErrObjectNotFound{ID: string(sha)} - } - - sizeStr := typ[idx+1 : len(typ)-1] - typ = typ[:idx] - - size, err = strconv.ParseInt(sizeStr, 10, 64) - return sha, typ, size, err -} - -// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest. -func ReadTagObjectID(rd *bufio.Reader, size int64) (string, error) { - var id string - var n int64 -headerLoop: - for { - line, err := rd.ReadBytes('\n') - if err != nil { - return "", err - } - n += int64(len(line)) - idx := bytes.Index(line, []byte{' '}) - if idx < 0 { - continue - } - - if string(line[:idx]) == "object" { - id = string(line[idx+1 : len(line)-1]) - break headerLoop - } - } - - return id, DiscardFull(rd, size-n+1) -} - -// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the commit content. -func ReadTreeID(rd *bufio.Reader, size int64) (string, error) { - var id string - var n int64 -headerLoop: - for { - line, err := rd.ReadBytes('\n') - if err != nil { - return "", err - } - n += int64(len(line)) - idx := bytes.Index(line, []byte{' '}) - if idx < 0 { - continue - } - - if string(line[:idx]) == "tree" { - id = string(line[idx+1 : len(line)-1]) - break headerLoop - } - } - - return id, DiscardFull(rd, size-n+1) -} - -// hextable helps quickly convert between binary and hex representation -const hextable = "0123456789abcdef" - -// BinToHex converts a binary hash into a hex encoded one. Input and output can be the -// same byte slice to support in-place conversion without allocations. -func BinToHex(objectFormat ObjectFormat, sha, out []byte) []byte { - for i := objectFormat.FullLength()/2 - 1; i >= 0; i-- { - v := sha[i] - vhi, vlo := v>>4, v&0x0f - shi, slo := hextable[vhi], hextable[vlo] - out[i*2], out[i*2+1] = shi, slo - } - return out -} - -// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream and avoids allocations -// where possible. Each line is composed of: -// SP NUL -func ParseCatFileTreeLine(objectFormat ObjectFormat, rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { - var readBytes []byte - - readBytes, err = rd.ReadSlice('\x00') - if err != nil { - return mode, fname, sha, n, err - } - idx := bytes.IndexByte(readBytes, ' ') - if idx < 0 { - log.Debug("missing space in readBytes ParseCatFileTreeLine: %s", readBytes) - return mode, fname, sha, n, ErrObjectNotFound{} - } - - n += idx + 1 - copy(modeBuf, readBytes[:idx]) - if len(modeBuf) >= idx { - modeBuf = modeBuf[:idx] - } else { - modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...) - } - mode = modeBuf - - readBytes = readBytes[idx+1:] - copy(fnameBuf, readBytes) - if len(fnameBuf) > len(readBytes) { - fnameBuf = fnameBuf[:len(readBytes)] - } else { - fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) - } - for err == bufio.ErrBufferFull { - readBytes, err = rd.ReadSlice('\x00') - fnameBuf = append(fnameBuf, readBytes...) - } - n += len(fnameBuf) - if err != nil { - return mode, fname, sha, n, err - } - fnameBuf = fnameBuf[:len(fnameBuf)-1] - fname = fnameBuf - - idx = 0 - length := objectFormat.FullLength() / 2 - for idx < length { - var read int - read, err = rd.Read(shaBuf[idx:length]) - n += read - if err != nil { - return mode, fname, sha, n, err - } - idx += read - } - sha = shaBuf - return mode, fname, sha, n, err -} - -// DiscardFull discards the requested amount of bytes from the buffered reader regardless of its internal limit. -func DiscardFull(rd *bufio.Reader, discard int64) error { - if discard > math.MaxInt32 { - n, err := rd.Discard(math.MaxInt32) - discard -= int64(n) - if err != nil { - return err - } - } - for discard > 0 { - n, err := rd.Discard(int(discard)) - discard -= int64(n) - if err != nil { - return err - } - } - return nil -} diff --git a/modules/git/catfile_batch.go b/modules/git/catfile_batch.go new file mode 100644 index 0000000000..d13179f3ec --- /dev/null +++ b/modules/git/catfile_batch.go @@ -0,0 +1,52 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "io" +) + +type BufferedReader interface { + io.Reader + Buffered() int + Peek(n int) ([]byte, error) + Discard(n int) (int, error) + ReadString(sep byte) (string, error) + ReadSlice(sep byte) ([]byte, error) + ReadBytes(sep byte) ([]byte, error) +} + +type CatFileObject struct { + ID string + Type string + Size int64 +} + +type CatFileBatch interface { + // QueryInfo queries the object info from the git repository by its object name using "git cat-file --batch" family commands. + // "git cat-file" accepts "" for the object name, it can be a ref name, object id, etc. https://git-scm.com/docs/gitrevisions + // In Gitea, we only use the simple ref name or object id, no other complex rev syntax like "suffix" or "git describe" although they are supported by git. + QueryInfo(obj string) (*CatFileObject, error) + + // QueryContent is similar to QueryInfo, it queries the object info and additionally returns a reader for its content. + // FIXME: this design still follows the old pattern: the returned BufferedReader is very fragile, + // callers should carefully maintain its lifecycle and discard all unread data. + // TODO: It needs to be refactored to a fully managed Reader stream in the future, don't let callers manually Close or Discard + QueryContent(obj string) (*CatFileObject, BufferedReader, error) +} + +type CatFileBatchCloser interface { + CatFileBatch + Close() +} + +// NewBatch creates a "batch object provider (CatFileBatch)" for the given repository path to retrieve object info and content efficiently. +// The CatFileBatch and the readers create by it should only be used in the same goroutine. +func NewBatch(ctx context.Context, repoPath string) (CatFileBatchCloser, error) { + if DefaultFeatures().SupportCatFileBatchCommand { + return newCatFileBatchCommand(ctx, repoPath) + } + return newCatFileBatchLegacy(ctx, repoPath) +} diff --git a/modules/git/catfile_batch_command.go b/modules/git/catfile_batch_command.go new file mode 100644 index 0000000000..ae3fb9e129 --- /dev/null +++ b/modules/git/catfile_batch_command.go @@ -0,0 +1,63 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + + "code.gitea.io/gitea/modules/git/gitcmd" +) + +// catFileBatchCommand implements the CatFileBatch interface using the "cat-file --batch-command" command +// for git version >= 2.36 +// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch-command +type catFileBatchCommand struct { + ctx context.Context + repoPath string + batch *catFileBatchCommunicator +} + +var _ CatFileBatch = (*catFileBatchCommand)(nil) + +func newCatFileBatchCommand(ctx context.Context, repoPath string) (*catFileBatchCommand, error) { + if err := ensureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + return &catFileBatchCommand{ctx: ctx, repoPath: repoPath}, nil +} + +func (b *catFileBatchCommand) getBatch() *catFileBatchCommunicator { + if b.batch != nil { + return b.batch + } + b.batch = newCatFileBatch(b.ctx, b.repoPath, gitcmd.NewCommand("cat-file", "--batch-command")) + return b.batch +} + +func (b *catFileBatchCommand) QueryContent(obj string) (*CatFileObject, BufferedReader, error) { + _, err := b.getBatch().writer.Write([]byte("contents " + obj + "\n")) + if err != nil { + return nil, nil, err + } + info, err := catFileBatchParseInfoLine(b.getBatch().reader) + if err != nil { + return nil, nil, err + } + return info, b.getBatch().reader, nil +} + +func (b *catFileBatchCommand) QueryInfo(obj string) (*CatFileObject, error) { + _, err := b.getBatch().writer.Write([]byte("info " + obj + "\n")) + if err != nil { + return nil, err + } + return catFileBatchParseInfoLine(b.getBatch().reader) +} + +func (b *catFileBatchCommand) Close() { + if b.batch != nil { + b.batch.Close() + b.batch = nil + } +} diff --git a/modules/git/catfile_batch_legacy.go b/modules/git/catfile_batch_legacy.go new file mode 100644 index 0000000000..714ef022c3 --- /dev/null +++ b/modules/git/catfile_batch_legacy.go @@ -0,0 +1,78 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "context" + "io" + + "code.gitea.io/gitea/modules/git/gitcmd" +) + +// catFileBatchLegacy implements the CatFileBatch interface using the "cat-file --batch" command and "cat-file --batch-check" command +// for git version < 2.36 +// to align with "--batch-command", it creates the two commands for querying object contents and object info separately +// ref: https://git-scm.com/docs/git-cat-file#Documentation/git-cat-file.txt---batch +type catFileBatchLegacy struct { + ctx context.Context + repoPath string + batchContent *catFileBatchCommunicator + batchCheck *catFileBatchCommunicator +} + +var _ CatFileBatchCloser = (*catFileBatchLegacy)(nil) + +func newCatFileBatchLegacy(ctx context.Context, repoPath string) (*catFileBatchLegacy, error) { + if err := ensureValidGitRepository(ctx, repoPath); err != nil { + return nil, err + } + return &catFileBatchLegacy{ctx: ctx, repoPath: repoPath}, nil +} + +func (b *catFileBatchLegacy) getBatchContent() *catFileBatchCommunicator { + if b.batchContent != nil { + return b.batchContent + } + b.batchContent = newCatFileBatch(b.ctx, b.repoPath, gitcmd.NewCommand("cat-file", "--batch")) + return b.batchContent +} + +func (b *catFileBatchLegacy) getBatchCheck() *catFileBatchCommunicator { + if b.batchCheck != nil { + return b.batchCheck + } + b.batchCheck = newCatFileBatch(b.ctx, b.repoPath, gitcmd.NewCommand("cat-file", "--batch-check")) + return b.batchCheck +} + +func (b *catFileBatchLegacy) QueryContent(obj string) (*CatFileObject, BufferedReader, error) { + _, err := io.WriteString(b.getBatchContent().writer, obj+"\n") + if err != nil { + return nil, nil, err + } + info, err := catFileBatchParseInfoLine(b.getBatchContent().reader) + if err != nil { + return nil, nil, err + } + return info, b.getBatchContent().reader, nil +} + +func (b *catFileBatchLegacy) QueryInfo(obj string) (*CatFileObject, error) { + _, err := io.WriteString(b.getBatchCheck().writer, obj+"\n") + if err != nil { + return nil, err + } + return catFileBatchParseInfoLine(b.getBatchCheck().reader) +} + +func (b *catFileBatchLegacy) Close() { + if b.batchContent != nil { + b.batchContent.Close() + b.batchContent = nil + } + if b.batchCheck != nil { + b.batchCheck.Close() + b.batchCheck = nil + } +} diff --git a/modules/git/catfile_batch_reader.go b/modules/git/catfile_batch_reader.go new file mode 100644 index 0000000000..9ad49c98d9 --- /dev/null +++ b/modules/git/catfile_batch_reader.go @@ -0,0 +1,283 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "bufio" + "bytes" + "context" + "io" + "math" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/git/gitcmd" + "code.gitea.io/gitea/modules/log" +) + +// writeCloserError wraps an io.WriteCloser with an additional CloseWithError function (for nio.Pipe) +type writeCloserError interface { + io.WriteCloser + CloseWithError(err error) error +} + +type catFileBatchCommunicator struct { + cancel context.CancelFunc + reader *bufio.Reader + writer writeCloserError +} + +func (b *catFileBatchCommunicator) Close() { + if b.cancel != nil { + b.cancel() + b.reader = nil + b.writer = nil + b.cancel = nil + } +} + +// ensureValidGitRepository runs git rev-parse in the repository path - thus ensuring that the repository is a valid repository. +// Run before opening git cat-file. +// This is needed otherwise the git cat-file will hang for invalid repositories. +// FIXME: the comment is from https://github.com/go-gitea/gitea/pull/17991 but it doesn't seem to be true. +// The real problem is that Golang's Cmd.Wait hangs because it waits for the pipes to be closed, but we can't close the pipes before Wait returns +// Need to refactor to use StdinPipe and StdoutPipe +func ensureValidGitRepository(ctx context.Context, repoPath string) error { + stderr := strings.Builder{} + err := gitcmd.NewCommand("rev-parse"). + WithDir(repoPath). + WithStderr(&stderr). + Run(ctx) + if err != nil { + return gitcmd.ConcatenateError(err, (&stderr).String()) + } + return nil +} + +// newCatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function +func newCatFileBatch(ctx context.Context, repoPath string, cmdCatFile *gitcmd.Command) *catFileBatchCommunicator { + // We often want to feed the commits in order into cat-file --batch, followed by their trees and subtrees as necessary. + + // so let's create a batch stdin and stdout + batchStdinReader, batchStdinWriter := io.Pipe() + batchStdoutReader, batchStdoutWriter := io.Pipe() + ctx, ctxCancel := context.WithCancel(ctx) + closed := make(chan struct{}) + cancel := func() { + ctxCancel() + _ = batchStdinWriter.Close() + _ = batchStdoutReader.Close() + <-closed + } + + // Ensure cancel is called as soon as the provided context is cancelled + go func() { + <-ctx.Done() + cancel() + }() + + go func() { + stderr := strings.Builder{} + err := cmdCatFile. + WithDir(repoPath). + WithStdin(batchStdinReader). + WithStdout(batchStdoutWriter). + WithStderr(&stderr). + WithUseContextTimeout(true). + Run(ctx) + if err != nil { + _ = batchStdoutWriter.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String())) + _ = batchStdinReader.CloseWithError(gitcmd.ConcatenateError(err, (&stderr).String())) + } else { + _ = batchStdoutWriter.Close() + _ = batchStdinReader.Close() + } + close(closed) + }() + + // use a buffered reader to read from the cat-file --batch (StringReader.ReadString) + batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024) + + return &catFileBatchCommunicator{ + writer: batchStdinWriter, + reader: batchReader, + cancel: cancel, + } +} + +// catFileBatchParseInfoLine reads the header line from cat-file --batch +// We expect: SP SP LF +// then leaving the rest of the stream " LF" to be read +func catFileBatchParseInfoLine(rd BufferedReader) (*CatFileObject, error) { + typ, err := rd.ReadString('\n') + if err != nil { + return nil, err + } + if len(typ) == 1 { + typ, err = rd.ReadString('\n') + if err != nil { + return nil, err + } + } + idx := strings.IndexByte(typ, ' ') + if idx < 0 { + return nil, ErrNotExist{} + } + sha := typ[:idx] + typ = typ[idx+1:] + + idx = strings.IndexByte(typ, ' ') + if idx < 0 { + return nil, ErrNotExist{ID: sha} + } + + sizeStr := typ[idx+1 : len(typ)-1] + typ = typ[:idx] + + size, err := strconv.ParseInt(sizeStr, 10, 64) + return &CatFileObject{ID: sha, Type: typ, Size: size}, err +} + +// ReadTagObjectID reads a tag object ID hash from a cat-file --batch stream, throwing away the rest of the stream. +func ReadTagObjectID(rd BufferedReader, size int64) (string, error) { + var id string + var n int64 +headerLoop: + for { + line, err := rd.ReadBytes('\n') + if err != nil { + return "", err + } + n += int64(len(line)) + idx := bytes.Index(line, []byte{' '}) + if idx < 0 { + continue + } + + if string(line[:idx]) == "object" { + id = string(line[idx+1 : len(line)-1]) + break headerLoop + } + } + + // Discard the rest of the tag + return id, DiscardFull(rd, size-n+1) +} + +// ReadTreeID reads a tree ID from a cat-file --batch stream, throwing away the rest of the stream. +func ReadTreeID(rd BufferedReader, size int64) (string, error) { + var id string + var n int64 +headerLoop: + for { + line, err := rd.ReadBytes('\n') + if err != nil { + return "", err + } + n += int64(len(line)) + idx := bytes.Index(line, []byte{' '}) + if idx < 0 { + continue + } + + if string(line[:idx]) == "tree" { + id = string(line[idx+1 : len(line)-1]) + break headerLoop + } + } + + // Discard the rest of the commit + return id, DiscardFull(rd, size-n+1) +} + +// git tree files are a list: +// SP NUL +// +// Unfortunately this 20-byte notation is somewhat in conflict to all other git tools +// Therefore we need some method to convert these binary hashes to hex hashes + +// ParseCatFileTreeLine reads an entry from a tree in a cat-file --batch stream +// This carefully avoids allocations - except where fnameBuf is too small. +// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations +// +// Each line is composed of: +// SP NUL +// +// We don't attempt to convert the raw HASH to save a lot of time +func ParseCatFileTreeLine(objectFormat ObjectFormat, rd BufferedReader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) { + var readBytes []byte + + // Read the Mode & fname + readBytes, err = rd.ReadSlice('\x00') + if err != nil { + return mode, fname, sha, n, err + } + idx := bytes.IndexByte(readBytes, ' ') + if idx < 0 { + log.Debug("missing space in readBytes ParseCatFileTreeLine: %s", readBytes) + return mode, fname, sha, n, &ErrNotExist{} + } + + n += idx + 1 + copy(modeBuf, readBytes[:idx]) + if len(modeBuf) >= idx { + modeBuf = modeBuf[:idx] + } else { + modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...) + } + mode = modeBuf + + readBytes = readBytes[idx+1:] + + // Deal with the fname + copy(fnameBuf, readBytes) + if len(fnameBuf) > len(readBytes) { + fnameBuf = fnameBuf[:len(readBytes)] + } else { + fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) + } + for err == bufio.ErrBufferFull { + readBytes, err = rd.ReadSlice('\x00') + fnameBuf = append(fnameBuf, readBytes...) + } + n += len(fnameBuf) + if err != nil { + return mode, fname, sha, n, err + } + fnameBuf = fnameBuf[:len(fnameBuf)-1] + fname = fnameBuf + + // Deal with the binary hash + idx = 0 + length := objectFormat.FullLength() / 2 + for idx < length { + var read int + read, err = rd.Read(shaBuf[idx:length]) + n += read + if err != nil { + return mode, fname, sha, n, err + } + idx += read + } + sha = shaBuf + return mode, fname, sha, n, err +} + +func DiscardFull(rd BufferedReader, discard int64) error { + if discard > math.MaxInt32 { + n, err := rd.Discard(math.MaxInt32) + discard -= int64(n) + if err != nil { + return err + } + } + for discard > 0 { + n, err := rd.Discard(int(discard)) + discard -= int64(n) + if err != nil { + return err + } + } + return nil +} diff --git a/modules/git/catfile_batch_test.go b/modules/git/catfile_batch_test.go new file mode 100644 index 0000000000..2f5ade2b4e --- /dev/null +++ b/modules/git/catfile_batch_test.go @@ -0,0 +1,55 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git + +import ( + "io" + "path/filepath" + "testing" + + "code.gitea.io/gitea/modules/test" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCatFileBatch(t *testing.T) { + defer test.MockVariableValue(&DefaultFeatures().SupportCatFileBatchCommand)() + DefaultFeatures().SupportCatFileBatchCommand = false + t.Run("LegacyCheck", testCatFileBatch) + DefaultFeatures().SupportCatFileBatchCommand = true + t.Run("BatchCommand", testCatFileBatch) +} + +func testCatFileBatch(t *testing.T) { + t.Run("CorruptedGitRepo", func(t *testing.T) { + tmpDir := t.TempDir() + _, err := NewBatch(t.Context(), tmpDir) + require.Error(t, err) + }) + + batch, err := NewBatch(t.Context(), filepath.Join(testReposDir, "repo1_bare")) + require.NoError(t, err) + defer batch.Close() + + t.Run("QueryInfo", func(t *testing.T) { + info, err := batch.QueryInfo("e2129701f1a4d54dc44f03c93bca0a2aec7c5449") + require.NoError(t, err) + assert.Equal(t, "e2129701f1a4d54dc44f03c93bca0a2aec7c5449", info.ID) + assert.Equal(t, "blob", info.Type) + assert.EqualValues(t, 6, info.Size) + }) + + t.Run("QueryContent", func(t *testing.T) { + info, rd, err := batch.QueryContent("e2129701f1a4d54dc44f03c93bca0a2aec7c5449") + require.NoError(t, err) + assert.Equal(t, "e2129701f1a4d54dc44f03c93bca0a2aec7c5449", info.ID) + assert.Equal(t, "blob", info.Type) + assert.EqualValues(t, 6, info.Size) + + content, err := io.ReadAll(io.LimitReader(rd, info.Size)) + require.NoError(t, err) + require.Equal(t, "file1\n", string(content)) + }) +} diff --git a/modules/git/commit_info_test.go b/modules/git/commit_info_test.go index 14a4174544..1e1697b006 100644 --- a/modules/git/commit_info_test.go +++ b/modules/git/commit_info_test.go @@ -30,28 +30,57 @@ func cloneRepo(tb testing.TB, url string) (string, error) { } func testGetCommitsInfo(t *testing.T, repo1 *Repository) { + type expectedEntryInfo struct { + CommitID string + Size int64 + } + // these test case are specific to the repo1 test repo testCases := []struct { CommitID string Path string - ExpectedIDs map[string]string + ExpectedIDs map[string]expectedEntryInfo ExpectedTreeCommit string }{ - {"8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2", "", map[string]string{ - "file1.txt": "95bb4d39648ee7e325106df01a621c530863a653", - "file2.txt": "8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2", + {"8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2", "", map[string]expectedEntryInfo{ + "file1.txt": { + CommitID: "95bb4d39648ee7e325106df01a621c530863a653", + Size: 6, + }, + "file2.txt": { + CommitID: "8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2", + Size: 6, + }, }, "8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2"}, - {"2839944139e0de9737a044f78b0e4b40d989a9e3", "", map[string]string{ - "file1.txt": "2839944139e0de9737a044f78b0e4b40d989a9e3", - "branch1.txt": "9c9aef8dd84e02bc7ec12641deb4c930a7c30185", + {"2839944139e0de9737a044f78b0e4b40d989a9e3", "", map[string]expectedEntryInfo{ + "file1.txt": { + CommitID: "2839944139e0de9737a044f78b0e4b40d989a9e3", + Size: 15, + }, + "branch1.txt": { + CommitID: "9c9aef8dd84e02bc7ec12641deb4c930a7c30185", + Size: 8, + }, }, "2839944139e0de9737a044f78b0e4b40d989a9e3"}, - {"5c80b0245c1c6f8343fa418ec374b13b5d4ee658", "branch2", map[string]string{ - "branch2.txt": "5c80b0245c1c6f8343fa418ec374b13b5d4ee658", + {"5c80b0245c1c6f8343fa418ec374b13b5d4ee658", "branch2", map[string]expectedEntryInfo{ + "branch2.txt": { + CommitID: "5c80b0245c1c6f8343fa418ec374b13b5d4ee658", + Size: 8, + }, }, "5c80b0245c1c6f8343fa418ec374b13b5d4ee658"}, - {"feaf4ba6bc635fec442f46ddd4512416ec43c2c2", "", map[string]string{ - "file1.txt": "95bb4d39648ee7e325106df01a621c530863a653", - "file2.txt": "8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2", - "foo": "37991dec2c8e592043f47155ce4808d4580f9123", + {"feaf4ba6bc635fec442f46ddd4512416ec43c2c2", "", map[string]expectedEntryInfo{ + "file1.txt": { + CommitID: "95bb4d39648ee7e325106df01a621c530863a653", + Size: 6, + }, + "file2.txt": { + CommitID: "8d92fc957a4d7cfd98bc375f0b7bb189a0d6c9f2", + Size: 6, + }, + "foo": { + CommitID: "37991dec2c8e592043f47155ce4808d4580f9123", + Size: 0, + }, }, "feaf4ba6bc635fec442f46ddd4512416ec43c2c2"}, } for _, testCase := range testCases { @@ -93,11 +122,12 @@ func testGetCommitsInfo(t *testing.T, repo1 *Repository) { for _, commitInfo := range commitsInfo { entry := commitInfo.Entry commit := commitInfo.Commit - expectedID, ok := testCase.ExpectedIDs[entry.Name()] + expectedInfo, ok := testCase.ExpectedIDs[entry.Name()] if !assert.True(t, ok) { continue } - assert.Equal(t, expectedID, commit.ID.String()) + assert.Equal(t, expectedInfo.CommitID, commit.ID.String()) + assert.Equal(t, expectedInfo.Size, entry.Size(), entry.Name()) } } } diff --git a/modules/git/git.go b/modules/git/git.go index 6d2c643b33..6b93ed072f 100644 --- a/modules/git/git.go +++ b/modules/git/git.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/tempdir" "github.com/hashicorp/go-version" ) @@ -26,11 +27,12 @@ const RequiredVersion = "2.0.0" // the minimum Git version required type Features struct { gitVersion *version.Version - UsingGogit bool - SupportProcReceive bool // >= 2.29 - SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ - SupportedObjectFormats []ObjectFormat // sha1, sha256 - SupportCheckAttrOnBare bool // >= 2.40 + UsingGogit bool + SupportProcReceive bool // >= 2.29 + SupportHashSha256 bool // >= 2.42, SHA-256 repositories no longer an ‘experimental curiosity’ + SupportedObjectFormats []ObjectFormat // sha1, sha256 + SupportCheckAttrOnBare bool // >= 2.40 + SupportCatFileBatchCommand bool // >= 2.36, support `git cat-file --batch-command` } var defaultFeatures *Features @@ -75,6 +77,7 @@ func loadGitVersionFeatures() (*Features, error) { features.SupportedObjectFormats = append(features.SupportedObjectFormats, Sha256ObjectFormat) } features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40") + features.SupportCatFileBatchCommand = features.CheckVersionAtLeast("2.36") return features, nil } @@ -176,3 +179,25 @@ func InitFull() (err error) { return syncGitConfig(context.Background()) } + +// RunGitTests helps to init the git module and run tests. +// FIXME: GIT-PACKAGE-DEPENDENCY: the dependency is not right, setting.Git.HomePath is initialized in this package but used in gitcmd package +func RunGitTests(m interface{ Run() int }) { + fatalf := func(exitCode int, format string, args ...any) { + _, _ = fmt.Fprintf(os.Stderr, format, args...) + os.Exit(exitCode) + } + gitHomePath, cleanup, err := tempdir.OsTempDir("gitea-test").MkdirTempRandom("git-home") + if err != nil { + fatalf(1, "unable to create temp dir: %s", err.Error()) + } + defer cleanup() + + setting.Git.HomePath = gitHomePath + if err = InitFull(); err != nil { + fatalf(1, "failed to call Init: %s", err.Error()) + } + if exitCode := m.Run(); exitCode != 0 { + fatalf(exitCode, "run test failed, ExitCode=%d", exitCode) + } +} diff --git a/modules/git/git_test.go b/modules/git/git_test.go index 7a8ca74b01..44c018dd74 100644 --- a/modules/git/git_test.go +++ b/modules/git/git_test.go @@ -4,42 +4,14 @@ package git import ( - "fmt" - "os" "testing" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/tempdir" - "github.com/hashicorp/go-version" "github.com/stretchr/testify/assert" ) -func testRun(m *testing.M) error { - gitHomePath, cleanup, err := tempdir.OsTempDir("gitea-test").MkdirTempRandom("git-home") - if err != nil { - return fmt.Errorf("unable to create temp dir: %w", err) - } - defer cleanup() - - setting.Git.HomePath = gitHomePath - - if err = InitFull(); err != nil { - return fmt.Errorf("failed to call Init: %w", err) - } - - exitCode := m.Run() - if exitCode != 0 { - return fmt.Errorf("run test failed, ExitCode=%d", exitCode) - } - return nil -} - func TestMain(m *testing.M) { - if err := testRun(m); err != nil { - _, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err) - os.Exit(1) - } + RunGitTests(m) } func TestParseGitVersion(t *testing.T) { diff --git a/modules/git/gitcmd/command_test.go b/modules/git/gitcmd/command_test.go index 1ba8b2e3e4..d813ffce6d 100644 --- a/modules/git/gitcmd/command_test.go +++ b/modules/git/gitcmd/command_test.go @@ -15,6 +15,8 @@ import ( ) func TestMain(m *testing.M) { + // FIXME: GIT-PACKAGE-DEPENDENCY: the dependency is not right. + // "setting.Git.HomePath" is initialized in "git" package but really used in "gitcmd" package gitHomePath, cleanup, err := tempdir.OsTempDir("gitea-test").MkdirTempRandom("git-home") if err != nil { _, _ = fmt.Fprintf(os.Stderr, "unable to create temp dir: %v", err) diff --git a/modules/git/languagestats/language_stats_nogogit.go b/modules/git/languagestats/language_stats_nogogit.go index da291ae848..1dbf184af6 100644 --- a/modules/git/languagestats/language_stats_nogogit.go +++ b/modules/git/languagestats/language_stats_nogogit.go @@ -28,28 +28,22 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, } defer cancel() - writeID := func(id string) error { - _, err := batch.Writer().Write([]byte(id + "\n")) - return err - } - - if err := writeID(commitID); err != nil { + commitInfo, batchReader, err := batch.QueryContent(commitID) + if err != nil { return nil, err } - batchReader := batch.Reader() - shaBytes, typ, size, err := git.ReadBatchLine(batchReader) - if typ != "commit" { + if commitInfo.Type != "commit" { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} } - sha, err := git.NewIDFromString(string(shaBytes)) + sha, err := git.NewIDFromString(commitInfo.ID) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, git.ErrNotExist{ID: commitID} } - commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, size)) + commit, err := git.CommitFromReader(repo, sha, io.LimitReader(batchReader, commitInfo.Size)) if err != nil { log.Debug("Unable to get commit for: %s. Err: %v", commitID, err) return nil, err @@ -145,20 +139,16 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64, // If content can not be read or file is too big just do detection by filename if f.Size() <= bigFileSize { - if err := writeID(f.ID.String()); err != nil { - return nil, err - } - _, _, size, err := git.ReadBatchLine(batchReader) + info, _, err := batch.QueryContent(f.ID.String()) if err != nil { - log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err) return nil, err } - sizeToRead := size + sizeToRead := info.Size discard := int64(1) - if size > fileSizeLimit { + if info.Size > fileSizeLimit { sizeToRead = fileSizeLimit - discard = size - fileSizeLimit + 1 + discard = info.Size - fileSizeLimit + 1 } _, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead)) diff --git a/modules/git/languagestats/main_test.go b/modules/git/languagestats/main_test.go index b8f9ded005..bf860f2a18 100644 --- a/modules/git/languagestats/main_test.go +++ b/modules/git/languagestats/main_test.go @@ -4,37 +4,11 @@ package languagestats import ( - "fmt" - "os" "testing" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" ) -func testRun(m *testing.M) error { - gitHomePath, err := os.MkdirTemp(os.TempDir(), "git-home") - if err != nil { - return fmt.Errorf("unable to create temp dir: %w", err) - } - defer util.RemoveAll(gitHomePath) - setting.Git.HomePath = gitHomePath - - if err = git.InitFull(); err != nil { - return fmt.Errorf("failed to call Init: %w", err) - } - - exitCode := m.Run() - if exitCode != 0 { - return fmt.Errorf("run test failed, ExitCode=%d", exitCode) - } - return nil -} - func TestMain(m *testing.M) { - if err := testRun(m); err != nil { - _, _ = fmt.Fprintf(os.Stderr, "Test failed: %v", err) - os.Exit(1) - } + git.RunGitTests(m) } diff --git a/modules/git/parse_treeentry.go b/modules/git/parse_treeentry.go index e14d9f17b5..d46cd3344d 100644 --- a/modules/git/parse_treeentry.go +++ b/modules/git/parse_treeentry.go @@ -4,7 +4,6 @@ package git import ( - "bufio" "bytes" "fmt" "io" @@ -47,7 +46,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { return entries, nil } -func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) { +func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd BufferedReader, sz int64) ([]*TreeEntry, error) { fnameBuf := make([]byte, 4096) modeBuf := make([]byte, 40) shaBuf := make([]byte, objectFormat.FullLength()) diff --git a/modules/git/pipeline/lfs_nogogit.go b/modules/git/pipeline/lfs_nogogit.go index 6f1a860c1d..417af27037 100644 --- a/modules/git/pipeline/lfs_nogogit.go +++ b/modules/git/pipeline/lfs_nogogit.go @@ -8,6 +8,7 @@ package pipeline import ( "bufio" "bytes" + "encoding/hex" "io" "sort" "strings" @@ -53,12 +54,9 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err } defer cancel() - batchStdinWriter := batch.Writer() - batchReader := batch.Reader() - // We'll use a scanner for the revList because it's simpler than a bufio.Reader scan := bufio.NewScanner(revListReader) - trees := [][]byte{} + trees := []string{} paths := []string{} fnameBuf := make([]byte, 4096) @@ -67,14 +65,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err for scan.Scan() { // Get the next commit ID - commitID := scan.Bytes() + commitID := scan.Text() // push the commit to the cat-file --batch process - _, err := batchStdinWriter.Write(commitID) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte{'\n'}) + info, batchReader, err := batch.QueryContent(commitID) if err != nil { return nil, err } @@ -84,26 +78,20 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err commitReadingLoop: for { - _, typ, size, err := git.ReadBatchLine(batchReader) - if err != nil { - return nil, err - } - - switch typ { + switch info.Type { case "tag": // This shouldn't happen but if it does well just get the commit and try again - id, err := git.ReadTagObjectID(batchReader, size) + id, err := git.ReadTagObjectID(batchReader, info.Size) if err != nil { return nil, err } - _, err = batchStdinWriter.Write([]byte(id + "\n")) - if err != nil { + if info, batchReader, err = batch.QueryContent(id); err != nil { return nil, err } continue case "commit": // Read in the commit to get its tree and in case this is one of the last used commits - curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size)) + curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, info.Size)) if err != nil { return nil, err } @@ -111,13 +99,13 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } - if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil { + if info, _, err = batch.QueryContent(curCommit.Tree.ID.String()); err != nil { return nil, err } curPath = "" case "tree": var n int64 - for n < size { + for n < info.Size { mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf) if err != nil { return nil, err @@ -133,9 +121,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err } resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result } else if string(mode) == git.EntryModeTree.String() { - hexObjectID := make([]byte, objectID.Type().FullLength()) - git.BinToHex(objectID.Type(), binObjectID, hexObjectID) - trees = append(trees, hexObjectID) + trees = append(trees, hex.EncodeToString(binObjectID)) paths = append(paths, curPath+string(fname)+"/") } } @@ -143,11 +129,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err return nil, err } if len(trees) > 0 { - _, err := batchStdinWriter.Write(trees[len(trees)-1]) - if err != nil { - return nil, err - } - _, err = batchStdinWriter.Write([]byte("\n")) + info, _, err = batch.QueryContent(trees[len(trees)-1]) if err != nil { return nil, err } @@ -158,7 +140,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err break commitReadingLoop } default: - if err := git.DiscardFull(batchReader, size+1); err != nil { + if err := git.DiscardFull(batchReader, info.Size+1); err != nil { return nil, err } } diff --git a/modules/git/pipeline/lfs_test.go b/modules/git/pipeline/lfs_test.go new file mode 100644 index 0000000000..30fe2f93c2 --- /dev/null +++ b/modules/git/pipeline/lfs_test.go @@ -0,0 +1,38 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "testing" + "time" + + "code.gitea.io/gitea/modules/git" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFindLFSFile(t *testing.T) { + repoPath := "../../../tests/gitea-repositories-meta/user2/lfs.git" + gitRepo, err := git.OpenRepository(t.Context(), repoPath) + require.NoError(t, err) + defer gitRepo.Close() + + objectID := git.MustIDFromString("2b6c6c4eaefa24b22f2092c3d54b263ff26feb58") + + stats, err := FindLFSFile(gitRepo, objectID) + require.NoError(t, err) + + tm, err := time.Parse(time.RFC3339, "2022-12-21T17:56:42-05:00") + require.NoError(t, err) + + assert.Len(t, stats, 1) + assert.Equal(t, "CONTRIBUTING.md", stats[0].Name) + assert.Equal(t, "73cf03db6ece34e12bf91e8853dc58f678f2f82d", stats[0].SHA) + assert.Equal(t, "Initial commit", stats[0].Summary) + assert.Equal(t, tm, stats[0].When) + assert.Empty(t, stats[0].ParentHashes) + assert.Equal(t, "master", stats[0].BranchName) + assert.Equal(t, "master", stats[0].FullCommitName) +} diff --git a/modules/git/pipeline/main_test.go b/modules/git/pipeline/main_test.go new file mode 100644 index 0000000000..fa5832b68c --- /dev/null +++ b/modules/git/pipeline/main_test.go @@ -0,0 +1,14 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package pipeline + +import ( + "testing" + + "code.gitea.io/gitea/modules/git" +) + +func TestMain(m *testing.M) { + git.RunGitTests(m) +} diff --git a/modules/git/repo_base_nogogit.go b/modules/git/repo_base_nogogit.go index 97a43b90fd..775bbd4a09 100644 --- a/modules/git/repo_base_nogogit.go +++ b/modules/git/repo_base_nogogit.go @@ -9,8 +9,8 @@ package git import ( "context" "path/filepath" + "sync" - "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/util" ) @@ -23,11 +23,9 @@ type Repository struct { tagCache *ObjectCache[*Tag] - batchInUse bool - batch catfile.Batch - - checkInUse bool - check catfile.Batch + mu sync.Mutex + catFileBatchCloser CatFileBatchCloser + catFileBatchInUse bool Ctx context.Context LastCommitCache *LastCommitCache @@ -56,69 +54,47 @@ func OpenRepository(ctx context.Context, repoPath string) (*Repository, error) { }, nil } -// CatFileBatch obtains a CatFileBatch for this repository -func (repo *Repository) CatFileBatch(ctx context.Context) (catfile.Batch, func(), error) { - if repo.batch == nil { - var err error - repo.batch, err = catfile.NewBatch(ctx, repo.Path) +// CatFileBatch obtains a "batch object provider" for this repository. +// It reuses an existing one if available, otherwise creates a new one. +func (repo *Repository) CatFileBatch(ctx context.Context) (_ CatFileBatch, closeFunc func(), err error) { + repo.mu.Lock() + defer repo.mu.Unlock() + + if repo.catFileBatchCloser == nil { + repo.catFileBatchCloser, err = NewBatch(ctx, repo.Path) if err != nil { + repo.catFileBatchCloser = nil // otherwise it is "interface(nil)" and will cause wrong logic return nil, nil, err } } - if !repo.batchInUse { - repo.batchInUse = true - return repo.batch, func() { - repo.batchInUse = false + if !repo.catFileBatchInUse { + repo.catFileBatchInUse = true + return CatFileBatch(repo.catFileBatchCloser), func() { + repo.mu.Lock() + defer repo.mu.Unlock() + repo.catFileBatchInUse = false }, nil } log.Debug("Opening temporary cat file batch for: %s", repo.Path) - tempBatch, err := catfile.NewBatch(ctx, repo.Path) + tempBatch, err := NewBatch(ctx, repo.Path) if err != nil { return nil, nil, err } return tempBatch, tempBatch.Close, nil } -// CatFileBatchCheck obtains a CatFileBatchCheck for this repository -func (repo *Repository) CatFileBatchCheck(ctx context.Context) (catfile.Batch, func(), error) { - if repo.check == nil { - var err error - repo.check, err = catfile.NewBatchCheck(ctx, repo.Path) - if err != nil { - return nil, nil, err - } - } - - if !repo.checkInUse { - repo.checkInUse = true - return repo.check, func() { - repo.checkInUse = false - }, nil - } - - log.Debug("Opening temporary cat file batch-check for: %s", repo.Path) - tempBatchCheck, err := catfile.NewBatchCheck(ctx, repo.Path) - if err != nil { - return nil, nil, err - } - return tempBatchCheck, tempBatchCheck.Close, nil -} - func (repo *Repository) Close() error { if repo == nil { return nil } - if repo.batch != nil { - repo.batch.Close() - repo.batch = nil - repo.batchInUse = false - } - if repo.check != nil { - repo.check.Close() - repo.check = nil - repo.checkInUse = false + repo.mu.Lock() + defer repo.mu.Unlock() + if repo.catFileBatchCloser != nil { + repo.catFileBatchCloser.Close() + repo.catFileBatchCloser = nil + repo.catFileBatchInUse = false } repo.LastCommitCache = nil repo.tagCache = nil diff --git a/modules/git/repo_base_nogogit_test.go b/modules/git/repo_base_nogogit_test.go new file mode 100644 index 0000000000..a12bbb73c2 --- /dev/null +++ b/modules/git/repo_base_nogogit_test.go @@ -0,0 +1,26 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +//go:build !gogit + +package git + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestRepoCatFileBatch(t *testing.T) { + t.Run("MissingRepoAndClose", func(t *testing.T) { + repo, err := OpenRepository(t.Context(), filepath.Join(testReposDir, "repo1_bare")) + require.NoError(t, err) + repo.Path = "/no-such" // when the repo is missing (it usually occurs during testing because the fixtures are synced frequently) + _, _, err = repo.CatFileBatch(t.Context()) + require.Error(t, err) + require.NoError(t, repo.Close()) // shouldn't panic + }) + + // TODO: test more methods and concurrency queries +} diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 09873fb2c6..7eb8c48466 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -8,7 +8,6 @@ package git import ( "bufio" - "bytes" "context" "io" "strings" @@ -18,24 +17,24 @@ import ( ) // IsObjectExist returns true if the given object exists in the repository. +// FIXME: this function doesn't seem right, it is only used by GarbageCollectLFSMetaObjectsForRepo func (repo *Repository) IsObjectExist(name string) bool { if name == "" { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) + log.Debug("Error opening CatFileBatch %v", err) return false } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) + info, err := batch.QueryInfo(name) if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) + log.Debug("Error checking object info %v", err) return false } - sha, _, _, err := ReadBatchLine(batch.Reader()) - return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) + return strings.HasPrefix(info.ID, name) // FIXME: this logic doesn't seem right, why "HasPrefix" } // IsReferenceExist returns true if given reference exists in the repository. @@ -44,18 +43,13 @@ func (repo *Repository) IsReferenceExist(name string) bool { return false } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) + log.Error("Error opening CatFileBatch %v", err) return false } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) - if err != nil { - log.Debug("Error writing to CatFileBatchCheck %v", err) - return false - } - _, _, _, err = ReadBatchLine(batch.Reader()) + _, err = batch.QueryInfo(name) return err == nil } diff --git a/modules/git/repo_commit_nogogit.go b/modules/git/repo_commit_nogogit.go index a3d728eb6d..0147c82a01 100644 --- a/modules/git/repo_commit_nogogit.go +++ b/modules/git/repo_commit_nogogit.go @@ -10,7 +10,6 @@ import ( "io" "strings" - "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/log" ) @@ -37,26 +36,21 @@ func (repo *Repository) ResolveReference(name string) (string, error) { // GetRefCommitID returns the last commit ID string of given reference (branch or tag). func (repo *Repository) GetRefCommitID(name string) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = batch.Writer().Write([]byte(name + "\n")) - if err != nil { - return "", err - } - shaBs, _, _, err := ReadBatchLine(batch.Reader()) + info, err := batch.QueryInfo(name) if IsErrNotExist(err) { return "", ErrNotExist{name, ""} } - - return string(shaBs), nil + return info.ID, nil } // IsCommitExist returns true if given commit exists in current repository. func (repo *Repository) IsCommitExist(name string) bool { - if err := catfile.EnsureValidGitRepository(repo.Ctx, repo.Path); err != nil { + if err := ensureValidGitRepository(repo.Ctx, repo.Path); err != nil { log.Error("IsCommitExist: %v", err) return false } @@ -73,15 +67,11 @@ func (repo *Repository) getCommit(id ObjectID) (*Commit, error) { return nil, err } defer cancel() - - _, _ = batch.Writer().Write([]byte(id.String() + "\n")) - - return repo.getCommitFromBatchReader(batch, id) + return repo.getCommitWithBatch(batch, id) } -func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectID) (*Commit, error) { - rd := batch.Reader() - _, typ, size, err := ReadBatchLine(rd) +func (repo *Repository) getCommitWithBatch(batch CatFileBatch, id ObjectID) (*Commit, error) { + info, rd, err := batch.QueryContent(id.String()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: id.String()} @@ -89,13 +79,13 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return nil, err } - switch typ { + switch info.Type { case "missing": return nil, ErrNotExist{ID: id.String()} case "tag": // then we need to parse the tag // and load the commit - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, info.Size)) if err != nil { return nil, err } @@ -107,19 +97,9 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI if err != nil { return nil, err } - - if _, err := batch.Writer().Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) - if err != nil { - return nil, err - } - - return commit, nil + return repo.getCommitWithBatch(batch, tag.Object) case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, info.Size)) if err != nil { return nil, err } @@ -130,8 +110,8 @@ func (repo *Repository) getCommitFromBatchReader(batch catfile.Batch, id ObjectI return commit, nil default: - log.Debug("Unknown typ: %s", typ) - if err := DiscardFull(rd, size+1); err != nil { + log.Debug("Unknown cat-file object type: %s", info.Type) + if err := DiscardFull(rd, info.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ @@ -153,16 +133,12 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { } } - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return nil, err } defer cancel() - _, err = batch.Writer().Write([]byte(commitID + "\n")) - if err != nil { - return nil, err - } - sha, _, _, err := ReadBatchLine(batch.Reader()) + info, err := batch.QueryInfo(commitID) if err != nil { if IsErrNotExist(err) { return nil, ErrNotExist{commitID, ""} @@ -170,5 +146,5 @@ func (repo *Repository) ConvertToGitID(commitID string) (ObjectID, error) { return nil, err } - return MustIDFromString(string(sha)), nil + return MustIDFromString(info.ID), nil } diff --git a/modules/git/repo_tag_nogogit.go b/modules/git/repo_tag_nogogit.go index 88d9edcbd8..a9ac040821 100644 --- a/modules/git/repo_tag_nogogit.go +++ b/modules/git/repo_tag_nogogit.go @@ -24,23 +24,19 @@ func (repo *Repository) IsTagExist(name string) bool { // GetTagType gets the type of the tag, either commit (simple) or tag (annotated) func (repo *Repository) GetTagType(id ObjectID) (string, error) { - batch, cancel, err := repo.CatFileBatchCheck(repo.Ctx) + batch, cancel, err := repo.CatFileBatch(repo.Ctx) if err != nil { return "", err } defer cancel() - _, err = batch.Writer().Write([]byte(id.String() + "\n")) - if err != nil { - return "", err - } - _, typ, _, err := ReadBatchLine(batch.Reader()) + info, err := batch.QueryInfo(id.String()) if err != nil { if IsErrNotExist(err) { return "", ErrNotExist{ID: id.String()} } return "", err } - return typ, nil + return info.Type, nil } func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { @@ -94,17 +90,14 @@ func (repo *Repository) getTag(tagID ObjectID, name string) (*Tag, error) { } defer cancel() - rd := batch.Reader() - if _, err := batch.Writer().Write([]byte(tagID.String() + "\n")); err != nil { - return nil, err - } - _, typ, size, err := ReadBatchLine(rd) + info, rd, err := batch.QueryContent(tagID.String()) if err != nil { if errors.Is(err, io.EOF) || IsErrNotExist(err) { return nil, ErrNotExist{ID: tagID.String()} } return nil, err } + typ, size := info.Type, info.Size if typ != "tag" { if err := DiscardFull(rd, size+1); err != nil { return nil, err diff --git a/modules/git/repo_tree_nogogit.go b/modules/git/repo_tree_nogogit.go index e6e2ee9fa0..82a61072c9 100644 --- a/modules/git/repo_tree_nogogit.go +++ b/modules/git/repo_tree_nogogit.go @@ -16,20 +16,15 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { } defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(id.String() + "\n")) - - // ignore the SHA - _, typ, size, err := ReadBatchLine(rd) + info, rd, err := batch.QueryContent(id.String()) if err != nil { return nil, err } - switch typ { + switch info.Type { case "tag": resolvedID := id - data, err := io.ReadAll(io.LimitReader(rd, size)) + data, err := io.ReadAll(io.LimitReader(rd, info.Size)) if err != nil { return nil, err } @@ -38,17 +33,14 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { return nil, err } - if _, err := wr.Write([]byte(tag.Object.String() + "\n")); err != nil { - return nil, err - } - commit, err := repo.getCommitFromBatchReader(batch, tag.Object) + commit, err := repo.getCommitWithBatch(batch, tag.Object) if err != nil { return nil, err } commit.Tree.ResolvedID = resolvedID return &commit.Tree, nil case "commit": - commit, err := CommitFromReader(repo, id, io.LimitReader(rd, size)) + commit, err := CommitFromReader(repo, id, io.LimitReader(rd, info.Size)) if err != nil { return nil, err } @@ -64,14 +56,14 @@ func (repo *Repository) getTree(id ObjectID) (*Tree, error) { if err != nil { return nil, err } - tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, size) + tree.entries, err = catBatchParseTreeEntries(objectFormat, tree, rd, info.Size) if err != nil { return nil, err } tree.entriesParsed = true return tree, nil default: - if err := DiscardFull(rd, size+1); err != nil { + if err := DiscardFull(rd, info.Size+1); err != nil { return nil, err } return nil, ErrNotExist{ diff --git a/modules/git/tree_entry_nogogit.go b/modules/git/tree_entry_nogogit.go index 0ea7aeed9d..0a19b38d3e 100644 --- a/modules/git/tree_entry_nogogit.go +++ b/modules/git/tree_entry_nogogit.go @@ -15,23 +15,19 @@ func (te *TreeEntry) Size() int64 { return te.size } - batch, cancel, err := te.ptree.repo.CatFileBatchCheck(te.ptree.repo.Ctx) + batch, cancel, err := te.ptree.repo.CatFileBatch(te.ptree.repo.Ctx) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } defer cancel() - _, err = batch.Writer().Write([]byte(te.ID.String() + "\n")) - if err != nil { - log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) - return 0 - } - _, _, te.size, err = ReadBatchLine(batch.Reader()) + info, err := batch.QueryInfo(te.ID.String()) if err != nil { log.Debug("error whilst reading size for %s in %s. Error: %v", te.ID.String(), te.ptree.repo.Path, err) return 0 } + te.size = info.Size te.sized = true return te.size } diff --git a/modules/git/tree_nogogit.go b/modules/git/tree_nogogit.go index b8561dd352..d50c1ad629 100644 --- a/modules/git/tree_nogogit.go +++ b/modules/git/tree_nogogit.go @@ -33,26 +33,23 @@ func (t *Tree) ListEntries() (Entries, error) { } defer cancel() - wr := batch.Writer() - rd := batch.Reader() - _, _ = wr.Write([]byte(t.ID.String() + "\n")) - _, typ, sz, err := ReadBatchLine(rd) + info, rd, err := batch.QueryContent(t.ID.String()) if err != nil { return nil, err } - if typ == "commit" { - treeID, err := ReadTreeID(rd, sz) + + if info.Type == "commit" { + treeID, err := ReadTreeID(rd, info.Size) if err != nil && err != io.EOF { return nil, err } - _, _ = wr.Write([]byte(treeID + "\n")) - _, typ, sz, err = ReadBatchLine(rd) + info, rd, err = batch.QueryContent(treeID) if err != nil { return nil, err } } - if typ == "tree" { - t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, sz) + if info.Type == "tree" { + t.entries, err = catBatchParseTreeEntries(t.ID.Type(), t, rd, info.Size) if err != nil { return nil, err } @@ -61,7 +58,7 @@ func (t *Tree) ListEntries() (Entries, error) { } // Not a tree just use ls-tree instead - if err := DiscardFull(rd, sz+1); err != nil { + if err := DiscardFull(rd, info.Size+1); err != nil { return nil, err } } diff --git a/modules/gitrepo/cat_file.go b/modules/gitrepo/cat_file.go index 0e5fc9951c..42ca23acde 100644 --- a/modules/gitrepo/cat_file.go +++ b/modules/gitrepo/cat_file.go @@ -6,9 +6,9 @@ package gitrepo import ( "context" - "code.gitea.io/gitea/modules/git/catfile" + "code.gitea.io/gitea/modules/git" ) -func NewBatch(ctx context.Context, repo Repository) (catfile.Batch, error) { - return catfile.NewBatch(ctx, repoPath(repo)) +func NewBatch(ctx context.Context, repo Repository) (git.CatFileBatchCloser, error) { + return git.NewBatch(ctx, repoPath(repo)) } diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go index ee1872b999..51bd5a2334 100644 --- a/modules/graceful/manager.go +++ b/modules/graceful/manager.go @@ -74,12 +74,6 @@ func (g *Manager) RunWithCancel(rc RunCanceler) { g.RunAtShutdown(context.Background(), rc.Cancel) g.runningServerWaitGroup.Add(1) defer g.runningServerWaitGroup.Done() - defer func() { - if err := recover(); err != nil { - log.Critical("PANIC during RunWithCancel: %v\nStacktrace: %s", err, log.Stack(2)) - g.doShutdown() - } - }() rc.Run() } @@ -89,12 +83,6 @@ func (g *Manager) RunWithCancel(rc RunCanceler) { func (g *Manager) RunWithShutdownContext(run func(context.Context)) { g.runningServerWaitGroup.Add(1) defer g.runningServerWaitGroup.Done() - defer func() { - if err := recover(); err != nil { - log.Critical("PANIC during RunWithShutdownContext: %v\nStacktrace: %s", err, log.Stack(2)) - g.doShutdown() - } - }() ctx := g.ShutdownContext() pprof.SetGoroutineLabels(ctx) // We don't have a label to restore back to but I think this is fine run(ctx) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index a3727bd0cb..7027927eb7 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -15,7 +15,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer" @@ -151,7 +150,7 @@ func NewIndexer(indexDir string) *Indexer { } } -func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, commitSha string, +func (b *Indexer) addUpdate(ctx context.Context, catFileBatch git.CatFileBatch, commitSha string, update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search @@ -177,17 +176,11 @@ func (b *Indexer) addUpdate(ctx context.Context, catfileBatch catfile.Batch, com return b.addDelete(update.Filename, repo, batch) } - if _, err := catfileBatch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return err - } - - batchReader := catfileBatch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + info, batchReader, err := catFileBatch.QueryContent(update.BlobSha) if err != nil { return err } - - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) + fileContents, err := io.ReadAll(io.LimitReader(batchReader, info.Size)) if err != nil { return err } else if !typesniffer.DetectContentType(fileContents).IsText() { @@ -230,7 +223,6 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st return err } } - catfileBatch.Close() } for _, filename := range changes.RemovedFilenames { if err := b.addDelete(filename, repo, batch); err != nil { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 653df0bd11..8c3a2cf508 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -14,7 +14,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/git/catfile" "code.gitea.io/gitea/modules/git/gitcmd" "code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/indexer" @@ -139,7 +138,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, catFileBatch git.CatFileBatch, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -162,17 +161,12 @@ func (b *Indexer) addUpdate(ctx context.Context, batch catfile.Batch, sha string return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil } - if _, err := batch.Writer().Write([]byte(update.BlobSha + "\n")); err != nil { - return nil, err - } - - batchReader := batch.Reader() - _, _, size, err = git.ReadBatchLine(batchReader) + info, batchReader, err := catFileBatch.QueryContent(update.BlobSha) if err != nil { return nil, err } - fileContents, err := io.ReadAll(io.LimitReader(batchReader, size)) + fileContents, err := io.ReadAll(io.LimitReader(batchReader, info.Size)) if err != nil { return nil, err } else if !typesniffer.DetectContentType(fileContents).IsText() { @@ -226,7 +220,6 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st reqs = append(reqs, updateReqs...) } } - batch.Close() } for _, filename := range changes.RemovedFilenames { diff --git a/modules/testlogger/testlogger.go b/modules/testlogger/testlogger.go index 60e281d403..b0f38644a7 100644 --- a/modules/testlogger/testlogger.go +++ b/modules/testlogger/testlogger.go @@ -4,6 +4,7 @@ package testlogger import ( + "context" "fmt" "os" "runtime" @@ -108,30 +109,33 @@ func PrintCurrentTest(t testing.TB, skip ...int) func() { actualSkip := util.OptionalArg(skip) + 1 _, filename, line, _ := runtime.Caller(actualSkip) + getRuntimeStackAll := func() string { + stack := make([]byte, 1024*1024) + n := runtime.Stack(stack, true) + return util.UnsafeBytesToString(stack[:n]) + } + + deferHasRun := false + t.Cleanup(func() { + if !deferHasRun { + Printf("!!! defer function hasn't been run but Cleanup is called\n%s", getRuntimeStackAll()) + } + }) Printf("=== %s (%s:%d)\n", log.NewColoredValue(t.Name()), strings.TrimPrefix(filename, prefix), line) WriterCloser.pushT(t) timeoutChecker := time.AfterFunc(TestTimeout, func() { - l := 128 * 1024 - var stack []byte - for { - stack = make([]byte, l) - n := runtime.Stack(stack, true) - if n <= l { - stack = stack[:n] - break - } - l = n - } - Printf("!!! %s ... timeout: %v ... stacktrace:\n%s\n\n", log.NewColoredValue(t.Name(), log.Bold, log.FgRed), TestTimeout, string(stack)) + Printf("!!! %s ... timeout: %v ... stacktrace:\n%s\n\n", log.NewColoredValue(t.Name(), log.Bold, log.FgRed), TestTimeout, getRuntimeStackAll()) }) return func() { + deferHasRun = true flushStart := time.Now() slowFlushChecker := time.AfterFunc(TestSlowFlush, func() { Printf("+++ %s ... still flushing after %v ...\n", log.NewColoredValue(t.Name(), log.Bold, log.FgRed), TestSlowFlush) }) if err := queue.GetManager().FlushAll(t.Context(), -1); err != nil { - t.Errorf("Flushing queues failed with error %v", err) + // if panic occurs, then the t.Context() is also cancelled ahead, so here it shows "context canceled" error. + t.Errorf("Flushing queues failed with error %q, cause %q", err, context.Cause(t.Context())) } slowFlushChecker.Stop() timeoutChecker.Stop() diff --git a/routers/web/repo/compare.go b/routers/web/repo/compare.go index bbe1ed3b5e..6ccf0901b9 100644 --- a/routers/web/repo/compare.go +++ b/routers/web/repo/compare.go @@ -149,9 +149,9 @@ func setCsvCompareContext(ctx *context.Context) { if err != nil { return nil, nil, err } - + var closer io.Closer = reader csvReader, err := csv_module.CreateReaderAndDetermineDelimiter(ctx, charset.ToUTF8WithFallbackReader(reader, charset.ConvertOpts{})) - return csvReader, reader, err + return csvReader, closer, err } baseReader, baseBlobCloser, err := csvReaderFromCommit(markup.NewRenderContext(ctx).WithRelativePath(diffFile.OldName), baseBlob) diff --git a/services/contexttest/context_tests.go b/services/contexttest/context_tests.go index 44d9f4a70f..3a072e089a 100644 --- a/services/contexttest/context_tests.go +++ b/services/contexttest/context_tests.go @@ -143,8 +143,9 @@ func LoadRepoCommit(t *testing.T, ctx gocontext.Context) { gitRepo, err := gitrepo.OpenRepository(ctx, repo.Repository) require.NoError(t, err) - defer gitRepo.Close() - + t.Cleanup(func() { + gitRepo.Close() + }) if repo.RefFullName == "" { repo.RefFullName = git_module.RefNameFromBranch(repo.Repository.DefaultBranch) }