opengist/internal/git/output_parser.go

207 lines
4.2 KiB
Go

package git
import (
"bufio"
"bytes"
"encoding/csv"
"fmt"
"io"
"regexp"
"strings"
)
type File struct {
Filename string
OldFilename string
Content string
Truncated bool
IsCreated bool
IsDeleted bool
}
type CsvFile struct {
File
Header []string
Rows [][]string
}
type Commit struct {
Hash string
AuthorName string
AuthorEmail string
Timestamp string
Changed string
Files []File
}
func truncateCommandOutput(out io.Reader, maxBytes int64) (string, bool, error) {
var buf []byte
var err error
if maxBytes < 0 {
buf, err = io.ReadAll(out)
} else {
buf, err = io.ReadAll(io.LimitReader(out, maxBytes))
}
if err != nil {
return "", false, err
}
truncated := maxBytes > 0 && len(buf) >= int(maxBytes)
// Remove the last line if it's truncated
if truncated {
// Find the index of the last newline character
lastNewline := bytes.LastIndexByte(buf, '\n')
if lastNewline > 0 {
// Trim the data buffer up to the last newline character
buf = buf[:lastNewline]
}
}
return string(buf), truncated, nil
}
func parseLog(out io.Reader, maxBytes int) []*Commit {
scanner := bufio.NewScanner(out)
var commits []*Commit
var currentCommit *Commit
var currentFile *File
var isContent bool
var bytesRead = 0
scanNext := true
for {
if scanNext && !scanner.Scan() {
break
}
scanNext = true
// new commit found
currentFile = nil
currentCommit = &Commit{Hash: string(scanner.Bytes()[2:]), Files: []File{}}
scanner.Scan()
currentCommit.AuthorName = string(scanner.Bytes()[2:])
scanner.Scan()
currentCommit.AuthorEmail = string(scanner.Bytes()[2:])
scanner.Scan()
currentCommit.Timestamp = string(scanner.Bytes()[2:])
scanner.Scan()
// if there is no shortstat, it means that the commit is empty, we add it and move onto the next one
if scanner.Bytes()[0] != ' ' {
commits = append(commits, currentCommit)
// avoid scanning the next line, as we already did it
scanNext = false
continue
}
changed := scanner.Bytes()[1:]
changed = bytes.ReplaceAll(changed, []byte("(+)"), []byte(""))
changed = bytes.ReplaceAll(changed, []byte("(-)"), []byte(""))
currentCommit.Changed = string(changed)
// twice because --shortstat adds a new line
scanner.Scan()
scanner.Scan()
// commit header parsed
// files changes inside the commit
for {
line := scanner.Bytes()
// end of content of file
if len(line) == 0 {
isContent = false
if currentFile != nil {
currentCommit.Files = append(currentCommit.Files, *currentFile)
}
break
}
// new file found
if bytes.HasPrefix(line, []byte("diff --git")) {
// current file is finished, we can add it to the commit
if currentFile != nil {
currentCommit.Files = append(currentCommit.Files, *currentFile)
}
// create a new file
isContent = false
bytesRead = 0
currentFile = &File{}
filenameRegex := regexp.MustCompile(`^diff --git a/(.+) b/(.+)$`)
matches := filenameRegex.FindStringSubmatch(string(line))
if len(matches) == 3 {
currentFile.Filename = matches[2]
if matches[1] != matches[2] {
currentFile.OldFilename = matches[1]
}
}
scanner.Scan()
continue
}
if bytes.HasPrefix(line, []byte("new")) {
currentFile.IsCreated = true
}
if bytes.HasPrefix(line, []byte("deleted")) {
currentFile.IsDeleted = true
}
// file content found
if line[0] == '@' {
isContent = true
}
if isContent {
currentFile.Content += string(line) + "\n"
bytesRead += len(line)
if bytesRead > maxBytes {
currentFile.Truncated = true
currentFile.Content = ""
isContent = false
}
}
scanner.Scan()
}
commits = append(commits, currentCommit)
}
return commits
}
func ParseCsv(file *File) (*CsvFile, error) {
reader := csv.NewReader(strings.NewReader(file.Content))
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
header := records[0]
numColumns := len(header)
for i := 1; i < len(records); i++ {
if len(records[i]) != numColumns {
return nil, fmt.Errorf("CSV file has invalid row at index %d", i)
}
}
return &CsvFile{
File: *file,
Header: header,
Rows: records[1:],
}, nil
}