Blob: commit.py

Blob id: d30b3cf6b835e0c1a25bdfd19f258420c4867875

Size: 3.9 KB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pygit2 as git

# map libgit2 delta status to human readable
status_map = {
    git.GIT_DELTA_ADDED: 'added',
    git.GIT_DELTA_DELETED: 'deleted',
    git.GIT_DELTA_MODIFIED: 'modified',
    git.GIT_DELTA_RENAMED: 'renamed',
    git.GIT_DELTA_COPIED: 'copied',
    git.GIT_DELTA_IGNORED: 'ignored',
    git.GIT_DELTA_UNTRACKED: 'untracked',
    git.GIT_DELTA_TYPECHANGE: 'typechange',
    git.GIT_DELTA_UNREADABLE: 'unreadable',
    git.GIT_DELTA_CONFLICTED: 'conflicted'
}

# retrieves commit history for given repo path and reference
def get_commits(path, ref="HEAD", max_count=None, skip=0):
    repo = git.Repository(path)
    commits = []
    # TODO: accept blob oids to filter commits that touch specific blobs
    try:
        obj = repo.revparse_single(ref)
    except Exception:
        # PYGIT DOES NOT REURN LIBGIT ERRORS!??! >:(
        # only generic exception....
        return [], f"invalid reference"
    
    # revwalk
    walker = repo.walk(obj.id, git.GIT_SORT_TIME)

    n = 0
    for commit in walker:
        # pagination, 50 per page, walk until skip, then collect commits until max_count
        if n < skip:
            n += 1
            continue
        if max_count is not None and (n - skip) >= max_count:
            break
        if len(commit.parents) > 0:
            # get diif stats against first parent
            # libgit2 has very fast diff stats calculation, using that here
            diff = repo.diff(commit.parents[0], commit)
            stats = diff.stats
            diff_stats = {
                'insertions': stats.insertions,
                'deletions': stats.deletions,
                'files_changed': stats.files_changed
            }
        else:
            # TODO: compare to NULL_TREE
            diff_stats = {
                'insertions': 0,
                'deletions': 0,
                'files_changed': 0
            }
        commit_info = {
            'id': str(commit.id),
            'message': commit.message.strip(),
            'author': commit.author,
            'committer': commit.committer,
            'date': commit.commit_time,
            'diff_stats': diff_stats
        }
        commits.append(commit_info)
        n += 1
    return commits

# retrieves a single commit by its id
def get_commit(path, commit_id):
    repo = git.Repository(path)
    commit = repo.revparse_single(commit_id)

    if len(commit.parents) > 0:
        diff = repo.diff(commit.parents[0], commit)
        stats = diff.stats
        diff_stats = {
            'insertions': stats.insertions,
            'deletions': stats.deletions,
            'files_changed': stats.files_changed
        }
        # detect renames and copies
        diff.find_similar()
        patches = list(diff)
        deltas = diff.deltas
        changed_files = []
        for i, delta in enumerate(deltas):
            patch = patches[i]
            file_path = delta.new_file.path if delta.new_file.path else delta.old_file.path
            _, additions, deletions = patch.line_stats
            status_str = status_map.get(delta.status, 'unknown')
            changed_files.append({
                'file': file_path,
                'additions': additions,
                'deletions': deletions,
                'status': status_str
            })
    else:
        diff_stats = {
            'insertions': 0,
            'deletions': 0,
            'files_changed': 0
        }
        diff = None
        changed_files = []

    commit_info = {
        'id': str(commit.id),
        'message': commit.message.strip(),
        'author': commit.author,
        'committer': commit.committer,
        'tree_id': str(commit.tree.id),
        'parent_id': str(commit.parents[0].id) if commit.parents else None,
        'date': commit.commit_time,
        'diff_stats': diff_stats,
        'diff': diff.patch if diff else None,
        'changed_files': changed_files
    }
    return commit_info