Diff between 6ecf1d655271b263e3b5f73e8d4575acf3bfdbf8 and 56c30ed7bdcbcae5c2663c09971cc173222f8424

Changed Files

File Additions Deletions Status
app.py +7 -0 modified
git/blame.py +66 -0 added
git/commit.py +1 -0 modified
templates/blame.html +35 -0 added
templates/blob.html +1 -0 modified

Full Patch

diff --git a/app.py b/app.py
index 3b87e76..7823067 100644
--- a/app.py
+++ b/app.py
@@ -7,6 +7,7 @@ from git.tree import get_tree_items
 from git.blob import get_blob
 from git.misc import get_version
 from git.diff import get_diff
+from git.blame import get_blame
 
 app = Flask(__name__)
 
@@ -53,6 +54,12 @@ def repo_blob_path(repo_name, path):
     blob = get_blob(f"{repo_path}/{repo_name}", ref, path)
     return render_template("blob.html", repo_name=repo_name, ref=ref, path=path, blob=blob)
 
+@app.route("/<repo_name>/blame/<path:path>")
+def repo_blame_path(repo_name, path):
+    ref = request.args.get('ref', 'HEAD')
+    blame = get_blame(f"{repo_path}/{repo_name}", ref, path)
+    return render_template("blame.html", repo_name=repo_name, ref=ref, path=path, blame=blame)
+
 @app.route("/<repo_name>/diff")
 def repo_diff(repo_name):
     id1 = request.args.get('id1', 'HEAD')
diff --git a/git/blame.py b/git/blame.py
new file mode 100644
index 0000000..a230b63
--- /dev/null
+++ b/git/blame.py
@@ -0,0 +1,66 @@
+import pygit2 as git
+
+# discourage using blame because its very expensive, especially on repos with long commits history
+# retrieves blame information for a file at given ref and path
+def get_blame(repo_path, ref="HEAD", file_path=""):
+    repo = git.Repository(repo_path)
+    obj = repo.revparse_single(ref)
+    if obj.type == git.GIT_OBJECT_COMMIT:
+        commit = obj
+    else:
+        commit = obj.peel(git.GIT_OBJECT_COMMIT)
+
+    # traverse to the blob path
+    # TODO: make this common across more modules
+    tree = commit.tree
+    blob = None
+    if file_path:
+        parts = file_path.rstrip('/').split('/')
+        for part in parts:
+            found = False
+            for entry in tree:
+                if entry.name == part:
+                    if entry.type == git.GIT_OBJECT_BLOB:
+                        blob = repo.get(entry.id)
+                        found = True
+                        break
+                    elif entry.type == git.GIT_OBJECT_TREE:
+                        tree = repo.get(entry.id)
+                        found = True
+                        break
+            if not found:
+                return None  # path not found
+    if blob is None:
+        return None
+
+    blame = repo.blame(file_path)
+    
+    # get blob content lines directly. maybe later use lines_in_hunk
+    content_lines = blob.data.decode('utf-8', errors='replace').splitlines()
+    
+    # create a list to hold blame info per line
+    blame_lines = [None] * len(content_lines)
+    for hunk in blame:
+        # https://libgit2.org/docs/reference/main/blame/git_blame_hunk.html
+        start = hunk.final_start_line_number - 1  # to 0 index, since using python lists
+        end = start + hunk.lines_in_hunk
+        commit = repo.get(hunk.final_commit_id)  # last commit oid
+        # TODO: more info if needed
+        info = {
+            'commit_id': str(hunk.final_commit_id),
+            'author': commit.author,
+        }
+        # fill premade info for lines in this hunk
+        for i in range(start, min(end, len(blame_lines))): # prevent index overflow, with min
+            blame_lines[i] = info
+    
+    # combine content lines with their blame info
+    result = []
+    for i, line in enumerate(content_lines):
+        result.append({
+            'line_num': i + 1,
+            'content': line,
+            'blame': blame_lines[i]
+        })
+    
+    return result
\ No newline at end of file
diff --git a/git/commit.py b/git/commit.py
index 1c13bf5..6627a7e 100644
--- a/git/commit.py
+++ b/git/commit.py
@@ -4,6 +4,7 @@ import pygit2 as git
 def get_commits(path, ref="HEAD", max_count=None, skip=0):
     repo = git.Repository(path)
     commits = []
+    # TODO: accept blob oids to filter commits that touch specific blobs
     walker = repo.walk(repo.revparse_single(ref).id, git.GIT_SORT_TIME)
 
     n = 0
diff --git a/templates/blame.html b/templates/blame.html
new file mode 100644
index 0000000..7987724
--- /dev/null
+++ b/templates/blame.html
@@ -0,0 +1,35 @@
+{% block content %}
+<h1>Blame: {{ path }}</h1>
+{% if blame %}
+<table border="1"> <!-- temp default styling --->
+    <thead>
+        <tr>
+            <th>Line</th>
+            <th>Commit</th>
+            <th>Author</th>
+            <th>Date</th>
+            <th>Content</th>
+        </tr>
+    </thead>
+    <tbody>
+        {% for line in blame %}
+        <tr>
+            <td>{{ line.line_num }}</td>
+            {% if line.blame %}
+            <td><a href="{{ url_for('commit_detail', repo_name=repo_name, commit_id=line.blame.commit_id) }}">{{ line.blame.commit_id[:8] }}</a></td>
+            <td>{{ line.blame.author.name }}</td>
+            <td>{{ line.blame.author.time }}</td>
+            {% else %}
+            <td></td>
+            <td></td>
+            <td></td>
+            {% endif %}
+            <td><pre>{{ line.content }}</pre></td>
+        </tr>
+        {% endfor %}
+    </tbody>
+</table>
+{% else %}
+<p>No blame info</p>
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/templates/blob.html b/templates/blob.html
index d80ae27..f8cd857 100644
--- a/templates/blob.html
+++ b/templates/blob.html
@@ -2,6 +2,7 @@
 <h1>Blob: {{ blob.name }}</h1>
 <p>Blob id: {{ blob.id }}</p>
 <p>Size: {{ blob.size }} bytes</p>
+<p><a href="{{ url_for('repo_blame_path', repo_name=repo_name, path=path) }}?ref={{ ref }}">Blame</a></p>
 <h2>Content:</h2>
 {% if blob.is_binary %}
     <pre>Binary...</pre>