feat(diff): implemented the myers diff algorithm with longest common subsequence and printing the diff

This commit is contained in:
lisk77 2025-08-17 18:20:04 +02:00
parent f7043cc03a
commit 190aa7cc76
4 changed files with 167 additions and 7 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
build/ build/
test/

View file

@ -1,9 +1,108 @@
#ifndef MYERS_H #ifndef MYERS_H
#define MYERS_H #define MYERS_H
#include <string.h> #include "file.h"
#include "action_list.h" #include "action_list.h"
ActionList* myers_diff(char**, char**, uint64_t, uint64_t); // minimum helper
static inline uint64_t min(uint64_t a, uint64_t b) {
return (a < b) ? a : b;
}
// maxmium helper
static inline uint64_t max(uint64_t a, uint64_t b) {
return (a > b) ? a : b;
}
// diffing function based on Myers Diff Algorithm with LCS for the maximum matching
ActionList* myers_diff(File* old_version, File* new_version, uint64_t old_offset, uint64_t new_offset) {
uint64_t old_len = old_version->lines;
uint64_t new_len = new_version->lines;
ActionList* result = new_list();
// old file is empty
if (old_len == 0) {
for (uint64_t i = 0; i < new_len; i++) {
Action action = (Action){.type=INSERT, .line_original=old_offset, .line_changed=new_offset + i};
add_action(result, action);
}
return result;
}
// new file is empty
if (new_len == 0) {
for (uint64_t i = 0; i < old_len; i++) {
Action action = (Action){.type=DELETE, .line_original=old_offset + i, .line_changed=0};
add_action(result, action);
}
return result;
}
// the LCS table
uint64_t** lcs_table = calloc(old_len + 1, sizeof(uint64_t*));
if (!lcs_table) return result;
for (uint64_t i = 0; i <= old_len; i++) {
lcs_table[i] = calloc(new_len + 1, sizeof(uint64_t));
if (!lcs_table[i]) {
for (uint64_t j = 0; j < i; j++) free(lcs_table[j]);
free(lcs_table);
return result;
}
}
// filling the LCS table
for (uint64_t i = 1; i <= old_len; i++) {
for (uint64_t j = 1; j <= new_len; j++) {
if (strcmp(old_version->content[i-1], new_version->content[j-1]) == 0) {
lcs_table[i][j] = lcs_table[i-1][j-1] + 1;
} else {
lcs_table[i][j] = max(lcs_table[i-1][j], lcs_table[i][j-1]);
}
}
}
uint64_t i = old_len, j = new_len;
// actions need to be collected temporarily because they are in reverse order
Action* temp_actions = calloc(old_len + new_len, sizeof(Action));
uint64_t temp_count = 0;
if (!temp_actions) {
for (uint64_t k = 0; k <= old_len; k++) free(lcs_table[k]);
free(lcs_table);
return result;
}
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && strcmp(old_version->content[i-1], new_version->content[j-1]) == 0) {
i--;
j--;
} else if (j > 0 && (i == 0 || lcs_table[i][j-1] >= lcs_table[i-1][j])) {
temp_actions[temp_count].type = INSERT;
temp_actions[temp_count].line_original = old_offset + i;
temp_actions[temp_count].line_changed = new_offset + j - 1;
temp_count++;
j--;
} else if (i > 0) {
temp_actions[temp_count].type = DELETE;
temp_actions[temp_count].line_original = old_offset + i - 1;
temp_actions[temp_count].line_changed = 0;
temp_count++;
i--;
}
}
// reversing the actions into the correct order
for (int64_t k = temp_count - 1; k >= 0; k--) {
add_action(result, temp_actions[k]);
}
free(temp_actions);
for (uint64_t k = 0; k <= old_len; k++) free(lcs_table[k]);
free(lcs_table);
return result;
}
#endif // MYERS_H #endif // MYERS_H

65
include/utilities.h Normal file
View file

@ -0,0 +1,65 @@
#ifndef UTILITIES_H
#define UTILITIES_H
#include "file.h"
#include "action_list.h"
#define RESET "\033[0m"
#define RED_BG "\033[41m"
#define GREEN_BG "\033[42m"
#define BLACK_FG "\033[30m"
void visualize_diff(File* old_version, File* new_version, ActionList* actions) {
int* deleted_lines = calloc(old_version->lines, sizeof(int));
int* inserted_lines = calloc(new_version->lines, sizeof(int));
if (!deleted_lines || !inserted_lines) {
free(deleted_lines);
free(inserted_lines);
return;
}
for (uint64_t i = 0; i < actions->len; i++) {
if (actions->actions[i].type == DELETE) {
deleted_lines[actions->actions[i].line_original] = 1;
} else if (actions->actions[i].type == INSERT) {
inserted_lines[actions->actions[i].line_changed] = 1;
}
}
uint64_t old_idx = 0, new_idx = 0;
while (old_idx < old_version->lines || new_idx < new_version->lines) {
// DELETE
if (old_idx < old_version->lines && deleted_lines[old_idx]) {
printf("%s%s%4ld | %s%s\n", RED_BG, BLACK_FG, old_idx+1, old_version->content[old_idx], RESET);
old_idx++;
}
// INSERT
else if (new_idx < new_version->lines && inserted_lines[new_idx]) {
printf("%s%s %4ld | %s%s\n", GREEN_BG, BLACK_FG, new_idx+1, new_version->content[new_idx], RESET);
new_idx++;
}
// STAYS
else if (old_idx < old_version->lines && new_idx < new_version->lines) {
printf("%4ld %4ld | %s\n", old_idx+1, new_idx+1, old_version->content[old_idx]);
old_idx++;
new_idx++;
}
// DELETE
else if (old_idx < old_version->lines) {
printf("%s%s%4ld | %s%s\n", RED_BG, BLACK_FG, old_idx+1, old_version->content[old_idx], RESET);
old_idx++;
}
// INSERT
else if (new_idx < new_version->lines) {
printf("%s%s %4ld | %s%s\n", GREEN_BG, BLACK_FG, new_idx+1, new_version->content[new_idx], RESET);
new_idx++;
}
}
free(deleted_lines);
free(inserted_lines);
}
#endif // UTILITIES_H

View file

@ -1,5 +0,0 @@
#include "myers.h"
ActionList* myers_diff(char** file1, char** file2, uint64_t offset1, uint64_t offset2) {
return NULL;
}