feat(diff): implemented the myers diff algorithm with longest common subsequence and printing the diff
This commit is contained in:
parent
f7043cc03a
commit
190aa7cc76
4 changed files with 167 additions and 7 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1 +1,2 @@
|
||||||
build/
|
build/
|
||||||
|
test/
|
||||||
|
|
|
||||||
103
include/myers.h
103
include/myers.h
|
|
@ -1,9 +1,108 @@
|
||||||
#ifndef MYERS_H
|
#ifndef MYERS_H
|
||||||
#define MYERS_H
|
#define MYERS_H
|
||||||
|
|
||||||
#include <string.h>
|
#include "file.h"
|
||||||
#include "action_list.h"
|
#include "action_list.h"
|
||||||
|
|
||||||
ActionList* myers_diff(char**, char**, uint64_t, uint64_t);
|
// minimum helper
|
||||||
|
static inline uint64_t min(uint64_t a, uint64_t b) {
|
||||||
|
return (a < b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// maxmium helper
|
||||||
|
static inline uint64_t max(uint64_t a, uint64_t b) {
|
||||||
|
return (a > b) ? a : b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// diffing function based on Myers Diff Algorithm with LCS for the maximum matching
|
||||||
|
ActionList* myers_diff(File* old_version, File* new_version, uint64_t old_offset, uint64_t new_offset) {
|
||||||
|
uint64_t old_len = old_version->lines;
|
||||||
|
uint64_t new_len = new_version->lines;
|
||||||
|
ActionList* result = new_list();
|
||||||
|
|
||||||
|
// old file is empty
|
||||||
|
if (old_len == 0) {
|
||||||
|
for (uint64_t i = 0; i < new_len; i++) {
|
||||||
|
Action action = (Action){.type=INSERT, .line_original=old_offset, .line_changed=new_offset + i};
|
||||||
|
add_action(result, action);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// new file is empty
|
||||||
|
if (new_len == 0) {
|
||||||
|
for (uint64_t i = 0; i < old_len; i++) {
|
||||||
|
Action action = (Action){.type=DELETE, .line_original=old_offset + i, .line_changed=0};
|
||||||
|
add_action(result, action);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the LCS table
|
||||||
|
uint64_t** lcs_table = calloc(old_len + 1, sizeof(uint64_t*));
|
||||||
|
if (!lcs_table) return result;
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i <= old_len; i++) {
|
||||||
|
lcs_table[i] = calloc(new_len + 1, sizeof(uint64_t));
|
||||||
|
if (!lcs_table[i]) {
|
||||||
|
for (uint64_t j = 0; j < i; j++) free(lcs_table[j]);
|
||||||
|
free(lcs_table);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// filling the LCS table
|
||||||
|
for (uint64_t i = 1; i <= old_len; i++) {
|
||||||
|
for (uint64_t j = 1; j <= new_len; j++) {
|
||||||
|
if (strcmp(old_version->content[i-1], new_version->content[j-1]) == 0) {
|
||||||
|
lcs_table[i][j] = lcs_table[i-1][j-1] + 1;
|
||||||
|
} else {
|
||||||
|
lcs_table[i][j] = max(lcs_table[i-1][j], lcs_table[i][j-1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t i = old_len, j = new_len;
|
||||||
|
|
||||||
|
// actions need to be collected temporarily because they are in reverse order
|
||||||
|
Action* temp_actions = calloc(old_len + new_len, sizeof(Action));
|
||||||
|
uint64_t temp_count = 0;
|
||||||
|
|
||||||
|
if (!temp_actions) {
|
||||||
|
for (uint64_t k = 0; k <= old_len; k++) free(lcs_table[k]);
|
||||||
|
free(lcs_table);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (i > 0 || j > 0) {
|
||||||
|
if (i > 0 && j > 0 && strcmp(old_version->content[i-1], new_version->content[j-1]) == 0) {
|
||||||
|
i--;
|
||||||
|
j--;
|
||||||
|
} else if (j > 0 && (i == 0 || lcs_table[i][j-1] >= lcs_table[i-1][j])) {
|
||||||
|
temp_actions[temp_count].type = INSERT;
|
||||||
|
temp_actions[temp_count].line_original = old_offset + i;
|
||||||
|
temp_actions[temp_count].line_changed = new_offset + j - 1;
|
||||||
|
temp_count++;
|
||||||
|
j--;
|
||||||
|
} else if (i > 0) {
|
||||||
|
temp_actions[temp_count].type = DELETE;
|
||||||
|
temp_actions[temp_count].line_original = old_offset + i - 1;
|
||||||
|
temp_actions[temp_count].line_changed = 0;
|
||||||
|
temp_count++;
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// reversing the actions into the correct order
|
||||||
|
for (int64_t k = temp_count - 1; k >= 0; k--) {
|
||||||
|
add_action(result, temp_actions[k]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(temp_actions);
|
||||||
|
for (uint64_t k = 0; k <= old_len; k++) free(lcs_table[k]);
|
||||||
|
free(lcs_table);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
#endif // MYERS_H
|
#endif // MYERS_H
|
||||||
|
|
|
||||||
65
include/utilities.h
Normal file
65
include/utilities.h
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
#ifndef UTILITIES_H
|
||||||
|
#define UTILITIES_H
|
||||||
|
|
||||||
|
#include "file.h"
|
||||||
|
#include "action_list.h"
|
||||||
|
|
||||||
|
#define RESET "\033[0m"
|
||||||
|
#define RED_BG "\033[41m"
|
||||||
|
#define GREEN_BG "\033[42m"
|
||||||
|
#define BLACK_FG "\033[30m"
|
||||||
|
|
||||||
|
void visualize_diff(File* old_version, File* new_version, ActionList* actions) {
|
||||||
|
int* deleted_lines = calloc(old_version->lines, sizeof(int));
|
||||||
|
int* inserted_lines = calloc(new_version->lines, sizeof(int));
|
||||||
|
|
||||||
|
if (!deleted_lines || !inserted_lines) {
|
||||||
|
free(deleted_lines);
|
||||||
|
free(inserted_lines);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < actions->len; i++) {
|
||||||
|
if (actions->actions[i].type == DELETE) {
|
||||||
|
deleted_lines[actions->actions[i].line_original] = 1;
|
||||||
|
} else if (actions->actions[i].type == INSERT) {
|
||||||
|
inserted_lines[actions->actions[i].line_changed] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t old_idx = 0, new_idx = 0;
|
||||||
|
|
||||||
|
while (old_idx < old_version->lines || new_idx < new_version->lines) {
|
||||||
|
// DELETE
|
||||||
|
if (old_idx < old_version->lines && deleted_lines[old_idx]) {
|
||||||
|
printf("%s%s%4ld | %s%s\n", RED_BG, BLACK_FG, old_idx+1, old_version->content[old_idx], RESET);
|
||||||
|
old_idx++;
|
||||||
|
}
|
||||||
|
// INSERT
|
||||||
|
else if (new_idx < new_version->lines && inserted_lines[new_idx]) {
|
||||||
|
printf("%s%s %4ld | %s%s\n", GREEN_BG, BLACK_FG, new_idx+1, new_version->content[new_idx], RESET);
|
||||||
|
new_idx++;
|
||||||
|
}
|
||||||
|
// STAYS
|
||||||
|
else if (old_idx < old_version->lines && new_idx < new_version->lines) {
|
||||||
|
printf("%4ld %4ld | %s\n", old_idx+1, new_idx+1, old_version->content[old_idx]);
|
||||||
|
old_idx++;
|
||||||
|
new_idx++;
|
||||||
|
}
|
||||||
|
// DELETE
|
||||||
|
else if (old_idx < old_version->lines) {
|
||||||
|
printf("%s%s%4ld | %s%s\n", RED_BG, BLACK_FG, old_idx+1, old_version->content[old_idx], RESET);
|
||||||
|
old_idx++;
|
||||||
|
}
|
||||||
|
// INSERT
|
||||||
|
else if (new_idx < new_version->lines) {
|
||||||
|
printf("%s%s %4ld | %s%s\n", GREEN_BG, BLACK_FG, new_idx+1, new_version->content[new_idx], RESET);
|
||||||
|
new_idx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(deleted_lines);
|
||||||
|
free(inserted_lines);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // UTILITIES_H
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
#include "myers.h"
|
|
||||||
|
|
||||||
ActionList* myers_diff(char** file1, char** file2, uint64_t offset1, uint64_t offset2) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue