Skip to content

Commit

Permalink
When fetching, fetch all the relevant objects without leaving out any
Browse files Browse the repository at this point in the history
  • Loading branch information
adamziel committed Jan 7, 2025
1 parent 8dd84b3 commit 2d5da28
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 84 deletions.
1 change: 1 addition & 0 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
require_once __DIR__ . '/src/git/WP_Git_Filesystem.php';
require_once __DIR__ . '/src/git/WP_Git_Server.php';
require_once __DIR__ . '/src/git/WP_Git_Merge_Engine.php';
require_once __DIR__ . '/src/git/functions.php';

require_once __DIR__ . '/src/WP_Data_Liberation_HTML_Processor.php';
require_once __DIR__ . '/src/utf8_decoder.php';
Expand Down
5 changes: 3 additions & 2 deletions packages/playground/data-liberation/src/git/WP_Git_Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ public function force_push_one_commit() {
$pack_objects = [];
foreach($delta as $oid) {
// @TODO: just stream the saved object instead of re-reading and re-encoding it.
$this->index->read_object($oid);
$body = '';
do {
$body .= $this->index->get_body_chunk();
Expand Down Expand Up @@ -179,8 +180,8 @@ public function force_pull($branch_name=null, $path = '/') {
$all_path_related_oids = array_flip($all_path_related_oids);

// @TODO: Support "want" and "have" here
$new_oids = $remote_index->find_objects_added_in($remote_head, $local_ref, [
'old_tree_index' => $local_index,
$new_oids = $remote_index->find_objects_added_in($remote_head, $local_ref ?: null, [
'old_commit_repository' => $local_index,
]);
$objects_to_fetch = [];
foreach($new_oids as $oid) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,22 @@ static public function encode_packet_lines(array $payloads): string {
}

static public function encode_packet_line(string $payload, $channel=''): string {
$payload = $channel . $payload;
if($payload !== '0000' && $payload !== '0001' && $payload !== '0002') {
$length = sprintf("%04x", strlen($payload) + 4);
} else {
$length = '';
// @TODO: Stream instead of buffering
if($payload === '0000' || $payload === '0001' || $payload === '0002') {
$payload = $channel . $payload;
return $payload;
}
return $length . $payload;

$chunk_size = 8000;
$offset = 0;
$lines = [];
while($offset < strlen($payload)) {
$chunk = $channel . substr($payload, $offset, $chunk_size);
$length = sprintf("%04x", strlen($chunk) + 4);
$lines[] = $length . $chunk;
$offset += $chunk_size;
}
return implode('', $lines);
}

/**
Expand Down
110 changes: 40 additions & 70 deletions packages/playground/data-liberation/src/git/WP_Git_Repository.php
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ public function get_parsed_commit() {
if(null === $this->parsed_commit && $this->oid) {
$commit_body = $this->read_entire_object_contents();
$this->parsed_commit = WP_Git_Pack_Processor::parse_commit_body($commit_body);
$this->parsed_commit['oid'] = $this->oid;
if(!$this->parsed_commit) {
$this->last_error = 'Failed to parse commit';
$this->parsed_commit = [];
Expand Down Expand Up @@ -433,84 +434,53 @@ public function find_path_descendants($path) {
return $oids;
}

public function find_objects_added_in($new_tree_oid, $old_tree_oid=WP_Git_Repository::NULL_OID, $options=[]) {
$old_tree_index = $options['old_tree_index'] ?? $this;
if($old_tree_index === null) {
$old_tree_index = $this;
}

// Resolve the actual tree oid if $new_tree_oid is a commit
if(false === $this->read_object($new_tree_oid)) {
$this->last_error = 'Failed to read object: ' . $new_tree_oid;
return false;
}
if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) {
// yield the commit object itself
$parsed_commit = $this->get_parsed_commit();
$new_tree_oid = $parsed_commit['tree'];
yield $this->oid;
}

// Resolve the actual tree oid if $old_tree_oid is a commit
if(!$this->is_null_oid($old_tree_oid)) {
if(false === $old_tree_index->read_object($old_tree_oid)) {
$this->last_error = 'Failed to read object: ' . $old_tree_oid;
return false;
public function find_objects_added_in($new_commit_hash, $old_commit_hash=WP_Git_Repository::NULL_OID, $options=[]) {
$new_commit = wp_git_get_parsed_commit($this, $new_commit_hash);
if(!$new_commit) {
throw new Exception('Failed to read new commit object: ' . $new_commit_hash);
}
// Resolve the actual tree oid if $old_commit_hash is a commit
$old_tree_hash = WP_Git_Repository::NULL_OID;
$old_objects_oids = [];
if(!wp_git_is_null_oid($old_commit_hash)) {
$old_commit_repository = $options['old_commit_repository'] ?? $this;
if(false === $old_commit_repository->read_object($old_commit_hash)) {
throw new Exception('Failed to read old commit object: ' . $old_commit_hash);
}
if($old_tree_index->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) {
$old_tree_oid = $old_tree_index->get_parsed_commit()['tree'];
if($old_commit_repository->get_type() !== WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) {
throw new Exception('Object was not a commit in find_objects_added_in: ' . $old_commit_repository->get_type());
}
$old_tree_hash = $old_commit_repository->get_parsed_commit()['tree'];
$old_objects_oids = array_flip(
wp_git_get_all_descendant_oids_in_tree($old_commit_repository, $old_tree_hash)
);
$old_objects_oids[$old_commit_hash] = true;
}

if($new_tree_oid === $old_tree_oid) {
return false;
}

$stack = [[$new_tree_oid, $old_tree_oid]];

while(!empty($stack)) {
list($current_new_oid, $current_old_oid) = array_pop($stack);
$new_objects_oids = [];
// Optimization – don't process the same tree more than once.
$processed_trees = [];

// Object is unchanged
if($current_new_oid === $current_old_oid) {
continue;
while($new_commit_hash !== $old_commit_hash && !wp_git_is_null_oid($new_commit_hash)) {
if(false === $this->read_object($new_commit_hash)) {
throw new Exception('Failed to read new commit object: ' . $new_commit_hash);
}
if($this->is_null_oid($current_new_oid)) {
continue;
}

if(false === $this->read_object($current_new_oid)) {
$this->last_error = 'Failed to read object: ' . $current_new_oid;
return false;
}
if($this->get_type() === WP_Git_Pack_Processor::OBJECT_TYPE_BLOB) {
yield $this->get_oid();
continue;
} else if($this->get_type() !== WP_Git_Pack_Processor::OBJECT_TYPE_TREE) {
_doing_it_wrong(__METHOD__, 'Invalid object type in find_objects_added_in: ' . $this->get_type(), '1.0.0');
return false;
}

$new_tree = $this->get_parsed_tree();
yield $this->get_oid();

$old_tree = [];
if(!$this->is_null_oid($current_old_oid)) {
if(false === $old_tree_index->read_object($current_old_oid)) {
$this->last_error = 'Failed to read object: ' . $current_old_oid;
return false;
$new_objects_oids[$new_commit_hash] = true;
$parsed_commit = $this->get_parsed_commit();
$tree_oid = $parsed_commit['tree'];
$new_objects_oids[$tree_oid] = true;
if(!isset($processed_trees[$tree_oid])) {
$descendants = wp_git_get_all_descendant_oids_in_tree($this, $tree_oid);
foreach($descendants as $descendant) {
$new_objects_oids[$descendant] = true;
}
$old_tree = $old_tree_index->get_parsed_tree();
}

foreach($new_tree as $name => $object) {
$stack[] = [$object['sha1'], $old_tree[$name]['sha1'] ?? null];
}
$processed_trees[$tree_oid] = true;
$new_commit_hash = $parsed_commit['parent'] ?? WP_Git_Repository::NULL_OID;
}
}

private function is_null_oid($oid) {
return $oid === null || $oid === WP_Git_Repository::NULL_OID;
$diff = array_diff_key($new_objects_oids, $old_objects_oids);
return array_keys($diff);
}

public function set_ref_head($ref, $oid) {
Expand Down Expand Up @@ -680,13 +650,13 @@ public function commit($options=[]) {
$is_amend = isset($options['amend']) && $options['amend'];

$this->read_object($this->get_ref_head('refs/heads/main'));
$old_tree_oid = $this->get_parsed_commit()['tree'];
$old_commit_hash = $this->get_parsed_commit()['tree'];

// Process trees bottom-up recursively
$root_tree_oid = $this->commit_tree('/', $changed_trees);

if(
$root_tree_oid === $old_tree_oid &&
$root_tree_oid === $old_commit_hash &&
!$is_amend
) {
// Nothing has changed, skip creating a new empty commit.
Expand Down
15 changes: 9 additions & 6 deletions packages/playground/data-liberation/src/git/WP_Git_Server.php
Original file line number Diff line number Diff line change
Expand Up @@ -282,25 +282,28 @@ public function handle_fetch_request($request_bytes, $response) {

$parsed_commit = $this->repository->get_parsed_commit();
if(!isset($parsed_commit['parent'])) {
$common_parent_hash = WP_Git_Repository::NULL_OID;
break;
}

$commit_hash = $parsed_commit['parent'];
if(isset($have_oids[$commit_hash])) {
$common_parent_hash = $commit_hash;
break;
}
}
$common_parent_hash = $commit_hash;

// For each wanted commit, find objects not present in any of the have commits
$new_objects = $this->repository->find_objects_added_in(
$want_hash,
$common_parent_hash
);
$objects_to_send = array_merge(
$objects_to_send,
iterator_to_array($new_objects)
);
if(false !== $new_objects) {
$objects_to_send = array_merge(
$objects_to_send,
$new_objects
);
}
if($common_parent_hash !== WP_Git_Repository::NULL_OID) {
$acks[] = $common_parent_hash;
}
Expand Down Expand Up @@ -358,7 +361,7 @@ public function handle_fetch_request($request_bytes, $response) {
// @TODO: Stream the pack data instead of buffering it
$pack_data = WP_Git_Pack_Processor::encode($pack_objects);

$response->write(WP_Git_Pack_Processor::encode_packet_line("\x01" . $pack_data));
$response->write(WP_Git_Pack_Processor::encode_packet_line($pack_data, "\x01"));
$response->write(WP_Git_Pack_Processor::encode_packet_line("0000"));
return true;
}
Expand Down
41 changes: 41 additions & 0 deletions packages/playground/data-liberation/src/git/functions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<?php

function wp_git_get_all_descendant_oids_in_tree(WP_Git_Repository $repository, $tree_oid) {
if(false === $repository->read_object($tree_oid)) {
return false;
}
$oids = [$tree_oid];
$trees = [$tree_oid];

while (!empty($trees)) {
$tree_hash = array_pop($trees);
if (!$repository->read_object($tree_hash)) {
_doing_it_wrong('wp_git_get_all_descendant_oids_in_tree', 'Failed to read object: ' . $tree_hash, '1.0.0');
return false;
}
$tree = $repository->get_parsed_tree();
foreach ($tree as $object) {
$oids[] = $object['sha1'];
if ($object['mode'] === WP_Git_Pack_Processor::FILE_MODE_DIRECTORY) {
$trees[] = $object['sha1'];
}
}
}
return $oids;
}

function wp_git_get_parsed_commit(WP_Git_Repository $repository, $commit_oid) {
if(false === $repository->read_object($commit_oid)) {
_doing_it_wrong('wp_git_get_parsed_commit', 'Failed to read object: ' . $commit_oid, '1.0.0');
return false;
}
if($repository->get_type() !== WP_Git_Pack_Processor::OBJECT_TYPE_COMMIT) {
_doing_it_wrong('wp_git_get_parsed_commit', 'Object was not a commit in find_objects_added_in: ' . $repository->get_type(), '1.0.0');
return false;
}
return $repository->get_parsed_commit();
}

function wp_git_is_null_oid($oid) {
return $oid === null || $oid === WP_Git_Repository::NULL_OID;
}

0 comments on commit 2d5da28

Please sign in to comment.