[PATCH] erofs-utils: add OCI registry support

saz97 sa.z at qq.com
Sat Aug 3 00:01:34 AEST 2024


This patch adds support for handling OCI registry operations in EROFS.
The following functionalities are included:

 1. `oci_registry_read`: Reads data from the OCI registry.
 2. `oci_registry_pread`: Reads data from a specified offset.
 3. `oci_registry_lseek`: Adjusts the file offset.
 4. `open_oci_registry`: handle the opening of the OCI registry.

Signed-off-by: Changzhi Xie <sa.z at qq.com>
---
 lib/oci_registry.c | 579 +++++++++++++++++++++++++++++++++++++++++++++
 lib/oci_registry.h |  17 ++
 2 files changed, 596 insertions(+)
 create mode 100644 lib/oci_registry.c
 create mode 100644 lib/oci_registry.h

diff --git a/lib/oci_registry.c b/lib/oci_registry.c
new file mode 100644
index 0000000..a3886ac
--- /dev/null
+++ b/lib/oci_registry.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+
+#include "oci_registry.h"
+
+#define erofs_token_mode 1
+#define erofs_image_index_mode 2
+#define erofs_manifest_mode 3
+#define erofs_blob_mode 4
+
+#define DOCKER_AUTH_URL "https://auth.docker.io/token?service=registry.docker.io&scope=repository:library/%s:pull"
+
+struct erofs_oci_registry_memory {
+	char *memory;
+	size_t size;
+};
+
+static CURLM *erofs_oci_registry_multi_handle(void)
+{
+	static CURLM *multi_handle;
+
+	if (!multi_handle)
+		multi_handle = curl_multi_init();
+
+	return multi_handle;
+}
+
+static size_t erofs_oci_registry_callback(void *contents, size_t size,
+					  size_t nmemb, void *userp)
+{
+	size_t real_size = size * nmemb;
+	struct erofs_oci_registry_memory *mem =
+		(struct erofs_oci_registry_memory *)userp;
+	char *ptr = realloc(mem->memory, mem->size + real_size + 1);
+
+	if (!ptr) {
+		fprintf(stderr, "realloc failed\n");
+		return 0;
+	}
+
+	mem->memory = ptr;
+	memcpy(&(mem->memory[mem->size]), contents, real_size);
+	mem->size += real_size;
+	mem->memory[mem->size] = 0;
+	return real_size;
+}
+
+ssize_t erofs_oci_registry_read(struct erofs_vfile *vf, void *buf, size_t len)
+{
+	struct erofs_oci_registry_memory *memoryStruct =
+		(struct erofs_oci_registry_memory *)(vf->payload);
+
+	if (vf->offset >= memoryStruct->size)
+		return 0;
+
+	if (len > memoryStruct->size - vf->offset)
+		len = memoryStruct->size - vf->offset;
+
+	memcpy(buf, memoryStruct->memory + vf->offset, len);
+	vf->offset += len;
+
+	return len;
+}
+
+ssize_t erofs_oci_registry_pread(struct erofs_vfile *vf, void *buf,
+				 u64 offset, size_t len)
+{
+	struct erofs_oci_registry_memory *memoryStruct =
+		(struct erofs_oci_registry_memory *)(vf->payload);
+
+	if (offset >= memoryStruct->size)
+		return 0;
+
+	if (offset + len > memoryStruct->size)
+		len = memoryStruct->size - offset;
+
+	memcpy(buf, memoryStruct->memory + offset, len);
+
+	return len;
+}
+
+off_t erofs_oci_registry_lseek(struct erofs_vfile *vf, u64 offset, int whence)
+{
+	struct erofs_oci_registry_memory *memoryStruct =
+		(struct erofs_oci_registry_memory *)(vf->payload);
+	u64 new_offset = 0;
+
+	switch (whence) {
+	case SEEK_SET:
+		new_offset = offset;
+		break;
+	case SEEK_CUR:
+		new_offset = vf->offset + offset;
+		break;
+	case SEEK_END:
+		new_offset = memoryStruct->size + offset;
+		break;
+	default:
+		return -1;
+	}
+
+	if (new_offset > memoryStruct->size)
+		return -1;
+
+	vf->offset = new_offset;
+
+	return new_offset;
+}
+
+static char *erofs_get_authorization_header(struct erofs_oci_registry_memory *data)
+{
+	json_object *parsed_json, *token_json;
+	const char *token;
+	char *auth_header;
+
+	if (!data->memory) {
+		fprintf(stderr, "No data received\n");
+		return NULL;
+	}
+	parsed_json = json_tokener_parse(data->memory);
+
+	if (!parsed_json) {
+		fprintf(stderr, "Failed to parse JSON\n");
+		return NULL;
+	}
+
+	if (!json_object_object_get_ex(parsed_json, "token", &token_json)) {
+		fprintf(stderr, "Token not found in JSON\n");
+		json_object_put(parsed_json);
+		return NULL;
+	}
+	token = json_object_get_string(token_json);
+	auth_header = malloc(strlen("Authorization: Bearer ") + strlen(token) + 1);
+
+	if (!auth_header) {
+		fprintf(stderr, "Failed to allocate memory for authorization header\n");
+		json_object_put(parsed_json);
+		return NULL;
+	}
+
+	strscpy(auth_header, "Authorization: Bearer ", sizeof(auth_header));
+	strcat(auth_header, token);
+
+	json_object_put(parsed_json);
+	free(data->memory);
+
+	data->memory = NULL;
+	data->size = 0;
+
+	return auth_header;
+}
+
+static char *erofs_get_manifest_digest(struct erofs_oci_registry_memory *data,
+				       const char *arch, const char *os, char *media_type)
+{
+	json_object *parsed_json, *manifests_array;
+	int len;
+
+	if (!data->memory) {
+		fprintf(stderr, "No data received\n");
+		return NULL;
+	}
+
+	parsed_json = json_tokener_parse(data->memory);
+
+	if (!parsed_json) {
+		fprintf(stderr, "Failed to parse JSON\n");
+		return NULL;
+	}
+
+	if (!json_object_object_get_ex(parsed_json, "manifests", &manifests_array)) {
+		fprintf(stderr, "Cannot find manifests in JSON\n");
+		json_object_put(parsed_json);
+		return NULL;
+	}
+
+	len = json_object_array_length(manifests_array);
+
+	for (int i = 0; i < len; i++) {
+		json_object *manifest = json_object_array_get_idx(manifests_array, i);
+		json_object *platform_json;
+
+		if (json_object_object_get_ex(manifest, "platform", &platform_json)) {
+			json_object *arch_json, *os_json, *digest_json, *media_type_json;
+
+			if (json_object_object_get_ex(platform_json, "architecture", &arch_json) &&
+			    json_object_object_get_ex(platform_json, "os", &os_json) &&
+			    json_object_object_get_ex(manifest, "digest", &digest_json)) {
+
+				const char *manifest_arch = json_object_get_string(arch_json);
+				const char *manifest_os = json_object_get_string(os_json);
+
+				if (strcmp(manifest_arch, arch) == 0 &&
+				    strcmp(manifest_os, os) == 0) {
+					char *digest = strdup(json_object_get_string(digest_json));
+
+					if (json_object_object_get_ex(manifest, "mediaType", &media_type_json)) {
+						const char *manifest_media_type = json_object_get_string(media_type_json);
+
+						sprintf(media_type, "Accept: %s", manifest_media_type);
+					}
+
+					json_object_put(parsed_json);
+					free(data->memory);
+
+					data->memory = NULL;
+					data->size = 0;
+
+					return digest;
+				}
+			}
+		}
+	}
+
+	json_object_put(parsed_json);
+	free(data->memory);
+
+	data->memory = NULL;
+	data->size = 0;
+
+	fprintf(stderr, "No matching arch and os found\n");
+	return NULL;
+}
+
+static char *erofs_get_layer_digest(struct erofs_oci_registry_memory *data,
+				     char *media_type, int count)
+{
+	json_object *parsed_json, *layers_array, *layer, *digest_json, *media_type_json;
+	int len;
+	char *digest = NULL;
+
+	parsed_json = json_tokener_parse(data->memory);
+
+	if (!parsed_json) {
+		fprintf(stderr, "Failed to parse JSON\n");
+		return NULL;
+	}
+
+	if (!json_object_object_get_ex(parsed_json, "layers", &layers_array) ||
+	    json_object_get_type(layers_array) != json_type_array) {
+		fprintf(stderr, "Layers key not found or is not an array in JSON\n");
+		json_object_put(parsed_json);
+		return NULL;
+	}
+
+	len = json_object_array_length(layers_array);
+
+	if (count < 0 || count >= len) {
+		fprintf(stderr, "Count %d is out of bounds (0-%d)\n", count, len - 1);
+		json_object_put(parsed_json);
+		return NULL;
+	}
+
+	layer = json_object_array_get_idx(layers_array, count);
+
+	if (!json_object_object_get_ex(layer, "digest", &digest_json))
+		fprintf(stderr, "Digest not found in layer #%d\n", count);
+	else {
+		digest = strdup(json_object_get_string(digest_json));
+		if (json_object_object_get_ex(layer, "mediaType", &media_type_json)) {
+			const char *manifest_media_type = json_object_get_string(media_type_json);
+
+			sprintf(media_type, "Accept: %s", manifest_media_type);
+		}
+	}
+
+	json_object_put(parsed_json);
+	return digest;
+}
+
+static void erofs_curl_io(CURLM *multi_handle, int *still_running)
+{
+	CURLMcode mc;
+
+	do {
+		mc = curl_multi_perform(multi_handle, still_running);
+
+		if (mc != CURLM_OK) {
+			fprintf(stderr, "curl_multi_perform() failed: %s\n",
+				curl_multi_strerror(mc));
+			break;
+		}
+
+		if (*still_running) {
+			int numfds;
+
+			mc = curl_multi_poll(multi_handle, NULL, 0, 1000, &numfds);
+			if (mc != CURLM_OK) {
+				fprintf(stderr, "curl_multi_poll failed: %s\n",
+					curl_multi_strerror(mc));
+				break;
+			}
+		}
+
+	} while (*still_running > 0);
+}
+
+static struct erofs_oci_registry_memory *erofs_curl_setopt(
+	CURLM *multi_handle, CURL *curl, const char *auth_header,
+	const char *media_type, const char *url, int mode)
+{
+	struct erofs_oci_registry_memory *data =
+		malloc(sizeof(struct erofs_oci_registry_memory));
+	struct curl_slist *headers = NULL;
+
+	if (!data) {
+		fprintf(stderr, "Failed to allocate memory for erofs_oci_registry_memory\n");
+		return NULL;
+	}
+	data->memory = NULL;
+	data->size = 0;
+
+	switch (mode) {
+	case erofs_token_mode:
+		curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback);
+		curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
+		curl_easy_setopt(curl, CURLOPT_URL, url);
+		curl_multi_add_handle(multi_handle, curl);
+		break;
+	case erofs_image_index_mode:
+		curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback);
+		curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
+		headers = curl_slist_append(headers, auth_header);
+		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+		curl_easy_setopt(curl, CURLOPT_URL, url);
+		curl_multi_add_handle(multi_handle, curl);
+		break;
+	case erofs_manifest_mode:
+		curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback);
+		curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
+		headers = curl_slist_append(headers, auth_header);
+		headers = curl_slist_append(headers, media_type);
+		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+		curl_easy_setopt(curl, CURLOPT_URL, url);
+		curl_multi_add_handle(multi_handle, curl);
+		break;
+	case erofs_blob_mode:
+		curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, erofs_oci_registry_callback);
+		curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)data);
+		headers = curl_slist_append(headers, auth_header);
+		headers = curl_slist_append(headers, media_type);
+		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+		curl_easy_setopt(curl, CURLOPT_URL, url);
+		curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+		curl_multi_add_handle(multi_handle, curl);
+		break;
+	default:
+		break;
+	}
+
+	return data;
+}
+
+static void erofs_extract_urlfront_repository(const char *url, char *repo_end,
+					      char *repository, char *url_front)
+{
+	const char *repo_start = strstr(url, "/library/");
+
+	if (!repo_start)
+		return;
+	repo_start += strlen("/library/");
+	repo_end = strchr(repo_start, '/');
+
+	if (!repo_end)
+		return;
+	strscpy(repository, repo_start, repo_end - repo_start + 1);
+	strscpy(url_front, url, repo_start - url + 1);
+}
+
+static void erofs_token_header(const char *repository,
+			       char **token_header, int still_running)
+{
+	char url_token[512];
+
+	snprintf(url_token, sizeof(url_token), DOCKER_AUTH_URL, repository);
+	CURL *curl_token = curl_easy_init();
+	struct erofs_oci_registry_memory *data_token =
+		erofs_curl_setopt(erofs_oci_registry_multi_handle(),
+				  curl_token, NULL, NULL, url_token, erofs_token_mode);
+
+	erofs_curl_io(erofs_oci_registry_multi_handle(), &still_running);
+	*token_header = erofs_get_authorization_header(data_token);
+
+	curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_token);
+	curl_easy_cleanup(curl_token);
+
+	if (data_token)
+		free(data_token);
+}
+
+static void erofs_blob_info(char *blob_start, const char *url_front,
+			    const char *repository, char *digest_value,
+			    char *media_type_value, char *url_blob)
+{
+	const char *digest_start = blob_start + strlen("/blobs/");
+	const char *digest_end = strchr(digest_start, '/');
+
+	if (!digest_end)
+		digest_end = digest_start + strlen(digest_start);
+	strscpy(digest_value, digest_start, digest_end - digest_start + 1);
+
+	const char *media_type_start = strstr(digest_end, "Accept: ");
+
+	if (media_type_start)
+		strscpy(media_type_value, media_type_start, sizeof(media_type_value));
+	else
+		strscpy(media_type_value, "", sizeof(media_type_value));
+
+	snprintf(url_blob, 512, "%s%s/blobs/%s",
+		 url_front, repository, digest_value);
+}
+
+static void erofs_parse_params(const char *repo_end,
+			       char *arch, char *os, int *digest)
+{
+	const char *params = repo_end + 1;
+
+	while (params && *params != '\0') {
+		if (strncmp(params, "arch-", 5) == 0) {
+			params += 5;
+			const char *param_end = strchr(params, '/');
+
+			if (param_end) {
+				strscpy(arch, params, param_end - params + 1);
+				params = param_end + 1;
+			} else {
+				strscpy(arch, params, sizeof(arch));
+				break;
+			}
+		} else if (strncmp(params, "os-", 3) == 0) {
+			params += 3;
+			const char *param_end = strchr(params, '/');
+
+			if (param_end) {
+				strscpy(os, params, param_end - params + 1);
+				params = param_end + 1;
+			} else {
+				strscpy(os, params, sizeof(os));
+				break;
+			}
+		} else if (strncmp(params, "digest-", 7) == 0) {
+			params += 7;
+			*digest = atoi(params) - 1;
+			break;
+		}
+
+		params = strchr(params, '/');
+		if (params)
+			params++;
+	}
+}
+
+static void erofs_manifest(const char *url_front, const char *repository,
+			   const char *token_header, const char *arch,
+			   const char *os, char *media_type_blob,
+			   int *digest, int *still_running, char *media_type, char *url_blob)
+{
+	char url_image_index[512];
+
+	snprintf(url_image_index, sizeof(url_image_index),
+		 "%s%s/manifests/latest", url_front, repository);
+
+	CURL *curl_image_index = curl_easy_init();
+	struct erofs_oci_registry_memory *data_image_index =
+		erofs_curl_setopt(erofs_oci_registry_multi_handle(),
+				  curl_image_index, token_header,
+				  NULL, url_image_index, erofs_image_index_mode);
+	erofs_curl_io(erofs_oci_registry_multi_handle(), still_running);
+	char *digest_image_index = erofs_get_manifest_digest(data_image_index,
+							    arch, os, media_type);
+	if (data_image_index)
+		free(data_image_index);
+
+	curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_image_index);
+	curl_easy_cleanup(curl_image_index);
+
+	char url_manifest[512];
+
+	snprintf(url_manifest, sizeof(url_manifest), "%s%s/manifests/%s",
+		 url_front, repository, digest_image_index);
+
+	if (digest_image_index)
+		free(digest_image_index);
+
+	CURL *curl_manifest = curl_easy_init();
+	struct erofs_oci_registry_memory *data_manifest =
+		erofs_curl_setopt(erofs_oci_registry_multi_handle(),
+				  curl_manifest, token_header, media_type,
+				  url_manifest, erofs_manifest_mode);
+	erofs_curl_io(erofs_oci_registry_multi_handle(), still_running);
+	char *digest_manifest = erofs_get_layer_digest(data_manifest,
+						      media_type_blob, *digest);
+
+	if (data_manifest)
+		free(data_manifest);
+
+	curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_manifest);
+	curl_easy_cleanup(curl_manifest);
+	snprintf(url_blob, 512, "%s%s/blobs/%s", url_front, repository, digest_manifest);
+}
+
+struct erofs_vfile *open_oci_registry(const char *url)
+{
+	char *url_front = (char *)malloc(256 * sizeof(char));
+	char *repository = (char *)malloc(256 * sizeof(char));
+	char *arch = (char *)malloc(256 * sizeof(char));
+	char *os = (char *)malloc(256 * sizeof(char));
+	char *media_type_value = (char *)malloc(512 * sizeof(char));
+	char *media_type_blob = (char *)malloc(512 * sizeof(char));
+	char *url_blob = (char *)malloc(512 * sizeof(char));
+	char *repo_end = (char *)malloc(256 * sizeof(char));
+	char **token_header = NULL;
+	char *media_type = (char *)malloc(512 * sizeof(char));
+	char *blob_start = NULL;
+	CURL *curl_blob = NULL;
+	struct erofs_oci_registry_memory *data_blob = NULL;
+	struct erofs_vfile *vf = (struct erofs_vfile *)malloc(sizeof(struct erofs_vfile));
+
+	int digest = 0;
+	int still_running = 0;
+	int mode = 0;
+
+	erofs_extract_urlfront_repository(url, repo_end, repository, url_front);
+	erofs_token_header(repository, &token_header, still_running);
+
+	blob_start = strstr(repo_end, "/blobs/");
+
+	if (blob_start) {
+		char *digest_value = (char *)malloc(128 * sizeof(char));
+
+		erofs_blob_info(blob_start, url_front, repository,
+				digest_value, media_type_value, url_blob);
+		free(blob_start);
+
+		mode = 1;
+
+		goto pull_blob_mode;
+	} else {
+		strscpy(arch, "amd64", sizeof(arch));
+		strscpy(os, "linux", sizeof(os));
+		digest = 0;
+
+		erofs_parse_params(repo_end, arch, os, &digest);
+		erofs_manifest(url_front, repository, token_header, arch, os,
+			       media_type_blob, &digest, &still_running, media_type, url_blob);
+	}
+
+	free(url_front);
+	free(arch);
+	free(os);
+	free(media_type);
+
+pull_blob_mode:
+
+	curl_blob = curl_easy_init();
+
+	if (mode == 1) {
+		data_blob = erofs_curl_setopt(erofs_oci_registry_multi_handle(),
+					      curl_blob, token_header,
+					      media_type_value,
+					      url_blob, erofs_blob_mode);
+	} else {
+		data_blob = erofs_curl_setopt(erofs_oci_registry_multi_handle(),
+					      curl_blob, token_header,
+					      media_type_blob,
+					      url_blob, erofs_blob_mode);
+	}
+
+	erofs_curl_io(erofs_oci_registry_multi_handle(), &still_running);
+	curl_multi_remove_handle(erofs_oci_registry_multi_handle(), curl_blob);
+	curl_easy_cleanup(curl_blob);
+
+	vf->ops = malloc(sizeof(struct erofs_vfops));
+	vf->ops->read = erofs_oci_registry_read;
+	vf->ops->pread = erofs_oci_registry_pread;
+	vf->ops->lseek = erofs_oci_registry_lseek;
+	*((struct erofs_oci_registry_memory **)(vf->payload)) = data_blob;
+
+	return vf;
+}
diff --git a/lib/oci_registry.h b/lib/oci_registry.h
new file mode 100644
index 0000000..ba6a08b
--- /dev/null
+++ b/lib/oci_registry.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+
+
+#include <stdio.h>
+#include <curl/curl.h>
+#include <json-c/json.h>
+#include <stdlib.h>
+#include <string.h>
+#include "erofs/io.h"
+
+struct erofs_vfile *erofs_open_oci_registry(const char *url);
+ssize_t erofs_oci_registry_read(struct erofs_vfile *vf,
+				void *buf, size_t len);
+ssize_t erofs_oci_registry_pread(struct erofs_vfile *vf, void *buf,
+				u64 offset, size_t len);
+off_t erofs_oci_registry_lseek(struct erofs_vfile *vf,
+				u64 offset, int whence);
-- 
2.44.0.windows.1



More information about the Linux-erofs mailing list