From 562e4329e26e43df48feca7430cdb17e2d0a5429 Mon Sep 17 00:00:00 2001 From: Ben Bierens <39762930+benbierens@users.noreply.github.com> Date: Thu, 17 Oct 2024 18:54:28 +0200 Subject: [PATCH] Download API upgrade (#955) * Adds API for fetching manifest only and downloading dataset without stream * Updates openapi.yaml * Adds tests for downloading manifest-only and without stream. * review comments by Giuliano * updates test clients --- codex/rest/api.nim | 76 +++++++++++++++++++++-- openapi.yaml | 54 +++++++++++++++- tests/integration/codexclient.nim | 24 ++++++- tests/integration/testblockexpiration.nim | 2 +- tests/integration/testupdownload.nim | 23 +++++++ 5 files changed, 170 insertions(+), 9 deletions(-) diff --git a/codex/rest/api.nim b/codex/rest/api.nim index 7fad52ae..66279385 100644 --- a/codex/rest/api.nim +++ b/codex/rest/api.nim @@ -53,14 +53,16 @@ proc validate( {.gcsafe, raises: [Defect].} = 0 +proc formatManifest(cid: Cid, manifest: Manifest): RestContent = + return RestContent.init(cid, manifest) + proc formatManifestBlocks(node: CodexNodeRef): Future[JsonNode] {.async.} = var content: seq[RestContent] - proc formatManifest(cid: Cid, manifest: Manifest) = - let restContent = RestContent.init(cid, manifest) - content.add(restContent) + proc addManifest(cid: Cid, manifest: Manifest) = + content.add(formatManifest(cid, manifest)) + await node.iterateManifests(addManifest) - await node.iterateManifests(formatManifest) return %RestContentList.init(content) proc retrieveCid( @@ -207,8 +209,45 @@ proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRoute await node.retrieveCid(cid.get(), local = true, resp=resp) router.api( - MethodGet, + MethodPost, "/api/codex/v1/data/{cid}/network") do ( + cid: Cid, resp: HttpResponseRef) -> RestApiResponse: + ## Download a file from the network to the local node + ## + + var headers = buildCorsHeaders("GET", allowedOrigin) + + if cid.isErr: + return RestApiResponse.error( + Http400, + $cid.error(), headers = headers) + + if corsOrigin =? allowedOrigin: + resp.setCorsHeaders("GET", corsOrigin) + resp.setHeader("Access-Control-Headers", "X-Requested-With") + + without manifest =? (await node.fetchManifest(cid.get())), err: + error "Failed to fetch manifest", err = err.msg + return RestApiResponse.error( + Http404, + err.msg, headers = headers) + + proc fetchDatasetAsync(): Future[void] {.async.} = + try: + if err =? (await node.fetchBatched(manifest)).errorOption: + error "Unable to fetch dataset", cid = cid.get(), err = err.msg + except CatchableError as exc: + error "CatchableError when fetching dataset", cid = cid.get(), exc = exc.msg + discard + + asyncSpawn fetchDatasetAsync() + + let json = %formatManifest(cid.get(), manifest) + return RestApiResponse.response($json, contentType="application/json") + + router.api( + MethodGet, + "/api/codex/v1/data/{cid}/network/stream") do ( cid: Cid, resp: HttpResponseRef) -> RestApiResponse: ## Download a file from the network in a streaming ## manner @@ -227,6 +266,33 @@ proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRoute await node.retrieveCid(cid.get(), local = false, resp=resp) + router.api( + MethodGet, + "/api/codex/v1/data/{cid}/network/manifest") do ( + cid: Cid, resp: HttpResponseRef) -> RestApiResponse: + ## Download only the manifest. + ## + + var headers = buildCorsHeaders("GET", allowedOrigin) + + if cid.isErr: + return RestApiResponse.error( + Http400, + $cid.error(), headers = headers) + + if corsOrigin =? allowedOrigin: + resp.setCorsHeaders("GET", corsOrigin) + resp.setHeader("Access-Control-Headers", "X-Requested-With") + + without manifest =? (await node.fetchManifest(cid.get())), err: + error "Failed to fetch manifest", err = err.msg + return RestApiResponse.error( + Http404, + err.msg, headers = headers) + + let json = %formatManifest(cid.get(), manifest) + return RestApiResponse.response($json, contentType="application/json") + router.api( MethodGet, "/api/codex/v1/space") do () -> RestApiResponse: diff --git a/openapi.yaml b/openapi.yaml index 0507a385..c84072a7 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -455,10 +455,36 @@ paths: description: Well it was bad-bad "/data/{cid}/network": + post: + summary: "Download a file from the network to the local node if it's not available locally. Note: Download is performed async. Call can return before download is completed." + tags: [ Data ] + operationId: downloadNetwork + parameters: + - in: path + name: cid + required: true + schema: + $ref: "#/components/schemas/Cid" + description: "File to be downloaded." + responses: + "200": + description: Manifest information for download that has been started. + content: + application/json: + schema: + $ref: "#/components/schemas/DataItem" + "400": + description: Invalid CID is specified + "404": + description: Failed to download dataset manifest + "500": + description: Well it was bad-bad + + "/data/{cid}/network/stream": get: summary: "Download a file from the network in a streaming manner. If the file is not available locally, it will be retrieved from other nodes in the network if able." tags: [ Data ] - operationId: downloadNetwork + operationId: downloadNetworkStream parameters: - in: path name: cid @@ -481,6 +507,32 @@ paths: "500": description: Well it was bad-bad + "/data/{cid}/network/manifest": + get: + summary: "Download only the dataset manifest from the network to the local node if it's not available locally." + tags: [ Data ] + operationId: downloadNetworkManifest + parameters: + - in: path + name: cid + required: true + schema: + $ref: "#/components/schemas/Cid" + description: "File for which the manifest is to be downloaded." + responses: + "200": + description: Manifest information. + content: + application/json: + schema: + $ref: "#/components/schemas/DataItem" + "400": + description: Invalid CID is specified + "404": + description: Failed to download dataset manifest + "500": + description: Well it was bad-bad + "/space": get: summary: "Gets a summary of the storage space allocation of the node." diff --git a/tests/integration/codexclient.nim b/tests/integration/codexclient.nim index 2b1e324c..69958cb2 100644 --- a/tests/integration/codexclient.nim +++ b/tests/integration/codexclient.nim @@ -46,7 +46,27 @@ proc download*(client: CodexClient, cid: Cid, local = false): ?!string = let response = client.http.get( client.baseurl & "/data/" & $cid & - (if local: "" else: "/network")) + (if local: "" else: "/network/stream")) + + if response.status != "200 OK": + return failure(response.status) + + success response.body + +proc downloadManifestOnly*(client: CodexClient, cid: Cid): ?!string = + let + response = client.http.get( + client.baseurl & "/data/" & $cid & "/network/manifest") + + if response.status != "200 OK": + return failure(response.status) + + success response.body + +proc downloadNoStream*(client: CodexClient, cid: Cid): ?!string = + let + response = client.http.post( + client.baseurl & "/data/" & $cid & "/network") if response.status != "200 OK": return failure(response.status) @@ -60,7 +80,7 @@ proc downloadBytes*( let uri = parseUri( client.baseurl & "/data/" & $cid & - (if local: "" else: "/network") + (if local: "" else: "/network/stream") ) let (status, bytes) = await client.session.fetch(uri) diff --git a/tests/integration/testblockexpiration.nim b/tests/integration/testblockexpiration.nim index fc2bd0bd..6502243a 100644 --- a/tests/integration/testblockexpiration.nim +++ b/tests/integration/testblockexpiration.nim @@ -47,7 +47,7 @@ ethersuite "Node block expiration tests": proc downloadTestFile(contentId: string, local = false): Response = let client = newHttpClient(timeout=3000) let downloadUrl = baseurl & "/data/" & - contentId & (if local: "" else: "/network") + contentId & (if local: "" else: "/network/stream") let content = client.get(downloadUrl) client.close() diff --git a/tests/integration/testupdownload.nim b/tests/integration/testupdownload.nim index 33e3dfe2..242a868e 100644 --- a/tests/integration/testupdownload.nim +++ b/tests/integration/testupdownload.nim @@ -1,3 +1,4 @@ +import pkg/codex/rest/json import ./twonodes twonodessuite "Uploads and downloads", debug1 = false, debug2 = false: @@ -37,3 +38,25 @@ twonodessuite "Uploads and downloads", debug1 = false, debug2 = false: check: resp2.error.msg == "404 Not Found" + + proc checkRestContent(content: ?!string) = + let c = content.tryGet() + # tried to JSON (very easy) and checking the resulting object (would be much nicer) + # spent an hour to try and make it work. + check: + c == "{\"cid\":\"zDvZRwzm1ePSzKSXt57D5YxHwcSDmsCyYN65wW4HT7fuX9HrzFXy\",\"manifest\":{\"treeCid\":\"zDzSvJTezk7bJNQqFq8k1iHXY84psNuUfZVusA5bBQQUSuyzDSVL\",\"datasetSize\":18,\"blockSize\":65536,\"protected\":false}}" + + test "node allows downloading only manifest": + let content1 = "some file contents" + let cid1 = client1.upload(content1).get + let resp2 = client2.downloadManifestOnly(cid1) + checkRestContent(resp2) + + test "node allows downloading content without stream": + let content1 = "some file contents" + let cid1 = client1.upload(content1).get + let resp1 = client2.downloadNoStream(cid1) + checkRestContent(resp1) + let resp2 = client2.download(cid1, local = true).get + check: + content1 == resp2