Download API upgrade (#955)

* Adds API for fetching manifest only and downloading dataset without stream

* Updates openapi.yaml

* Adds tests for downloading manifest-only and without stream.

* review comments by Giuliano

* updates test clients
This commit is contained in:
Ben Bierens 2024-10-17 18:54:28 +02:00 committed by GitHub
parent 436baef20a
commit 562e4329e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 170 additions and 9 deletions

View File

@ -53,14 +53,16 @@ proc validate(
{.gcsafe, raises: [Defect].} = {.gcsafe, raises: [Defect].} =
0 0
proc formatManifest(cid: Cid, manifest: Manifest): RestContent =
return RestContent.init(cid, manifest)
proc formatManifestBlocks(node: CodexNodeRef): Future[JsonNode] {.async.} = proc formatManifestBlocks(node: CodexNodeRef): Future[JsonNode] {.async.} =
var content: seq[RestContent] var content: seq[RestContent]
proc formatManifest(cid: Cid, manifest: Manifest) = proc addManifest(cid: Cid, manifest: Manifest) =
let restContent = RestContent.init(cid, manifest) content.add(formatManifest(cid, manifest))
content.add(restContent) await node.iterateManifests(addManifest)
await node.iterateManifests(formatManifest)
return %RestContentList.init(content) return %RestContentList.init(content)
proc retrieveCid( proc retrieveCid(
@ -207,8 +209,45 @@ proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRoute
await node.retrieveCid(cid.get(), local = true, resp=resp) await node.retrieveCid(cid.get(), local = true, resp=resp)
router.api( router.api(
MethodGet, MethodPost,
"/api/codex/v1/data/{cid}/network") do ( "/api/codex/v1/data/{cid}/network") do (
cid: Cid, resp: HttpResponseRef) -> RestApiResponse:
## Download a file from the network to the local node
##
var headers = buildCorsHeaders("GET", allowedOrigin)
if cid.isErr:
return RestApiResponse.error(
Http400,
$cid.error(), headers = headers)
if corsOrigin =? allowedOrigin:
resp.setCorsHeaders("GET", corsOrigin)
resp.setHeader("Access-Control-Headers", "X-Requested-With")
without manifest =? (await node.fetchManifest(cid.get())), err:
error "Failed to fetch manifest", err = err.msg
return RestApiResponse.error(
Http404,
err.msg, headers = headers)
proc fetchDatasetAsync(): Future[void] {.async.} =
try:
if err =? (await node.fetchBatched(manifest)).errorOption:
error "Unable to fetch dataset", cid = cid.get(), err = err.msg
except CatchableError as exc:
error "CatchableError when fetching dataset", cid = cid.get(), exc = exc.msg
discard
asyncSpawn fetchDatasetAsync()
let json = %formatManifest(cid.get(), manifest)
return RestApiResponse.response($json, contentType="application/json")
router.api(
MethodGet,
"/api/codex/v1/data/{cid}/network/stream") do (
cid: Cid, resp: HttpResponseRef) -> RestApiResponse: cid: Cid, resp: HttpResponseRef) -> RestApiResponse:
## Download a file from the network in a streaming ## Download a file from the network in a streaming
## manner ## manner
@ -227,6 +266,33 @@ proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRoute
await node.retrieveCid(cid.get(), local = false, resp=resp) await node.retrieveCid(cid.get(), local = false, resp=resp)
router.api(
MethodGet,
"/api/codex/v1/data/{cid}/network/manifest") do (
cid: Cid, resp: HttpResponseRef) -> RestApiResponse:
## Download only the manifest.
##
var headers = buildCorsHeaders("GET", allowedOrigin)
if cid.isErr:
return RestApiResponse.error(
Http400,
$cid.error(), headers = headers)
if corsOrigin =? allowedOrigin:
resp.setCorsHeaders("GET", corsOrigin)
resp.setHeader("Access-Control-Headers", "X-Requested-With")
without manifest =? (await node.fetchManifest(cid.get())), err:
error "Failed to fetch manifest", err = err.msg
return RestApiResponse.error(
Http404,
err.msg, headers = headers)
let json = %formatManifest(cid.get(), manifest)
return RestApiResponse.response($json, contentType="application/json")
router.api( router.api(
MethodGet, MethodGet,
"/api/codex/v1/space") do () -> RestApiResponse: "/api/codex/v1/space") do () -> RestApiResponse:

View File

@ -455,10 +455,36 @@ paths:
description: Well it was bad-bad description: Well it was bad-bad
"/data/{cid}/network": "/data/{cid}/network":
post:
summary: "Download a file from the network to the local node if it's not available locally. Note: Download is performed async. Call can return before download is completed."
tags: [ Data ]
operationId: downloadNetwork
parameters:
- in: path
name: cid
required: true
schema:
$ref: "#/components/schemas/Cid"
description: "File to be downloaded."
responses:
"200":
description: Manifest information for download that has been started.
content:
application/json:
schema:
$ref: "#/components/schemas/DataItem"
"400":
description: Invalid CID is specified
"404":
description: Failed to download dataset manifest
"500":
description: Well it was bad-bad
"/data/{cid}/network/stream":
get: get:
summary: "Download a file from the network in a streaming manner. If the file is not available locally, it will be retrieved from other nodes in the network if able." summary: "Download a file from the network in a streaming manner. If the file is not available locally, it will be retrieved from other nodes in the network if able."
tags: [ Data ] tags: [ Data ]
operationId: downloadNetwork operationId: downloadNetworkStream
parameters: parameters:
- in: path - in: path
name: cid name: cid
@ -481,6 +507,32 @@ paths:
"500": "500":
description: Well it was bad-bad description: Well it was bad-bad
"/data/{cid}/network/manifest":
get:
summary: "Download only the dataset manifest from the network to the local node if it's not available locally."
tags: [ Data ]
operationId: downloadNetworkManifest
parameters:
- in: path
name: cid
required: true
schema:
$ref: "#/components/schemas/Cid"
description: "File for which the manifest is to be downloaded."
responses:
"200":
description: Manifest information.
content:
application/json:
schema:
$ref: "#/components/schemas/DataItem"
"400":
description: Invalid CID is specified
"404":
description: Failed to download dataset manifest
"500":
description: Well it was bad-bad
"/space": "/space":
get: get:
summary: "Gets a summary of the storage space allocation of the node." summary: "Gets a summary of the storage space allocation of the node."

View File

@ -46,7 +46,27 @@ proc download*(client: CodexClient, cid: Cid, local = false): ?!string =
let let
response = client.http.get( response = client.http.get(
client.baseurl & "/data/" & $cid & client.baseurl & "/data/" & $cid &
(if local: "" else: "/network")) (if local: "" else: "/network/stream"))
if response.status != "200 OK":
return failure(response.status)
success response.body
proc downloadManifestOnly*(client: CodexClient, cid: Cid): ?!string =
let
response = client.http.get(
client.baseurl & "/data/" & $cid & "/network/manifest")
if response.status != "200 OK":
return failure(response.status)
success response.body
proc downloadNoStream*(client: CodexClient, cid: Cid): ?!string =
let
response = client.http.post(
client.baseurl & "/data/" & $cid & "/network")
if response.status != "200 OK": if response.status != "200 OK":
return failure(response.status) return failure(response.status)
@ -60,7 +80,7 @@ proc downloadBytes*(
let uri = parseUri( let uri = parseUri(
client.baseurl & "/data/" & $cid & client.baseurl & "/data/" & $cid &
(if local: "" else: "/network") (if local: "" else: "/network/stream")
) )
let (status, bytes) = await client.session.fetch(uri) let (status, bytes) = await client.session.fetch(uri)

View File

@ -47,7 +47,7 @@ ethersuite "Node block expiration tests":
proc downloadTestFile(contentId: string, local = false): Response = proc downloadTestFile(contentId: string, local = false): Response =
let client = newHttpClient(timeout=3000) let client = newHttpClient(timeout=3000)
let downloadUrl = baseurl & "/data/" & let downloadUrl = baseurl & "/data/" &
contentId & (if local: "" else: "/network") contentId & (if local: "" else: "/network/stream")
let content = client.get(downloadUrl) let content = client.get(downloadUrl)
client.close() client.close()

View File

@ -1,3 +1,4 @@
import pkg/codex/rest/json
import ./twonodes import ./twonodes
twonodessuite "Uploads and downloads", debug1 = false, debug2 = false: twonodessuite "Uploads and downloads", debug1 = false, debug2 = false:
@ -37,3 +38,25 @@ twonodessuite "Uploads and downloads", debug1 = false, debug2 = false:
check: check:
resp2.error.msg == "404 Not Found" resp2.error.msg == "404 Not Found"
proc checkRestContent(content: ?!string) =
let c = content.tryGet()
# tried to JSON (very easy) and checking the resulting object (would be much nicer)
# spent an hour to try and make it work.
check:
c == "{\"cid\":\"zDvZRwzm1ePSzKSXt57D5YxHwcSDmsCyYN65wW4HT7fuX9HrzFXy\",\"manifest\":{\"treeCid\":\"zDzSvJTezk7bJNQqFq8k1iHXY84psNuUfZVusA5bBQQUSuyzDSVL\",\"datasetSize\":18,\"blockSize\":65536,\"protected\":false}}"
test "node allows downloading only manifest":
let content1 = "some file contents"
let cid1 = client1.upload(content1).get
let resp2 = client2.downloadManifestOnly(cid1)
checkRestContent(resp2)
test "node allows downloading content without stream":
let content1 = "some file contents"
let cid1 = client1.upload(content1).get
let resp1 = client2.downloadNoStream(cid1)
checkRestContent(resp1)
let resp2 = client2.download(cid1, local = true).get
check:
content1 == resp2