feat: add metadata to the manifest (#960)

* Add metadata to the manifest

* Remove useless import

* Fix the openapi documentation

* Use optional fields instead of default values

* Remove testRestApi target

* Return failure when the protobuf cannot get the field

* Set download headers and fix cors headers when an error is returned

* Add tests to verify the download headers

* Try to adjust the content length header

* Fix convertion to string

* Remove the content length header

* Remove testRestApi target

* Removing debug messages
This commit is contained in:
Arnaud 2024-10-25 14:43:19 +01:00 committed by GitHub
parent bcc1468130
commit 2fb7031ec6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 344 additions and 39 deletions

View File

@ -10,6 +10,7 @@
# This module implements serialization and deserialization of Manifest
import pkg/upraises
import times
push: {.upraises: [].}
@ -59,6 +60,9 @@ proc encode*(manifest: Manifest): ?!seq[byte] =
# optional hcodec: MultiCodec = 5 # Multihash codec
# optional version: CidVersion = 6; # Cid version
# optional ErasureInfo erasure = 7; # erasure coding info
# optional filename: ?string = 8; # original filename
# optional mimetype: ?string = 9; # original mimetype
# optional uploadedAt: ?int64 = 10; # original uploadedAt
# }
# ```
#
@ -70,6 +74,7 @@ proc encode*(manifest: Manifest): ?!seq[byte] =
header.write(4, manifest.codec.uint32)
header.write(5, manifest.hcodec.uint32)
header.write(6, manifest.version.uint32)
if manifest.protected:
var erasureInfo = initProtoBuffer()
erasureInfo.write(1, manifest.ecK.uint32)
@ -90,6 +95,15 @@ proc encode*(manifest: Manifest): ?!seq[byte] =
erasureInfo.finish()
header.write(7, erasureInfo)
if manifest.filename.isSome:
header.write(8, manifest.filename.get())
if manifest.mimetype.isSome:
header.write(9, manifest.mimetype.get())
if manifest.uploadedAt.isSome:
header.write(10, manifest.uploadedAt.get().uint64)
pbNode.write(1, header) # set the treeCid as the data field
pbNode.finish()
@ -118,6 +132,9 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest =
slotRoots: seq[seq[byte]]
cellSize: uint32
verifiableStrategy: uint32
filename: string
mimetype: string
uploadedAt: uint64
# Decode `Header` message
if pbNode.getField(1, pbHeader).isErr:
@ -145,6 +162,15 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest =
if pbHeader.getField(7, pbErasureInfo).isErr:
return failure("Unable to decode `erasureInfo` from manifest!")
if pbHeader.getField(8, filename).isErr:
return failure("Unable to decode `filename` from manifest!")
if pbHeader.getField(9, mimetype).isErr:
return failure("Unable to decode `mimetype` from manifest!")
if pbHeader.getField(10, uploadedAt).isErr:
return failure("Unable to decode `uploadedAt` from manifest!")
let protected = pbErasureInfo.buffer.len > 0
var verifiable = false
if protected:
@ -183,6 +209,10 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest =
let
treeCid = ? Cid.init(treeCidBuf).mapFailure
var filenameOption = if filename.len == 0: string.none else: filename.some
var mimetypeOption = if mimetype.len == 0: string.none else: mimetype.some
var uploadedAtOption = if uploadedAt == 0: int64.none else: uploadedAt.int64.some
let
self = if protected:
Manifest.new(
@ -196,7 +226,10 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest =
ecM = ecM.int,
originalTreeCid = ? Cid.init(originalTreeCid).mapFailure,
originalDatasetSize = originalDatasetSize.NBytes,
strategy = StrategyType(protectedStrategy))
strategy = StrategyType(protectedStrategy),
filename = filenameOption,
mimetype = mimetypeOption,
uploadedAt = uploadedAtOption)
else:
Manifest.new(
treeCid = treeCid,
@ -204,7 +237,10 @@ proc decode*(_: type Manifest, data: openArray[byte]): ?!Manifest =
blockSize = blockSize.NBytes,
version = CidVersion(version),
hcodec = hcodec.MultiCodec,
codec = codec.MultiCodec)
codec = codec.MultiCodec,
filename = filenameOption,
mimetype = mimetypeOption,
uploadedAt = uploadedAtOption)
? self.verify()

View File

@ -36,6 +36,9 @@ type
codec: MultiCodec # Dataset codec
hcodec: MultiCodec # Multihash codec
version: CidVersion # Cid version
filename {.serialize.}: ?string # The filename of the content uploaded (optional)
mimetype {.serialize.}: ?string # The mimetype of the content uploaded (optional)
uploadedAt {.serialize.}: ?int64 # The UTC creation timestamp in seconds
case protected {.serialize.}: bool # Protected datasets have erasure coded info
of true:
ecK: int # Number of blocks to encode
@ -121,6 +124,14 @@ func verifiableStrategy*(self: Manifest): StrategyType =
func numSlotBlocks*(self: Manifest): int =
divUp(self.blocksCount, self.numSlots)
func filename*(self: Manifest): ?string =
self.filename
func mimetype*(self: Manifest): ?string =
self.mimetype
func uploadedAt*(self: Manifest): ?int64 =
self.uploadedAt
############################################################
# Operations on block list
############################################################
@ -163,6 +174,9 @@ func `==`*(a, b: Manifest): bool =
(a.hcodec == b.hcodec) and
(a.codec == b.codec) and
(a.protected == b.protected) and
(a.filename == b.filename) and
(a.mimetype == b.mimetype) and
(a.uploadedAt == b.uploadedAt) and
(if a.protected:
(a.ecK == b.ecK) and
(a.ecM == b.ecM) and
@ -181,26 +195,38 @@ func `==`*(a, b: Manifest): bool =
true)
func `$`*(self: Manifest): string =
"treeCid: " & $self.treeCid &
result = "treeCid: " & $self.treeCid &
", datasetSize: " & $self.datasetSize &
", blockSize: " & $self.blockSize &
", version: " & $self.version &
", hcodec: " & $self.hcodec &
", codec: " & $self.codec &
", protected: " & $self.protected &
(if self.protected:
", ecK: " & $self.ecK &
", ecM: " & $self.ecM &
", originalTreeCid: " & $self.originalTreeCid &
", originalDatasetSize: " & $self.originalDatasetSize &
", verifiable: " & $self.verifiable &
(if self.verifiable:
", verifyRoot: " & $self.verifyRoot &
", slotRoots: " & $self.slotRoots
else:
"")
", protected: " & $self.protected
if self.filename.isSome:
result &= ", filename: " & $self.filename
if self.mimetype.isSome:
result &= ", mimetype: " & $self.mimetype
if self.uploadedAt.isSome:
result &= ", uploadedAt: " & $self.uploadedAt
result &= (if self.protected:
", ecK: " & $self.ecK &
", ecM: " & $self.ecM &
", originalTreeCid: " & $self.originalTreeCid &
", originalDatasetSize: " & $self.originalDatasetSize &
", verifiable: " & $self.verifiable &
(if self.verifiable:
", verifyRoot: " & $self.verifyRoot &
", slotRoots: " & $self.slotRoots
else:
"")
else:
"")
return result
############################################################
# Constructors
@ -214,7 +240,10 @@ func new*(
version: CidVersion = CIDv1,
hcodec = Sha256HashCodec,
codec = BlockCodec,
protected = false): Manifest =
protected = false,
filename: ?string = string.none,
mimetype: ?string = string.none,
uploadedAt: ?int64 = int64.none): Manifest =
T(
treeCid: treeCid,
@ -223,7 +252,10 @@ func new*(
version: version,
codec: codec,
hcodec: hcodec,
protected: protected)
protected: protected,
filename: filename,
mimetype: mimetype,
uploadedAt: uploadedAt)
func new*(
T: type Manifest,
@ -247,7 +279,11 @@ func new*(
ecK: ecK, ecM: ecM,
originalTreeCid: manifest.treeCid,
originalDatasetSize: manifest.datasetSize,
protectedStrategy: strategy)
protectedStrategy: strategy,
filename: manifest.filename,
mimetype: manifest.mimetype,
uploadedAt: manifest.uploadedAt
)
func new*(
T: type Manifest,
@ -263,7 +299,10 @@ func new*(
codec: manifest.codec,
hcodec: manifest.hcodec,
blockSize: manifest.blockSize,
protected: false)
protected: false,
filename: manifest.filename,
mimetype: manifest.mimetype,
uploadedAt: manifest.uploadedAt)
func new*(
T: type Manifest,
@ -277,7 +316,10 @@ func new*(
ecM: int,
originalTreeCid: Cid,
originalDatasetSize: NBytes,
strategy = SteppedStrategy): Manifest =
strategy = SteppedStrategy,
filename: ?string = string.none,
mimetype: ?string = string.none,
uploadedAt: ?int64 = int64.none): Manifest =
Manifest(
treeCid: treeCid,
@ -291,7 +333,10 @@ func new*(
ecM: ecM,
originalTreeCid: originalTreeCid,
originalDatasetSize: originalDatasetSize,
protectedStrategy: strategy)
protectedStrategy: strategy,
filename: filename,
mimetype: mimetype,
uploadedAt: uploadedAt)
func new*(
T: type Manifest,
@ -329,7 +374,11 @@ func new*(
verifyRoot: verifyRoot,
slotRoots: @slotRoots,
cellSize: cellSize,
verifiableStrategy: strategy)
verifiableStrategy: strategy,
filename: manifest.filename,
mimetype: manifest.mimetype,
uploadedAt: manifest.uploadedAt
)
func new*(
T: type Manifest,

View File

@ -14,6 +14,7 @@ import std/sequtils
import std/strformat
import std/sugar
import std/cpuinfo
import times
import pkg/questionable
import pkg/questionable/results
@ -297,6 +298,8 @@ proc retrieve*(
proc store*(
self: CodexNodeRef,
stream: LPStream,
filename: ?string = string.none,
mimetype: ?string = string.none,
blockSize = DefaultBlockSize): Future[?!Cid] {.async.} =
## Save stream contents as dataset with given blockSize
## to nodes's BlockStore, and return Cid of its manifest
@ -355,7 +358,10 @@ proc store*(
datasetSize = NBytes(chunker.offset),
version = CIDv1,
hcodec = hcodec,
codec = dataCodec)
codec = dataCodec,
filename = filename,
mimetype = mimetype,
uploadedAt = now().utc.toTime.toUnix.some)
without manifestBlk =? await self.storeManifest(manifest), err:
error "Unable to store manifest"
@ -364,7 +370,9 @@ proc store*(
info "Stored data", manifestCid = manifestBlk.cid,
treeCid = treeCid,
blocks = manifest.blocksCount,
datasetSize = manifest.datasetSize
datasetSize = manifest.datasetSize,
filename = manifest.filename,
mimetype = manifest.mimetype
return manifestBlk.cid.success

View File

@ -13,6 +13,8 @@ push: {.upraises: [].}
import std/sequtils
import mimetypes
import os
import pkg/questionable
import pkg/questionable/results
@ -81,11 +83,27 @@ proc retrieveCid(
try:
without stream =? (await node.retrieve(cid, local)), error:
if error of BlockNotFoundError:
return RestApiResponse.error(Http404, error.msg)
resp.status = Http404
return await resp.sendBody("")
else:
return RestApiResponse.error(Http500, error.msg)
resp.status = Http500
return await resp.sendBody(error.msg)
# It is ok to fetch again the manifest because it will hit the cache
without manifest =? (await node.fetchManifest(cid)), err:
error "Failed to fetch manifest", err = err.msg
resp.status = Http404
return await resp.sendBody(err.msg)
if manifest.mimetype.isSome:
resp.setHeader("Content-Type", manifest.mimetype.get())
else:
resp.addHeader("Content-Type", "application/octet-stream")
if manifest.filename.isSome:
resp.setHeader("Content-Disposition", "attachment; filename=\"" & manifest.filename.get() & "\"")
resp.addHeader("Content-Type", "application/octet-stream")
await resp.prepareChunked()
while not stream.atEof:
@ -98,12 +116,14 @@ proc retrieveCid(
break
bytes += buff.len
await resp.sendChunk(addr buff[0], buff.len)
await resp.finish()
codex_api_downloads.inc()
except CatchableError as exc:
warn "Excepting streaming blocks", exc = exc.msg
return RestApiResponse.error(Http500)
resp.status = Http500
return await resp.sendBody("")
finally:
info "Sent bytes", cid = cid, bytes
if not stream.isNil:
@ -124,6 +144,18 @@ proc setCorsHeaders(resp: HttpResponseRef, httpMethod: string, origin: string) =
resp.setHeader("Access-Control-Allow-Methods", httpMethod & ", OPTIONS")
resp.setHeader("Access-Control-Max-Age", "86400")
proc getFilenameFromContentDisposition(contentDisposition: string): ?string =
if not("filename=" in contentDisposition):
return string.none
let parts = contentDisposition.split("filename=\"")
if parts.len < 2:
return string.none
let filename = parts[1].strip()
return filename[0..^2].some
proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRouter) =
let allowedOrigin = router.allowedOrigin # prevents capture inside of api defintion
@ -134,7 +166,7 @@ proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRoute
if corsOrigin =? allowedOrigin:
resp.setCorsHeaders("POST", corsOrigin)
resp.setHeader("Access-Control-Allow-Headers", "content-type")
resp.setHeader("Access-Control-Allow-Headers", "content-type, content-disposition")
resp.status = Http204
await resp.sendBody("")
@ -157,12 +189,31 @@ proc initDataApi(node: CodexNodeRef, repoStore: RepoStore, router: var RestRoute
#
await request.handleExpect()
var mimetype = request.headers.getString(ContentTypeHeader).some
if mimetype.get() != "":
var m = newMimetypes()
let extension = m.getExt(mimetype.get(), "")
if extension == "":
return RestApiResponse.error(Http422, "The MIME type is not valid.")
else:
mimetype = string.none
const ContentDispositionHeader = "Content-Disposition"
let contentDisposition = request.headers.getString(ContentDispositionHeader)
let filename = getFilenameFromContentDisposition(contentDisposition)
if filename.isSome and not isValidFilename(filename.get()):
return RestApiResponse.error(Http422, "The filename is not valid.")
# Here we could check if the extension matches the filename if needed
let
reader = bodyReader.get()
try:
without cid =? (
await node.store(AsyncStreamWrapper.new(reader = AsyncStreamReader(reader)))), error:
await node.store(AsyncStreamWrapper.new(reader = AsyncStreamReader(reader)), filename = filename, mimetype = mimetype)), error:
error "Error uploading file", exc = error.msg
return RestApiResponse.error(Http500, error.msg)
@ -537,7 +588,7 @@ proc initPurchasingApi(node: CodexNodeRef, router: var RestRouter) =
try:
without contracts =? node.contracts.client:
return RestApiResponse.error(Http503, "Persistence is not enabled", headers = headers)
without cid =? cid.tryGet.catch, error:
return RestApiResponse.error(Http400, error.msg, headers = headers)

View File

@ -357,6 +357,19 @@ components:
protected:
type: boolean
description: "Indicates if content is protected by erasure-coding"
filename:
type: string
description: "The original name of the uploaded content (optional)"
example: codex.png
mimetype:
type: string
description: "The original mimetype of the uploaded content (optional)"
example: image/png
uploadedAt:
type: integer
format: int64
description: "The UTC upload timestamp in seconds"
example: 1729244192
Space:
type: object
@ -430,6 +443,21 @@ paths:
summary: "Lists manifest CIDs stored locally in node."
tags: [ Data ]
operationId: listData
parameters:
- name: content-type
in: header
required: false
description: The content type of the file. Must be valid.
schema:
type: string
example: "image/png"
- name: content-disposition
in: header
required: false
description: The content disposition used to send the filename.
schema:
type: string
example: "attachment; filename=\"codex.png\""
responses:
"200":
description: Retrieved list of content CIDs
@ -442,6 +470,8 @@ paths:
description: Invalid CID is specified
"404":
description: Content specified by the CID is not found
"422":
description: The content type is not a valid content type or the filename is not valid
"500":
description: Well it was bad-bad
post:

View File

@ -83,6 +83,8 @@ suite "Manifest - Attribute Inheritance":
treeCid = Cid.example,
blockSize = 1.MiBs,
datasetSize = 100.MiBs,
filename = "codex.png".some,
mimetype = "image/png".some
),
treeCid = Cid.example,
datasetSize = 200.MiBs,
@ -107,3 +109,15 @@ suite "Manifest - Attribute Inheritance":
).tryGet()
check verifiable.protectedStrategy == LinearStrategy
test "Should preserve metadata for manifest in verifiable manifest":
var verifiable = Manifest.new(
manifest = makeProtectedManifest(SteppedStrategy),
verifyRoot = Cid.example,
slotRoots = @[Cid.example, Cid.example]
).tryGet()
check verifiable.filename.isSome == true
check verifiable.filename.get() == "codex.png"
check verifiable.mimetype.isSome == true
check verifiable.mimetype.get() == "image/png"

View File

@ -257,3 +257,13 @@ proc saleStateIs*(client: CodexClient, id: SlotId, state: string): bool =
proc requestId*(client: CodexClient, id: PurchaseId): ?RequestId =
return client.getPurchase(id).option.?requestId
proc uploadRaw*(client: CodexClient, contents: string, headers = newHttpHeaders()): Response =
return client.http.request(client.baseurl & "/data", body = contents, httpMethod=HttpPost, headers = headers)
proc listRaw*(client: CodexClient): Response =
return client.http.request(client.baseurl & "/data", httpMethod=HttpGet)
proc downloadRaw*(client: CodexClient, cid: string, local = false): Response =
return client.http.request(client.baseurl & "/data/" & cid &
(if local: "" else: "/network/stream"), httpMethod=HttpGet)

View File

@ -4,9 +4,9 @@ from pkg/libp2p import `==`
import pkg/codex/units
import ./twonodes
import ../examples
import json
twonodessuite "REST API", debug1 = false, debug2 = false:
test "nodes can print their peer information":
check !client1.info() != !client2.info()
@ -16,6 +16,7 @@ twonodessuite "REST API", debug1 = false, debug2 = false:
test "node accepts file uploads":
let cid1 = client1.upload("some file contents").get
let cid2 = client1.upload("some other contents").get
check cid1 != cid2
test "node shows used and available space":
@ -25,7 +26,7 @@ twonodessuite "REST API", debug1 = false, debug2 = false:
check:
space.totalBlocks == 2
space.quotaMaxBytes == 8589934592.NBytes
space.quotaUsedBytes == 65592.NBytes
space.quotaUsedBytes == 65598.NBytes
space.quotaReservedBytes == 12.NBytes
test "node lists local files":
@ -151,3 +152,89 @@ twonodessuite "REST API", debug1 = false, debug2 = false:
tolerance.uint)
check responseBefore.status == "200 OK"
test "node accepts file uploads with content type":
let headers = newHttpHeaders({"Content-Type": "text/plain"})
let response = client1.uploadRaw("some file contents", headers)
check response.status == "200 OK"
check response.body != ""
test "node accepts file uploads with content disposition":
let headers = newHttpHeaders({"Content-Disposition": "attachment; filename=\"example.txt\""})
let response = client1.uploadRaw("some file contents", headers)
check response.status == "200 OK"
check response.body != ""
test "node accepts file uploads with content disposition without filename":
let headers = newHttpHeaders({"Content-Disposition": "attachment"})
let response = client1.uploadRaw("some file contents", headers)
check response.status == "200 OK"
check response.body != ""
test "upload fails if content disposition contains bad filename":
let headers = newHttpHeaders({"Content-Disposition": "attachment; filename=\"exam*ple.txt\""})
let response = client1.uploadRaw("some file contents", headers)
check response.status == "422 Unprocessable Entity"
check response.body == "The filename is not valid."
test "upload fails if content type is invalid":
let headers = newHttpHeaders({"Content-Type": "hello/world"})
let response = client1.uploadRaw("some file contents", headers)
check response.status == "422 Unprocessable Entity"
check response.body == "The MIME type is not valid."
test "node retrieve the metadata":
let headers = newHttpHeaders({"Content-Type": "text/plain", "Content-Disposition": "attachment; filename=\"example.txt\""})
let uploadResponse = client1.uploadRaw("some file contents", headers)
let cid = uploadResponse.body
let listResponse = client1.listRaw()
let jsonData = parseJson(listResponse.body)
check jsonData.hasKey("content") == true
let content = jsonData["content"][0]
check content.hasKey("manifest") == true
let manifest = content["manifest"]
check manifest.hasKey("filename") == true
check manifest["filename"].getStr() == "example.txt"
check manifest.hasKey("mimetype") == true
check manifest["mimetype"].getStr() == "text/plain"
check manifest.hasKey("uploadedAt") == true
check manifest["uploadedAt"].getInt() > 0
test "node set the headers when for download":
let headers = newHttpHeaders({
"Content-Disposition": "attachment; filename=\"example.txt\"",
"Content-Type": "text/plain"
})
let uploadResponse = client1.uploadRaw("some file contents", headers)
let cid = uploadResponse.body
check uploadResponse.status == "200 OK"
let response = client1.downloadRaw(cid)
check response.status == "200 OK"
check response.headers.hasKey("Content-Type") == true
check response.headers["Content-Type"] == "text/plain"
check response.headers.hasKey("Content-Disposition") == true
check response.headers["Content-Disposition"] == "attachment; filename=\"example.txt\""
let local = true
let localResponse = client1.downloadRaw(cid, local)
check localResponse.status == "200 OK"
check localResponse.headers.hasKey("Content-Type") == true
check localResponse.headers["Content-Type"] == "text/plain"
check localResponse.headers.hasKey("Content-Disposition") == true
check localResponse.headers["Content-Disposition"] == "attachment; filename=\"example.txt\""

View File

@ -1,5 +1,7 @@
import pkg/codex/rest/json
import ./twonodes
import json
from pkg/libp2p import Cid, `$`
twonodessuite "Uploads and downloads", debug1 = false, debug2 = false:
@ -39,24 +41,42 @@ twonodessuite "Uploads and downloads", debug1 = false, debug2 = false:
check:
resp2.error.msg == "404 Not Found"
proc checkRestContent(content: ?!string) =
proc checkRestContent(cid: Cid, content: ?!string) =
let c = content.tryGet()
# tried to JSON (very easy) and checking the resulting object (would be much nicer)
# spent an hour to try and make it work.
check:
c == "{\"cid\":\"zDvZRwzm1ePSzKSXt57D5YxHwcSDmsCyYN65wW4HT7fuX9HrzFXy\",\"manifest\":{\"treeCid\":\"zDzSvJTezk7bJNQqFq8k1iHXY84psNuUfZVusA5bBQQUSuyzDSVL\",\"datasetSize\":18,\"blockSize\":65536,\"protected\":false}}"
let jsonData = parseJson(c)
check jsonData.hasKey("cid") == true
check jsonData["cid"].getStr() == $cid
check jsonData.hasKey("manifest") == true
let manifest = jsonData["manifest"]
check manifest.hasKey("treeCid") == true
check manifest["treeCid"].getStr() == "zDzSvJTezk7bJNQqFq8k1iHXY84psNuUfZVusA5bBQQUSuyzDSVL"
check manifest.hasKey("datasetSize") == true
check manifest["datasetSize"].getInt() == 18
check manifest.hasKey("blockSize") == true
check manifest["blockSize"].getInt() == 65536
check manifest.hasKey("protected") == true
check manifest["protected"].getBool() == false
test "node allows downloading only manifest":
let content1 = "some file contents"
let cid1 = client1.upload(content1).get
let resp2 = client2.downloadManifestOnly(cid1)
checkRestContent(resp2)
let resp2 = client1.downloadManifestOnly(cid1)
checkRestContent(cid1, resp2)
test "node allows downloading content without stream":
let content1 = "some file contents"
let cid1 = client1.upload(content1).get
let resp1 = client2.downloadNoStream(cid1)
checkRestContent(resp1)
checkRestContent(cid1, resp1)
let resp2 = client2.download(cid1, local = true).get
check:
content1 == resp2