mirror of
https://github.com/logos-messaging/packages.git
synced 2026-01-04 07:03:13 +00:00
Implement new package scanner (#2532)
* Implement new package scanner * Replace hardcoded repository paths in getmergebase.sh * Create branch instead of working on detached head in getmergebase.sh * Checkout with unlimited depth * Fix package_scanner command line * Merge new package scanner * Improve debug information of germergebase.sh * Remove validate_json.js as package_scanner already validates json * Improve documentation of getmergebase.sh * Close http client at the end of the program * Address code review comments * Fix normalization of aliases
This commit is contained in:
parent
a001193eff
commit
44e7ea517b
17
.github/workflows/testpr.yml
vendored
Normal file
17
.github/workflows/testpr.yml
vendored
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
on: pull_request
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
default:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: ${{ github.event.pull_request.head.sha }}
|
||||||
|
- uses: iffy/install-nim@v3.2.2
|
||||||
|
with:
|
||||||
|
version: stable
|
||||||
|
- run: ./getmergebase.sh
|
||||||
|
- run: nim r -d:ssl -d:release package_scanner.nim packages.json --old=packages_old.json --check-urls
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
@ -1,16 +1,13 @@
|
|||||||
on:
|
on: push
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
default:
|
default:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- uses: iffy/install-nim@v3.2.2
|
- uses: iffy/install-nim@v3.2.2
|
||||||
with:
|
with:
|
||||||
version: stable
|
version: stable
|
||||||
- run: nim c -d:ssl -r -d:release package_scanner.nim
|
- run: nim c -d:ssl -r -d:release package_scanner.nim packages.json
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- run: node ./validate_json.js
|
|
||||||
27
getmergebase.sh
Executable file
27
getmergebase.sh
Executable file
@ -0,0 +1,27 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# CI helper script to checkout the current packages.json and
|
||||||
|
# the version at the merge base to compare to.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Repository and branch the PR will be merged into
|
||||||
|
targetRepository="https://github.com/$GITHUB_REPOSITORY"
|
||||||
|
targetBranch="$GITHUB_BASE_REF"
|
||||||
|
|
||||||
|
# Create a branch of the current repository state because actions/checkout
|
||||||
|
# leaves us with a detached HEAD
|
||||||
|
git branch merge-branch
|
||||||
|
# Backup the current packages.json because it will get overwritten by a
|
||||||
|
# checkout
|
||||||
|
cp packages.json packages.json.bak
|
||||||
|
# Fetch the merge target branch into a branch called "base"
|
||||||
|
git fetch "$targetRepository" "$targetBranch:base"
|
||||||
|
# Determine the last common commit (the merge base)
|
||||||
|
mergeBase="$(git merge-base merge-branch base)"
|
||||||
|
echo "Comparing against packages.json at $mergeBase"
|
||||||
|
# Checkout the package list at the branching point
|
||||||
|
git checkout "$mergeBase" packages.json
|
||||||
|
# PR version becomes packages.json and merge base becomes packages_old.json
|
||||||
|
mv packages.json packages_old.json
|
||||||
|
mv packages.json.bak packages.json
|
||||||
@ -1,194 +1,238 @@
|
|||||||
# A very simple Nim package scanner.
|
# Package scanner for the nimble package list.
|
||||||
#
|
|
||||||
# Scans the package list from this repository.
|
|
||||||
#
|
#
|
||||||
# Check the packages for:
|
# Check the packages for:
|
||||||
# * Missing name
|
# * Duplicate and invalid names
|
||||||
# * Missing/unknown method
|
# * Missing alias targets
|
||||||
# * Missing/unreachable repository
|
# * Empty tags
|
||||||
# * Missing tags
|
# * Invalid method
|
||||||
# * Empty tags
|
# * Missing description or license
|
||||||
# * Missing description
|
# * Unavailable URLs
|
||||||
# * Missing/unknown license
|
# * Insecure URLs
|
||||||
# * Insecure git:// url on GitHub
|
|
||||||
#
|
#
|
||||||
# Usage: nim r [-d:dontFetchRepos] package_scanner.nim
|
# Usage: nim r package_scanner.nim <packages.json> [--old=packages_old.json] [--check-urls]
|
||||||
#
|
#
|
||||||
# Copyright 2015 Federico Ceratto <federico.ceratto@gmail.com>
|
# Copyright 2015 Federico Ceratto <federico.ceratto@gmail.com>
|
||||||
# Released under GPLv3 License, see /usr/share/common-licenses/GPL-3
|
# Copyright 2023 Gabriel Huber <mail@gabrielhuber.at>
|
||||||
|
# Released under GPLv3 License, see LICENSE-GPLv3.txt
|
||||||
|
|
||||||
import std/[httpclient, net, json, os, sets, strutils]
|
import std/parseopt
|
||||||
|
import std/os
|
||||||
|
import std/json
|
||||||
|
import std/tables
|
||||||
|
import std/strutils
|
||||||
|
import std/httpclient
|
||||||
|
import std/streams
|
||||||
|
import std/net
|
||||||
|
|
||||||
const licenses = [
|
|
||||||
"allegro 4 giftware",
|
|
||||||
"apache license 2.0",
|
|
||||||
"apache",
|
|
||||||
"apache2",
|
|
||||||
"apache 2.0",
|
|
||||||
"apache-2.0",
|
|
||||||
"apache-2.0 license",
|
|
||||||
"apache version 2.0",
|
|
||||||
"mit or apache 2.0",
|
|
||||||
"apache license 2.0 or mit",
|
|
||||||
"mit or apache license 2.0",
|
|
||||||
"(mit or apache license 2.0) and simplified bsd",
|
|
||||||
"lxxsdt-mit",
|
|
||||||
"lgplv2.1",
|
|
||||||
"0bsd",
|
|
||||||
"bsd",
|
|
||||||
"bsd2",
|
|
||||||
"bsd-2",
|
|
||||||
"bsd-2-clause",
|
|
||||||
"bsd3",
|
|
||||||
"bsd-3",
|
|
||||||
"bsd 3-clause",
|
|
||||||
"bsd-3-clause",
|
|
||||||
"boost",
|
|
||||||
"boost-1.0",
|
|
||||||
"bsl",
|
|
||||||
"bsl-1.0",
|
|
||||||
"2-clause bsd",
|
|
||||||
"cc-by-sa 4.0",
|
|
||||||
"cc0",
|
|
||||||
"cc0-1.0",
|
|
||||||
"gpl",
|
|
||||||
"gpl2",
|
|
||||||
"gpl-2.0-only",
|
|
||||||
"gpl3",
|
|
||||||
"gplv2",
|
|
||||||
"gplv3",
|
|
||||||
"gplv3+",
|
|
||||||
"gpl-2.0",
|
|
||||||
"agpl-3.0",
|
|
||||||
"gpl-3.0",
|
|
||||||
"gpl-3.0-or-later",
|
|
||||||
"gpl-3.0-only",
|
|
||||||
"lgplv3 or gplv2",
|
|
||||||
"apache 2.0 or gplv2",
|
|
||||||
"lgpl-2.1-or-later",
|
|
||||||
"lgpl with static linking exception",
|
|
||||||
"gnu lesser general public license v2.1",
|
|
||||||
"openldap",
|
|
||||||
"lgpl",
|
|
||||||
"lgplv2",
|
|
||||||
"lgplv3",
|
|
||||||
"lgpl-2.1",
|
|
||||||
"lgpl-3.0",
|
|
||||||
"agplv3",
|
|
||||||
"mit",
|
|
||||||
"mit/isc",
|
|
||||||
"ms-pl",
|
|
||||||
"mpl",
|
|
||||||
"mplv2",
|
|
||||||
"mpl-2.0",
|
|
||||||
"mpl 2.0",
|
|
||||||
"epl-2.0",
|
|
||||||
"eupl-1.2",
|
|
||||||
"wtfpl",
|
|
||||||
"libpng",
|
|
||||||
"fontconfig",
|
|
||||||
"zlib",
|
|
||||||
"isc",
|
|
||||||
"ppl",
|
|
||||||
"hydra",
|
|
||||||
"openssl and ssleay",
|
|
||||||
"unlicense",
|
|
||||||
"public domain",
|
|
||||||
"proprietary",
|
|
||||||
]
|
|
||||||
|
|
||||||
proc canFetchNimbleRepository(name: string, urlJson: JsonNode): bool =
|
const usage = """
|
||||||
# TODO: Make this check the actual repo url and check if there is a
|
Usage: package_scanner <packages.json> [--old=packages_old.json] [--check-urls]
|
||||||
# nimble file in it
|
Scans the nimble package list for mistakes and dead packages.
|
||||||
result = true
|
Options:
|
||||||
var url: string
|
--old= Old package file, will only scan changed packages
|
||||||
var client = newHttpClient(timeout = 100_000)
|
--check-urls Try to request the package url
|
||||||
|
--help Print this help text"""
|
||||||
|
|
||||||
if not urlJson.isNil:
|
const allowedNameChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '-', '.'}
|
||||||
url = urlJson.str
|
|
||||||
if url.startsWith("https://github.com"):
|
|
||||||
if existsEnv("GITHUB_TOKEN"):
|
|
||||||
client.headers = newHttpHeaders({"authorization": "Bearer " & getEnv("GITHUB_TOKEN")})
|
|
||||||
try:
|
|
||||||
discard client.getContent(url)
|
|
||||||
except TimeoutError:
|
|
||||||
echo "W: ", name, ": Timeout error fetching repo ", url, " ", getCurrentExceptionMsg()
|
|
||||||
except HttpRequestError:
|
|
||||||
echo "W: ", name, ": HTTP error fetching repo ", url, " ", getCurrentExceptionMsg()
|
|
||||||
except AssertionDefect:
|
|
||||||
echo "W: ", name, ": httpclient error fetching repo ", url, " ", getCurrentExceptionMsg()
|
|
||||||
except:
|
|
||||||
echo "W: Unkown error fetching repo ", url, " ", getCurrentExceptionMsg()
|
|
||||||
finally:
|
|
||||||
client.close()
|
|
||||||
|
|
||||||
proc verifyAlias(pkg: JsonNode, result: var int) =
|
|
||||||
if not pkg.hasKey("name"):
|
|
||||||
echo "E: Missing alias' package name"
|
|
||||||
inc result
|
|
||||||
# TODO: Verify that 'alias' points to a known package.
|
|
||||||
|
|
||||||
proc check(): int =
|
proc checkUrlReachable(client: HttpClient, url: string): string =
|
||||||
var name: string
|
var headers: HttpHeaders = nil
|
||||||
var names = initHashSet[string]()
|
if url.startsWith("https://github.com"):
|
||||||
|
if existsEnv("GITHUB_TOKEN"):
|
||||||
|
headers = newHttpHeaders({"Authorization": "Bearer " & getEnv("GITHUB_TOKEN")})
|
||||||
|
|
||||||
for pkg in parseJson(readFile(getCurrentDir() / "packages.json")):
|
try:
|
||||||
name = if pkg.hasKey("name"): pkg["name"].str else: ""
|
let resp = client.request(url, headers=headers)
|
||||||
if pkg.hasKey("alias"):
|
discard resp.bodyStream.readAll()
|
||||||
verifyAlias(pkg, result)
|
if not resp.code.is2xx:
|
||||||
|
result = "Server returned status " & $resp.code
|
||||||
|
except TimeoutError:
|
||||||
|
result = "Timeout after " & $client.timeout & "ms"
|
||||||
|
client.close()
|
||||||
|
except HttpRequestError:
|
||||||
|
result = "HTTP error: " & getCurrentExceptionMsg()
|
||||||
|
client.close()
|
||||||
|
except AssertionDefect:
|
||||||
|
result = "httpclient error: " & getCurrentExceptionMsg()
|
||||||
|
client.close()
|
||||||
|
except CatchableError as e:
|
||||||
|
result = "Unexpected exception " & $e.name & ": " & e.msg
|
||||||
|
client.close()
|
||||||
|
|
||||||
|
template logPackageError(errorMsg: string) =
|
||||||
|
echo "E: ", errorMsg
|
||||||
|
success = false
|
||||||
|
|
||||||
|
template checkUrl(urlType: string, url: string) =
|
||||||
|
if url == "":
|
||||||
|
logPackageError(displayName & " has an empty " & urlType & " URL")
|
||||||
|
elif not url.startsWith("https://"):
|
||||||
|
logPackageError(displayName & " has a non-https " & urlType & " URL: " & url)
|
||||||
|
elif checkUrls:
|
||||||
|
let urlError = client.checkUrlReachable(url)
|
||||||
|
if urlError != "":
|
||||||
|
logPackageError(displayName & " has an unreachable " & urlType & " URL: " & url)
|
||||||
|
logPackageError(urlError)
|
||||||
|
|
||||||
|
proc getStrIfExists(n: JsonNode, name: string, default: string = ""): string =
|
||||||
|
result = default
|
||||||
|
if n.hasKey(name) and n[name].kind == JString:
|
||||||
|
result = n[name].str
|
||||||
|
|
||||||
|
proc getElemsIfExists(n: JsonNode, name: string, default: seq[JsonNode] = @[]): seq[JsonNode] =
|
||||||
|
result = default
|
||||||
|
if n.hasKey(name) and n[name].kind == JArray:
|
||||||
|
result = n[name].elems
|
||||||
|
|
||||||
|
proc checkPackages(newPackagesPath: string, oldPackagesPath: string, checkUrls: bool = false): int =
|
||||||
|
var oldPackagesTable = initTable[string, JsonNode]()
|
||||||
|
if oldPackagesPath != "":
|
||||||
|
let oldPackagesJson = parseJson(readFile(oldPackagesPath))
|
||||||
|
for oldPkg in oldPackagesJson:
|
||||||
|
let oldNameNorm = oldPkg.getStrIfExists("name").normalize()
|
||||||
|
if oldNameNorm != "":
|
||||||
|
oldPackagesTable[oldNameNorm] = oldPkg
|
||||||
|
|
||||||
|
let newPackagesJson = parseJson(readFile(newPackagesPath))
|
||||||
|
# Do a first pass through the list to count duplicate names
|
||||||
|
var packageNameCounter = initCountTable[string]()
|
||||||
|
for pkg in newPackagesJson:
|
||||||
|
let pkgNameNorm = pkg.getStrIfExists("name").normalize()
|
||||||
|
if pkgNameNorm != "":
|
||||||
|
packageNameCounter.inc(pkgNameNorm)
|
||||||
|
|
||||||
|
var client: HttpClient = nil
|
||||||
|
if checkUrls:
|
||||||
|
client = newHttpClient(timeout=3000)
|
||||||
|
client.headers = newHttpHeaders({"User-Agent": "Nim packge_scanner/2.0"})
|
||||||
|
|
||||||
|
var modifiedPackagesCount = 0
|
||||||
|
var failedPackagesCount = 0
|
||||||
|
for pkg in newPackagesJson:
|
||||||
|
var success = true # Set to false by logPackageError
|
||||||
|
let pkgName = pkg.getStrIfExists("name")
|
||||||
|
let pkgNameNorm = pkgName.normalize()
|
||||||
|
var displayName = pkgName
|
||||||
|
if displayName == "":
|
||||||
|
displayName = "<unnamed package>"
|
||||||
|
|
||||||
|
# Start with detecting duplicates
|
||||||
|
if packageNameCounter[pkgNameNorm] > 1:
|
||||||
|
let url = pkg.getStrIfExists("url", "<no url>")
|
||||||
|
logPackageError("Duplicate package " & displayName & " from url " & url)
|
||||||
|
|
||||||
|
# isNew should be used in future versions to do a conditional inspection
|
||||||
|
# of the package contents which requires downloading the full release tarball
|
||||||
|
let isNew = not oldPackagesTable.hasKey(pkgNameNorm)
|
||||||
|
var isModified: bool
|
||||||
|
if isNew:
|
||||||
|
isModified = true
|
||||||
else:
|
else:
|
||||||
if name.len == 0:
|
isModified = oldPackagesTable[pkgNameNorm] != pkg
|
||||||
echo "E: missing package name"
|
|
||||||
inc result
|
|
||||||
elif not pkg.hasKey("method"):
|
|
||||||
echo "E: ", name, " has no method"
|
|
||||||
inc result
|
|
||||||
elif pkg["method"].str notin ["git", "hg"]:
|
|
||||||
echo "E: ", name, " has an unknown method: ", pkg["method"].str
|
|
||||||
inc result
|
|
||||||
elif not pkg.hasKey("url"):
|
|
||||||
echo "E: ", name, " has no URL"
|
|
||||||
inc result
|
|
||||||
elif not pkg.hasKey("tags"):
|
|
||||||
echo "E: ", name, " has no tags"
|
|
||||||
inc result
|
|
||||||
elif not pkg.hasKey("description"):
|
|
||||||
echo "E: ", name, " has no description"
|
|
||||||
inc result
|
|
||||||
elif pkg.hasKey("description") and pkg["description"].str == "":
|
|
||||||
echo "E: ", name, " has empty description"
|
|
||||||
inc result
|
|
||||||
elif not pkg.hasKey("license"):
|
|
||||||
echo "E: ", name, " has no license"
|
|
||||||
inc result
|
|
||||||
elif pkg["url"].str.normalize.startsWith("git://github.com/"):
|
|
||||||
echo "E: ", name, " has an insecure git:// URL instead of https://"
|
|
||||||
inc result
|
|
||||||
elif pkg["license"].str.toLowerAscii notin licenses:
|
|
||||||
echo "E: ", name, " has an unexpected license: ", pkg["license"]
|
|
||||||
inc result
|
|
||||||
elif pkg.hasKey("web"):
|
|
||||||
when not defined(dontFetchRepos):
|
|
||||||
if not canFetchNimbleRepository(name, pkg["web"]):
|
|
||||||
echo "W: Failed to fetch source code repo for ", name
|
|
||||||
elif pkg.hasKey("tags"):
|
|
||||||
var emptyTags = 0
|
|
||||||
for tag in pkg["tags"]:
|
|
||||||
if tag.getStr.len == 0:
|
|
||||||
inc emptyTags
|
|
||||||
|
|
||||||
if emptyTags > 0:
|
if isModified:
|
||||||
echo "E: ", name, " has ", emptyTags, " empty tags"
|
inc modifiedPackagesCount
|
||||||
inc result
|
|
||||||
|
|
||||||
if name.normalize notin names:
|
if pkgName == "":
|
||||||
names.incl name.normalize
|
logPackageError("Missing package name")
|
||||||
else:
|
|
||||||
echo("E: ", name, ": a package by that name already exists.")
|
|
||||||
inc result
|
|
||||||
|
|
||||||
echo "\nProblematic packages count: ", result
|
let isAlias = pkg.hasKey("alias")
|
||||||
|
if isAlias:
|
||||||
|
if packageNameCounter[pkg["alias"].getStr().normalize()] == 0:
|
||||||
|
logPackageError(displayName & " is an alias pointing to a missing package")
|
||||||
|
else:
|
||||||
|
var tags = pkg.getElemsIfExists("tags")
|
||||||
|
var isDeleted = false
|
||||||
|
if tags.len == 0:
|
||||||
|
logPackageError(displayName & " has no tags")
|
||||||
|
else:
|
||||||
|
var emptyTags = false
|
||||||
|
for tag in tags:
|
||||||
|
if tag.getStr == "":
|
||||||
|
emptyTags = true
|
||||||
|
if tag.getStr.toLowerAscii() == "deleted":
|
||||||
|
isDeleted = true
|
||||||
|
if emptyTags:
|
||||||
|
logPackageError(displayName & " has empty tags")
|
||||||
|
|
||||||
|
if not isDeleted:
|
||||||
|
if not pkgName.allCharsInSet(allowedNameChars):
|
||||||
|
logPackageError(displayName & " is not a valid package name")
|
||||||
|
|
||||||
|
if not pkg.hasKey("method"):
|
||||||
|
logPackageError(displayName & " has no method")
|
||||||
|
elif pkg["method"].kind != JString or pkg["method"].str notin ["git", "hg"]:
|
||||||
|
logPackageError(displayName & " has an invalid method")
|
||||||
|
|
||||||
|
if pkg.getStrIfExists("description") == "":
|
||||||
|
logPackageError(displayName & " has no description")
|
||||||
|
|
||||||
|
if pkg.getStrIfExists("license") == "":
|
||||||
|
logPackageError(displayName & " has no license")
|
||||||
|
|
||||||
|
var downloadUrl = pkg.getStrIfExists("url")
|
||||||
|
if not pkg.hasKey("url"):
|
||||||
|
logPackageError(displayName & " has no download URL")
|
||||||
|
else:
|
||||||
|
downloadUrl = downloadUrl
|
||||||
|
checkUrl("download", downloadUrl)
|
||||||
|
|
||||||
|
if pkg.hasKey("web"):
|
||||||
|
let webUrl = pkg["web"].getStr()
|
||||||
|
if webUrl != downloadUrl:
|
||||||
|
checkUrl("web", webUrl)
|
||||||
|
|
||||||
|
if pkg.hasKey("doc"):
|
||||||
|
let docUrl = pkg["doc"].getStr()
|
||||||
|
if docUrl != downloadUrl:
|
||||||
|
checkUrl("doc", docUrl)
|
||||||
|
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
inc failedPackagesCount
|
||||||
|
|
||||||
|
|
||||||
|
if client != nil:
|
||||||
|
client.close()
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
if oldPackagesPath != "":
|
||||||
|
echo "Found ", modifiedPackagesCount, " modified package(s)"
|
||||||
|
echo "Problematic packages count: ", failedPackagesCount
|
||||||
|
if failedPackagesCount > 0:
|
||||||
|
result = 1
|
||||||
|
|
||||||
|
|
||||||
|
proc cliMain(): int =
|
||||||
|
var parser = initOptParser(os.commandLineParams())
|
||||||
|
var newPackagesPath = ""
|
||||||
|
var oldPackagesPath = ""
|
||||||
|
var checkUrls = false
|
||||||
|
while true:
|
||||||
|
parser.next()
|
||||||
|
case parser.kind:
|
||||||
|
of cmdEnd: break
|
||||||
|
of cmdShortOption, cmdLongOption:
|
||||||
|
if parser.key == "old":
|
||||||
|
oldPackagesPath = parser.val
|
||||||
|
elif parser.key == "check-urls":
|
||||||
|
checkUrls = true
|
||||||
|
elif parser.key == "help":
|
||||||
|
echo usage
|
||||||
|
return 0
|
||||||
|
of cmdArgument:
|
||||||
|
if newPackagesPath == "":
|
||||||
|
newPackagesPath = parser.key
|
||||||
|
else:
|
||||||
|
echo "Too many arguments!"
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if newPackagesPath == "":
|
||||||
|
echo usage
|
||||||
|
return 1
|
||||||
|
|
||||||
|
result = checkPackages(newPackagesPath, oldPackagesPath, checkUrls)
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
quit(check())
|
quit(cliMain())
|
||||||
|
|||||||
@ -1,2 +0,0 @@
|
|||||||
var fs = require('fs');
|
|
||||||
JSON.parse(fs.readFileSync('packages.json', 'utf8'));
|
|
||||||
Loading…
x
Reference in New Issue
Block a user