mirror of
https://github.com/logos-storage/bittorrent-benchmarks.git
synced 2026-02-22 21:53:07 +00:00
fix: handle exceptions for malformed lines at vector log parser
This commit is contained in:
parent
fc0630224f
commit
84bac4594d
38
README.md
38
README.md
@ -1,3 +1,39 @@
|
||||
# bittorrent-benchmarks
|
||||
|
||||
Scaffolding and experiments for benchmarking Codex against the Deluge bittorrent client.
|
||||
Scaffolding and experiments for benchmarking Codex against the Deluge bittorrent client.
|
||||
This is general enough that it could be extended to benchmark Codex against any content
|
||||
distribution network, including IPFS.
|
||||
|
||||
This experimental harness leans on Kubernetes. It is completely possible to run experiments
|
||||
locally, however, using [Minikube](https://minikube.sigs.k8s.io/) (or Kind, or Docker Desktop).
|
||||
|
||||
## Limits
|
||||
|
||||
When running experiments locally in a Linux machine, you will bump onto a number of
|
||||
limitations. I have documented those here. I won't go into how to make those changes
|
||||
permanent within your system as there's significant variation across distributions.
|
||||
|
||||
**ARP Cache.** The default size for the ARP cache is too small. You should bump it
|
||||
significantly, e.g.:
|
||||
|
||||
```bash
|
||||
echo 4096 | sudo tee /proc/sys/net/ipv4/neigh/default/gc_thresh1
|
||||
echo 8192 | sudo tee /proc/sys/net/ipv4/neigh/default/gc_thresh2
|
||||
echo 16384 | sudo tee /proc/sys/net/ipv4/neigh/default/gc_thresh3
|
||||
```
|
||||
|
||||
**Inotify.** Kubernetes seems to enjoy watching the filesystem, so
|
||||
you should increase inotify limits across the board:
|
||||
|
||||
```bash
|
||||
sudo sysctl -w fs.inotify.max_user_instances=2099999999
|
||||
sudo sysctl -w fs.inotify.max_queued_events=2099999999
|
||||
sudo sysctl -w fs.inotify.max_user_watches=2099999999
|
||||
```
|
||||
|
||||
**Kernel key retention service.* Kubernetes also places a large number of keys
|
||||
within the kernel. Make sure you have enough room:
|
||||
|
||||
```bash
|
||||
echo 10000 | sudo tee /proc/sys/kernel/keys/maxkeys
|
||||
```
|
||||
@ -2,9 +2,11 @@
|
||||
that stores logs. Such infrastructure might be a simple file system, a service like Logstash, or a database."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Iterator
|
||||
from contextlib import AbstractContextManager
|
||||
from json import JSONDecodeError
|
||||
from pathlib import Path
|
||||
from typing import TextIO, Optional, Tuple, List, Dict, Type
|
||||
|
||||
@ -19,6 +21,8 @@ RawLine = str
|
||||
ExperimentId = str
|
||||
NodeId = str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LogSource(ABC):
|
||||
""":class:`LogSource` knows how to retrieve logs for experiments within experiment groups. A key assumption is that
|
||||
@ -131,7 +135,14 @@ class VectorFlatFileSource(LogSource):
|
||||
if app_label in line and group_label in line:
|
||||
if experiment_id is not None and experiment_label not in line:
|
||||
continue
|
||||
parsed = json.loads(line)
|
||||
try:
|
||||
parsed = json.loads(line)
|
||||
except JSONDecodeError as err:
|
||||
logger.error(
|
||||
f"Failed to parse line from vector from source {line}", err
|
||||
)
|
||||
continue
|
||||
|
||||
k8s = parsed["kubernetes"]
|
||||
yield (
|
||||
k8s["pod_labels"]["app.kubernetes.io/instance"],
|
||||
@ -139,6 +150,9 @@ class VectorFlatFileSource(LogSource):
|
||||
parsed["message"],
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return f"VectorFlatFileSource({self.app_name})"
|
||||
|
||||
|
||||
def split_logs_in_source(
|
||||
log_source: LogSource,
|
||||
@ -160,9 +174,12 @@ def split_logs_in_source(
|
||||
splitters: Dict[str, LogSplitter] = {}
|
||||
formats = formats if formats else []
|
||||
|
||||
logger.info(f'Processing logs for group "{group_id} from source "{log_source}"')
|
||||
|
||||
for experiment_id, node_id, raw_line in log_source.logs(group_id):
|
||||
splitter = splitters.get(experiment_id)
|
||||
if splitter is None:
|
||||
logger.info(f"Found experiment {experiment_id}")
|
||||
splitter = LogSplitter(
|
||||
lambda event_type, ext: output_manager.open(
|
||||
Path(experiment_id) / f"{event_type}.{ext.value}"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user