add CSV support, adhoc scripts and adhoc analysis
This commit is contained in:
parent
5a855a97e7
commit
b63ece0fea
|
@ -1,4 +1,9 @@
|
||||||
.idea
|
.idea
|
||||||
data
|
data
|
||||||
dist
|
dist
|
||||||
**/__pycache__
|
**/__pycache__
|
||||||
|
.Rproj.user
|
||||||
|
.RData
|
||||||
|
*.html
|
||||||
|
*.log
|
||||||
|
*.csv
|
||||||
|
|
23
README.md
23
README.md
|
@ -2,27 +2,40 @@
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```
|
```sh
|
||||||
pip install logtools
|
pip install pip install git+https://github.com/gmega/logtools.git
|
||||||
```
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### Merge by Timestamp
|
### Merge by Timestamp
|
||||||
|
|
||||||
```
|
```sh
|
||||||
log-merge log1.log log2.log
|
log-merge log1.log log2.log
|
||||||
```
|
```
|
||||||
|
|
||||||
### Merge by Timestamp Showing Aliases Instead of File Name
|
### Merge by Timestamp Showing Aliases Instead of File Name
|
||||||
|
|
||||||
```
|
```sh
|
||||||
log-merge log1.log log2.log --aliases bootstrap codex21
|
log-merge log1.log log2.log --aliases bootstrap codex21
|
||||||
```
|
```
|
||||||
|
|
||||||
### Merge and Filter by Timestamp
|
### Merge and Filter by Timestamp
|
||||||
|
|
||||||
```
|
```sh
|
||||||
# If no timezone is provided, assumes UTC
|
# If no timezone is provided, assumes UTC
|
||||||
log-merge log1.log log2.log --from 2021-01-01T00:00:00 --to 2021-01-02T00:00:00
|
log-merge log1.log log2.log --from 2021-01-01T00:00:00 --to 2021-01-02T00:00:00
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Transform Raw Logs into CSV
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cat ./log1.log | log-to-csv
|
||||||
|
```
|
||||||
|
|
||||||
|
### Transform Raw Logs into CSV, Extracting Topics Into Column
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cat ./log1.log | log-to-csv --extract-fields topics
|
||||||
|
```
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
"""Ad-hoc script which tags uploads with a sequential number."""
|
||||||
|
import sys
|
||||||
|
|
||||||
|
uploading = False
|
||||||
|
upload_no = 0
|
||||||
|
for line in sys.stdin:
|
||||||
|
if 'Handling file upload' in line:
|
||||||
|
upload_no += 1
|
||||||
|
uploading = True
|
||||||
|
|
||||||
|
if uploading:
|
||||||
|
line = line.strip()
|
||||||
|
line = line.rsplit(' ', maxsplit=1)
|
||||||
|
line = ' '.join([line[0], f'upload={upload_no}', line[1]])
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
if 'Uploaded file' in line:
|
||||||
|
uploading = False
|
|
@ -0,0 +1,512 @@
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1:2], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 13) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[c(-1,-2)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 13) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[c(-1,-2,-3)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 13) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[c(-1,-2,-4)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 13) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[c(-1,-2,-3)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 13) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-(1:3)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-(1:4)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-(1:5)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-(1:10)], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
c(1,2,3,4)
|
||||||
|
c(1,2,3,4)[-1]
|
||||||
|
c(1,2,3,4)[-2]
|
||||||
|
c(1,2,3,4)[-(1:2)]
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval, probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds'))
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 1) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 2) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 3) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 4) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 5) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 6) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 7) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 8) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 7) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 8) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 9) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 10) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 11) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 12) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 13) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 14) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 15) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 16) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 17) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18
|
||||||
|
) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
|
@ -0,0 +1,152 @@
|
||||||
|
---
|
||||||
|
title: "R Notebook"
|
||||||
|
output: html_notebook
|
||||||
|
---
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
library(tidyverse)
|
||||||
|
library(lubridate)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
uploads <- read_csv('./codex-continuous-tests-0codex3-5-77bdb95dc7-j7f46_codex3-5-uploads.csv')
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
durations <- uploads |>
|
||||||
|
arrange(count) |>
|
||||||
|
group_by(upload) |>
|
||||||
|
summarise(
|
||||||
|
start = timestamp[1],
|
||||||
|
end = timestamp[n()],
|
||||||
|
) |>
|
||||||
|
mutate(duration = end - start)
|
||||||
|
```
|
||||||
|
|
||||||
|
How long are uploads taking?
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
ggplot(durations, aes(x = upload, y = duration)) +
|
||||||
|
geom_point() +
|
||||||
|
geom_line() +
|
||||||
|
ylab('upload duration') +
|
||||||
|
xlab('upload number') +
|
||||||
|
theme_minimal()
|
||||||
|
```
|
||||||
|
Are all uploads completing?
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
uploads |>
|
||||||
|
filter(message == 'Got data from stream') |>
|
||||||
|
group_by(upload) |>
|
||||||
|
count(name = 'blocks')
|
||||||
|
```
|
||||||
|
|
||||||
|
Does the end of the upload coincide with the last chunk that gets stored?
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
uploads |>
|
||||||
|
filter(grepl('Got data from stream', message)) |>
|
||||||
|
group_by(upload) |>
|
||||||
|
summarise(
|
||||||
|
last_store = max(timestamp)
|
||||||
|
) |>
|
||||||
|
inner_join(durations, by='upload')
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
durations
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
uploads |> filter(grepl('Exception', message)) |> group_by(message) |> count() |> arrange(n)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
uploads |> filter(upload == 18) |> group_by(message) |> count() |> arrange(n)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
uploads |> filter(upload == 17) |> group_by(message) |> count() |> arrange(n)
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
messages <- uploads |> group_by(message) |> count() |> filter(n > 100) |> pull(message)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r fig.height = 10}
|
||||||
|
uploads |> filter(message %in% messages) |> group_by(upload, message) |> count() %>% {
|
||||||
|
ggplot(.) +
|
||||||
|
geom_point(aes(x = message, y = n, color = as.factor(upload))) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust=1)) +
|
||||||
|
ylab('count') +
|
||||||
|
scale_color_manual(values=c('18'='red'))
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
interlog_intervals <- uploads |>
|
||||||
|
group_by(upload) |>
|
||||||
|
arrange(timestamp) |>
|
||||||
|
mutate(log_interval = as.numeric(timestamp - lag(timestamp))) |>
|
||||||
|
ungroup()
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
interlog_intervals |>
|
||||||
|
group_by(upload) |>
|
||||||
|
summarise(
|
||||||
|
mean_li = mean(log_interval, na.rm=TRUE),
|
||||||
|
median_li = median(log_interval, na.rm=TRUE),
|
||||||
|
max_li = max(log_interval, na.rm=TRUE),
|
||||||
|
) |>
|
||||||
|
pivot_longer(-upload) %>% {
|
||||||
|
ggplot(.) +
|
||||||
|
geom_line(aes(x = upload, y = value, col = name)) +
|
||||||
|
scale_y_log10() +
|
||||||
|
theme_minimal() +
|
||||||
|
ylab('duration (logscale, seconds)')
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
interlog_intervals |> group_by(upload) |> count() |> arrange(desc(n))
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
```{r fig.height=5}
|
||||||
|
interlog_intervals |>
|
||||||
|
group_by(upload) |>
|
||||||
|
arrange(log_interval) |>
|
||||||
|
mutate(rank = seq_along(log_interval)) |> ungroup() %>% {
|
||||||
|
ggplot(.) +
|
||||||
|
geom_point(aes(x = rank, y = log_interval, col = as.factor(upload))) +
|
||||||
|
theme_minimal() +
|
||||||
|
xlab('rank') +
|
||||||
|
ylab('time between two consecutive log messages') +
|
||||||
|
guides(col = guide_legend(title = 'upload #'))
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
```{r}
|
||||||
|
ggplot(
|
||||||
|
interlog_intervals |>
|
||||||
|
filter(upload == 18
|
||||||
|
) |>
|
||||||
|
mutate(bucket = floor_date(timestamp, unit = '5 seconds')) |>
|
||||||
|
group_by(bucket) |>
|
||||||
|
mutate(
|
||||||
|
mean_interval = mean(log_interval),
|
||||||
|
p_70 = quantile(log_interval[-1], probs = c(0.95))
|
||||||
|
) |>
|
||||||
|
ungroup()
|
||||||
|
) +
|
||||||
|
geom_point(aes(x = timestamp, y = log_interval)) +
|
||||||
|
geom_line(aes(x = bucket, y = mean_interval), col = 'red', lwd = 2) +
|
||||||
|
geom_line(aes(x = bucket, y = p_70), col = 'orange', lwd = 2) +
|
||||||
|
theme_minimal()
|
||||||
|
```
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
Version: 1.0
|
||||||
|
|
||||||
|
RestoreWorkspace: Default
|
||||||
|
SaveWorkspace: Default
|
||||||
|
AlwaysSaveHistory: Default
|
||||||
|
|
||||||
|
EnableCodeIndexing: Yes
|
||||||
|
UseSpacesForTab: Yes
|
||||||
|
NumSpacesForTab: 2
|
||||||
|
Encoding: UTF-8
|
||||||
|
|
||||||
|
RnwWeave: Sweave
|
||||||
|
LaTeX: pdfLaTeX
|
|
@ -57,8 +57,9 @@ def _assign_colors(names: Dict[str, str]) -> Dict[str, str]:
|
||||||
def _filtering_predicate(args):
|
def _filtering_predicate(args):
|
||||||
if args.from_ or args.to:
|
if args.from_ or args.to:
|
||||||
return timestamp_range(
|
return timestamp_range(
|
||||||
_ensure_utc(tsparser.parse(args.from_)),
|
_ensure_utc(args.from_) if args.from_ is not None else datetime(
|
||||||
_ensure_utc(tsparser.parse(args.to))
|
year=1980, month=1, day=1, hour=0, minute=0, second=0, tzinfo=pytz.UTC),
|
||||||
|
_ensure_utc(args.to) if args.to is not None else datetime.utcnow().replace(tzinfo=pytz.UTC)
|
||||||
)
|
)
|
||||||
|
|
||||||
return lambda x: True
|
return lambda x: True
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows chronicles topics to be
|
||||||
|
extracted into their own columns."""
|
||||||
|
import sys
|
||||||
|
from csv import DictWriter
|
||||||
|
|
||||||
|
from traitlets.config.loader import ArgumentParser
|
||||||
|
|
||||||
|
from logtools.log.sources.stream_log_source import StreamLogSource
|
||||||
|
|
||||||
|
|
||||||
|
def to_csv(args):
|
||||||
|
fields = args.extract_fields
|
||||||
|
writer = DictWriter(sys.stdout,
|
||||||
|
fieldnames=['timestamp', 'line_number', 'level', 'fields', 'count', 'message'] + fields)
|
||||||
|
writer.writeheader()
|
||||||
|
for line in StreamLogSource(sys.stdin):
|
||||||
|
line_fields = {field: line.fields.get(field, 'NA') for field in fields}
|
||||||
|
writer.writerow({
|
||||||
|
'timestamp': line.timestamp.isoformat(),
|
||||||
|
'line_number': line.location.line_number,
|
||||||
|
'level': line.level.value,
|
||||||
|
'fields': line.topics,
|
||||||
|
'count': line.count,
|
||||||
|
'message': line.message,
|
||||||
|
**line_fields,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
argparse = ArgumentParser()
|
||||||
|
argparse.add_argument('--extract-fields', nargs='+', default=[],
|
||||||
|
help='Extract chronicles topics into CSV columns')
|
||||||
|
|
||||||
|
to_csv(argparse.parse_args())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -1,165 +0,0 @@
|
||||||
# import abc
|
|
||||||
# import re
|
|
||||||
# from abc import abstractmethod
|
|
||||||
# from datetime import datetime
|
|
||||||
# from heapq import heapify, heappop, heappush
|
|
||||||
# from pathlib import Path
|
|
||||||
# from typing import TypedDict, Iterable, Union, Generator, Optional, Iterator, List
|
|
||||||
#
|
|
||||||
# from parse.utils import group_match
|
|
||||||
#
|
|
||||||
# class LogIterator(abc.ABC, Iterator[LogLine]):
|
|
||||||
# @abstractmethod
|
|
||||||
# def peek(self) -> Optional[LogLine]:
|
|
||||||
# ...
|
|
||||||
#
|
|
||||||
# def context(self) -> str:
|
|
||||||
# ...
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class SingleLogIterator(LogIterator):
|
|
||||||
#
|
|
||||||
# def __init__(
|
|
||||||
# self,
|
|
||||||
# path: Path,
|
|
||||||
# alias: str,
|
|
||||||
# from_ts: Optional[datetime] = None,
|
|
||||||
# to_ts: Optional[datetime] = None,
|
|
||||||
# parse_datetime=False
|
|
||||||
# ):
|
|
||||||
# self.path = path
|
|
||||||
# self.line_number = 0
|
|
||||||
# self.parse_datetime = parse_datetime
|
|
||||||
# self.alias = alias
|
|
||||||
#
|
|
||||||
# # If from_ts or to_ts is specified, then timestamp parsing is mandatory.
|
|
||||||
# self.parse_datetime = self.parse_datetime or (from_ts is not None or to_ts is not None)
|
|
||||||
# self.from_ts = from_ts
|
|
||||||
# self.to_ts = to_ts
|
|
||||||
#
|
|
||||||
# self.inner_iterator = self._iterator()
|
|
||||||
# self.look_ahead = next(self.inner_iterator, None)
|
|
||||||
#
|
|
||||||
# def __next__(self) -> LogLine:
|
|
||||||
# next_element = self.look_ahead if self.look_ahead is not None else next(self.inner_iterator)
|
|
||||||
# self.look_ahead = next(self.inner_iterator, None)
|
|
||||||
# return next_element
|
|
||||||
#
|
|
||||||
# def __iter__(self):
|
|
||||||
# return self
|
|
||||||
#
|
|
||||||
# def __lt__(self, other):
|
|
||||||
# return self.latest_timestamp() < other.latest_timestamp()
|
|
||||||
#
|
|
||||||
# def __le__(self, other):
|
|
||||||
# return self.latest_timestamp() <= other.latest_timestamp()
|
|
||||||
#
|
|
||||||
# def _iterator(self) -> Generator[LogLine, None, None]:
|
|
||||||
# with self.path.open() as f:
|
|
||||||
# for line in f:
|
|
||||||
# self.line_number += 1
|
|
||||||
# contents = group_match(line, LOG_LINE)
|
|
||||||
# if not contents:
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# line = LogLine(
|
|
||||||
# parent=self,
|
|
||||||
# log=self.alias,
|
|
||||||
# raw=line,
|
|
||||||
# line_number=self.line_number,
|
|
||||||
# timestamp=(datetime.fromisoformat(contents['timestamp']) if self.parse_datetime
|
|
||||||
# else contents['timestamp']),
|
|
||||||
# message=contents['message'],
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# if self.should_accept(line):
|
|
||||||
# yield line
|
|
||||||
#
|
|
||||||
# def should_accept(self, line: LogLine) -> bool:
|
|
||||||
# timestamp = line['timestamp']
|
|
||||||
# if self.from_ts is not None and timestamp <= self.from_ts:
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# if self.to_ts is not None and timestamp >= self.to_ts:
|
|
||||||
# return False
|
|
||||||
#
|
|
||||||
# return True
|
|
||||||
#
|
|
||||||
# def peek(self) -> Optional[LogLine]:
|
|
||||||
# return self.look_ahead
|
|
||||||
#
|
|
||||||
# def latest_timestamp(self) -> Optional[datetime]:
|
|
||||||
# return self.peek()['timestamp'] if self.peek() is not None else None
|
|
||||||
#
|
|
||||||
# def context(self) -> str:
|
|
||||||
# return f'{self.path}:{self.line_number}'
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# def _exclude_empty(logs: Iterable[LogIterator]):
|
|
||||||
# return [log for log in logs if log.peek() is not None]
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class CollatingLogIterator(LogIterator):
|
|
||||||
#
|
|
||||||
# def __init__(self, logs: List[SingleLogIterator]):
|
|
||||||
# self.logs = _exclude_empty(logs)
|
|
||||||
#
|
|
||||||
# def __iter__(self):
|
|
||||||
# return self
|
|
||||||
#
|
|
||||||
# def __next__(self):
|
|
||||||
# if not self.logs:
|
|
||||||
# raise StopIteration()
|
|
||||||
#
|
|
||||||
# log = self.logs[0]
|
|
||||||
# value = next(log)
|
|
||||||
# if log.peek() is None:
|
|
||||||
# self.logs.pop(0)
|
|
||||||
# return value
|
|
||||||
#
|
|
||||||
# def peek(self) -> Optional[LogLine]:
|
|
||||||
# if not self.logs:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return self.logs[0].peek()
|
|
||||||
#
|
|
||||||
# def context(self) -> str:
|
|
||||||
# if not self.logs:
|
|
||||||
# raise Exception('Undefined context.')
|
|
||||||
#
|
|
||||||
# return self.logs[0].context()
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# class MergingLogIterator(LogIterator):
|
|
||||||
# def __init__(self, logs: List[SingleLogIterator]):
|
|
||||||
# self.logs = _exclude_empty(logs)
|
|
||||||
# heapify(self.logs)
|
|
||||||
#
|
|
||||||
# def __iter__(self):
|
|
||||||
# return self
|
|
||||||
#
|
|
||||||
# def __next__(self) -> LogLine:
|
|
||||||
# if not self.logs:
|
|
||||||
# raise StopIteration()
|
|
||||||
#
|
|
||||||
# # by construction, we can't have any empty iterators at this point, so the call to next always succeeds.
|
|
||||||
# log = heappop(self.logs)
|
|
||||||
# value = next(log)
|
|
||||||
#
|
|
||||||
# # if the iterator still has stuff in it...
|
|
||||||
# if log.peek() is not None:
|
|
||||||
# heappush(self.logs, log)
|
|
||||||
#
|
|
||||||
# return value
|
|
||||||
#
|
|
||||||
# def peek(self) -> Optional[LogLine]:
|
|
||||||
# if not self.logs:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return self.logs[0].peek()
|
|
||||||
#
|
|
||||||
# def context(self) -> str:
|
|
||||||
# if not self.logs:
|
|
||||||
# raise Exception('Undefined context.')
|
|
||||||
#
|
|
||||||
# return self.logs[0].context()
|
|
|
@ -2,7 +2,9 @@ import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Union, Self, Optional
|
from typing import Union, Optional
|
||||||
|
|
||||||
|
TOPICS = re.compile(r'(?P<key>\w+)=(?P<value>"[\w\s]+"|\S+)')
|
||||||
|
|
||||||
|
|
||||||
class LogLevel(Enum):
|
class LogLevel(Enum):
|
||||||
|
@ -10,35 +12,20 @@ class LogLevel(Enum):
|
||||||
debug = 'DBG'
|
debug = 'DBG'
|
||||||
info = 'INF'
|
info = 'INF'
|
||||||
error = 'ERR'
|
error = 'ERR'
|
||||||
|
warning = 'WRN'
|
||||||
|
note = 'NOT'
|
||||||
LOG_LINE = re.compile(
|
|
||||||
r'(?P<line_type>\w{3}) (?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3}\+\d{2}:\d{2}) (?P<message>.*) '
|
|
||||||
r'count=(?P<count>\d+)$'
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class LogLine:
|
class LogLine:
|
||||||
raw: str
|
raw: str
|
||||||
level: LogLevel
|
level: LogLevel
|
||||||
line_number: int
|
|
||||||
timestamp: Union[str, datetime]
|
timestamp: Union[str, datetime]
|
||||||
message: str
|
message: str
|
||||||
|
topics: str
|
||||||
count: Optional[int]
|
count: Optional[int]
|
||||||
|
|
||||||
@classmethod
|
@property
|
||||||
def from_str(cls, source: str, parse_datetime: bool = False) -> Self:
|
def fields(self):
|
||||||
parsed = LOG_LINE.search(source)
|
fields = TOPICS.findall(self.topics)
|
||||||
if not parsed:
|
return {key: value for key, value in fields} if fields else {}
|
||||||
raise ValueError(f'Could not parse log line: {source}')
|
|
||||||
|
|
||||||
return cls(
|
|
||||||
raw=source,
|
|
||||||
level=LogLevel(parsed['line_type'].upper()),
|
|
||||||
line_number=0,
|
|
||||||
timestamp=(datetime.fromisoformat(parsed['timestamp']) if parse_datetime
|
|
||||||
else parsed['timestamp']),
|
|
||||||
message=parsed['message'],
|
|
||||||
count=int(parsed['count']) if parsed['count'] else None,
|
|
||||||
)
|
|
||||||
|
|
|
@ -1,30 +1,25 @@
|
||||||
import sys
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from logtools.log.sources.log_source import TrackedLogLine, LogSource
|
from logtools.log.sources.log_parsers import LineNumberLocation
|
||||||
|
from logtools.log.sources.stream_log_source import StreamLogSource
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FileLineLocation:
|
class FileLineLocation(LineNumberLocation):
|
||||||
path: Path
|
path: Path
|
||||||
line_number: int
|
|
||||||
|
|
||||||
|
|
||||||
class FileLogSource(LogSource[TrackedLogLine[FileLineLocation]]):
|
class FileLogSource(StreamLogSource):
|
||||||
def __init__(self, path: Path, parse_datetime=True):
|
def __init__(self, path: Path, parse_datetime=True):
|
||||||
self.path = path
|
self.path = path
|
||||||
self.parse_datetime = parse_datetime
|
super().__init__(self.path.open(encoding='utf-8'), parse_datetime=parse_datetime)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
with self.path.open(encoding='utf-8') as f:
|
try:
|
||||||
for line_number, line in enumerate(f, start=1):
|
yield from super().__iter__()
|
||||||
try:
|
finally:
|
||||||
parsed = TrackedLogLine.from_str(line, parse_datetime=True)
|
self.stream.close()
|
||||||
parsed.location = FileLineLocation(self.path, line_number)
|
|
||||||
|
|
||||||
yield parsed
|
def _location(self, line_number: int) -> LineNumberLocation:
|
||||||
except ValueError:
|
return FileLineLocation(path=self.path, line_number=line_number)
|
||||||
# FIXME we should probably relax parsing restrictions and output
|
|
||||||
# these too but for now just skip it.
|
|
||||||
print(f'Skip unparseable line: {line}', file=sys.stderr)
|
|
||||||
|
|
|
@ -17,6 +17,6 @@ class FilteredSource(LogSource[TrackedLogLine[TLocation]]):
|
||||||
|
|
||||||
def timestamp_range(start: datetime, end: datetime):
|
def timestamp_range(start: datetime, end: datetime):
|
||||||
def predicate(line: TrackedLogLine[TLocation]):
|
def predicate(line: TrackedLogLine[TLocation]):
|
||||||
return start <= line.timestamp <= end
|
return start <= line.timestamp <= end # type: ignore
|
||||||
|
|
||||||
return predicate
|
return predicate
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from csv import DictReader
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from typing import Callable, TextIO, Optional, cast
|
||||||
|
|
||||||
|
from dateutil import parser as tsparser
|
||||||
|
|
||||||
|
from logtools.log.log_line import LogLevel
|
||||||
|
from logtools.log.sources.log_source import TrackedLogLine, LogSource
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LineNumberLocation:
|
||||||
|
line_number: int
|
||||||
|
|
||||||
|
|
||||||
|
"""A :class:`LogParser` is a function that takes a raw text stream and returns a :class:`LogSource`, which in turn
|
||||||
|
is an iterable of parsed lines."""
|
||||||
|
LogParser = Callable[[TextIO], LogSource[LineNumberLocation]]
|
||||||
|
|
||||||
|
LOG_LINE = re.compile(
|
||||||
|
r'(?P<line_type>\w{3}) (?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}.\d{3}\+\d{2}:\d{2}) (?P<message>.*) '
|
||||||
|
r'count=(?P<count>\d+)$'
|
||||||
|
)
|
||||||
|
|
||||||
|
TOPICS = re.compile(r'((\w+=("[\w\s]+"|\S+) )+)?\w+=("[\w\s]+"|\S+)$')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]:
|
||||||
|
parsed = LOG_LINE.search(line)
|
||||||
|
topics = TOPICS.search(parsed['message'])
|
||||||
|
if not parsed or not topics:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return TrackedLogLine(
|
||||||
|
raw=line,
|
||||||
|
level=LogLevel(parsed['line_type'].upper()),
|
||||||
|
timestamp=(tsparser.parse(parsed['timestamp']) if parse_datetime
|
||||||
|
else parsed['timestamp']),
|
||||||
|
message=parsed['message'][:topics.start() - 1].strip(),
|
||||||
|
count=int(parsed['count']) if parsed['count'] else None,
|
||||||
|
topics=topics.group()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def raw_parser(stream: TextIO, parse_datetime=True) -> LogSource:
|
||||||
|
for line_number, line in enumerate(stream, start=1):
|
||||||
|
parsed = parse_raw(line, parse_datetime=parse_datetime)
|
||||||
|
if not parsed:
|
||||||
|
# FIXME we should probably relax parsing restrictions and output
|
||||||
|
# these too but for now just skip it.
|
||||||
|
print(f'Skip unparseable line: {line}', file=sys.stderr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
yield parsed
|
||||||
|
|
||||||
|
|
||||||
|
def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
|
||||||
|
for line_number, line in enumerate(DictReader(stream), start=1):
|
||||||
|
try:
|
||||||
|
line = TrackedLogLine(
|
||||||
|
raw=line['message'], # FIXME this is NOT the raw line...
|
||||||
|
timestamp=line['timestamp'],
|
||||||
|
message=line['message'],
|
||||||
|
count=int(line['count']) if line['count'] else None,
|
||||||
|
topics=line['topics'],
|
||||||
|
level=LogLevel[line['level']],
|
||||||
|
)
|
||||||
|
|
||||||
|
if parse_datetime:
|
||||||
|
line.timestamp = tsparser.parse(cast(str, line.timestamp))
|
||||||
|
yield line
|
||||||
|
except ValueError:
|
||||||
|
print(f'Skip unparseable line: {line}', file=sys.stderr)
|
|
@ -0,0 +1,19 @@
|
||||||
|
from typing import TextIO
|
||||||
|
|
||||||
|
from logtools.log.sources.log_parsers import raw_parser, LineNumberLocation, LogParser
|
||||||
|
from logtools.log.sources.log_source import LogSource, TrackedLogLine
|
||||||
|
|
||||||
|
|
||||||
|
class StreamLogSource(LogSource[TrackedLogLine[LineNumberLocation]]):
|
||||||
|
def __init__(self, stream: TextIO, parse_datetime=True, log_format: LogParser = raw_parser):
|
||||||
|
self.stream = stream
|
||||||
|
self.format = log_format
|
||||||
|
self.parse_datetime = parse_datetime
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
for line_number, line in enumerate(self.format(self.stream), start=1):
|
||||||
|
line.location = self._location(line_number)
|
||||||
|
yield line
|
||||||
|
|
||||||
|
def _location(self, line_number: int) -> LineNumberLocation:
|
||||||
|
return LineNumberLocation(line_number)
|
|
@ -1,22 +1,19 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
from logtools.log.sources.log_source import LogSource, TrackedLogLine
|
from logtools.log.sources.log_parsers import LogParser
|
||||||
|
from logtools.log.sources.stream_log_source import StreamLogSource, LineNumberLocation, raw_parser
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParseLocation:
|
class ParseLocation(LineNumberLocation):
|
||||||
name: str
|
name: str
|
||||||
number: int
|
|
||||||
|
|
||||||
|
|
||||||
class StringLogSource(LogSource[TrackedLogLine[ParseLocation]]):
|
class StringLogSource(StreamLogSource):
|
||||||
def __init__(self, name: str, lines: str):
|
def __init__(self, name: str, lines: str, log_format: LogParser = raw_parser):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.lines = lines
|
super().__init__(stream=StringIO(lines), log_format=log_format)
|
||||||
|
|
||||||
def __iter__(self):
|
def _location(self, line_number: int) -> LineNumberLocation:
|
||||||
for line_number, line in enumerate(self.lines.splitlines(), start=1):
|
return ParseLocation(name=self.name, line_number=line_number)
|
||||||
parsed = TrackedLogLine.from_str(line, parse_datetime=True)
|
|
||||||
parsed.location = ParseLocation(self.name, line_number)
|
|
||||||
|
|
||||||
yield parsed
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ def test_should_collate_lines_from_log_sources():
|
||||||
)
|
)
|
||||||
|
|
||||||
collated = CollatingSource(log1, log2)
|
collated = CollatingSource(log1, log2)
|
||||||
entries = [(line.location.name, line.location.number, line.count) for line in collated]
|
entries = [(line.location.name, line.location.line_number, line.count) for line in collated]
|
||||||
assert entries == [
|
assert entries == [
|
||||||
('log1', 1, 1),
|
('log1', 1, 1),
|
||||||
('log1', 2, 2),
|
('log1', 2, 2),
|
||||||
|
|
|
@ -2,6 +2,7 @@ from dateutil import parser
|
||||||
|
|
||||||
from logtools.log.log_line import LogLine
|
from logtools.log.log_line import LogLine
|
||||||
from logtools.log.sources.filtered_source import FilteredSource, timestamp_range
|
from logtools.log.sources.filtered_source import FilteredSource, timestamp_range
|
||||||
|
from logtools.log.sources.log_parsers import parse_raw
|
||||||
from logtools.log.sources.tests.string_log_source import StringLogSource
|
from logtools.log.sources.tests.string_log_source import StringLogSource
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,7 +33,7 @@ def test_should_generate_correct_datetime_range_predicate():
|
||||||
matches = timestamp_range(start=parser.parse('2023-10-16 22:29:24.597+00:00'),
|
matches = timestamp_range(start=parser.parse('2023-10-16 22:29:24.597+00:00'),
|
||||||
end=parser.parse('2023-10-18 20:29:25.597+00:00'))
|
end=parser.parse('2023-10-18 20:29:25.597+00:00'))
|
||||||
|
|
||||||
lines = [LogLine.from_str(line, parse_datetime=True) for line in raw_lines]
|
lines = [parse_raw(line, parse_datetime=True) for line in raw_lines]
|
||||||
filtered = [line.count for line in lines if matches(line)]
|
filtered = [line.count for line in lines if matches(line)]
|
||||||
|
|
||||||
assert filtered == [2, 3]
|
assert filtered == [2, 3]
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from logtools.log.log_line import LogLevel
|
||||||
|
from logtools.log.sources.log_parsers import parse_raw
|
||||||
|
|
||||||
|
|
||||||
|
def test_raw_parser_should_parse_logline_from_string():
|
||||||
|
line = parse_raw('TRC 2023-10-16 17:28:46.579+00:00 Sending want list to peer '
|
||||||
|
'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM '
|
||||||
|
'type=WantBlock items=1 count=870781', parse_datetime=True)
|
||||||
|
|
||||||
|
assert line.level == LogLevel.trace
|
||||||
|
assert line.timestamp == datetime(2023, 10, 16, 17, 28, 46,
|
||||||
|
579000, tzinfo=pytz.utc)
|
||||||
|
assert line.message == 'Sending want list to peer'
|
||||||
|
assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1'
|
||||||
|
assert line.count == 870781
|
|
@ -5,15 +5,12 @@ from logtools.log.sources.tests.string_log_source import StringLogSource
|
||||||
|
|
||||||
|
|
||||||
def test_should_order_sources_by_lookahead_timestamp():
|
def test_should_order_sources_by_lookahead_timestamp():
|
||||||
contents = StringLogSource(
|
lines = """TRC 2023-10-16 20:29:24.595+00:00 Advertising block topics="codex discoveryengine" count=1
|
||||||
name='log1',
|
|
||||||
lines="""TRC 2023-10-16 20:29:24.595+00:00 Advertising block topics="codex discoveryengine" count=1
|
|
||||||
TRC 2023-10-16 20:29:24.597+00:00 Provided to nodes topics="codex discovery" tid=1 count=2
|
TRC 2023-10-16 20:29:24.597+00:00 Provided to nodes topics="codex discovery" tid=1 count=2
|
||||||
TRC 2023-10-16 20:29:24.646+00:00 Retrieved record from repo topics="codex repostore" count=3"""
|
TRC 2023-10-16 20:29:24.646+00:00 Retrieved record from repo topics="codex repostore" count=3"""
|
||||||
)
|
|
||||||
|
|
||||||
log1 = OrderedSource(contents)
|
log1 = OrderedSource(StringLogSource(name='log1', lines=lines))
|
||||||
log2 = OrderedSource(contents)
|
log2 = OrderedSource(StringLogSource(name='log2', lines=lines))
|
||||||
|
|
||||||
next(log1)
|
next(log1)
|
||||||
assert log2 < log1
|
assert log2 < log1
|
||||||
|
@ -24,19 +21,15 @@ def test_should_order_sources_by_lookahead_timestamp():
|
||||||
|
|
||||||
|
|
||||||
def test_should_raise_error_if_comparing_empty_sources():
|
def test_should_raise_error_if_comparing_empty_sources():
|
||||||
contents = StringLogSource(
|
lines = """TRC 2023-10-16 20:29:24.595+00:00 Advertising block topics="codex discoveryengine" count=1
|
||||||
name='log1',
|
|
||||||
lines="""TRC 2023-10-16 20:29:24.595+00:00 Advertising block topics="codex discoveryengine" count=1
|
|
||||||
TRC 2023-10-16 20:29:24.597+00:00 Provided to nodes topics="codex discovery" tid=1 count=2
|
TRC 2023-10-16 20:29:24.597+00:00 Provided to nodes topics="codex discovery" tid=1 count=2
|
||||||
TRC 2023-10-16 20:29:24.646+00:00 Retrieved record from repo topics="codex repostore" count=3"""
|
TRC 2023-10-16 20:29:24.646+00:00 Retrieved record from repo topics="codex repostore" count=3"""
|
||||||
)
|
|
||||||
|
|
||||||
log1 = OrderedSource(contents)
|
log1 = OrderedSource(StringLogSource(name='log1', lines=lines))
|
||||||
log2 = OrderedSource(contents)
|
log2 = OrderedSource(StringLogSource(name='log2', lines=lines))
|
||||||
|
|
||||||
for _ in log1:
|
for _ in log1:
|
||||||
...
|
...
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
_ = log1 < log2
|
_ = log1 < log2
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,19 @@
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
import pytz
|
|
||||||
|
|
||||||
from logtools.log.log_line import LogLine, LogLevel
|
from logtools.log.log_line import LogLine, LogLevel
|
||||||
|
|
||||||
|
|
||||||
def test_should_parse_logline_from_string():
|
def test_should_parse_chronicles_fields():
|
||||||
line = LogLine.from_str('TRC 2023-10-16 17:28:46.579+00:00 Sending want list to peer '
|
line = LogLine(message='Sending want list to peer',
|
||||||
'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM '
|
topics='topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM '
|
||||||
'type=WantBlock items=1 count=870781', parse_datetime=True)
|
'type=WantBlock items=1',
|
||||||
|
timestamp='',
|
||||||
|
count=0,
|
||||||
|
raw='',
|
||||||
|
level=LogLevel.trace)
|
||||||
|
|
||||||
assert line.level == LogLevel.trace
|
assert line.fields == {
|
||||||
assert line.timestamp == datetime(2023, 10, 16, 17, 28, 46,
|
'topics': '"codex blockexcnetwork"',
|
||||||
579000, tzinfo=pytz.utc)
|
'tid': '1',
|
||||||
assert line.count == 870781
|
'peer': '16U*7mogoM',
|
||||||
|
'type': 'WantBlock',
|
||||||
|
'items': '1',
|
||||||
|
}
|
||||||
|
|
|
@ -24,4 +24,5 @@ requires = ["poetry-core"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
[tool.poetry.scripts]
|
||||||
log-merge = 'logtools.cli.merge:main'
|
log-merge = 'logtools.cli.merge:main'
|
||||||
|
log-to-csv = 'logtools.cli.to_csv:main'
|
Loading…
Reference in New Issue