8000 Hercules packages by jakthom · Pull Request #29 · jakthom/hercules · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Hercules packages #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.3
0.6.0
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,19 @@ globalLabels:
- env: dev
```

### Packages

Hercules includes a yml-based package loader which means extensions, macros, sources, and metrics can be logically grouped and distributed.

Starter packages can be found in the [hercules-packages](/hercules-packages/) directory.

**Example package registration**

```
packages:
- location: hercules-packages/snowflake-performance.yml
```


### Embedded Analytics

Expand Down
40 changes: 35 additions & 5 deletions cmd/hercules/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/dbecorp/hercules/pkg/config"
"github.com/dbecorp/hercules/pkg/flock"
herculespackage "github.com/dbecorp/hercules/pkg/herculesPackage"
metrics "github.com/dbecorp/hercules/pkg/metrics"
"github.com/dbecorp/hercules/pkg/middleware"
"github.com/prometheus/client_golang/prometheus"
Expand All @@ -26,6 +27,7 @@ var VERSION string
type Hercules struct {
config config.Config
db *sql.DB
packages []herculespackage.Package
conn *sql.Conn
metricRegistry *metrics.MetricRegistry
}
Expand All @@ -48,21 +50,49 @@ func (d *Hercules) initializeFlock() {
d.db, d.conn = flock.InitializeDB(d.config)
}

func (d *Hercules) initializeSources() {
for _, source := range d.config.Sources {
source.InitializeWithConnection(d.conn)
func (d *Hercules) loadPackages() {
pkgs := []herculespackage.Package{}
for _, pkgConfig := range d.config.Packages {
pkg, err := pkgConfig.GetPackage()
if err != nil {
log.Error().Err(err).Msg("could not get package")
}
pkgs = append(pkgs, pkg)
}
// Represent core configuration via a package
pkgs = append(pkgs, herculespackage.Package{
Name: "core",
Version: "1.0.0",
Extensions: d.config.Extensions,
Macros: d.config.Macros,
Sources: d.config.Sources,
Metrics: d.config.Metrics,
})
d.packages = pkgs

}

func (d *Hercules) initializePackages() {
for _, p := range d.packages {
p.InitializeWithConnection(d.conn)
}
}

func (d *Hercules) initializeRegistry() {
d.metricRegistry = metrics.NewMetricRegistry(d.config.Metrics, d.config.InstanceLabels())
// Merge metric definitions from all packages
metricDefinitions := metrics.MetricDefinitions{}
for _, pkg := range d.packages {
metricDefinitions.Merge(pkg.Metrics)
}
d.metricRegistry = metrics.NewMetricRegistry(metricDefinitions, d.config.InstanceLabels())
}

func (d *Hercules) Initialize() {
log.Debug().Msg("initializing Hercules")
d.configure()
d.initializeFlock()
d.initializeSources()
d.loadPackages()
d.initializePackages()
d.initializeRegistry()
log.Debug().Interface("config", d.config).Msg("running with config")
}
Expand Down
18 changes: 18 additions & 0 deletions hercules-packages/example-nyc-taxi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: nyc-taxi
version: 1.0.0

sources:
- name: nyc_yellow_taxi_june_2024
type: parquet
source: https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-07.parquet
materialize: true
refreshIntervalSeconds: 100

metrics:
gauge:
- name: nyc_pickup_location_fare_total
help: Total NYC fares for the month of August by pickup location
enabled: True
sql: select struct_pack(pickupLocation := PULocationID::text), sum(fare_amount) as val from nyc_yellow_taxi_june_2024 group by 1
labels:
- pickupLocation
2 changes: 2 additions & 0 deletions hercules-packages/example-tpch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: tpch
version: 1.0.0
120 changes: 120 additions & 0 deletions hercules-packages/snowflake-performance.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
name: snowflake-performance
version: 1.0.0

extensions:
core:
- name: inet

community:
- name: chsql # Clickhouse macros and functions

macros:
- sql: create or replace macro one() AS (SELECT 1);

sources:
- name: snowflake_query_history
type: parquet
source: assets/snowflake_query_history.parquet
materialize: true
refreshIntervalSeconds: 5

metrics:
gauge:
- name: query_status_count
help: Queries executed and their associated status, by user and warehouse
enabled: true
sql: from snowflake_query_history select struct_pack(user := user_name, warehouse := warehouse_name, status := lower(execution_status)) as labels, count(*) as value group by 1;
labels:
- user
- warehouse
- status

- name: queries_this_week_total
help: Queries this week total, by user and warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, count(*) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

- name: avg_query_duration_seconds
help: The average query duration for a particular user, using a particular warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, avg(TOTAL_ELAPSED_TIME) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

- name: table_operations_count
help: The number of operations on each table over the last week
enabled: true
sql: select struct_pack(user := user_name, query_type := query_type) as labels, count(*) as value from snowflake_query_history group by 1;
labels:
- user
- query_type

- name: avg_virtual_warehouse_spill_to_local_storage_bytes
help: The average bytes spilled to disk for queries on a specific warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, avg(BYTES_SPILLED_TO_LOCAL_STORAGE) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

- name: avg_virtual_warehouse_spill_to_remote_storage_bytes
help: The average bytes spilled to remote disk for queries on a specific warehouse
enabled: true
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, avg(BYTES_SPILLED_TO_REMOTE_STORAGE) as value from snowflake_query_history group by 1;
labels:
- user
- warehouse

histogram:
- name: query_duration_seconds
help: Histogram of query duration seconds
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, total_elapsed_time as value from snowflake_query_history;
labels:
- user
- warehouse
buckets:
- 0.1
- 0.5
- 1
- 2
- 4
- 8
- 16
- 32
- 64
- 128
- 256
- 512
- 1024
- 2048
- 4096
- 8192
- 16384
- 32768

summary:
- name: virtual_warehouse_query_duration_seconds
help: Summary of query duration seconds
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, total_elapsed_time as value from snowflake_query_history;
labels:
- user
- warehouse
objectives:
- 0.001
- 0.05
- 0.01
- 0.5
- 0.9
- 0.99

counter:
- name: queries_executed_count
help: The count of queries executed by user and warehouse
sql: select struct_pack(user := user_name, warehouse := warehouse_name) as labels, 1 as value from snowflake_query_history;
labels:
- user
- warehouse< 7324 /span>
2 changes: 2 additions & 0 deletions hercules-packages/snowflake-security.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: snowflake-security
version: 1.0.0
Loading
0