8000 Improve Dbt Docs UI and testing by ismailsimsek · Pull Request #70 · memiiso/opendbt · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Improve Dbt Docs UI and testing #70

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 20, 2025
10000
8 changes: 4 additions & 4 deletions opendbt/catalog/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Optional

import sqlglot
import tqdm
from dataclasses import dataclass
from pathlib import Path
from sqlglot import Expression
from sqlglot.lineage import lineage, SqlglotError, exp
from typing import Dict, Optional

from opendbt.logger import OpenDbtLogger
from opendbt.utils import Utils
Expand Down Expand Up @@ -35,6 +34,7 @@ def __init__(self, table_ref: OpenDbtTableRef, data: dict):
self.data["type"] = self.data["type"] if "type" in self.data else "unknown"
self.data["column_fqn"] = f"{self.table_ref.table_fqn()}.{self.name}".lower()
self.data["table_fqn"] = self.table_ref.table_fqn().lower()
self.data["table_relative_fqn"] = f"{self.table_ref.schema}.{self.table_ref.table}"
self.data["transformations"] = []
self.data["depends_on"] = []

Expand Down
231 changes: 165 additions & 66 deletions opendbt/dbt/docs/index.html

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions tests/resources/dbtcore/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ version: '1.0.0'

profile: 'dbtcore'

# include opendbt macros
macro-paths: [ "macros", "../../../opendbt/macros/" ]
# use opendbt index.html for docs
docs-paths: [ "../../../opendbt/docs/" ]

clean-targets:
Expand All @@ -13,8 +15,7 @@ clean-targets:

models:
dbtcore:
example:
+materialized: view
+materialized: table

vars:
dbt_custom_adapter: opendbt.examples.DuckDBAdapterV2Custom
Expand Down
33 changes: 0 additions & 33 deletions tests/resources/dbtcore/models/example/schema.yml

This file was deleted.

4 changes: 3 additions & 1 deletion tests/resources/dbtcore/models/my_executedlt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ def model(dbt, pipeline: TPipeline):
print("========================================================")
print(f"INFO: DLT Pipeline pipeline_name:{pipeline.pipeline_name}")
print(f"INFO: DLT Pipeline dataset_name:{pipeline.dataset_name}")
print(f"INFO: DLT Pipeline dataset_name:{pipeline}")
print(f"INFO: DLT Pipeline staging:{pipeline.staging}")
print(f"INFO: DLT Pipeline destination:{pipeline.destination}")
print(f"INFO: DLT Pipeline _pipeline_storage:{pipeline._pipeline_storage}")
print(f"INFO: DLT Pipeline _schema_storage:{pipeline._schema_storage}")
print(f"INFO: DLT Pipeline state:{pipeline.state}")
print(f"INFO: DBT this:{dbt.this}")
print("========================================================")
load_info = pipeline.run(events())
load_info = pipeline.run(events(), table_name=str(str(dbt.this).split('.')[-1]).strip('"'))
print(load_info)
row_counts = pipeline.last_trace.last_normalize_info
print(row_counts)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
SELECT
t1.id AS pk_id,
t1.data_value AS data_value1,
CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2
CONCAT(t1.column_3, '-concat-1', t1.data_value, t2.row_data) AS data_value2,
t3.event_tstamp AS event_tstamp
FROM {{ ref('my_first_dbt_model') }} AS t1
LEFT JOIN {{ ref('my_core_table1') }} AS t2 ON t1.id = t2.id
LEFT JOIN {{ ref('my_executedlt_model') }} AS t3 ON t1.id = t3.event_id
WHERE t1.id IN (1, 2)
42 changes: 41 additions & 1 deletion tests/resources/dbtcore/models/schema.yml
Original file line number Diff line number Diff line change
9E7A @@ -1,6 +1,46 @@

version: 2

models:
- name: my_first_dbt_model
description: "A starter dbt model"
columns:
- name: data_value
- name: column_3
- name: id
description: "The primary key for this table"
tests:
- unique:
config:
severity: error
error_if: ">1000"
warn_if: ">0"
- not_null:
config:
severity: error
error_if: ">1000"
warn_if: ">0"

- name: my_second_dbt_model
description: "A starter dbt model"
columns:
- name: pk_id
description: "The primary key for this table"
data_tests:
- unique
- not_null
- name: data_value1
- name: data_value2
- name: event_tstamp
- name: my_core_table1
columns:
- name: id
- name: row_data
- name: row_data
- name: my_executedlt_model
columns:
- name: event_id
- name: event_tstamp
- name: my_executepython_model
columns:
- name: event_id
- name: event_tstamp
8 changes: 1 addition & 7 deletions tests/resources/dbtcore/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,8 @@ dbtcore:
outputs:
dev:
type: duckdb
adapter: my.dbt.custom.OpenAdapterXXX
schema: core
path: ./../dev.duckdb
threads: 1

prod:
type: duckdb
adapter: my.dbt.custom.OpenAdapterXXX
path: prod.duckdb
threads: 4

target: dev
7 changes: 6 additions & 1 deletion tests/resources/dbtfinance/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,9 @@ profile: 'dbtfinance'
clean-targets:
- "target"
- "dbt_packages"
- "logs"
- "logs"

models:
# ensure referenced models are used with correct schema
dbtcore:
schema: "core"
17 changes: 17 additions & 0 deletions tests/resources/dbtfinance/macros/generate_schema_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% macro generate_schema_name(custom_schema_name, node) -%}

{%- set default_schema = target.schema -%}
{%- if custom_schema_name is none -%}

{{ default_schema }}

{%- else -%}

{# HERE we are overriding `generate_schema_name` macro generation.
which is concatenating custom schema name and default schema.
#}
{{ custom_schema_name | trim }}

{%- endif -%}

{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ select
row_data,
count(*) as num_rows
from {{ ref('dbtcore', 'my_core_table1') }}
-- fake second dependency {{ source('main', 'my_executepython_model') }}
-- fake second dependency {{ source('core', 'my_executepython_model') }}
group by 1,2
5 changes: 3 additions & 2 deletions tests/resources/dbtfinance/models/sources.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
version: 2

sources:
- name: main
schema: main
# defining `dbtcore` project models as source!
- name: core
schema: core
tables:
- name: my_executepython_model
- name: my_executepython_dlt_model
8 changes: 1 addition & 7 deletions tests/resources/dbtfinance/profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,8 @@ dbtfinance:
outputs:
dev:
type: duckdb
adapter: my.dbt.custom.OpenAdapterXXX
schema: finance
path: ./../dev.duckdb
threads: 1

prod:
type: duckdb
adapter: my.dbt.custom.OpenAdapterXXX
path: prod.duckdb
threads: 4

target: dev
20 changes: 7 additions & 13 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,19 @@ def test_catalog_loading(self):
model1 = catalog.nodes.get("model.dbtfinance.my_cross_project_ref_model")
model1_schema = model1.db_schema_dict(include_parents=True)
self.assertIn("dev", model1_schema)
self.assertIn("main", model1_schema["dev"])
self.assertIn("my_core_table1", model1_schema["dev"]["main"])
self.assertIn("finance", model1_schema["dev"])
self.assertIn("my_core_table1", model1_schema["dev"]["core"])
self.assertIn("my_cross_project_ref_model", model1_schema["dev"]["finance"])
# self.assertIn("row_data", model1_schema["dev"]["main"]['my_core_table1'])

parent_schema = model1.parent_db_schema_dict()
self.assertIn("dev", parent_schema)
self.assertIn("main", parent_schema["dev"])
self.assertIn("my_core_table1", parent_schema["dev"]["main"])

self.assertIn("num_rows", model1.populate_lineage(catalog.tables2nodes))
self.assertIn("row_data", model1.populate_lineage(catalog.tables2nodes))

@unittest.skip("reason for skipping")
def test_catalog_export(self):
dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
dp.run(command="compile")
dp.run(command="run", args=['--select', 'my_core_table1'])
dp.run(command="run", args=['--select', 'my_first_dbt_model'])
dp.run(command="run", args=['--select', 'my_second_dbt_model'])
dp.run(command="run", args=['--select', '+my_second_dbt_model'])
dp.run(command="docs", args=['generate'])
catalog = OpenDbtCatalog(
manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'),
Expand All @@ -47,12 +41,12 @@ def test_catalog_export(self):
def test_catalog_export_one_node(self):
dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
dp.run(command="compile")
dp.run(command="run", args=['--select', 'my_core_table1 my_first_dbt_model my_second_dbt_model'])
dp.run(command="run", args=['--select', '+my_second_dbt_model'])
dp.run(command="docs", args=['generate'])
catalog = OpenDbtCatalog(
manifest_path=self.DBTFINANCE_DIR.joinpath('target/manifest.json'),
catalog_path=self.DBTFINANCE_DIR.joinpath('target/catalog.json'))
node = catalog.node(node_id="model.dbtcore.my_second_dbt_model")
result = node.parent_db_schema_dict()
self.assertIn("my_first_dbt_model", result["dev"]["main"])
self.assertIn("column_3", result["dev"]["main"]["my_first_dbt_model"])
self.assertIn("my_first_dbt_model", result["dev"]["core"])
self.assertIn("column_3", result["dev"]["core"]["my_first_dbt_model"])
16 changes: 7 additions & 9 deletions tests/test_opendbt_airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from airflow.utils.dates import days_ago

from base_dbt_test import BaseDbtTest
from opendbt.airflow import OpenDbtAirflowProject, OpenDbtExecutorOperator
from opendbt.airflow import OpenDbtAirflowProject


class TestOpenDbtProject(BaseDbtTest):
Expand All @@ -25,11 +25,9 @@ def test_run_dbt_as_airflow_task(self):
include_dbt_seeds=True)

for j in dag.tasks:
# don't run the model we created to fail
if 'my_failing_dbt_model' in j.task_id:
continue

if isinstance(j, OpenDbtExecutorOperator):
# skip dbt tests which are triggering callbacks
j.command = "run" if j.command == "build" else j.command
j.execute({})
if 'my_first_dbt_model' in j.task_id:
j.execute({})
if 'my_executedlt_model' in j.task_id:
j.execute({})
if 'my_executepython_model' in j.task_id:
j.execute({})
9 changes: 3 additions & 6 deletions tests/test_opendbt_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,9 @@ def test_cli_callbacks(self):

def test_cli_run_models(self):
dp = OpenDbtCli(project_dir=self.DBTCORE_DIR)
dp.invoke(args=['run', '--select', 'my_core_table1 my_first_dbt_model+', "--exclude", "my_failing_dbt_model",
"--profiles-dir",
dp.project_dir.as_posix()])
dp.invoke(args=['run', "--exclude", "my_failing_dbt_model", "--profiles-dir", dp.project_dir.as_posix()])

def test_cli_run_cross_project_ref_models(self):
dpf = OpenDbtCli(project_dir=self.DBTFINANCE_DIR)
dpc = OpenDbtCli(project_dir=self.DBTCORE_DIR)
dpc.invoke(args=['run', '--select', 'my_core_table1', "--profiles-dir", dpc.project_dir.as_posix()])
dpf.invoke(args=['run', '--select', 'my_cross_project_ref_model', "--profiles-dir", dpf.project_dir.as_posix()])
dpf.invoke(
args=['run', '--select', '+my_cross_project_ref_model', "--profiles-dir", dpf.project_dir.as_posix()])
27 changes: 27 additions & 0 deletions tests/test_opendbt_mesh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import json

from base_dbt_test import BaseDbtTest
from opendbt import OpenDbtProject


class TestOpenDbtMesh(BaseDbtTest):

def test_run_cross_project(self):
dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
dp.run(command="compile")

dp = OpenDbtProject(project_dir=self.DBTFINANCE_DIR, profiles_dir=self.DBTFINANCE_DIR)
dp.run(command="compile")

manifest = json.loads(self.DBTFINANCE_DIR.joinpath("target/manifest.json").read_text())
model = manifest.get("nodes").get("model.dbtfinance.my_cross_project_ref_model", {})
print(model)
self.assertEqual(model["database"], 'dev')
self.assertEqual(model['schema'], 'finance')
self.assertEqual(model['name'], 'my_cross_project_ref_model')

model = manifest.get("nodes").get("model.dbtcore.my_core_table1", {})
self.assertEqual(model['database'], 'dev')
self.assertEqual(model['schema'], 'core')
self.assertEqual(model['name'], 'my_core_table1')
print(model)
2 changes: 1 addition & 1 deletion tests/test_opendbt_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_run_compile(self):
def test_run_run(self):
dp = OpenDbtProject(project_dir=self.DBTCORE_DIR, profiles_dir=self.DBTCORE_DIR)
dp.run(command="run",
args=['--select', 'my_core_table1 my_first_dbt_model+', "--exclude", "my_failing_dbt_model"],
args=['--select', '+my_second_dbt_model+', "--exclude", "my_failing_dbt_model"],
use_subprocess=True)

def test_project_attributes(self):
Expand Down
0