This commit is contained in:
Ubuntu 2022-05-19 14:21:25 +07:00
parent f1faad9777
commit dec59c30b0
8 changed files with 134 additions and 50 deletions

View File

@ -1,45 +1,29 @@
# This file is necessary to install dbt-utils with dbt deps name: airbyte_utils
# the content will be overwritten by the transform function version: '1.0'
# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: "airbyte_utils"
version: "1.0"
config-version: 2 config-version: 2
profile: normalize
# This setting configures which "profile" dbt uses for this project. Profiles contain model-paths:
# database connection information, and should be configured in the ~/.dbt/profiles.yml file - models
profile: "normalize" docs-paths:
- docs
# These configurations specify where dbt should look for different types of files. analysis-paths:
# The `model-paths` config, for example, states that source models can be found - analysis
# in the "models/" directory. You probably won't need to change these! test-paths:
model-paths: ["models"] - tests
docs-paths: ["docs"] seed-paths:
analysis-paths: ["analysis"] - data
test-paths: ["tests"] macro-paths:
seed-paths: ["data"] - macros
macro-paths: ["macros"] target-path: ../build
log-path: ../logs
target-path: "../build" # directory which will store compiled SQL files packages-install-path: ../dbt
log-path: "../logs" # directory which will store DBT logs clean-targets:
packages-install-path: "../dbt" # directory which will store external DBT dependencies - build
- dbt_modules
clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"
quoting: quoting:
database: true database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false schema: false
identifier: true identifier: true
# You can define configurations for models in the `model-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models: models:
airbyte_utils: airbyte_utils:
+materialized: table +materialized: table
@ -57,7 +41,15 @@ models:
airbyte_views: airbyte_views:
+tags: airbyte_internal_views +tags: airbyte_internal_views
+materialized: view +materialized: view
dispatch: dispatch:
- macro_namespace: dbt_utils - macro_namespace: dbt_utils
search_order: ["airbyte_utils", "dbt_utils"] search_order:
- airbyte_utils
- dbt_utils
vars:
json_column: _airbyte_data
models_to_source:
tracking_product_shares_ab1: unibag._airbyte_raw_tracking_product_shares
tracking_product_shares_ab2: unibag._airbyte_raw_tracking_product_shares
tracking_product_shares_ab3: unibag._airbyte_raw_tracking_product_shares
tracking_product_shares: unibag._airbyte_raw_tracking_product_shares

14
macros/configuration.sql Normal file
View File

@ -0,0 +1,14 @@
{%- macro redshift_super_type() -%}
{%- if not execute -%}
{{ return("") }}
{%- endif -%}
{%- set table_schema, _, table_name = var("models_to_source")[this.identifier].partition(".") -%}
{%- call statement("get_column_type", fetch_result=True) -%}
select data_type from SVV_COLUMNS where table_name = '{{ table_name }}' and column_name = '{{ var("json_column") }}' and table_schema = '{{ table_schema }}';
{%- endcall -%}
{%- set column_type = load_result("get_column_type")["data"][0][0] -%}
{{ return(column_type == "super") }}
{%- endmacro -%}

View File

@ -104,8 +104,19 @@
{% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %} {% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %}
{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #}
{% macro redshift__unnest_cte(from_table, stream_name, column_col) -%} {% macro redshift__unnest_cte(from_table, stream_name, column_col) -%}
{# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #}
{% if redshift_super_type() -%}
with joined as (
select
table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
_airbyte_nested_data
from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data
)
{%- else -%}
{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #}
{%- if not execute -%} {%- if not execute -%}
{{ return('') }} {{ return('') }}
{% endif %} {% endif %}
@ -134,6 +145,7 @@ joined as (
-- to the number of items in {{ from_table }}.{{ column_col }} -- to the number of items in {{ from_table }}.{{ column_col }}
where numbers.generated_number <= json_array_length({{ column_col }}, true) where numbers.generated_number <= json_array_length({{ column_col }}, true)
) )
{%- endif %}
{%- endmacro %} {%- endmacro %}
{% macro mysql__unnest_cte(from_table, stream_name, column_col) -%} {% macro mysql__unnest_cte(from_table, stream_name, column_col) -%}

View File

@ -0,0 +1,16 @@
{% macro redshift__alter_column_type(relation, column_name, new_column_type) -%}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
{% if new_column_type.lower() == "super" %}
update {{ relation }} set {{ adapter.quote(tmp_column) }} = JSON_PARSE({{ adapter.quote(column_name) }});
{% else %}
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
{% endif %}
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}

View File

@ -9,7 +9,11 @@
{% endmacro %} {% endmacro %}
{%- macro redshift__type_json() -%} {%- macro redshift__type_json() -%}
{%- if redshift_super_type() -%}
super
{%- else -%}
varchar varchar
{%- endif -%}
{%- endmacro -%} {%- endmacro -%}
{% macro postgres__type_json() %} {% macro postgres__type_json() %}
@ -29,7 +33,7 @@
{%- endmacro -%} {%- endmacro -%}
{%- macro sqlserver__type_json() -%} {%- macro sqlserver__type_json() -%}
VARCHAR(max) NVARCHAR(max)
{%- endmacro -%} {%- endmacro -%}
{% macro clickhouse__type_json() %} {% macro clickhouse__type_json() %}
@ -48,7 +52,7 @@
{%- endmacro -%} {%- endmacro -%}
{% macro sqlserver__type_string() %} {% macro sqlserver__type_string() %}
VARCHAR(max) NVARCHAR(max)
{%- endmacro -%} {%- endmacro -%}
{%- macro clickhouse__type_string() -%} {%- macro clickhouse__type_string() -%}
@ -150,7 +154,7 @@
{%- macro sqlserver__type_timestamp_with_timezone() -%} {%- macro sqlserver__type_timestamp_with_timezone() -%}
{#-- in TSQL timestamp is really datetime or datetime2 --#} {#-- in TSQL timestamp is really datetime or datetime2 --#}
{#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#} {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#}
datetime datetime2
{%- endmacro -%} {%- endmacro -%}
{% macro clickhouse__type_timestamp_with_timezone() %} {% macro clickhouse__type_timestamp_with_timezone() %}

View File

@ -43,11 +43,12 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__format_json_path(json_path_list) -%} {% macro redshift__format_json_path(json_path_list) -%}
{%- set quote = '"' if redshift_super_type() else "'" -%}
{%- set str_list = [] -%} {%- set str_list = [] -%}
{%- for json_path in json_path_list -%} {%- for json_path in json_path_list -%}
{%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%} {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%}
{%- endfor -%} {%- endfor -%}
{{ "'" ~ str_list|join("','") ~ "'" }} {{ quote ~ str_list|join(quote + "," + quote) ~ quote }}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__format_json_path(json_path_list) -%} {% macro snowflake__format_json_path(json_path_list) -%}
@ -114,11 +115,14 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} {% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %} {%- if from_table|string() != '' -%}
{%- set json_column = from_table|string() + "." + json_column|string() -%}
{%- endif -%}
{%- if redshift_super_type() -%}
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
{%- else -%}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% else %} {%- endif -%}
case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% endif -%}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} {% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
@ -168,7 +172,11 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} {% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
{%- if redshift_super_type() -%}
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
{%- else -%}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{%- endif -%}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} {% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
@ -210,7 +218,11 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%} {% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
{%- if redshift_super_type() -%}
{{ json_column }}.{{ format_json_path(json_path_list) }}
{%- else -%}
json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true)
{%- endif -%}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%} {% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%}

View File

@ -33,6 +33,31 @@
cast({{ array_column }} as {{dbt_utils.type_string()}}) cast({{ array_column }} as {{dbt_utils.type_string()}})
{%- endmacro %} {%- endmacro %}
{% macro redshift__array_to_string(array_column) -%}
{% if redshift_super_type() -%}
json_serialize({{array_column}})
{%- else -%}
{{ array_column }}
{%- endif %}
{%- endmacro %}
{# object_to_string ------------------------------------------------- #}
{% macro object_to_string(object_column) -%}
{{ adapter.dispatch('object_to_string')(object_column) }}
{%- endmacro %}
{% macro default__object_to_string(object_column) -%}
{{ object_column }}
{%- endmacro %}
{% macro redshift__object_to_string(object_column) -%}
{% if redshift_super_type() -%}
json_serialize({{object_column}})
{%- else -%}
{{ object_column }}
{%- endif %}
{%- endmacro %}
{# cast_to_boolean ------------------------------------------------- #} {# cast_to_boolean ------------------------------------------------- #}
{% macro cast_to_boolean(field) -%} {% macro cast_to_boolean(field) -%}
{{ adapter.dispatch('cast_to_boolean')(field) }} {{ adapter.dispatch('cast_to_boolean')(field) }}
@ -49,7 +74,11 @@
{# -- Redshift does not support converting string directly to boolean, it must go through int first #} {# -- Redshift does not support converting string directly to boolean, it must go through int first #}
{% macro redshift__cast_to_boolean(field) -%} {% macro redshift__cast_to_boolean(field) -%}
{% if redshift_super_type() -%}
cast({{ field }} as boolean)
{%- else -%}
cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean) cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean)
{%- endif %}
{%- endmacro %} {%- endmacro %}
{# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #} {# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #}
@ -70,3 +99,7 @@
{%- macro default__empty_string_to_null(field) -%} {%- macro default__empty_string_to_null(field) -%}
nullif({{ field }}, '') nullif({{ field }}, '')
{%- endmacro %} {%- endmacro %}
{%- macro redshift__empty_string_to_null(field) -%}
nullif({{ field }}::varchar, '')
{%- endmacro %}

View File

@ -21,3 +21,4 @@ select
from {{ ref('tracking_product_shares_ab3') }} from {{ ref('tracking_product_shares_ab3') }}
-- tracking_product_shares from {{ source('unibag', '_airbyte_raw_tracking_product_shares') }} -- tracking_product_shares from {{ source('unibag', '_airbyte_raw_tracking_product_shares') }}
where 1 = 1 where 1 = 1