diff --git a/dbt_project.yml b/dbt_project.yml index f957879..4530686 100755 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: "airbyte_utils" -version: "1.0" +name: airbyte_utils +version: '1.0' config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: "normalize" - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -seed-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -packages-install-path: "../dbt" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: ../dbt +clean-targets: +- build +- dbt_modules quoting: database: true - # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) - # all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `model-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,15 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - - macro_namespace: dbt_utils - search_order: ["airbyte_utils", "dbt_utils"] +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + tracking_product_shares_ab1: unibag._airbyte_raw_tracking_product_shares + tracking_product_shares_ab2: unibag._airbyte_raw_tracking_product_shares + tracking_product_shares_ab3: unibag._airbyte_raw_tracking_product_shares + tracking_product_shares: unibag._airbyte_raw_tracking_product_shares diff --git a/macros/configuration.sql b/macros/configuration.sql new file mode 100644 index 0000000..a599ab3 --- /dev/null +++ b/macros/configuration.sql @@ -0,0 +1,14 @@ +{%- macro redshift_super_type() -%} + {%- if not execute -%} + {{ return("") }} + {%- endif -%} + + {%- set table_schema, _, table_name = var("models_to_source")[this.identifier].partition(".") -%} + + {%- call statement("get_column_type", fetch_result=True) -%} + select data_type from SVV_COLUMNS where table_name = '{{ table_name }}' and column_name = '{{ var("json_column") }}' and table_schema = '{{ table_schema }}'; + {%- endcall -%} + + {%- set column_type = load_result("get_column_type")["data"][0][0] -%} + {{ return(column_type == "super") }} +{%- endmacro -%} diff --git a/macros/cross_db_utils/array.sql b/macros/cross_db_utils/array.sql index 9072da2..35df407 100644 --- a/macros/cross_db_utils/array.sql +++ b/macros/cross_db_utils/array.sql @@ -104,8 +104,19 @@ {% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %} -{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #} {% macro redshift__unnest_cte(from_table, stream_name, column_col) -%} + + {# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #} + {% if redshift_super_type() -%} + with joined as ( + select + table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid, + _airbyte_nested_data + from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data + ) + {%- else -%} + + {# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #} {%- if not execute -%} {{ return('') }} {% endif %} @@ -134,6 +145,7 @@ joined as ( -- to the number of items in {{ from_table }}.{{ column_col }} where numbers.generated_number <= json_array_length({{ column_col }}, true) ) + {%- endif %} {%- endmacro %} {% macro mysql__unnest_cte(from_table, stream_name, column_col) -%} diff --git a/macros/cross_db_utils/columns.sql b/macros/cross_db_utils/columns.sql new file mode 100644 index 0000000..0b695c1 --- /dev/null +++ b/macros/cross_db_utils/columns.sql @@ -0,0 +1,16 @@ +{% macro redshift__alter_column_type(relation, column_name, new_column_type) -%} + + {%- set tmp_column = column_name + "__dbt_alter" -%} + + {% call statement('alter_column_type') %} + alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }}; + {% if new_column_type.lower() == "super" %} + update {{ relation }} set {{ adapter.quote(tmp_column) }} = JSON_PARSE({{ adapter.quote(column_name) }}); + {% else %} + update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }}; + {% endif %} + alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade; + alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }} + {% endcall %} + +{% endmacro %} diff --git a/macros/cross_db_utils/datatypes.sql b/macros/cross_db_utils/datatypes.sql index 080aea5..18e2d61 100644 --- a/macros/cross_db_utils/datatypes.sql +++ b/macros/cross_db_utils/datatypes.sql @@ -9,7 +9,11 @@ {% endmacro %} {%- macro redshift__type_json() -%} + {%- if redshift_super_type() -%} + super + {%- else -%} varchar + {%- endif -%} {%- endmacro -%} {% macro postgres__type_json() %} @@ -29,7 +33,7 @@ {%- endmacro -%} {%- macro sqlserver__type_json() -%} - VARCHAR(max) + NVARCHAR(max) {%- endmacro -%} {% macro clickhouse__type_json() %} @@ -48,7 +52,7 @@ {%- endmacro -%} {% macro sqlserver__type_string() %} - VARCHAR(max) + NVARCHAR(max) {%- endmacro -%} {%- macro clickhouse__type_string() -%} @@ -150,7 +154,7 @@ {%- macro sqlserver__type_timestamp_with_timezone() -%} {#-- in TSQL timestamp is really datetime or datetime2 --#} {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#} - datetime + datetime2 {%- endmacro -%} {% macro clickhouse__type_timestamp_with_timezone() %} diff --git a/macros/cross_db_utils/json_operations.sql b/macros/cross_db_utils/json_operations.sql index cf52f24..e1e5443 100644 --- a/macros/cross_db_utils/json_operations.sql +++ b/macros/cross_db_utils/json_operations.sql @@ -43,11 +43,12 @@ {%- endmacro %} {% macro redshift__format_json_path(json_path_list) -%} + {%- set quote = '"' if redshift_super_type() else "'" -%} {%- set str_list = [] -%} {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%} + {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%} {%- endfor -%} - {{ "'" ~ str_list|join("','") ~ "'" }} + {{ quote ~ str_list|join(quote + "," + quote) ~ quote }} {%- endmacro %} {% macro snowflake__format_json_path(json_path_list) -%} @@ -114,11 +115,14 @@ {%- endmacro %} {% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} + {%- if from_table|string() != '' -%} + {%- set json_column = from_table|string() + "." + json_column|string() -%} + {%- endif -%} + {%- if redshift_super_type() -%} + case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end + {%- else -%} case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end - {% else %} - case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end - {% endif -%} + {%- endif -%} {%- endmacro %} {% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} @@ -168,7 +172,11 @@ {%- endmacro %} {% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + {%- if redshift_super_type() -%} + case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end + {%- else -%} case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end + {%- endif -%} {%- endmacro %} {% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} @@ -210,7 +218,11 @@ {%- endmacro %} {% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + {%- if redshift_super_type() -%} + {{ json_column }}.{{ format_json_path(json_path_list) }} + {%- else -%} json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) + {%- endif -%} {%- endmacro %} {% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%} diff --git a/macros/cross_db_utils/type_conversions.sql b/macros/cross_db_utils/type_conversions.sql index 1ad849b..ad3d756 100644 --- a/macros/cross_db_utils/type_conversions.sql +++ b/macros/cross_db_utils/type_conversions.sql @@ -33,6 +33,31 @@ cast({{ array_column }} as {{dbt_utils.type_string()}}) {%- endmacro %} +{% macro redshift__array_to_string(array_column) -%} + {% if redshift_super_type() -%} + json_serialize({{array_column}}) + {%- else -%} + {{ array_column }} + {%- endif %} +{%- endmacro %} + +{# object_to_string ------------------------------------------------- #} +{% macro object_to_string(object_column) -%} + {{ adapter.dispatch('object_to_string')(object_column) }} +{%- endmacro %} + +{% macro default__object_to_string(object_column) -%} + {{ object_column }} +{%- endmacro %} + +{% macro redshift__object_to_string(object_column) -%} + {% if redshift_super_type() -%} + json_serialize({{object_column}}) + {%- else -%} + {{ object_column }} + {%- endif %} +{%- endmacro %} + {# cast_to_boolean ------------------------------------------------- #} {% macro cast_to_boolean(field) -%} {{ adapter.dispatch('cast_to_boolean')(field) }} @@ -49,7 +74,11 @@ {# -- Redshift does not support converting string directly to boolean, it must go through int first #} {% macro redshift__cast_to_boolean(field) -%} + {% if redshift_super_type() -%} + cast({{ field }} as boolean) + {%- else -%} cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean) + {%- endif %} {%- endmacro %} {# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #} @@ -70,3 +99,7 @@ {%- macro default__empty_string_to_null(field) -%} nullif({{ field }}, '') {%- endmacro %} + +{%- macro redshift__empty_string_to_null(field) -%} + nullif({{ field }}::varchar, '') +{%- endmacro %} diff --git a/models/generated/airbyte_tables/unibag/tracking_product_shares.sql b/models/generated/airbyte_tables/unibag/tracking_product_shares.sql index 7ee5a90..a5faa93 100644 --- a/models/generated/airbyte_tables/unibag/tracking_product_shares.sql +++ b/models/generated/airbyte_tables/unibag/tracking_product_shares.sql @@ -21,3 +21,4 @@ select from {{ ref('tracking_product_shares_ab3') }} -- tracking_product_shares from {{ source('unibag', '_airbyte_raw_tracking_product_shares') }} where 1 = 1 +