From fd00e7b2f55d926f9c73814240dd3903fb3c500e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 12 May 2022 10:27:31 +0700 Subject: [PATCH] update --- dbt_project.yml | 70 ++++++++++------------ destination_catalog.json | 2 +- macros/configuration.sql | 14 +++++ macros/cross_db_utils/array.sql | 19 +++++- macros/cross_db_utils/columns.sql | 16 +++++ macros/cross_db_utils/datatypes.sql | 4 ++ macros/cross_db_utils/json_operations.sql | 32 ++++++---- macros/cross_db_utils/type_conversions.sql | 38 ++++++++++++ packages.yml | 2 +- 9 files changed, 145 insertions(+), 52 deletions(-) create mode 100644 macros/configuration.sql create mode 100644 macros/cross_db_utils/columns.sql diff --git a/dbt_project.yml b/dbt_project.yml index 4869f0a..b2df7fe 100755 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,45 +1,29 @@ -# This file is necessary to install dbt-utils with dbt deps -# the content will be overwritten by the transform function - -# Name your package! Package names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: 'airbyte_utils' +name: airbyte_utils version: '1.0' config-version: 2 - -# This setting configures which "profile" dbt uses for this project. Profiles contain -# database connection information, and should be configured in the ~/.dbt/profiles.yml file -profile: 'normalize' - -# These configurations specify where dbt should look for different types of files. -# The `source-paths` config, for example, states that source models can be found -# in the "models/" directory. You probably won't need to change these! -source-paths: ["models"] -docs-paths: ["docs"] -analysis-paths: ["analysis"] -test-paths: ["tests"] -data-paths: ["data"] -macro-paths: ["macros"] - -target-path: "../build" # directory which will store compiled SQL files -log-path: "../logs" # directory which will store DBT logs -modules-path: "../dbt_modules" # directory which will store external DBT dependencies - -clean-targets: # directories to be removed by `dbt clean` - - "build" - - "dbt_modules" - +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: ../dbt +clean-targets: +- build +- dbt_modules quoting: database: true -# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) -# all schemas should be unquoted schema: false identifier: true - -# You can define configurations for models in the `source-paths` directory here. -# Using these configurations, you can enable or disable models, change how they -# are materialized, and more! models: airbyte_utils: +materialized: table @@ -57,7 +41,15 @@ models: airbyte_views: +tags: airbyte_internal_views +materialized: view - dispatch: - - macro_namespace: dbt_utils - search_order: ['airbyte_utils', 'dbt_utils'] +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + se_orders_ab1: selly_express._airbyte_raw_se_orders + se_orders_ab2: selly_express._airbyte_raw_se_orders + se_orders_ab3: selly_express._airbyte_raw_se_orders + se_orders: selly_express._airbyte_raw_se_orders diff --git a/destination_catalog.json b/destination_catalog.json index f51ebe6..f02509f 100644 --- a/destination_catalog.json +++ b/destination_catalog.json @@ -1 +1 @@ -{"streams":[{"stream":{"name":"se-orders","json_schema":{"type":"object","properties":{"to":{"type":"string"},"_id":{"type":"string"},"cod":{"type":"number"},"code":{"type":"string"},"from":{"type":"string"},"note":{"type":"string"},"value":{"type":"number"},"client":{"type":"string"},"status":{"type":"string"},"volume":{"type":"string"},"weight":{"type":"number"},"courier":{"type":"string"},"distance":{"type":"number"},"createdAt":{"type":"string"},"updatedAt":{"type":"string"},"updatedBy":{"type":"string"},"itemVolume":{"type":"number"},"searchString":{"type":"string"},"extraServices":{"type":"array"}}},"supported_sync_modes":["full_refresh","incremental"],"default_cursor_field":[],"source_defined_primary_key":[],"namespace":"unibag"},"sync_mode":"full_refresh","cursor_field":[],"destination_sync_mode":"overwrite","primary_key":[]}]} \ No newline at end of file +{"streams":[{"stream":{"name":"se-orders","json_schema":{"type":"object","properties":{"to":{"type":"string"},"_id":{"type":"string"},"cod":{"type":"number"},"code":{"type":"string"},"from":{"type":"string"},"note":{"type":"string"},"value":{"type":"number"},"client":{"type":"string"},"status":{"type":"string"},"volume":{"type":"string"},"weight":{"type":"number"},"courier":{"type":"string"},"distance":{"type":"number"},"createdAt":{"type":"string"},"updatedAt":{"type":"string"},"updatedBy":{"type":"string"},"itemVolume":{"type":"number"},"searchString":{"type":"string"},"extraServices":{"type":"array"}}},"supported_sync_modes":["full_refresh","incremental"],"default_cursor_field":[],"source_defined_primary_key":[],"namespace":"selly-express"},"sync_mode":"full_refresh","cursor_field":[],"destination_sync_mode":"overwrite","primary_key":[]}]} \ No newline at end of file diff --git a/macros/configuration.sql b/macros/configuration.sql new file mode 100644 index 0000000..a599ab3 --- /dev/null +++ b/macros/configuration.sql @@ -0,0 +1,14 @@ +{%- macro redshift_super_type() -%} + {%- if not execute -%} + {{ return("") }} + {%- endif -%} + + {%- set table_schema, _, table_name = var("models_to_source")[this.identifier].partition(".") -%} + + {%- call statement("get_column_type", fetch_result=True) -%} + select data_type from SVV_COLUMNS where table_name = '{{ table_name }}' and column_name = '{{ var("json_column") }}' and table_schema = '{{ table_schema }}'; + {%- endcall -%} + + {%- set column_type = load_result("get_column_type")["data"][0][0] -%} + {{ return(column_type == "super") }} +{%- endmacro -%} diff --git a/macros/cross_db_utils/array.sql b/macros/cross_db_utils/array.sql index 4bc642f..35df407 100644 --- a/macros/cross_db_utils/array.sql +++ b/macros/cross_db_utils/array.sql @@ -5,6 +5,7 @@ - Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ - postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/ - MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15 + - ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/ #} {# cross_join_unnest ------------------------------------------------- #} @@ -21,6 +22,10 @@ cross join unnest({{ array_col }}) as {{ array_col }} {%- endmacro %} +{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%} + ARRAY JOIN {{ array_col }} +{%- endmacro %} + {% macro oracle__cross_join_unnest(stream_name, array_col) -%} {% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %} {%- endmacro %} @@ -99,8 +104,19 @@ {% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %} -{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #} {% macro redshift__unnest_cte(from_table, stream_name, column_col) -%} + + {# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #} + {% if redshift_super_type() -%} + with joined as ( + select + table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid, + _airbyte_nested_data + from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data + ) + {%- else -%} + + {# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #} {%- if not execute -%} {{ return('') }} {% endif %} @@ -129,6 +145,7 @@ joined as ( -- to the number of items in {{ from_table }}.{{ column_col }} where numbers.generated_number <= json_array_length({{ column_col }}, true) ) + {%- endif %} {%- endmacro %} {% macro mysql__unnest_cte(from_table, stream_name, column_col) -%} diff --git a/macros/cross_db_utils/columns.sql b/macros/cross_db_utils/columns.sql new file mode 100644 index 0000000..0b695c1 --- /dev/null +++ b/macros/cross_db_utils/columns.sql @@ -0,0 +1,16 @@ +{% macro redshift__alter_column_type(relation, column_name, new_column_type) -%} + + {%- set tmp_column = column_name + "__dbt_alter" -%} + + {% call statement('alter_column_type') %} + alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }}; + {% if new_column_type.lower() == "super" %} + update {{ relation }} set {{ adapter.quote(tmp_column) }} = JSON_PARSE({{ adapter.quote(column_name) }}); + {% else %} + update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }}; + {% endif %} + alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade; + alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }} + {% endcall %} + +{% endmacro %} diff --git a/macros/cross_db_utils/datatypes.sql b/macros/cross_db_utils/datatypes.sql index 080aea5..07600de 100644 --- a/macros/cross_db_utils/datatypes.sql +++ b/macros/cross_db_utils/datatypes.sql @@ -9,7 +9,11 @@ {% endmacro %} {%- macro redshift__type_json() -%} + {%- if redshift_super_type() -%} + super + {%- else -%} varchar + {%- endif -%} {%- endmacro -%} {% macro postgres__type_json() %} diff --git a/macros/cross_db_utils/json_operations.sql b/macros/cross_db_utils/json_operations.sql index 619eaf4..e1e5443 100644 --- a/macros/cross_db_utils/json_operations.sql +++ b/macros/cross_db_utils/json_operations.sql @@ -43,11 +43,12 @@ {%- endmacro %} {% macro redshift__format_json_path(json_path_list) -%} + {%- set quote = '"' if redshift_super_type() else "'" -%} {%- set str_list = [] -%} {%- for json_path in json_path_list -%} - {%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%} + {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%} {%- endfor -%} - {{ "'" ~ str_list|join("','") ~ "'" }} + {{ quote ~ str_list|join(quote + "," + quote) ~ quote }} {%- endmacro %} {% macro snowflake__format_json_path(json_path_list) -%} @@ -114,11 +115,14 @@ {%- endmacro %} {% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} - {%- if from_table|string() == '' %} + {%- if from_table|string() != '' -%} + {%- set json_column = from_table|string() + "." + json_column|string() -%} + {%- endif -%} + {%- if redshift_super_type() -%} + case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end + {%- else -%} case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end - {% else %} - case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end - {% endif -%} + {%- endif -%} {%- endmacro %} {% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} @@ -135,9 +139,9 @@ {% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} {%- if from_table|string() == '' %} - JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) + JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) {% else %} - JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}) + JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }}) {% endif -%} {%- endmacro %} @@ -168,7 +172,11 @@ {%- endmacro %} {% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + {%- if redshift_super_type() -%} + case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end + {%- else -%} case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end + {%- endif -%} {%- endmacro %} {% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} @@ -180,7 +188,7 @@ {%- endmacro %} {% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} - JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) + JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) {%- endmacro %} {# json_extract_array ------------------------------------------------- #} @@ -210,7 +218,11 @@ {%- endmacro %} {% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + {%- if redshift_super_type() -%} + {{ json_column }}.{{ format_json_path(json_path_list) }} + {%- else -%} json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) + {%- endif -%} {%- endmacro %} {% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%} @@ -222,5 +234,5 @@ {%- endmacro %} {% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%} - JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) + JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) {%- endmacro %} diff --git a/macros/cross_db_utils/type_conversions.sql b/macros/cross_db_utils/type_conversions.sql index 89dd68b..ad3d756 100644 --- a/macros/cross_db_utils/type_conversions.sql +++ b/macros/cross_db_utils/type_conversions.sql @@ -33,6 +33,31 @@ cast({{ array_column }} as {{dbt_utils.type_string()}}) {%- endmacro %} +{% macro redshift__array_to_string(array_column) -%} + {% if redshift_super_type() -%} + json_serialize({{array_column}}) + {%- else -%} + {{ array_column }} + {%- endif %} +{%- endmacro %} + +{# object_to_string ------------------------------------------------- #} +{% macro object_to_string(object_column) -%} + {{ adapter.dispatch('object_to_string')(object_column) }} +{%- endmacro %} + +{% macro default__object_to_string(object_column) -%} + {{ object_column }} +{%- endmacro %} + +{% macro redshift__object_to_string(object_column) -%} + {% if redshift_super_type() -%} + json_serialize({{object_column}}) + {%- else -%} + {{ object_column }} + {%- endif %} +{%- endmacro %} + {# cast_to_boolean ------------------------------------------------- #} {% macro cast_to_boolean(field) -%} {{ adapter.dispatch('cast_to_boolean')(field) }} @@ -49,7 +74,11 @@ {# -- Redshift does not support converting string directly to boolean, it must go through int first #} {% macro redshift__cast_to_boolean(field) -%} + {% if redshift_super_type() -%} + cast({{ field }} as boolean) + {%- else -%} cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean) + {%- endif %} {%- endmacro %} {# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #} @@ -57,6 +86,11 @@ cast({{ field }} as bit) {%- endmacro %} +{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #} +{% macro clickhouse__cast_to_boolean(field) -%} + IF(lower({{ field }}) = 'true', 1, 0) +{%- endmacro %} + {# empty_string_to_null ------------------------------------------------- #} {% macro empty_string_to_null(field) -%} {{ return(adapter.dispatch('empty_string_to_null')(field)) }} @@ -65,3 +99,7 @@ {%- macro default__empty_string_to_null(field) -%} nullif({{ field }}, '') {%- endmacro %} + +{%- macro redshift__empty_string_to_null(field) -%} + nullif({{ field }}::varchar, '') +{%- endmacro %} diff --git a/packages.yml b/packages.yml index 4b74445..33b4edd 100755 --- a/packages.yml +++ b/packages.yml @@ -2,4 +2,4 @@ packages: - git: "https://github.com/fishtown-analytics/dbt-utils.git" - revision: 0.7.4 + revision: 0.8.2