This commit is contained in:
Ubuntu 2022-05-12 10:27:31 +07:00
parent 5cd245a388
commit fd00e7b2f5
9 changed files with 145 additions and 52 deletions

View File

@ -1,45 +1,29 @@
# This file is necessary to install dbt-utils with dbt deps name: airbyte_utils
# the content will be overwritten by the transform function
# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'airbyte_utils'
version: '1.0' version: '1.0'
config-version: 2 config-version: 2
profile: normalize
# This setting configures which "profile" dbt uses for this project. Profiles contain model-paths:
# database connection information, and should be configured in the ~/.dbt/profiles.yml file - models
profile: 'normalize' docs-paths:
- docs
# These configurations specify where dbt should look for different types of files. analysis-paths:
# The `source-paths` config, for example, states that source models can be found - analysis
# in the "models/" directory. You probably won't need to change these! test-paths:
source-paths: ["models"] - tests
docs-paths: ["docs"] seed-paths:
analysis-paths: ["analysis"] - data
test-paths: ["tests"] macro-paths:
data-paths: ["data"] - macros
macro-paths: ["macros"] target-path: ../build
log-path: ../logs
target-path: "../build" # directory which will store compiled SQL files packages-install-path: ../dbt
log-path: "../logs" # directory which will store DBT logs clean-targets:
modules-path: "../dbt_modules" # directory which will store external DBT dependencies - build
- dbt_modules
clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"
quoting: quoting:
database: true database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false schema: false
identifier: true identifier: true
# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models: models:
airbyte_utils: airbyte_utils:
+materialized: table +materialized: table
@ -57,7 +41,15 @@ models:
airbyte_views: airbyte_views:
+tags: airbyte_internal_views +tags: airbyte_internal_views
+materialized: view +materialized: view
dispatch: dispatch:
- macro_namespace: dbt_utils - macro_namespace: dbt_utils
search_order: ['airbyte_utils', 'dbt_utils'] search_order:
- airbyte_utils
- dbt_utils
vars:
json_column: _airbyte_data
models_to_source:
se_orders_ab1: selly_express._airbyte_raw_se_orders
se_orders_ab2: selly_express._airbyte_raw_se_orders
se_orders_ab3: selly_express._airbyte_raw_se_orders
se_orders: selly_express._airbyte_raw_se_orders

View File

@ -1 +1 @@
{"streams":[{"stream":{"name":"se-orders","json_schema":{"type":"object","properties":{"to":{"type":"string"},"_id":{"type":"string"},"cod":{"type":"number"},"code":{"type":"string"},"from":{"type":"string"},"note":{"type":"string"},"value":{"type":"number"},"client":{"type":"string"},"status":{"type":"string"},"volume":{"type":"string"},"weight":{"type":"number"},"courier":{"type":"string"},"distance":{"type":"number"},"createdAt":{"type":"string"},"updatedAt":{"type":"string"},"updatedBy":{"type":"string"},"itemVolume":{"type":"number"},"searchString":{"type":"string"},"extraServices":{"type":"array"}}},"supported_sync_modes":["full_refresh","incremental"],"default_cursor_field":[],"source_defined_primary_key":[],"namespace":"unibag"},"sync_mode":"full_refresh","cursor_field":[],"destination_sync_mode":"overwrite","primary_key":[]}]} {"streams":[{"stream":{"name":"se-orders","json_schema":{"type":"object","properties":{"to":{"type":"string"},"_id":{"type":"string"},"cod":{"type":"number"},"code":{"type":"string"},"from":{"type":"string"},"note":{"type":"string"},"value":{"type":"number"},"client":{"type":"string"},"status":{"type":"string"},"volume":{"type":"string"},"weight":{"type":"number"},"courier":{"type":"string"},"distance":{"type":"number"},"createdAt":{"type":"string"},"updatedAt":{"type":"string"},"updatedBy":{"type":"string"},"itemVolume":{"type":"number"},"searchString":{"type":"string"},"extraServices":{"type":"array"}}},"supported_sync_modes":["full_refresh","incremental"],"default_cursor_field":[],"source_defined_primary_key":[],"namespace":"selly-express"},"sync_mode":"full_refresh","cursor_field":[],"destination_sync_mode":"overwrite","primary_key":[]}]}

14
macros/configuration.sql Normal file
View File

@ -0,0 +1,14 @@
{%- macro redshift_super_type() -%}
{%- if not execute -%}
{{ return("") }}
{%- endif -%}
{%- set table_schema, _, table_name = var("models_to_source")[this.identifier].partition(".") -%}
{%- call statement("get_column_type", fetch_result=True) -%}
select data_type from SVV_COLUMNS where table_name = '{{ table_name }}' and column_name = '{{ var("json_column") }}' and table_schema = '{{ table_schema }}';
{%- endcall -%}
{%- set column_type = load_result("get_column_type")["data"][0][0] -%}
{{ return(column_type == "super") }}
{%- endmacro -%}

View File

@ -5,6 +5,7 @@
- Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ - Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
- postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/ - postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
- MSSQL: openjson() > https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15 - MSSQL: openjson() > https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
- ClickHouse: ARRAY JOIN > https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/
#} #}
{# cross_join_unnest ------------------------------------------------- #} {# cross_join_unnest ------------------------------------------------- #}
@ -21,6 +22,10 @@
cross join unnest({{ array_col }}) as {{ array_col }} cross join unnest({{ array_col }}) as {{ array_col }}
{%- endmacro %} {%- endmacro %}
{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%}
ARRAY JOIN {{ array_col }}
{%- endmacro %}
{% macro oracle__cross_join_unnest(stream_name, array_col) -%} {% macro oracle__cross_join_unnest(stream_name, array_col) -%}
{% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %} {% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
{%- endmacro %} {%- endmacro %}
@ -99,8 +104,19 @@
{% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %} {% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %}
{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #}
{% macro redshift__unnest_cte(from_table, stream_name, column_col) -%} {% macro redshift__unnest_cte(from_table, stream_name, column_col) -%}
{# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #}
{% if redshift_super_type() -%}
with joined as (
select
table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
_airbyte_nested_data
from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data
)
{%- else -%}
{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #}
{%- if not execute -%} {%- if not execute -%}
{{ return('') }} {{ return('') }}
{% endif %} {% endif %}
@ -129,6 +145,7 @@ joined as (
-- to the number of items in {{ from_table }}.{{ column_col }} -- to the number of items in {{ from_table }}.{{ column_col }}
where numbers.generated_number <= json_array_length({{ column_col }}, true) where numbers.generated_number <= json_array_length({{ column_col }}, true)
) )
{%- endif %}
{%- endmacro %} {%- endmacro %}
{% macro mysql__unnest_cte(from_table, stream_name, column_col) -%} {% macro mysql__unnest_cte(from_table, stream_name, column_col) -%}

View File

@ -0,0 +1,16 @@
{% macro redshift__alter_column_type(relation, column_name, new_column_type) -%}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
{% if new_column_type.lower() == "super" %}
update {{ relation }} set {{ adapter.quote(tmp_column) }} = JSON_PARSE({{ adapter.quote(column_name) }});
{% else %}
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
{% endif %}
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}

View File

@ -9,7 +9,11 @@
{% endmacro %} {% endmacro %}
{%- macro redshift__type_json() -%} {%- macro redshift__type_json() -%}
{%- if redshift_super_type() -%}
super
{%- else -%}
varchar varchar
{%- endif -%}
{%- endmacro -%} {%- endmacro -%}
{% macro postgres__type_json() %} {% macro postgres__type_json() %}

View File

@ -43,11 +43,12 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__format_json_path(json_path_list) -%} {% macro redshift__format_json_path(json_path_list) -%}
{%- set quote = '"' if redshift_super_type() else "'" -%}
{%- set str_list = [] -%} {%- set str_list = [] -%}
{%- for json_path in json_path_list -%} {%- for json_path in json_path_list -%}
{%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%} {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%}
{%- endfor -%} {%- endfor -%}
{{ "'" ~ str_list|join("','") ~ "'" }} {{ quote ~ str_list|join(quote + "," + quote) ~ quote }}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__format_json_path(json_path_list) -%} {% macro snowflake__format_json_path(json_path_list) -%}
@ -114,11 +115,14 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} {% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %} {%- if from_table|string() != '' -%}
{%- set json_column = from_table|string() + "." + json_column|string() -%}
{%- endif -%}
{%- if redshift_super_type() -%}
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
{%- else -%}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% else %} {%- endif -%}
case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% endif -%}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} {% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
@ -135,9 +139,9 @@
{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} {% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %} {%- if from_table|string() == '' %}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{% else %} {% else %}
JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}) JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
{% endif -%} {% endif -%}
{%- endmacro %} {%- endmacro %}
@ -168,7 +172,11 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} {% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
{%- if redshift_super_type() -%}
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
{%- else -%}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{%- endif -%}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} {% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
@ -180,7 +188,7 @@
{%- endmacro %} {%- endmacro %}
{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} {% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %} {%- endmacro %}
{# json_extract_array ------------------------------------------------- #} {# json_extract_array ------------------------------------------------- #}
@ -210,7 +218,11 @@
{%- endmacro %} {%- endmacro %}
{% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%} {% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
{%- if redshift_super_type() -%}
{{ json_column }}.{{ format_json_path(json_path_list) }}
{%- else -%}
json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true)
{%- endif -%}
{%- endmacro %} {%- endmacro %}
{% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%} {% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
@ -222,5 +234,5 @@
{%- endmacro %} {%- endmacro %}
{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%} {% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %} {%- endmacro %}

View File

@ -33,6 +33,31 @@
cast({{ array_column }} as {{dbt_utils.type_string()}}) cast({{ array_column }} as {{dbt_utils.type_string()}})
{%- endmacro %} {%- endmacro %}
{% macro redshift__array_to_string(array_column) -%}
{% if redshift_super_type() -%}
json_serialize({{array_column}})
{%- else -%}
{{ array_column }}
{%- endif %}
{%- endmacro %}
{# object_to_string ------------------------------------------------- #}
{% macro object_to_string(object_column) -%}
{{ adapter.dispatch('object_to_string')(object_column) }}
{%- endmacro %}
{% macro default__object_to_string(object_column) -%}
{{ object_column }}
{%- endmacro %}
{% macro redshift__object_to_string(object_column) -%}
{% if redshift_super_type() -%}
json_serialize({{object_column}})
{%- else -%}
{{ object_column }}
{%- endif %}
{%- endmacro %}
{# cast_to_boolean ------------------------------------------------- #} {# cast_to_boolean ------------------------------------------------- #}
{% macro cast_to_boolean(field) -%} {% macro cast_to_boolean(field) -%}
{{ adapter.dispatch('cast_to_boolean')(field) }} {{ adapter.dispatch('cast_to_boolean')(field) }}
@ -49,7 +74,11 @@
{# -- Redshift does not support converting string directly to boolean, it must go through int first #} {# -- Redshift does not support converting string directly to boolean, it must go through int first #}
{% macro redshift__cast_to_boolean(field) -%} {% macro redshift__cast_to_boolean(field) -%}
{% if redshift_super_type() -%}
cast({{ field }} as boolean)
{%- else -%}
cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean) cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean)
{%- endif %}
{%- endmacro %} {%- endmacro %}
{# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #} {# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #}
@ -57,6 +86,11 @@
cast({{ field }} as bit) cast({{ field }} as bit)
{%- endmacro %} {%- endmacro %}
{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #}
{% macro clickhouse__cast_to_boolean(field) -%}
IF(lower({{ field }}) = 'true', 1, 0)
{%- endmacro %}
{# empty_string_to_null ------------------------------------------------- #} {# empty_string_to_null ------------------------------------------------- #}
{% macro empty_string_to_null(field) -%} {% macro empty_string_to_null(field) -%}
{{ return(adapter.dispatch('empty_string_to_null')(field)) }} {{ return(adapter.dispatch('empty_string_to_null')(field)) }}
@ -65,3 +99,7 @@
{%- macro default__empty_string_to_null(field) -%} {%- macro default__empty_string_to_null(field) -%}
nullif({{ field }}, '') nullif({{ field }}, '')
{%- endmacro %} {%- endmacro %}
{%- macro redshift__empty_string_to_null(field) -%}
nullif({{ field }}::varchar, '')
{%- endmacro %}

View File

@ -2,4 +2,4 @@
packages: packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git" - git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.7.4 revision: 0.8.2