This commit is contained in:
Ubuntu 2022-05-12 10:27:31 +07:00
parent 5cd245a388
commit fd00e7b2f5
9 changed files with 145 additions and 52 deletions

View File

@ -1,45 +1,29 @@
# This file is necessary to install dbt-utils with dbt deps
# the content will be overwritten by the transform function
# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'airbyte_utils'
name: airbyte_utils
version: '1.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'normalize'
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
docs-paths: ["docs"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]
target-path: "../build" # directory which will store compiled SQL files
log-path: "../logs" # directory which will store DBT logs
modules-path: "../dbt_modules" # directory which will store external DBT dependencies
clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"
profile: normalize
model-paths:
- models
docs-paths:
- docs
analysis-paths:
- analysis
test-paths:
- tests
seed-paths:
- data
macro-paths:
- macros
target-path: ../build
log-path: ../logs
packages-install-path: ../dbt
clean-targets:
- build
- dbt_modules
quoting:
database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false
identifier: true
# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models:
airbyte_utils:
+materialized: table
@ -57,7 +41,15 @@ models:
airbyte_views:
+tags: airbyte_internal_views
+materialized: view
dispatch:
- macro_namespace: dbt_utils
search_order: ['airbyte_utils', 'dbt_utils']
search_order:
- airbyte_utils
- dbt_utils
vars:
json_column: _airbyte_data
models_to_source:
se_orders_ab1: selly_express._airbyte_raw_se_orders
se_orders_ab2: selly_express._airbyte_raw_se_orders
se_orders_ab3: selly_express._airbyte_raw_se_orders
se_orders: selly_express._airbyte_raw_se_orders

View File

@ -1 +1 @@
{"streams":[{"stream":{"name":"se-orders","json_schema":{"type":"object","properties":{"to":{"type":"string"},"_id":{"type":"string"},"cod":{"type":"number"},"code":{"type":"string"},"from":{"type":"string"},"note":{"type":"string"},"value":{"type":"number"},"client":{"type":"string"},"status":{"type":"string"},"volume":{"type":"string"},"weight":{"type":"number"},"courier":{"type":"string"},"distance":{"type":"number"},"createdAt":{"type":"string"},"updatedAt":{"type":"string"},"updatedBy":{"type":"string"},"itemVolume":{"type":"number"},"searchString":{"type":"string"},"extraServices":{"type":"array"}}},"supported_sync_modes":["full_refresh","incremental"],"default_cursor_field":[],"source_defined_primary_key":[],"namespace":"unibag"},"sync_mode":"full_refresh","cursor_field":[],"destination_sync_mode":"overwrite","primary_key":[]}]}
{"streams":[{"stream":{"name":"se-orders","json_schema":{"type":"object","properties":{"to":{"type":"string"},"_id":{"type":"string"},"cod":{"type":"number"},"code":{"type":"string"},"from":{"type":"string"},"note":{"type":"string"},"value":{"type":"number"},"client":{"type":"string"},"status":{"type":"string"},"volume":{"type":"string"},"weight":{"type":"number"},"courier":{"type":"string"},"distance":{"type":"number"},"createdAt":{"type":"string"},"updatedAt":{"type":"string"},"updatedBy":{"type":"string"},"itemVolume":{"type":"number"},"searchString":{"type":"string"},"extraServices":{"type":"array"}}},"supported_sync_modes":["full_refresh","incremental"],"default_cursor_field":[],"source_defined_primary_key":[],"namespace":"selly-express"},"sync_mode":"full_refresh","cursor_field":[],"destination_sync_mode":"overwrite","primary_key":[]}]}

14
macros/configuration.sql Normal file
View File

@ -0,0 +1,14 @@
{%- macro redshift_super_type() -%}
{%- if not execute -%}
{{ return("") }}
{%- endif -%}
{%- set table_schema, _, table_name = var("models_to_source")[this.identifier].partition(".") -%}
{%- call statement("get_column_type", fetch_result=True) -%}
select data_type from SVV_COLUMNS where table_name = '{{ table_name }}' and column_name = '{{ var("json_column") }}' and table_schema = '{{ table_schema }}';
{%- endcall -%}
{%- set column_type = load_result("get_column_type")["data"][0][0] -%}
{{ return(column_type == "super") }}
{%- endmacro -%}

View File

@ -5,6 +5,7 @@
- Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
- postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
- MSSQL: openjson() > https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
- ClickHouse: ARRAY JOIN > https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/
#}
{# cross_join_unnest ------------------------------------------------- #}
@ -21,6 +22,10 @@
cross join unnest({{ array_col }}) as {{ array_col }}
{%- endmacro %}
{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%}
ARRAY JOIN {{ array_col }}
{%- endmacro %}
{% macro oracle__cross_join_unnest(stream_name, array_col) -%}
{% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
{%- endmacro %}
@ -99,8 +104,19 @@
{% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %}
{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #}
{% macro redshift__unnest_cte(from_table, stream_name, column_col) -%}
{# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #}
{% if redshift_super_type() -%}
with joined as (
select
table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
_airbyte_nested_data
from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data
)
{%- else -%}
{# -- based on https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ #}
{%- if not execute -%}
{{ return('') }}
{% endif %}
@ -129,6 +145,7 @@ joined as (
-- to the number of items in {{ from_table }}.{{ column_col }}
where numbers.generated_number <= json_array_length({{ column_col }}, true)
)
{%- endif %}
{%- endmacro %}
{% macro mysql__unnest_cte(from_table, stream_name, column_col) -%}

View File

@ -0,0 +1,16 @@
{% macro redshift__alter_column_type(relation, column_name, new_column_type) -%}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};
{% if new_column_type.lower() == "super" %}
update {{ relation }} set {{ adapter.quote(tmp_column) }} = JSON_PARSE({{ adapter.quote(column_name) }});
{% else %}
update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};
{% endif %}
alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;
alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}
{% endcall %}
{% endmacro %}

View File

@ -9,7 +9,11 @@
{% endmacro %}
{%- macro redshift__type_json() -%}
{%- if redshift_super_type() -%}
super
{%- else -%}
varchar
{%- endif -%}
{%- endmacro -%}
{% macro postgres__type_json() %}

View File

@ -43,11 +43,12 @@
{%- endmacro %}
{% macro redshift__format_json_path(json_path_list) -%}
{%- set quote = '"' if redshift_super_type() else "'" -%}
{%- set str_list = [] -%}
{%- for json_path in json_path_list -%}
{%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%}
{%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%}
{%- endfor -%}
{{ "'" ~ str_list|join("','") ~ "'" }}
{{ quote ~ str_list|join(quote + "," + quote) ~ quote }}
{%- endmacro %}
{% macro snowflake__format_json_path(json_path_list) -%}
@ -114,11 +115,14 @@
{%- endmacro %}
{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
{%- if from_table|string() != '' -%}
{%- set json_column = from_table|string() + "." + json_column|string() -%}
{%- endif -%}
{%- if redshift_super_type() -%}
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
{%- else -%}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% else %}
case when json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{% endif -%}
{%- endif -%}
{%- endmacro %}
{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
@ -135,9 +139,9 @@
{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{% else %}
JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
{% endif -%}
{%- endmacro %}
@ -168,7 +172,11 @@
{%- endmacro %}
{% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
{%- if redshift_super_type() -%}
case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end
{%- else -%}
case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
{%- endif -%}
{%- endmacro %}
{% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
@ -180,7 +188,7 @@
{%- endmacro %}
{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %}
{# json_extract_array ------------------------------------------------- #}
@ -210,7 +218,11 @@
{%- endmacro %}
{% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
{%- if redshift_super_type() -%}
{{ json_column }}.{{ format_json_path(json_path_list) }}
{%- else -%}
json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true)
{%- endif -%}
{%- endmacro %}
{% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
@ -222,5 +234,5 @@
{%- endmacro %}
{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %}

View File

@ -33,6 +33,31 @@
cast({{ array_column }} as {{dbt_utils.type_string()}})
{%- endmacro %}
{% macro redshift__array_to_string(array_column) -%}
{% if redshift_super_type() -%}
json_serialize({{array_column}})
{%- else -%}
{{ array_column }}
{%- endif %}
{%- endmacro %}
{# object_to_string ------------------------------------------------- #}
{% macro object_to_string(object_column) -%}
{{ adapter.dispatch('object_to_string')(object_column) }}
{%- endmacro %}
{% macro default__object_to_string(object_column) -%}
{{ object_column }}
{%- endmacro %}
{% macro redshift__object_to_string(object_column) -%}
{% if redshift_super_type() -%}
json_serialize({{object_column}})
{%- else -%}
{{ object_column }}
{%- endif %}
{%- endmacro %}
{# cast_to_boolean ------------------------------------------------- #}
{% macro cast_to_boolean(field) -%}
{{ adapter.dispatch('cast_to_boolean')(field) }}
@ -49,7 +74,11 @@
{# -- Redshift does not support converting string directly to boolean, it must go through int first #}
{% macro redshift__cast_to_boolean(field) -%}
{% if redshift_super_type() -%}
cast({{ field }} as boolean)
{%- else -%}
cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean)
{%- endif %}
{%- endmacro %}
{# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #}
@ -57,6 +86,11 @@
cast({{ field }} as bit)
{%- endmacro %}
{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #}
{% macro clickhouse__cast_to_boolean(field) -%}
IF(lower({{ field }}) = 'true', 1, 0)
{%- endmacro %}
{# empty_string_to_null ------------------------------------------------- #}
{% macro empty_string_to_null(field) -%}
{{ return(adapter.dispatch('empty_string_to_null')(field)) }}
@ -65,3 +99,7 @@
{%- macro default__empty_string_to_null(field) -%}
nullif({{ field }}, '')
{%- endmacro %}
{%- macro redshift__empty_string_to_null(field) -%}
nullif({{ field }}::varchar, '')
{%- endmacro %}

View File

@ -2,4 +2,4 @@
packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.7.4
revision: 0.8.2