update
This commit is contained in:
parent
59ac317c58
commit
250f61873c
2
Makefile
2
Makefile
|
@ -6,5 +6,5 @@ push:
|
||||||
run:
|
run:
|
||||||
dbt deps --profiles-dir=. --project-dir=.
|
dbt deps --profiles-dir=. --project-dir=.
|
||||||
#dbt run --profiles-dir=. --project-dir=. --full-refresh
|
#dbt run --profiles-dir=. --project-dir=. --full-refresh
|
||||||
dbt run --profiles-dir=. --project-dir=. --full-refresh --select order_items
|
dbt run --profiles-dir=. --project-dir=. --full-refresh --select tracking_product_shares
|
||||||
|
|
||||||
|
|
|
@ -4,27 +4,27 @@
|
||||||
# Name your package! Package names should contain only lowercase characters
|
# Name your package! Package names should contain only lowercase characters
|
||||||
# and underscores. A good package name should reflect your organization's
|
# and underscores. A good package name should reflect your organization's
|
||||||
# name or the intended use of these models
|
# name or the intended use of these models
|
||||||
name: 'airbyte_utils'
|
name: "airbyte_utils"
|
||||||
version: '1.0'
|
version: "1.0"
|
||||||
config-version: 2
|
config-version: 2
|
||||||
|
|
||||||
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
# This setting configures which "profile" dbt uses for this project. Profiles contain
|
||||||
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
|
||||||
profile: 'normalize'
|
profile: "normalize"
|
||||||
|
|
||||||
# These configurations specify where dbt should look for different types of files.
|
# These configurations specify where dbt should look for different types of files.
|
||||||
# The `source-paths` config, for example, states that source models can be found
|
# The `model-paths` config, for example, states that source models can be found
|
||||||
# in the "models/" directory. You probably won't need to change these!
|
# in the "models/" directory. You probably won't need to change these!
|
||||||
source-paths: ["models"]
|
model-paths: ["models"]
|
||||||
docs-paths: ["docs"]
|
docs-paths: ["docs"]
|
||||||
analysis-paths: ["analysis"]
|
analysis-paths: ["analysis"]
|
||||||
test-paths: ["tests"]
|
test-paths: ["tests"]
|
||||||
data-paths: ["data"]
|
seed-paths: ["data"]
|
||||||
macro-paths: ["macros"]
|
macro-paths: ["macros"]
|
||||||
|
|
||||||
target-path: "../build" # directory which will store compiled SQL files
|
target-path: "../build" # directory which will store compiled SQL files
|
||||||
log-path: "../logs" # directory which will store DBT logs
|
log-path: "../logs" # directory which will store DBT logs
|
||||||
modules-path: "../dbt_modules" # directory which will store external DBT dependencies
|
packages-install-path: "../dbt" # directory which will store external DBT dependencies
|
||||||
|
|
||||||
clean-targets: # directories to be removed by `dbt clean`
|
clean-targets: # directories to be removed by `dbt clean`
|
||||||
- "build"
|
- "build"
|
||||||
|
@ -37,7 +37,7 @@ quoting:
|
||||||
schema: false
|
schema: false
|
||||||
identifier: true
|
identifier: true
|
||||||
|
|
||||||
# You can define configurations for models in the `source-paths` directory here.
|
# You can define configurations for models in the `model-paths` directory here.
|
||||||
# Using these configurations, you can enable or disable models, change how they
|
# Using these configurations, you can enable or disable models, change how they
|
||||||
# are materialized, and more!
|
# are materialized, and more!
|
||||||
models:
|
models:
|
||||||
|
@ -60,4 +60,4 @@ models:
|
||||||
|
|
||||||
dispatch:
|
dispatch:
|
||||||
- macro_namespace: dbt_utils
|
- macro_namespace: dbt_utils
|
||||||
search_order: ['airbyte_utils', 'dbt_utils']
|
search_order: ["airbyte_utils", "dbt_utils"]
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
{"ssl":false,"host":"localhost","port":5555,"schema":"public","database":"selly_etl","password":"123","username":"selly","tunnel_method":{"tunnel_method":"NO_TUNNEL"}}
|
{"ssl":false,"host":"18.140.112.89","port":5432,"schema":"public","database":"mongo_etl","password":"bvxJaDGW2R55uyDXfODJ2a0Y","username":"selly","tunnel_method":{"tunnel_method":"NO_TUNNEL"}}
|
|
@ -5,6 +5,7 @@
|
||||||
- Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
|
- Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/
|
||||||
- postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
|
- postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/
|
||||||
- MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
|
- MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15
|
||||||
|
- ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/
|
||||||
#}
|
#}
|
||||||
|
|
||||||
{# cross_join_unnest ------------------------------------------------- #}
|
{# cross_join_unnest ------------------------------------------------- #}
|
||||||
|
@ -21,6 +22,10 @@
|
||||||
cross join unnest({{ array_col }}) as {{ array_col }}
|
cross join unnest({{ array_col }}) as {{ array_col }}
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%}
|
||||||
|
ARRAY JOIN {{ array_col }}
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
{% macro oracle__cross_join_unnest(stream_name, array_col) -%}
|
{% macro oracle__cross_join_unnest(stream_name, array_col) -%}
|
||||||
{% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
|
{% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %}
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
|
@ -135,9 +135,9 @@
|
||||||
|
|
||||||
{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
|
||||||
{%- if from_table|string() == '' %}
|
{%- if from_table|string() == '' %}
|
||||||
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
|
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
{% else %}
|
{% else %}
|
||||||
JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
|
JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
|
@ -180,7 +180,7 @@
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
|
||||||
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
|
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
{# json_extract_array ------------------------------------------------- #}
|
{# json_extract_array ------------------------------------------------- #}
|
||||||
|
@ -222,5 +222,5 @@
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
|
||||||
JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
|
JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
|
@ -57,6 +57,11 @@
|
||||||
cast({{ field }} as bit)
|
cast({{ field }} as bit)
|
||||||
{%- endmacro %}
|
{%- endmacro %}
|
||||||
|
|
||||||
|
{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #}
|
||||||
|
{% macro clickhouse__cast_to_boolean(field) -%}
|
||||||
|
IF(lower({{ field }}) = 'true', 1, 0)
|
||||||
|
{%- endmacro %}
|
||||||
|
|
||||||
{# empty_string_to_null ------------------------------------------------- #}
|
{# empty_string_to_null ------------------------------------------------- #}
|
||||||
{% macro empty_string_to_null(field) -%}
|
{% macro empty_string_to_null(field) -%}
|
||||||
{{ return(adapter.dispatch('empty_string_to_null')(field)) }}
|
{{ return(adapter.dispatch('empty_string_to_null')(field)) }}
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ source('unibag', '_airbyte_raw_social_post_views') }}
|
||||||
|
select
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['_id'], ['_id']) }} as _id,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['user'], ['user']) }} as {{ adapter.quote('user') }},
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['clientIP'], ['clientIP']) }} as clientip,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['lastViewAt'], ['lastViewAt']) }} as lastviewat,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['socialPost'], ['socialPost']) }} as socialpost,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ source('unibag', '_airbyte_raw_social_post_views') }} as table_alias
|
||||||
|
-- social_post_views
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: {{ ref('social_post_views_ab1') }}
|
||||||
|
select
|
||||||
|
cast(_id as {{ dbt_utils.type_string() }}) as _id,
|
||||||
|
cast({{ adapter.quote('user') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('user') }},
|
||||||
|
cast(clientip as {{ dbt_utils.type_string() }}) as clientip,
|
||||||
|
cast(lastviewat as {{ dbt_utils.type_string() }}) as lastviewat,
|
||||||
|
cast(socialpost as {{ dbt_utils.type_string() }}) as socialpost,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('social_post_views_ab1') }}
|
||||||
|
-- social_post_views
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: {{ ref('social_post_views_ab2') }}
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'_id',
|
||||||
|
adapter.quote('user'),
|
||||||
|
'clientip',
|
||||||
|
'lastviewat',
|
||||||
|
'socialpost',
|
||||||
|
]) }} as _airbyte_social_post_views_hashid,
|
||||||
|
tmp.*
|
||||||
|
from {{ ref('social_post_views_ab2') }} tmp
|
||||||
|
-- social_post_views
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ source('unibag', '_airbyte_raw_social_posts') }}
|
||||||
|
select
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['_id'], ['_id']) }} as _id,
|
||||||
|
{{ json_extract_array('_airbyte_data', ['tags'], ['tags']) }} as tags,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['isPin'], ['isPin']) }} as ispin,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['order'], ['order']) }} as {{ adapter.quote('order') }},
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['title'], ['title']) }} as title,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['author'], ['author']) }} as author,
|
||||||
|
{{ json_extract_array('_airbyte_data', ['cities'], ['cities']) }} as cities,
|
||||||
|
{{ json_extract_array('_airbyte_data', ['photos'], ['photos']) }} as photos,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['reason'], ['reason']) }} as reason,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['status'], ['status']) }} as status,
|
||||||
|
{{ json_extract_array('_airbyte_data', ['videos'], ['videos']) }} as videos,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['content'], ['content']) }} as {{ adapter.quote('content') }},
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['isTimer'], ['isTimer']) }} as istimer,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['startAt'], ['startAt']) }} as startat,
|
||||||
|
{{ json_extract_array('_airbyte_data', ['products'], ['products']) }} as products,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['createdAt'], ['createdAt']) }} as createdat,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['hasUpdate'], ['hasUpdate']) }} as hasupdate,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['statistic'], ['statistic']) }} as statistic,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['updatedAt'], ['updatedAt']) }} as updatedat,
|
||||||
|
{{ json_extract_array('_airbyte_data', ['categories'], ['categories']) }} as categories,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['fromSystem'], ['fromSystem']) }} as fromsystem,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['contributor'], ['contributor']) }} as contributor,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['publishedAt'], ['publishedAt']) }} as publishedat,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['searchString'], ['searchString']) }} as searchstring,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['updatedCount'], ['updatedCount']) }} as updatedcount,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['shareStatistic'], ['shareStatistic']) }} as sharestatistic,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['enableNotificationForContributor'], ['enableNotificationForContributor']) }} as enablenotificationforcontributor,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ source('unibag', '_airbyte_raw_social_posts') }} as table_alias
|
||||||
|
-- social_posts
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: {{ ref('social_posts_ab1') }}
|
||||||
|
select
|
||||||
|
cast(_id as {{ dbt_utils.type_string() }}) as _id,
|
||||||
|
tags,
|
||||||
|
{{ cast_to_boolean('ispin') }} as ispin,
|
||||||
|
cast({{ adapter.quote('order') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('order') }},
|
||||||
|
cast(title as {{ dbt_utils.type_string() }}) as title,
|
||||||
|
cast(author as {{ dbt_utils.type_string() }}) as author,
|
||||||
|
cities,
|
||||||
|
photos,
|
||||||
|
cast(reason as {{ dbt_utils.type_string() }}) as reason,
|
||||||
|
cast(status as {{ dbt_utils.type_string() }}) as status,
|
||||||
|
videos,
|
||||||
|
cast({{ adapter.quote('content') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('content') }},
|
||||||
|
{{ cast_to_boolean('istimer') }} as istimer,
|
||||||
|
cast(startat as {{ dbt_utils.type_string() }}) as startat,
|
||||||
|
products,
|
||||||
|
cast(createdat as {{ dbt_utils.type_string() }}) as createdat,
|
||||||
|
{{ cast_to_boolean('hasupdate') }} as hasupdate,
|
||||||
|
cast(statistic as {{ dbt_utils.type_string() }}) as statistic,
|
||||||
|
cast(updatedat as {{ dbt_utils.type_string() }}) as updatedat,
|
||||||
|
categories,
|
||||||
|
{{ cast_to_boolean('fromsystem') }} as fromsystem,
|
||||||
|
cast(contributor as {{ dbt_utils.type_string() }}) as contributor,
|
||||||
|
cast(publishedat as {{ dbt_utils.type_string() }}) as publishedat,
|
||||||
|
cast(searchstring as {{ dbt_utils.type_string() }}) as searchstring,
|
||||||
|
cast(updatedcount as {{ dbt_utils.type_float() }}) as updatedcount,
|
||||||
|
cast(sharestatistic as {{ dbt_utils.type_string() }}) as sharestatistic,
|
||||||
|
{{ cast_to_boolean('enablenotificationforcontributor') }} as enablenotificationforcontributor,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('social_posts_ab1') }}
|
||||||
|
-- social_posts
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: {{ ref('social_posts_ab2') }}
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'_id',
|
||||||
|
array_to_string('tags'),
|
||||||
|
boolean_to_string('ispin'),
|
||||||
|
adapter.quote('order'),
|
||||||
|
'title',
|
||||||
|
'author',
|
||||||
|
array_to_string('cities'),
|
||||||
|
array_to_string('photos'),
|
||||||
|
'reason',
|
||||||
|
'status',
|
||||||
|
array_to_string('videos'),
|
||||||
|
adapter.quote('content'),
|
||||||
|
boolean_to_string('istimer'),
|
||||||
|
'startat',
|
||||||
|
array_to_string('products'),
|
||||||
|
'createdat',
|
||||||
|
boolean_to_string('hasupdate'),
|
||||||
|
'statistic',
|
||||||
|
'updatedat',
|
||||||
|
array_to_string('categories'),
|
||||||
|
boolean_to_string('fromsystem'),
|
||||||
|
'contributor',
|
||||||
|
'publishedat',
|
||||||
|
'searchstring',
|
||||||
|
'updatedcount',
|
||||||
|
'sharestatistic',
|
||||||
|
boolean_to_string('enablenotificationforcontributor'),
|
||||||
|
]) }} as _airbyte_social_posts_hashid,
|
||||||
|
tmp.*
|
||||||
|
from {{ ref('social_posts_ab2') }} tmp
|
||||||
|
-- social_posts
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
|
||||||
|
-- depends_on: {{ source('unibag', '_airbyte_raw_tracking_product_shares') }}
|
||||||
|
select
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['_id'], ['_id']) }} as _id,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['user'], ['user']) }} as {{ adapter.quote('user') }},
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['source'], ['source']) }} as {{ adapter.quote('source') }},
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['options'], ['options']) }} as {{ adapter.quote('options') }},
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['products'], ['products']) }} as products,
|
||||||
|
{{ json_extract_scalar('_airbyte_data', ['createdAt'], ['createdAt']) }} as createdat,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ source('unibag', '_airbyte_raw_tracking_product_shares') }} as table_alias
|
||||||
|
-- tracking_product_shares
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
|
||||||
|
-- depends_on: {{ ref('tracking_product_shares_ab1') }}
|
||||||
|
select
|
||||||
|
cast(_id as {{ dbt_utils.type_string() }}) as _id,
|
||||||
|
cast({{ adapter.quote('user') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('user') }},
|
||||||
|
cast({{ adapter.quote('source') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('source') }},
|
||||||
|
cast({{ adapter.quote('options') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('options') }},
|
||||||
|
cast(products as {{ dbt_utils.type_string() }}) as products,
|
||||||
|
cast(createdat as {{ dbt_utils.type_string() }}) as createdat,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at
|
||||||
|
from {{ ref('tracking_product_shares_ab1') }}
|
||||||
|
-- tracking_product_shares
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "_airbyte_unibag",
|
||||||
|
tags = [ "top-level-intermediate" ]
|
||||||
|
) }}
|
||||||
|
-- SQL model to build a hash column based on the values of this record
|
||||||
|
-- depends_on: {{ ref('tracking_product_shares_ab2') }}
|
||||||
|
select
|
||||||
|
{{ dbt_utils.surrogate_key([
|
||||||
|
'_id',
|
||||||
|
adapter.quote('user'),
|
||||||
|
adapter.quote('source'),
|
||||||
|
adapter.quote('options'),
|
||||||
|
'products',
|
||||||
|
'createdat',
|
||||||
|
]) }} as _airbyte_tracking_product_shares_hashid,
|
||||||
|
tmp.*
|
||||||
|
from {{ ref('tracking_product_shares_ab2') }} tmp
|
||||||
|
-- tracking_product_shares
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "unibag",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('social_post_views_ab3') }}
|
||||||
|
select
|
||||||
|
_id,
|
||||||
|
{{ adapter.quote('user') }} AS seller_id,
|
||||||
|
socialpost AS social_post_id,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_social_post_views_hashid
|
||||||
|
from {{ ref('social_post_views_ab3') }}
|
||||||
|
-- social_post_views from {{ source('unibag', '_airbyte_raw_social_post_views') }}
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "unibag",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('social_posts_ab3') }}
|
||||||
|
select
|
||||||
|
_id,
|
||||||
|
title,
|
||||||
|
author AS author_id,
|
||||||
|
{{ adapter.quote('content') }},
|
||||||
|
status,
|
||||||
|
cast({{ adapter.quote('photos') }} AS jsonb) AS photos,
|
||||||
|
cast({{ adapter.quote('videos') }} AS jsonb) AS videos,
|
||||||
|
string_to_array(REPLACE(REPLACE(REPLACE(tags::text, '[', ''), ']', ''), '"', ''), ',') AS tags,
|
||||||
|
COALESCE(cast({{ adapter.quote('statistic') }}::json->>'views' AS integer), 0) AS statistic_views,
|
||||||
|
COALESCE(cast({{ adapter.quote('statistic') }}::json->>'uniqueViews' AS integer), 0) AS statistic_unique_views,
|
||||||
|
COALESCE(cast({{ adapter.quote('statistic') }}::json->>'likes' AS integer), 0) AS statistic_likes,
|
||||||
|
COALESCE(cast({{ adapter.quote('statistic') }}::json->>'shares' AS integer), 0) AS statistic_shares,
|
||||||
|
COALESCE(cast({{ adapter.quote('statistic') }}::json->>'comments' AS integer), 0) AS statistic_comments,
|
||||||
|
string_to_array(REPLACE(REPLACE(REPLACE(products::text, '[', ''), ']', ''), '"', ''), ',') AS products,
|
||||||
|
string_to_array(REPLACE(REPLACE(REPLACE(categories::text, '[', ''), ']', ''), '"', ''), ',') AS categories,
|
||||||
|
string_to_array(REPLACE(REPLACE(REPLACE(cities::text, '[', ''), ']', ''), '"', ''), ',') AS cities,
|
||||||
|
{{ adapter.quote('order') }},
|
||||||
|
contributor AS contributor_id,
|
||||||
|
createdat::timestamp AS created_at,
|
||||||
|
updatedat::timestamp AS updated_at,
|
||||||
|
publishedat::timestamp AS published_at,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_social_posts_hashid
|
||||||
|
from {{ ref('social_posts_ab3') }}
|
||||||
|
-- social_posts from {{ source('unibag', '_airbyte_raw_social_posts') }}
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
{{ config(
|
||||||
|
indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}],
|
||||||
|
unique_key = '_airbyte_ab_id',
|
||||||
|
schema = "unibag",
|
||||||
|
tags = [ "top-level" ]
|
||||||
|
) }}
|
||||||
|
-- Final base SQL model
|
||||||
|
-- depends_on: {{ ref('tracking_product_shares_ab3') }}
|
||||||
|
select
|
||||||
|
_id,
|
||||||
|
{{ adapter.quote('source') }},
|
||||||
|
{{ adapter.quote('user') }} AS seller_id,
|
||||||
|
products AS product_id,
|
||||||
|
{{ adapter.quote('options') }}::json->>'action' AS options_action,
|
||||||
|
createdat::timestamp AS created_at,
|
||||||
|
_airbyte_ab_id,
|
||||||
|
_airbyte_emitted_at,
|
||||||
|
{{ current_timestamp() }} as _airbyte_normalized_at,
|
||||||
|
_airbyte_tracking_product_shares_hashid
|
||||||
|
from {{ ref('tracking_product_shares_ab3') }}
|
||||||
|
-- tracking_product_shares from {{ source('unibag', '_airbyte_raw_tracking_product_shares') }}
|
||||||
|
where 1 = 1
|
||||||
|
|
|
@ -29,11 +29,14 @@ sources:
|
||||||
- name: _airbyte_raw_promotion_orders
|
- name: _airbyte_raw_promotion_orders
|
||||||
- name: _airbyte_raw_promotions
|
- name: _airbyte_raw_promotions
|
||||||
- name: _airbyte_raw_referrals
|
- name: _airbyte_raw_referrals
|
||||||
|
- name: _airbyte_raw_social_post_views
|
||||||
|
- name: _airbyte_raw_social_posts
|
||||||
- name: _airbyte_raw_suppliers
|
- name: _airbyte_raw_suppliers
|
||||||
- name: _airbyte_raw_team_activities
|
- name: _airbyte_raw_team_activities
|
||||||
- name: _airbyte_raw_team_bonus
|
- name: _airbyte_raw_team_bonus
|
||||||
- name: _airbyte_raw_team_members
|
- name: _airbyte_raw_team_members
|
||||||
- name: _airbyte_raw_teams
|
- name: _airbyte_raw_teams
|
||||||
|
- name: _airbyte_raw_tracking_product_shares
|
||||||
- name: _airbyte_raw_user_identifications
|
- name: _airbyte_raw_user_identifications
|
||||||
- name: _airbyte_raw_users
|
- name: _airbyte_raw_users
|
||||||
- name: _airbyte_raw_wards
|
- name: _airbyte_raw_wards
|
||||||
|
|
|
@ -2,4 +2,4 @@
|
||||||
|
|
||||||
packages:
|
packages:
|
||||||
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
|
||||||
revision: 0.7.4
|
revision: 0.8.2
|
||||||
|
|
|
@ -6,10 +6,10 @@ config:
|
||||||
normalize:
|
normalize:
|
||||||
outputs:
|
outputs:
|
||||||
prod:
|
prod:
|
||||||
dbname: selly_etl
|
dbname: mongo_etl
|
||||||
host: localhost
|
host: 18.140.112.89
|
||||||
pass: '123'
|
pass: bvxJaDGW2R55uyDXfODJ2a0Y
|
||||||
port: 5555
|
port: 5432
|
||||||
schema: public
|
schema: public
|
||||||
threads: 8
|
threads: 8
|
||||||
type: postgres
|
type: postgres
|
||||||
|
|
Loading…
Reference in New Issue