-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdbt_project.yml
More file actions
125 lines (118 loc) · 8.06 KB
/
dbt_project.yml
File metadata and controls
125 lines (118 loc) · 8.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
name: 'ta_firebase'
version: '0.9.13'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'default'
model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_packages"
models:
ta_firebase:
# Applies to all files under models/example/
materialized: table
schema: ta_firebase
+persist_docs:
relation: true
columns: true
seeds:
ta_firebase:
schema: ta
vars:
ta_firebase:
"TA:DONT_CARE": "MAKE_YAML_WORK" # optional
"TA:SOURCES_MULTIPLE_PROJECTS_GENERATED" : false
# "TA:SOURCES":
# - {project_id: watermark-maker,
# analytics_dataset_id: analytics_150733022,
# events_table: events_*,
# crashlytics_dataset_id: firebase_crashlytics,
# crashlytics_table: com_kobe_watermarkmaker_*}
# # add more projects here
# - {project_id: logo-maker-35628,
# analytics_dataset_id: analytics_153578578,
# events_table: events_*,
# crashlytics_dataset_id: firebase_crashlytics,
# crashlytics_table: com_dave_logomaker_*}
# "TA:SQLMESH_LOOKBACK"
# don't define variables here, because if they aren't overwritten in the client yml, then they will resolve to "None".
# Instead, use them in the package where you need them with them with the appropriate default value
# Furthermore, whatever we use in sources.yml, we can't define in this package's dbt_project.yml, or else they won't be able to be overwritten by the client
# Any mandatory variables you want to be enforced should go into ta_mandatory_vars.sql
# The comments liste here is just for our sanity & easier c/p in client configs
# "TA:FIREBASE_ANALYTICS_CUSTOM_INSTALL_EVENT": "" # optional e.g. attributed_first_open
# "TA:CUSTOM_PLATFORM_PREPROCESSOR": "IF(app_info.version LIKE '%nexus', 'nexus', platform)"
# "TA:FIREBASE_ANALYTICS_DEFAULT_INCREMENTAL_DAYS": # optional
# "TA:FIREBASE_CRASHLYTICS_DEFAULT_INCREMENTAL_DAYS": # optional
# "TA:FIREBASE_ANALYTICS_FULL_REFRESH_START_DATE": "2018-01-01" # mandatory
# "TA:FIREBASE_ANALYTICS_FULL_REFRESH_END_DATE": "2019-01-05" #optional, if you want to make
# "TA:FIREBASE_CRASHLYTICS_FULL_REFRESH_START_DATE": "2018-01-01" # mandatory
# "TA:FIREBASE_CRASHLYTICS_FULL_REFRESH_END_DATE": "20219-01-05" #optional, if you want to make
# "TA:DIMENSION_TO_EXCLUDE_IN_ROLLUPS": # optional, what built-in TA dimensions to exclude from bubbling up in the rollups (& save the row count)
# - geo.city
# - geo.metro
# "TA:CUSTOM_USER_PROPERTIES": # data_types : "string", "int", "double"
# - { key_name: up_just_in_raw, data_type: 'string'}
# - { key_name: first_open_time, data_type: 'int'}
# - { key_name: poorly_set_variable, data_type: 'double'}
# "TA:CUSTOM_EVENT_PARAMETERS": # data_types : "string", "int", "double"
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric'}
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int2' }
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'AVG(##)', struct_field_name: 'quantity_int3', rollup_struct_field_name: 'cm_quantity_avg' }
# - { key_name: quantity, data_type: 'int', rollup_type: 'metricOnly', metric_rollup_transformation: 'MIN(##)', struct_field_name: 'quantity_int', rollup_struct_field_name: 'cm_quantity_int_min' }
# - { key_name: myDimension, data_type: 'string', rollup_type: 'alsoForceNullDimension'}
# - { key_name: server_sent_ts, data_type: 'string', rollup_type: 'dimension', extract_transformation: 'TIMESTAMP(##)', output_data_type: 'TIMESTAMP', struct_field_name: 'server_sent_ts' }
# key_name: server_sent_ts
# data_type: string
# rollup_type: raw/dimension/alsoForceNullDimension/metric/metricOnly
# raw: it will just be extracted and kep in google_analytics_events_raw. It won't be propagated upwards
# dimension: raw + it will propagated upwards as a dimension
# alsoForceNullDimension: dimension + it will be also propagated in the "forced_nulls" table with this dimension set to NULL
# metric: raw + it will be propagated upwards as a metric
# metricOnly: no raw, it assumes there is already a raw and will use that
# metric_rollup_transformatin: 'SUM(##)' # optional, default is SUM()
# struct_field_name: quantity_ts # optional; default is key_name + '_' + data_type
# rollup_struct_field_name: cm_quantity_sum #optional; default is cm_ + struct_field_name + metric_rollup_transformation function
# extract_transformation: "PARSE_DATE('%Y%m%d', ##)" # optional, becaomes PARSE_DATE('%Y%m%d', value.string_value); default is just the int/string value
# output_data_type: TIMESTAMP # optional, default is the same as what was extracted, e.g. string
# event_name_filter: ["notification_received"] # optional, default is to extract for all events
# force_null_dimension_event_name_filter: [] #, optional, default is to extract for all events
# by default, metrics get aggregated as "cm_foo_int". So in the raw we have "foo_int", in the rollup we have "cm_foo_int".
# Changing the "struct_field_name" will change both.
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric'}
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int2' }
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'AVG(##)', struct_field_name: 'quantity_avg' }
# - { key_name: quantity, data_type: 'int', rollup_type: 'metricOnly', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int_sum2' }
# - { key_name: server_sent_ts, data_type: 'string', rollup_type: 'raw', extract_transformation: 'TIMESTAMP(##)', output_data_type: 'TIMESTAMP', struct_field_name: 'server_sent_ts' }
# "TA:CUSTOM_CRASHLYTICS_KEYS": # data_types : "string", "int", "float", "double"
# - { key_name: network_state, data_type: 'string'}
# "TA:CUSTOM_APP_HEALTH_MEASURES": # optional, any custom measures that you want rolled up in the app_health model
# - name: signup_step_1 #no spaces, needs to be a proper column name
# model: analytics_forced_nulls # optional. It can either be analytics/analytics-forced-nulls/crashlytics. Default is analytics
# agg: SUM(##)
# event_name: ui_view_show
# # mini_measures: ["cnt", "users"] # optional, the default it aggregates over if unspecified
# additional_filter : event_parameters.view_name_string = 'signup step 1'
# - name: svc_inbound_duration
# agg: SUM(##)
# event_name: spend_virtual_currency
# mini_measures: ["cm_quantity_int"]
# additional_filter : event_parameters.direction_string = 'inbound'
# - name: fatal_crashes_during_calls
# model: crashlytics
# agg: SUM(##)
# additional_filter: error_type = 'FATAL' AND custom_keys.call_state = 'duringCall'
# "TA:CUSTOM_APP_HEALTH_METRICS": # TODO later on, forward to the BI tool as the semantic layer
# NOT IMPLEMENTED ATM
# - name: signup_ratio
# type: ratio
# numerator_agg: SUM(##)
# numerator_filter : event_name LIKE '%view_show' AND event_parameters.view_name_string = 'signup step 1' AND event_parameters.view_type_string = 'ta-forced-null'
# denominator_agg: SUM(##)
# denominator_filter: event_name LIKE '%view_show' AND event_parameters.view_name_string = 'signup step 5' AND event_parameters.view_type_string = 'ta-forced-null'