Skip to content

Commit 3925dd6

Browse files
authored
Add files via upload
1 parent 420b6b6 commit 3925dd6

41 files changed

Lines changed: 2577 additions & 1 deletion

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,21 @@
1-
# firebase-analytics-dbt
1+
Welcome to your new dbt project!
2+
3+
### Using the starter project
4+
5+
Try running the following commands:
6+
- dbt run
7+
- dbt test
8+
9+
10+
### Resources:
11+
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
12+
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
13+
- Join the [dbt community](http://community.getbdt.com/) to learn from other analytics engineers
14+
- Find [dbt events](https://events.getdbt.com) near you
15+
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
16+
17+
18+
## TODO
19+
20+
- tests for counts from _raw to _events
21+
- why the DAU counts in app_health (aka _events) doesn't match the ones from raw. There's a dimension in there that's not fully disjunct, maybe make a _events_disjunct table as well
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{%- set columnNamesEventDimensions = ["event_dates", "install_dates", "app_id", "event_name", "platform", "appstore", "app_version", "platform_version",
2+
"user_properties", "event_parameters",
3+
"geo", "device_hardware", "device_language", "device_time_zone_offset",
4+
"traffic_source"
5+
] -%}
6+
7+
8+
{%- set miniColumnsToIgnoreInGroupBy = overbase_firebase.get_mini_columns_to_ignore_when_rolling_up() -%}
9+
10+
{# Ignore all time zones except the first & last (they're all the same, just save the computational effort) #}
11+
{%- set timezones = overbase_firebase.generate_date_timezone_age_struct('dont care') | map(attribute=0) | list -%}
12+
{%- set timezones = timezones[1:-1] -%}
13+
{%- set miniColumnsToIgnoreInGroupBy = miniColumnsToIgnoreInGroupBy + overbase_firebase.list_map_and_add_prefix(timezones, 'event_dates.') + overbase_firebase.list_map_and_add_prefix(timezones, 'install_dates.') -%}
14+
15+
16+
{%- set tmp_res = overbase_firebase.get_filtered_columns_for_table("fb_analytics_events_raw", columnNamesEventDimensions, miniColumnsToIgnoreInGroupBy) -%}
17+
{%- set columnsForEventDimensions = tmp_res[0] -%}
18+
19+
20+
{%- set minicolumns = overbase_firebase.unpack_columns_into_minicolumns_array(columnsForEventDimensions, miniColumnsToIgnoreInGroupBy, [], "", "") -%}
21+
{%- set unionAllSelects = [] -%}
22+
23+
WITH
24+
{%- for column in minicolumns -%}
25+
{{ ", " if not loop.first else "" }} dim_{{loop.index}} AS ( SELECT COUNT(DISTINCT({{ column[0] }})) AS dist_cnt FROM {{ ref("fb_analytics_events_raw") }} WHERE DATE(event_date) = '2023-10-10')
26+
{% set _ = unionAllSelects.append("SELECT '" ~ column[1] ~ "' AS dim_name, dist_cnt FROM dim_" ~ loop.index) -%}
27+
{% endfor -%}
28+
29+
{{ unionAllSelects | join('\n UNION ALL ') }}
30+
31+
ORDER BY 2 DESC

dbt_project.yml

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
name: 'overbase_firebase'
2+
version: '0.9.10'
3+
config-version: 2
4+
5+
# This setting configures which "profile" dbt uses for this project.
6+
profile: 'default'
7+
8+
model-paths: ["models"]
9+
analysis-paths: ["analyses"]
10+
test-paths: ["tests"]
11+
seed-paths: ["seeds"]
12+
macro-paths: ["macros"]
13+
snapshot-paths: ["snapshots"]
14+
15+
target-path: "target" # directory which will store compiled SQL files
16+
clean-targets: # directories to be removed by `dbt clean`
17+
- "target"
18+
- "dbt_packages"
19+
20+
models:
21+
overbase_firebase:
22+
# Applies to all files under models/example/
23+
materialized: table
24+
schema: overbase
25+
+persist_docs:
26+
relation: true
27+
columns: true
28+
29+
seeds:
30+
overbase_firebase:
31+
schema: overbase
32+
33+
vars:
34+
overbase_firebase:
35+
"OVERBASE:DONT_CARE": "MAKE_YAML_WORK" # optional
36+
# don't define variables here, because if they aren't overwritten in the client yml, then they will resolve to "None".
37+
# Instead, use them in the package where you need them with them with the appropriate default value
38+
# Furthermore, whatever we use in sources.yml, we can't define in this package's dbt_project.yml, or else they won't be able to be overwritten by the client
39+
# Any mandatory variables you want to be enforced should go into overbase_mandatory_vars.sql
40+
# The comments liste here is just for our sanity & easier c/p in client configs
41+
# "OVERBASE:FIREBASE_PROJECT_ID": "overbase" # mandatory, but used in sources.yml
42+
# "OVERBASE:FIREBASE_ANALYTICS_DATASET_ID": "firebase_analytics_raw_test" # mandatory, but used in sources.yml
43+
# "OVERBASE:FIREBASE_ANALYTICS_EVENTS_TABLE_NAME": "events_*" # optional, but used in sources.yml
44+
# "OVERBASE:FIREBASE_ANALYTICS_EVENTS_INTRADAY_TABLE_NAME": "events_intraday_*" # optional, but used in sources.yml
45+
# "OVERBASE:FIREBASE_ANALYTICS_CUSTOM_INSTALL_EVENT": "" # optional e.g. attributed_first_open
46+
# "OVERBASE:CUSTOM_PLATFORM_PREPROCESSOR": "IF(app_info.version LIKE '%nexus', 'nexus', platform)"
47+
# "OVERBASE:FIREBASE_ANALYTICS_DEFAULT_INCREMENTAL_DAYS": # optional
48+
# "OVERBASE:FIREBASE_CRASHLYTICS_DEFAULT_INCREMENTAL_DAYS": # optional
49+
# "OVERBASE:FIREBASE_ANALYTICS_FULL_REFRESH_START_DATE": "2018-01-01" # mandatory
50+
# "OVERBASE:FIREBASE_ANALYTICS_FULL_REFRESH_END_DATE": "2019-01-05" #optional, if you want to make
51+
# "OVERBASE:FIREBASE_CRASHLYTICS_FULL_REFRESH_START_DATE": "2018-01-01" # mandatory
52+
# "OVERBASE:FIREBASE_CRASHLYTICS_FULL_REFRESH_END_DATE": "20219-01-05" #optional, if you want to make
53+
# "OVERBASE:OB_DIMENSION_TO_EXCLUDE_IN_ROLLUPS": # optional, what built-in OB dimensions to exclude from bubbling up in the rollups (& save the row count)
54+
# - geo.city
55+
# - geo.metro
56+
# "OVERBASE:CUSTOM_USER_PROPERTIES": # data_types : "string", "int", "double"
57+
# - { key_name: up_just_in_raw, data_type: 'string'}
58+
# - { key_name: first_open_time, data_type: 'int'}
59+
# - { key_name: poorly_set_variable, data_type: 'double'}
60+
# "OVERBASE:CUSTOM_EVENT_PARAMETERS": # data_types : "string", "int", "double"
61+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric'}
62+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int2' }
63+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'AVG(##)', struct_field_name: 'quantity_int3', rollup_struct_field_name: 'cm_quantity_avg' }
64+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metricOnly', metric_rollup_transformation: 'MIN(##)', struct_field_name: 'quantity_int', rollup_struct_field_name: 'cm_quantity_int_min' }
65+
# - { key_name: myDimension, data_type: 'string', rollup_type: 'alsoForceNullDimension'}
66+
# - { key_name: server_sent_ts, data_type: 'string', rollup_type: 'dimension', extract_transformation: 'TIMESTAMP(##)', output_data_type: 'TIMESTAMP', struct_field_name: 'server_sent_ts' }
67+
# key_name: server_sent_ts
68+
# data_type: string
69+
# rollup_type: raw/dimension/alsoForceNullDimension/metric/metricOnly
70+
# raw: it will just be extracted and kep in fb_analytics_events_raw. It won't be propagated upwards
71+
# dimension: raw + it will propagated upwards as a dimension
72+
# alsoForceNullDimension: dimension + it will be also propagated in the "forced_nulls" table with this dimension set to NULL
73+
# metric: raw + it will be propagated upwards as a metric
74+
# metricOnly: no raw, it assumes there is already a raw and will use that
75+
76+
# metric_rollup_transformatin: 'SUM(##)' # optional, default is SUM()
77+
# struct_field_name: quantity_ts # optional; default is key_name + '_' + data_type
78+
# rollup_struct_field_name: cm_quantity_sum #optional; default is cm_ + struct_field_name + metric_rollup_transformation function
79+
# extract_transformation: "PARSE_DATE('%Y%m%d', ##)" # optional, becaomes PARSE_DATE('%Y%m%d', value.string_value); default is just the int/string value
80+
# output_data_type: TIMESTAMP # optional, default is the same as what was extracted, e.g. string
81+
# event_name_filter: ["notification_received"] # optional, default is to extract for all events
82+
# force_null_dimension_event_name_filter: [] #, optional, default is to extract for all events
83+
# by default, metrics get aggregated as "cm_foo_int". So in the raw we have "foo_int", in the rollup we have "cm_foo_int".
84+
# Changing the "struct_field_name" will change both.
85+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric'}
86+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int2' }
87+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metric', metric_rollup_transformation: 'AVG(##)', struct_field_name: 'quantity_avg' }
88+
# - { key_name: quantity, data_type: 'int', rollup_type: 'metricOnly', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int_sum2' }
89+
# - { key_name: server_sent_ts, data_type: 'string', rollup_type: 'raw', extract_transformation: 'TIMESTAMP(##)', output_data_type: 'TIMESTAMP', struct_field_name: 'server_sent_ts' }
90+
# "OVERBASE:CUSTOM_CRASHLYTICS_KEYS": # data_types : "string", "int", "float", "double"
91+
# - { key_name: network_state, data_type: 'string'}
92+
# "OVERBASE:CUSTOM_APP_HEALTH_MEASURES": # optional, any custom measures that you want rolled up in the app_health model
93+
# - name: signup_step_1 #no spaces, needs to be a proper column name
94+
# model: analytics_forced_nulls # optional. It can either be analytics/analytics-forced-nulls/crashlytics. Default is analytics
95+
# agg: SUM(##)
96+
# event_name: ui_view_shown
97+
# # mini_measures: ["cnt", "users"] # optional, the default it aggregates over if unspecified
98+
# additional_filter : event_parameters.view_name_string = 'signup step 1'
99+
# - name: svc_inbound_duration
100+
# agg: SUM(##)
101+
# event_name: spend_virtual_currency
102+
# mini_measures: ["cm_quantity_int"]
103+
# additional_filter : event_parameters.direction_string = 'inbound'
104+
# - name: fatal_crashes_during_calls
105+
# model: crashlytics
106+
# agg: SUM(##)
107+
# additional_filter: error_type = 'FATAL' AND custom_keys.call_state = 'duringCall'
108+
# "OVERBASE:CUSTOM_APP_HEALTH_METRICS": # TODO later on, forward to the BI tool as the semantic layer
109+
# NOT IMPLEMENTED ATM
110+
# - name: signup_ratio
111+
# type: ratio
112+
# numerator_agg: SUM(##)
113+
# numerator_filter : event_name LIKE '%view_shown' AND event_parameters.view_name_string = 'signup step 1' AND event_parameters.view_type_string = 'ob-forced-null'
114+
# denominator_agg: SUM(##)
115+
# denominator_filter: event_name LIKE '%view_shown' AND event_parameters.view_name_string = 'signup step 5' AND event_parameters.view_type_string = 'ob-forced-null'

macros/calculate_age_between.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{# TIMESTAMP_DIFF(.. HOUR), returns the floored hour. So something that is 1386 minutes passed (54 minutes till 24) will return as 23 hours #}
2+
{# divide by 24.0 -> floor it -> cast to int #}
3+
4+
{# TIMESTAMP_DIFF('2023-10-13 19:18:56.955001', '2023-10-12 19:20:28.013000', MINUTE) -> 1438 #}
5+
{# TIMESTAMP_DIFF('2023-10-13 19:18:56.955001', '2023-10-12 19:20:28.013000', HOUR) -> 23 #}
6+
{# DATETIME_DIFF ('2023-10-13 19:18:56.955001', '2023-10-12 19:20:28.013000', MINUTE) -> 1438 #}
7+
{# DATETIME_DIFF ('2023-10-13 19:18:56.955001', '2023-10-12 19:20:28.013000', HOUR) -> 24#}
8+
9+
{# Using "HOUR" works only timestamps correctly, but use minutes in both timestamp/datetime cases for consistency #}
10+
{%- macro calculate_age_between_timestamps(ts1, ts2) -%}
11+
CAST(FLOOR(SAFE_DIVIDE(TIMESTAMP_DIFF({{ ts1 }}, {{ ts2 }}, HOUR), 24.0)) AS INT64)
12+
{%- endmacro -%}
13+
14+
{# {%- macro calculate_age_between_timestamps(ts1, ts2) -%}
15+
CAST(FLOOR(SAFE_DIVIDE(TIMESTAMP_DIFF({{ ts1 }}, {{ ts2 }}, MINUTE), 1440.0)) AS INT64)
16+
{%- endmacro -%} #}
17+
18+
19+
{%- macro calculate_age_between_datetimes(dt1, dt2) -%}
20+
CAST(FLOOR(SAFE_DIVIDE(DATETIME_DIFF({{ dt1 }}, {{ dt2 }}, MINUTE), 1440.0)) AS INT64)
21+
{%- endmacro -%}
22+

0 commit comments

Comments
 (0)