Skip to content

Commit c3a9e83

Browse files
committed
add features
make it work without generating sources file for one project. add support for multiple datasets on the same project
1 parent 41c3155 commit c3a9e83

8 files changed

Lines changed: 293 additions & 65 deletions

File tree

README.md

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,35 @@ Welcome to your new dbt project!
22

33
### Using the starter project
44

5-
Try running the following commands:
6-
- dbt run
7-
- dbt test
5+
6+
This library works by default for one google cloud project:
7+
# "OVERBASE:SOURCES":
8+
# - {project_id: google_cloud_project_id,
9+
# analytics_dataset_id: schema_id,
10+
# events_table: events_table_prefix*,
11+
# crashlytics_dataset_id: crashlytics_dataset,
12+
# crashlytics_table: crashlytics_table_prefix*}
13+
14+
Adding more project_ids and multiple dataset_ids for specific datasets is also possible.
15+
A few additional steps are required for multiple sources to be added as sources.
16+
17+
1) OVERBASE:SOURCES_READY must be set to false (default).
18+
2) add projects and datasets to the OVERBASE:SOURCES variable in your dbt project
19+
3) run the following command to generate sources for all projects:
20+
dbt run-operation -q generate_firebase_sources > models/firebase_sources.yml
21+
4) change OVERBASE:SOURCES_READY to true
22+
# "OVERBASE:SOURCES":
23+
# - {project_id: google_cloud_project_id,
24+
# analytics_dataset_id: schema_id,
25+
# events_table: events_table_prefix*,
26+
# crashlytics_dataset_id: crashlytics_dataset,
27+
# crashlytics_table: crashlytics_table_prefix*}
28+
# - {project_id: google_cloud_project_id2,
29+
# analytics_datasets_id: [schema_id,schema_id2],
30+
# events_table: events_table_prefix*,
31+
# crashlytics_dataset_id: crashlytics_dataset,
32+
# crashlytics_table: crashlytics_table_prefix*}
33+
834

935

1036
### Resources:
@@ -17,5 +43,4 @@ Try running the following commands:
1743

1844
## TODO
1945

20-
- tests for counts from _raw to _events
2146
- why the DAU counts in app_health (aka _events) doesn't match the ones from raw. There's a dimension in there that's not fully disjunct, maybe make a _events_disjunct table as well

dbt_project.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ seeds:
3333
vars:
3434
overbase_firebase:
3535
"OVERBASE:DONT_CARE": "MAKE_YAML_WORK" # optional
36+
"OVERBASE:SOURCES_READY" : false
3637
# "OVERBASE:SOURCES":
3738
# - {project_id: watermark-maker,
3839
# analytics_dataset_id: analytics_150733022,

macros/overbase_mandatory_vars.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
{%- endmacro %}
99

1010
{% macro verify_all_overbase_mandatory_variables() -%}
11-
{{- overbase_firebase.compile_time_mandatory_var("OVERBASE:FIREBASE_PROJECT_ID", "overbase") -}}
12-
{{- overbase_firebase.compile_time_mandatory_var("OVERBASE:FIREBASE_ANALYTICS_DATASET_ID", "firebase_analytics_raw_test") -}}
11+
{{- overbase_firebase.compile_time_mandatory_var("OVERBASE:SOURCES", "overbase") -}}
12+
{{- overbase_firebase.compile_time_mandatory_var("OVERBASE:SOURCES_READY", "firebase_analytics_raw_test") -}}
1313
{{- overbase_firebase.compile_time_mandatory_var("OVERBASE:FIREBASE_ANALYTICS_FULL_REFRESH_START_DATE", "2018-01-01") -}}
1414
{{- overbase_firebase.compile_time_mandatory_var("OVERBASE:FIREBASE_CRASHLYTICS_FULL_REFRESH_START_DATE", "2018-01-01") -}}
1515

models/analytics/fb_analytics_events_raw.sql

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
) }}
1313

1414
-- https://support.google.com/firebase/answer/7029846
15-
SELECT TIMESTAMP_MICROS(event_timestamp) as event_ts
15+
SELECT
16+
project_id,
17+
dataset_id,
18+
TIMESTAMP_MICROS(event_timestamp) as event_ts
1619
, DATE(TIMESTAMP_MICROS(event_timestamp)) as event_date
1720
, TIMESTAMP_MICROS(user_first_touch_timestamp) as install_ts
1821
, {{ overbase_firebase.calculate_age_between_timestamps("TIMESTAMP_MICROS(event_timestamp)", "TIMESTAMP_MICROS(user_first_touch_timestamp)") }} as install_age
@@ -57,22 +60,62 @@ SELECT TIMESTAMP_MICROS(event_timestamp) as event_ts
5760
) AS users_ltv
5861
, STRUCT<firebase_app_id STRING, stream_id STRING, advertising_id STRING>(
5962
LOWER(app_info.firebase_app_id), LOWER(stream_id), LOWER({{ null_if_length_zero('device.advertising_id') }})
63+
) as other_ids
6064
, {{ overbase_firebase.generate_date_timezone_struct('TIMESTAMP_MICROS(event_timestamp)') }} as event_dates
6165
, {{ overbase_firebase.generate_date_timezone_struct('TIMESTAMP_MICROS(user_first_touch_timestamp)') }} as install_dates
6266
, COUNT(1) OVER (PARTITION BY user_pseudo_id, event_bundle_sequence_id, event_name, event_timestamp, event_previous_timestamp) as duplicates_cnt
63-
-- FROM {{ source("firebase_analytics", "events") }} as events
6467
FROM
6568
(
66-
{% set projects = var('OVERBASE:SOURCES', []) %}
69+
{%- set projects = var('OVERBASE:SOURCES', []) -%}
70+
{%- set ready = var('OVERBASE:SOURCES_READY', false) -%}
6771

68-
{% for p in projects %}
69-
{% if not loop.first %}UNION ALL{% endif %}
70-
select
71-
'{{ p.project_id }}' as project_id,
72-
*
73-
from {{ source('firebase_analytics__' ~ p.project_id, 'events') }}
74-
WHERE {{ overbase_firebase.analyticsTableSuffixFilter() }}
75-
{% endfor %}
72+
{%- set first = (projects[0] if projects and (projects[0] is mapping) else {}) -%}
73+
{%- set pid0 = first.get('project_id', 'fallback_project') -%}
74+
{%- set ads_raw0 = first.get('analytics_dataset_ids') if first.get('analytics_dataset_ids') is not none else first.get('analytics_dataset_id') -%}
75+
{%- if ads_raw0 is string -%}
76+
{%- set ds0 = ads_raw0 -%}
77+
{%- elif ads_raw0 is iterable and (ads_raw0 | length) > 0 -%}
78+
{%- set ds0 = ads_raw0[0] -%}
79+
{%- else -%}
80+
{%- set ds0 = 'fallback_dataset' -%}
81+
{%- endif -%}
82+
83+
{%- if not ready -%}
84+
-- FALLBACK: use the single parse-safe source until generated sources are ready
85+
SELECT
86+
'{{ pid0 }}' as project_id,
87+
'{{ ds0 }}' as dataset_id,
88+
*
89+
FROM {{ source('firebase_analytics__fallback', 'events') }}
90+
WHERE {{ overbase_firebase.analyticsTableSuffixFilter() }}
91+
{%- else -%}
92+
{%- set ns = namespace(first=true) -%}
93+
{%- for p in projects -%}
94+
{%- set pid = p.get('project_id') -%}
95+
{%- if not pid %}{% continue %}{% endif -%}
96+
{%- set ads_raw = p.get('analytics_dataset_ids') if p.get('analytics_dataset_ids') is not none else p.get('analytics_dataset_id') -%}
97+
{%- if ads_raw is string -%}
98+
{%- set ads_list = [ads_raw] -%}
99+
{%- elif ads_raw is iterable -%}
100+
{%- set ads_list = ads_raw -%}
101+
{%- else -%}
102+
{%- set ads_list = [] -%}
103+
{%- endif -%}
104+
105+
{%- for ds in ads_list -%}
106+
{%- if pid and ds -%}
107+
{% if not ns.first %}UNION ALL{% endif %}
108+
{% set ns.first = false %}
109+
SELECT
110+
'{{ pid }}' as project_id,
111+
'{{ ds }}' as dataset_id,
112+
*
113+
FROM {{ source('firebase_analytics__' ~ pid ~ '__' ~ ds, 'events') }}
114+
WHERE {{ overbase_firebase.analyticsTableSuffixFilter() }}
115+
{%- endif -%}
116+
{%- endfor -%}
117+
{%- endfor -%}
118+
{%- endif -%}
76119
) as events
77120
LEFT JOIN {{ref('ob_iso_country')}} as country_codes
78121
ON LOWER(events.geo.country) = LOWER(country_codes.firebase_name)

models/crashlytics/fb_crashlytics_events_raw.sql

Lines changed: 104 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,19 @@
1414
-- https://firebase.google.com/docs/crashlytics/bigquery-export#without_stack_traces
1515

1616
{% set projects = var('OVERBASE:SOURCES', []) %}
17+
{% set ready = var('OVERBASE:SOURCES_READY', false) %}
1718

18-
{% for p in projects %}
19-
{% if not loop.first %}UNION ALL{% endif %}
20-
SELECT
21-
'{{ p.project_id }}' as project_id,
19+
{% set first = projects[0] if projects and (projects[0] is mapping) else {} %}
20+
{% set pid0 = first.get('project_id', 'fallback_project') %}
21+
{% set ds0 = first.get('crashlytics_dataset_id', 'fallback_crashlytics_dataset') %}
22+
23+
{%- set ns = namespace(first=true) -%}
24+
25+
(
26+
{% if not ready %}
27+
SELECT
28+
'{{ pid0 }}' AS project_id
29+
,'{{ ds0 }}' AS dataset_id
2230
, DATE(event_timestamp) as event_date
2331
, received_timestamp as received_ts
2432
, installation_uuid as crashlytics_user_pseudo_id
@@ -84,7 +92,95 @@ SELECT
8492
) AS unity_metadata
8593
, COUNT(1) OVER (PARTITION BY installation_uuid, event_id, variant_id) as duplicates_cnt
8694
87-
FROM {{ source('firebase_crashlytics__' ~ p.project_id, 'events') }}
88-
WHERE {{ overbase_firebase.crashlyticsTSFilterFor("event_timestamp") }}
89-
QUALIFY ROW_NUMBER() OVER (PARTITION BY installation_uuid, event_id, variant_id ORDER BY received_ts) = 1
90-
{% endfor %}
95+
FROM {{ source('firebase_crashlytics__fallback', 'events') }}
96+
WHERE {{ overbase_firebase.crashlyticsTSFilterFor("event_timestamp") }}
97+
QUALIFY ROW_NUMBER() OVER (PARTITION BY crashlytics_user_pseudo_id, event_id, variant_id ORDER BY received_ts) = 1
98+
99+
{% else %}
100+
101+
{% for p in projects %}
102+
{% set pid = p.get('project_id') %}
103+
{% set ds = p.get('crashlytics_dataset_id') %}
104+
105+
{% if pid and ds %}
106+
{% if not ns.first %}UNION ALL{% endif %}
107+
{% set ns.first = false %}
108+
109+
SELECT
110+
'{{ pid }}' AS project_id,
111+
'{{ ds }}' AS dataset_id,
112+
DATE(event_timestamp) as event_date
113+
, received_timestamp as received_ts
114+
, installation_uuid as crashlytics_user_pseudo_id
115+
, (SELECT value FROM UNNEST(custom_keys) WHERE key = 'fb_user_pseudo_id') as firebase_analytics_user_pseudo_id
116+
, COALESCE(user.id, (SELECT value FROM UNNEST(custom_keys) WHERE key = 'app_user_id')) as user_id
117+
, bundle_identifier as app_id
118+
, ARRAY_TO_STRING(ARRAY_REVERSE(SPLIT(bundle_identifier, '.')), '.') as reverse_app_id
119+
, event_id
120+
-- the platform we get in operating_system.type is not populated for Android, only for iOS. So rely on _TABLE_SUFFIX instead
121+
, CASE WHEN _TABLE_SUFFIX LIKE '%ANDROID%' THEN'ANDROID'
122+
WHEN _TABLE_SUFFIX LIKE '%IOS%' THEN'IOS'
123+
ELSE 'UNKNOWN' -- TODO: unit test for this
124+
END as platform
125+
, STRUCT<id STRING, title STRING, subtitle STRING, variant_id STRING>(
126+
issue_id, issue_title, issue_subtitle, variant_id
127+
) as issue
128+
, error_type
129+
, process_state
130+
, STRUCT<app STRING, device STRING>(
131+
app_orientation, device_orientation
132+
) as orientation
133+
, STRUCT<firebase_value STRING, build_no STRING, major INT64, minor INT64, bugfix INT64, major_minor FLOAT64, major_minor_bugfix STRING, normalized INT64, join_value STRING>(
134+
{%- set v = "application.display_version" -%}
135+
{{ v }}, application.build_version, {{ overbase_firebase.get_version(v, "major") }}, {{ overbase_firebase.get_version(v, "minor") }}, {{ overbase_firebase.get_version(v, "bugfix") }}, {{ overbase_firebase.get_version(v, "major.minor") }}, {{ overbase_firebase.get_version(v, "major.minor.bugfix") }}, {{ overbase_firebase.get_version(v, "normalized") }}, COALESCE(CAST({{ overbase_firebase.get_version(v, "normalized") }} AS STRING), {{ v }} )
136+
) AS app_version
137+
, STRUCT<firebase_value STRING, name STRING, major INT64, minor INT64, bugfix INT64, major_minor FLOAT64, major_minor_bugfix STRING, normalized INT64, join_value STRING>(
138+
{%- set v = "operating_system.display_version" -%}
139+
{{ v }}, operating_system.name, {{ overbase_firebase.get_version(v, "major") }}, {{ overbase_firebase.get_version(v, "minor") }}, {{ overbase_firebase.get_version(v, "bugfix") }}, {{ overbase_firebase.get_version(v, "major.minor") }}, {{ overbase_firebase.get_version(v, "major.minor.bugfix") }}, {{ overbase_firebase.get_version(v, "normalized") }}, COALESCE(CAST( {{ overbase_firebase.get_version(v, "normalized") }} AS STRING), {{ v }} )
140+
) AS platform_version
141+
, operating_system.modification_state as jailbroken_state
142+
, STRUCT<type STRING, manufacturer STRING, os_model STRING, architecture STRING>(
143+
LOWER(operating_system.device_type), LOWER(device.manufacturer), LOWER(device.model), device.architecture
144+
) AS device_hardware
145+
, {{ overbase_firebase.generate_struct_for_raw_crashlytics_custom_keys() }} as custom_keys
146+
, custom_keys as custom_keys_raw
147+
, STRUCT<used_bytes INT64, free_bytes INT64>(memory.used, memory.free) as memory
148+
, STRUCT<used_bytes INT64, free_bytes INT64>(storage.used, storage.free) as storage
149+
, STRUCT<name STRING, email STRING>(user.name, user.email) as user
150+
, crashlytics_sdk_version AS crashlytics_sdk_version_string
151+
, logs
152+
, breadcrumbs
153+
, blame_frame
154+
, exceptions as android_exceptions
155+
, errors as ios_non_fatal
156+
, threads
157+
, STRUCT<unity_version STRING, debug_build BOOLEAN, processor_type STRING, processor_count INTEGER, processor_frequency INTEGER, system_memory_size INTEGER, graphics_memory_size INTEGER, graphics_device_id INTEGER, graphics_device_vendor_id INTEGER, graphics_device_name STRING, graphics_device_vendor STRING, graphics_device_version STRING, graphics_device_type STRING, graphics_shader_level INTEGER, graphics_render_target_count INTEGER, graphics_copy_texture_support STRING, graphics_max_texture_size INTEGER, screen_size STRING, screen_dpi FLOAT64, screen_refresh_rate INTEGER, processor_frequency_mhz INTEGER, system_memory_size_mb INTEGER, graphics_memory_size_mb INTEGER, screen_size_px STRING, screen_refresh_rate_hz INTEGER, screen_resolution_dpi STRING>(
158+
{# it has a short form of 20 columns (iOS REALTIME only) and a long form of 26 columns (Android historic, Android realtime & iOS historic )
159+
20:unity_version STRING,debug_build BOOLEAN,processor_type STRING,processor_count INTEGER,processor_frequency_mhz INTEGER,system_memory_size_mb INTEGER,graphics_memory_size_mb INTEGER,graphics_device_id INTEGER,graphics_device_vendor_id INTEGER,graphics_device_name STRING,graphics_device_vendor STRING,graphics_device_version STRING,graphics_device_type STRING,graphics_shader_level INTEGER,graphics_render_target_count INTEGER,graphics_copy_texture_support STRING,graphics_max_texture_size INTEGER,screen_size_px STRING,screen_refresh_rate_hz INTEGER,screen_resolution_dpi STRING,
160+
sometimes it's processor_frequency_mhz
161+
26: unity_version STRING, debug_build BOOLEAN, processor_type STRING, processor_count INTEGER, processor_frequency INTEGER, system_memory_size INTEGER, graphics_memory_size INTEGER, graphics_device_id INTEGER, graphics_device_vendor_id INTEGER, graphics_device_name STRING, graphics_device_vendor STRING, graphics_device_version STRING, graphics_device_type STRING, graphics_shader_level INTEGER, graphics_render_target_count INTEGER, graphics_copy_texture_support STRING, graphics_max_texture_size INTEGER, screen_size STRING, screen_dpi FLOAT, screen_refresh_rate INTEGER, processor_frequency_mhz INTEGER, system_memory_size_mb INTEGER, graphics_memory_size_mb INTEGER, screen_size_px STRING, screen_refresh_rate_hz INTEGER, screen_resolution_dpi STRING
162+
Differences:
163+
+ processor_frequency (but both also have processor_frequency_mhz)
164+
+ system_memory_size (but both also have system_memory_size_mb)
165+
+ graphics_memory_size (but both also have graphics_memory_size_mb)
166+
+ screen_size
167+
+ screen_dpi
168+
+ screen_refresh_rate
169+
Those values are NULLed for the time being
170+
#}
171+
{{ overbase_firebase.list_map_and_add_prefix([
172+
"unity_version","debug_build","processor_type","processor_count",none,none,none,"graphics_device_id","graphics_device_vendor_id","graphics_device_name","graphics_device_vendor","graphics_device_version","graphics_device_type","graphics_shader_level","graphics_render_target_count","graphics_copy_texture_support","graphics_max_texture_size",none,none,none,"processor_frequency_mhz","system_memory_size_mb","graphics_memory_size_mb","screen_size_px","screen_refresh_rate_hz","screen_resolution_dpi"
173+
], "unity_metadata." )| join(", ") }}
174+
) AS unity_metadata
175+
, COUNT(1) OVER (PARTITION BY installation_uuid, event_id, variant_id) as duplicates_cnt
176+
177+
FROM {{ source('firebase_crashlytics__' ~ pid, 'events') }}
178+
WHERE {{ overbase_firebase.crashlyticsTSFilterFor("event_timestamp") }}
179+
180+
{% endif %}
181+
QUALIFY ROW_NUMBER() OVER (PARTITION BY crashlytics_user_pseudo_id, event_id, variant_id ORDER BY received_ts) = 1
182+
{% endfor %}
183+
{% endif %}
184+
)
185+
186+

models/safe_sources.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
version: 2
2+
3+
sources:
4+
# Firebase Analytics (safe default)
5+
- name: "firebase_analytics__fallback"
6+
database: "{{ var('OVERBASE:SOURCES','ta')[0]['project_id'] }}"
7+
schema: "{{ var('OVERBASE:SOURCES','firebase_analytics_raw_test')[0]['analytics_dataset_id'] }}"
8+
tables:
9+
- name: events
10+
identifier: "{{ var('OVERBASE:SOURCES','events_*')[0]['events_table'] }}"
11+
12+
# Firebase Crashlytics (safe default)
13+
- name: firebase_crashlytics__fallback
14+
database: "{{ var('OVERBASE:SOURCES','ta')[0]['project_id'] }}"
15+
schema: "{{ var('OVERBASE:SOURCES', 'firebase_crashlytics__fallback')[0]['crashlytics_dataset_id'] }}"
16+
tables:
17+
- name: events
18+
identifier: "{{ var('OVERBASE:SOURCES','com_app_*')[0]['crashlytics_table'] }}"

models/sources.yml

Lines changed: 0 additions & 15 deletions
This file was deleted.

0 commit comments

Comments
 (0)