data-dbt-firebase-analytics/dbt_project.yml at main · TechArtists/data-dbt-firebase-analytics · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
name: 'ta_firebase'
version: '0.9.13'
config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: 'default'

model-paths: ["models"]
analysis-paths: ["analyses"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

target-path: "target"  # directory which will store compiled SQL files
clean-targets:         # directories to be removed by `dbt clean`
  - "target"
  - "dbt_packages"

models:
  ta_firebase:
    # Applies to all files under models/example/
    materialized: table
    schema: ta_firebase
    +persist_docs:
      relation: true
      columns: true

seeds:
  ta_firebase:
    schema: ta

vars:
  ta_firebase:
    "TA:DONT_CARE": "MAKE_YAML_WORK" # optional
    "TA:SOURCES_MULTIPLE_PROJECTS_GENERATED" : false
    # "TA:SOURCES":
    #   - {project_id: watermark-maker,
    #     analytics_dataset_id: analytics_150733022,
    #     events_table: events_*,
    #     crashlytics_dataset_id: firebase_crashlytics,
    #     crashlytics_table: com_kobe_watermarkmaker_*}
    #   # add more projects here
    #   - {project_id: logo-maker-35628,
    #     analytics_dataset_id: analytics_153578578,
    #     events_table: events_*,
    #     crashlytics_dataset_id: firebase_crashlytics,
    #     crashlytics_table: com_dave_logomaker_*}
    # "TA:SQLMESH_LOOKBACK"
    # don't define variables here, because if they aren't overwritten in the client yml, then they will resolve to "None".
    # Instead, use them in the package where you need them with them with the appropriate default value
    # Furthermore, whatever we use in sources.yml, we can't define in this package's dbt_project.yml, or else they won't be able to be overwritten by the client
    # Any mandatory variables you want to be enforced should go into ta_mandatory_vars.sql
    # The comments liste here is just for our sanity & easier c/p in client configs
    # "TA:FIREBASE_ANALYTICS_CUSTOM_INSTALL_EVENT": "" # optional e.g. attributed_first_open
    # "TA:CUSTOM_PLATFORM_PREPROCESSOR": "IF(app_info.version LIKE '%nexus', 'nexus', platform)"
    # "TA:FIREBASE_ANALYTICS_DEFAULT_INCREMENTAL_DAYS":  # optional
    # "TA:FIREBASE_CRASHLYTICS_DEFAULT_INCREMENTAL_DAYS":  # optional
    # "TA:FIREBASE_ANALYTICS_FULL_REFRESH_START_DATE": "2018-01-01" # mandatory
    # "TA:FIREBASE_ANALYTICS_FULL_REFRESH_END_DATE": "2019-01-05" #optional, if you want to make
    # "TA:FIREBASE_CRASHLYTICS_FULL_REFRESH_START_DATE": "2018-01-01" # mandatory
    # "TA:FIREBASE_CRASHLYTICS_FULL_REFRESH_END_DATE": "20219-01-05" #optional, if you want to make
    # "TA:DIMENSION_TO_EXCLUDE_IN_ROLLUPS":  # optional, what built-in TA dimensions to exclude from bubbling up in the rollups (& save the row count)
    #   - geo.city
    #   - geo.metro
    # "TA:CUSTOM_USER_PROPERTIES":  # data_types : "string", "int", "double"
          # - { key_name: up_just_in_raw, data_type: 'string'}
          # - { key_name: first_open_time, data_type: 'int'}
          # - { key_name: poorly_set_variable, data_type: 'double'}
    # "TA:CUSTOM_EVENT_PARAMETERS":  # data_types : "string", "int", "double"
          # - { key_name: quantity, data_type: 'int',  rollup_type: 'metric'}
          # - { key_name: quantity, data_type: 'int',  rollup_type: 'metric', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int2' }
          # - { key_name: quantity, data_type: 'int',  rollup_type: 'metric', metric_rollup_transformation: 'AVG(##)', struct_field_name: 'quantity_int3', rollup_struct_field_name: 'cm_quantity_avg' }
          # - { key_name: quantity, data_type: 'int',  rollup_type: 'metricOnly', metric_rollup_transformation: 'MIN(##)', struct_field_name: 'quantity_int', rollup_struct_field_name: 'cm_quantity_int_min' }
          # - { key_name: myDimension, data_type: 'string',  rollup_type: 'alsoForceNullDimension'}
          # - { key_name: server_sent_ts, data_type: 'string',  rollup_type: 'dimension', extract_transformation: 'TIMESTAMP(##)', output_data_type: 'TIMESTAMP', struct_field_name: 'server_sent_ts' }
      # key_name: server_sent_ts
      # data_type: string
      # rollup_type: raw/dimension/alsoForceNullDimension/metric/metricOnly
        # raw: it will just be extracted and kep in google_analytics_events_raw. It won't be propagated upwards
        # dimension: raw + it will propagated upwards as a dimension
        # alsoForceNullDimension: dimension + it will be also propagated in the "forced_nulls" table with this dimension set to NULL
        # metric: raw + it will be propagated upwards as a metric
        # metricOnly: no raw, it assumes there is already a raw and will use that

      # metric_rollup_transformatin: 'SUM(##)' # optional, default is SUM()
      # struct_field_name: quantity_ts # optional; default is key_name + '_' + data_type
      # rollup_struct_field_name: cm_quantity_sum #optional; default is cm_ + struct_field_name + metric_rollup_transformation function
      # extract_transformation: "PARSE_DATE('%Y%m%d', ##)" # optional, becaomes PARSE_DATE('%Y%m%d', value.string_value); default is just the int/string value
      # output_data_type: TIMESTAMP # optional, default is the same as what was extracted, e.g. string
      # event_name_filter: ["notification_received"] # optional, default is to extract for all events
      # force_null_dimension_event_name_filter: [] #, optional, default is to extract for all events
      # by default, metrics get aggregated as "cm_foo_int". So in the raw we have "foo_int", in the rollup we have "cm_foo_int".
      # Changing the "struct_field_name" will change both.
    #       - { key_name: quantity, data_type: 'int',  rollup_type: 'metric'}
    #       - { key_name: quantity, data_type: 'int',  rollup_type: 'metric', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int2' }
    #       - { key_name: quantity, data_type: 'int',  rollup_type: 'metric', metric_rollup_transformation: 'AVG(##)', struct_field_name: 'quantity_avg' }
    #       - { key_name: quantity, data_type: 'int',  rollup_type: 'metricOnly', metric_rollup_transformation: 'SUM(##)', struct_field_name: 'quantity_int_sum2' }
          # - { key_name: server_sent_ts, data_type: 'string',  rollup_type: 'raw', extract_transformation: 'TIMESTAMP(##)', output_data_type: 'TIMESTAMP', struct_field_name: 'server_sent_ts' }
    # "TA:CUSTOM_CRASHLYTICS_KEYS":  # data_types : "string", "int", "float", "double"
          # - { key_name: network_state, data_type: 'string'}
    # "TA:CUSTOM_APP_HEALTH_MEASURES":  # optional, any custom measures that you want rolled up in the app_health model
    #         - name: signup_step_1 #no spaces, needs to be a proper column name
    #           model: analytics_forced_nulls # optional. It can either be analytics/analytics-forced-nulls/crashlytics. Default is analytics
    #           agg: SUM(##)
    #           event_name: ui_view_show
    #           # mini_measures: ["cnt", "users"] # optional, the default it aggregates over if unspecified
    #           additional_filter  : event_parameters.view_name_string = 'signup step 1'
    #         - name: svc_inbound_duration
    #           agg: SUM(##)
    #           event_name: spend_virtual_currency
    #           mini_measures: ["cm_quantity_int"]
    #           additional_filter  : event_parameters.direction_string = 'inbound'
    #         - name: fatal_crashes_during_calls
    #           model: crashlytics
    #           agg: SUM(##)
    #           additional_filter: error_type = 'FATAL' AND custom_keys.call_state = 'duringCall'
    # "TA:CUSTOM_APP_HEALTH_METRICS": # TODO later on, forward to the BI tool as the semantic layer
          # NOT IMPLEMENTED ATM
          # -  name: signup_ratio
          #    type: ratio
          #    numerator_agg: SUM(##)
          #    numerator_filter  : event_name LIKE '%view_show' AND event_parameters.view_name_string = 'signup step 1' AND event_parameters.view_type_string = 'ta-forced-null'
          #    denominator_agg: SUM(##)
          #    denominator_filter: event_name LIKE '%view_show' AND event_parameters.view_name_string = 'signup step 5' AND event_parameters.view_type_string = 'ta-forced-null'