User Activity & Engagement
Daily Active Users (DAU)
Daily Active Users (DAU)
Grain: Daily aggregation
Use Case: Track overall platform engagement and user activity trends
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH
glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- change date ranges as needed
CAST(DATE_TRUNC(`timestamp`, DAY) AS DATE) >= CURRENT_DATE - 30
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, processed_ca_logs AS (
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.clientevent.event AS event_name
, timestamp AS event_timestamp
, jsonPayload.user.userid AS user_id
, jsonPayload.clientevent.category AS category
, jsonPayload.clientevent.label AS label
, jsonPayload.clientevent.pagepath AS page_path
, jsonPayload.clientevent.SessionTrackingToken AS stt
FROM
glean_customer_event
WHERE jsonPayload.Type = 'CLIENT_EVENT'
)
, -- We do not attribute most of the events fired to DAU unless that event correspond to a
-- user action that leads to some level of value creation. The below filter capture workflows
-- corresponding to such greenlisted user actions
filtered_ca_logs AS (
SELECT
*
FROM
processed_ca_logs
WHERE
(
-- Depending on category, opening links/documents counts towards DAU
event_name IN (
'OpenDocument'
, 'OpenLink'
)
AND category IN (
-- Autocomplete and Search interactions count towards DAU
'Search Result'
, 'Autocomplete'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
, 'New Tab Page'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
, 'Featured Question and Answer'
, 'Generated Question and Answer'
, 'Pins'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Verification'
, 'Datasource Auth'
, 'Insights'
-- Feature related interactions count towards DAU
, 'Chat'
, 'Result Preview'
, 'App Card'
, 'Customer Card'
, 'Search'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
OR (
-- Depending on category, certain feature related clicks count towards DAU
event_name IN (
'Click'
)
AND category IN (
-- Autocomplete and Search interactions count towards DAU
'Autocomplete'
, 'Search Result'
, 'Datasource Filter'
, 'Facets'
-- New tab page(NTP) and Homepage interactions count towards DAU
, 'Feed'
, 'Calendar'
-- Directory tab interactions count towards DAU
, 'Org Chart'
, 'Person Card'
, 'Teams'
, 'Profile'
, 'People Celebrations'
, 'Person Attribution'
-- Sidebar interactions count towards DAU
, 'Sidebar Tabs'
-- User Generated Content(UGC) interactions count towards DAU
, 'Announcements'
, 'Answers'
, 'Collections'
-- Golinks interactions counts towards DAU
, 'Shortcuts'
-- Admin and Setup page interactions count towards DAU
, 'Datasource Auth'
, 'User Menu'
, 'Admin Console'
-- Other tangible user interactions that count towards DAU
, 'Feedback'
)
)
-- CRUD operations on User Generated Content(UGC) always count always count towards DAU
OR (
event_name IN (
'Add'
, 'Create'
, 'Delete'
) AND category IN (
'Announcements'
, 'Answers'
, 'Collections'
)
)
OR (
event_name IN
(
'View'
)
AND category IN
(
-- User Generated Content(UGC) interactions count towards DAU
'Announcements',
'Answers',
'Collections',
-- Directory tab interactions count towards DAU
'Person Card',
'Team Card',
'Org Chart'
) and page_path not in ('/', '/ntp', '/search')
)
)
, feature_logs AS (
SELECT
DISTINCT jsonPayload.user.userid AS user_id
, TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
FROM
glean_customer_event
WHERE
((
jsonPayload.Type IN ('SEARCH')
-- User initiated search queries only
AND jsonpayload.Search.initiator = 'USER'
)
OR (
jsonPayload.Type IN ('CHAT')
-- User initiated chat queries only
AND jsonpayload.Chat.initiator = 'USER'
)
OR (
jsonPayload.Type IN ('SEARCH_CLICK','CHAT_FEEDBACK','AI_SUMMARY','AI_ANSWER','SEARCH_FEEDBACK')
)
OR (
jsonPayload.Type IN ('SHORTCUT')
AND jsonPayload.shortcut.event IN ('REDIRECT', 'CREATE', 'DELETE', 'UPDATE')
)
OR (
jsonPayload.Type IN ('AUTOCOMPLETE')
-- Filtering autocomplete queries to include only those with a query length greater than 1 to ensure only intentional engagement is considered.
AND jsonPayload.autocomplete.querylength>=1.0
))
)
SELECT
utc_day
, COUNT(DISTINCT user_id) AS DAU
FROM (
SELECT
utc_day
, user_id
FROM
filtered_ca_logs
UNION ALL
SELECT
utc_day
, user_id
FROM
feature_logs
)
GROUP BY
1
ORDER BY
1 DESC
Number of Glean Webapp Searches and Distinct Searchers per Day
Number of Glean Webapp Searches and Distinct Searchers per Day
Grain: Daily aggregation
Use Case: Monitor webapp search adoption and usage patterns
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- Optional project filtering - only filter if project_id is specified
(project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, COUNT(DISTINCT jsonPayload.User.UserId) AS num_users
, COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_searches
FROM glean_customer_event
WHERE jsonPayload.Type = 'SEARCH'
AND jsonPayload.Search.Initiator = 'USER'
AND jsonPayload.Search.Modality = 'FULLPAGE'
GROUP BY 1
ORDER BY 1
Number of Searches and Searchers from Embedded Search Component per Day, per Embedding Domain
Number of Searches and Searchers from Embedded Search Component per Day, per Embedding Domain
Grain: Daily aggregation by embedding domain
Use Case: Track embedded search component adoption across different platforms and domains
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- Optional project filtering - only filter if project_id is specified
(project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.Search.HostDomain
, COUNT(DISTINCT jsonPayload.User.UserId) AS num_users
, COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_searches
FROM glean_customer_event
WHERE jsonPayload.Type = 'SEARCH'
AND jsonPayload.Search.Initiator = 'USER'
AND jsonPayload.Search.Modality = 'EMBEDDED_SEARCH'
GROUP BY 1, 2
ORDER BY 1, 2
Search Analytics
Number of Queries, Sessions, Queries per Session and Searchers in Last 7d
Number of Queries, Sessions, Queries per Session and Searchers in Last 7d
Grain: Daily aggregation over 7-day period
Use Case: Analyze search engagement depth and session patterns
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 7 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
,COUNT(DISTINCT jsonPayload.User.UserId) AS num_users
,COUNT(DISTINCT jsonPayload.Search.TrackingToken) AS num_queries
,COUNT(DISTINCT jsonPayload.Search.SessionTrackingToken) AS num_sessions
,COUNT(DISTINCT jsonPayload.Search.TrackingToken)/COUNT(DISTINCT jsonPayload.Search.SessionTrackingToken) AS Queries_per_session
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
GROUP BY 1
ORDER BY 1;
Number of Clicks and Queries with Clicks in Last 7d
Number of Clicks and Queries with Clicks in Last 7d
Grain: Daily aggregation over 7-day period
Use Case: Measure search result relevance and click-through rates
DECLARE lookback_period INT64 DEFAULT 7;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL -- Set to NULL to include all projects, or specify a project ID
WITH
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
SELECT
DATE(timestamp) AS datepartition
, resource.labels.project_id AS project_id
, jsonPayload.User.UserId AS userid
, jsonPayload
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE
timestamp >= CASE
WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY)) -- Default to DAY
END
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
,count(jsonPayload.SearchClick.TrackingToken) AS num_clicks
,count(DISTINCT jsonPayload.SearchClick.TrackingToken) AS num_queries_with_clicks
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
GROUP BY 1
ORDER BY 1
Number of Clicks and Queries with Clicks for Each Datasource in Last 7d
Number of Clicks and Queries with Clicks for Each Datasource in Last 7d
Grain: Daily aggregation by datasource over 7-day period
Use Case: Evaluate datasource performance and result positioning effectiveness
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 7 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -7 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
,jsonPayload.searchclick.datasource
,count(jsonPayload.SearchClick.TrackingToken) AS num_clicks
,count(DISTINCT jsonPayload.SearchClick.TrackingToken) AS num_queries_with_clicks
,avg(jsonPayload.searchclick.position)+1 as average_position
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
GROUP BY 1,2
ORDER BY 1,3 desc;
Top Search Queries
Top Search Queries
Grain: Query-level aggregation over 14-day period
Use Case: Identify popular search topics and content gaps
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 14 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
SELECT
DISTINCT `timestamp`
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
, jsonPayload.Search.Query AS search_query
, jsonPayload.user.userid
, jsonPayload.user.useremail
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
SELECT
search_query
, COUNT(DISTINCT qtt) AS frequency
, COUNT(DISTINCT userid) AS user_count
FROM
queries_data
GROUP BY
1
ORDER BY
2 DESC
Top Search Terms
Top Search Terms
Grain: Search term-level aggregation over 14-day period
Use Case: Understand user vocabulary and optimize search suggestions
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 14 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
SELECT
DISTINCT `timestamp`
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
, jsonPayload.Search.Query AS search_query
, jsonPayload.user.userid
, jsonPayload.user.useremail
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
, query_terms_list AS (
SELECT
*
, SPLIT(search_query, ' ') AS query_terms
FROM
queries_data
)
, query_terms_flattened AS (
SELECT
timestamp
, stt
, qtt
, userid
, useremail
, query_term
FROM
query_terms_list
, UNNEST(query_terms) AS query_term
)
SELECT
query_term
, COUNT(*) AS frequency
, COUNT(DISTINCT userid) AS user_count
FROM
query_terms_flattened
GROUP BY
1
ORDER BY
2 DESC
Search Quality & Performance
Search Session Satisfaction (SSAT)
Search Session Satisfaction (SSAT)
Grain: Daily aggregation over 14-day period
Use Case: Measure search quality and user satisfaction trends
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 14 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
SELECT
DISTINCT TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt --Query Tracking Token
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for user queries (exclude Gleanbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
, clicks_data AS (
SELECT
DISTINCT jsonPayload.SearchClick.TrackingToken AS qtt
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
)
SELECT
q.utc_day --metric date
, COUNT(DISTINCT IF(c.qtt IS NOT NULL, q.stt, NULL)) / GREATEST(COUNT(DISTINCT q.stt), 1) AS session_satisfaction --Ratio of satisfied sessions to all sessions
FROM
queries_data q
LEFT JOIN
clicks_data c
ON
q.qtt = c.qtt
GROUP BY 1
ORDER BY 1;
Mean Average Precision (MAP)
Mean Average Precision (MAP)
Grain: Daily aggregation over 14-day period
Use Case: Evaluate search result ranking quality and relevance
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 14 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, clicks_data AS (
SELECT
jsonPayload.SearchClick.TrackingToken AS qtt
,jsonPayload.searchclick.position AS position
,MAX(TIMESTAMP_TRUNC(`timestamp`, DAY)) AS utc_day
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
GROUP BY
1,2
)
, precision_data AS (
SELECT
utc_day
, qtt
, position
, COUNT(*) OVER (PARTITION BY qtt ORDER BY position) / (1 + position) AS precision
FROM
clicks_data
)
, average_precision_data AS (
SELECT
utc_day
, qtt
, AVG(precision) AS average_precision
FROM
precision_data
GROUP BY
1, 2
)
SELECT
utc_day --metric_date
, AVG(average_precision) AS mean_average_precision
FROM
average_precision_data
GROUP BY
1
ORDER BY
1;
Time(milliseconds) from First Query to Last Click (p50)
Time(milliseconds) from First Query to Last Click (p50)
Grain: Daily aggregation over 14-day period
Use Case: Measure search session efficiency and time-to-information
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 14 days (adjust as needed)
`timestamp` >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -14 DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, queries_data AS (
SELECT
DISTINCT TIMESTAMP_TRUNC(`timestamp`, DAY) AS utc_day
, jsonPayload.Search.SessionTrackingToken AS stt
, jsonPayload.Search.TrackingToken AS qtt
, timestamp
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
-- filter for user queries (exclude background and slackbot requests)
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
AND LOWER(jsonPayload.Search.Modality) NOT IN ('gleanbot', '')
)
, clicks_data AS (
SELECT
jsonPayload.SearchClick.TrackingToken AS qtt
, MAX(timestamp) as timestamp
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH_CLICK'
GROUP BY 1
)
, sessions_w_clicks AS (
SELECT
utc_day --metric_date
, stt
, first_query_ts
, last_click_ts
, timestamp_diff(last_click_ts, first_query_ts, millisecond) AS time_from_first_query_to_last_click
FROM (
SELECT
stt
, MIN(utc_day) AS utc_day
, MIN(q.timestamp) AS first_query_ts
, MIN(c.timestamp) AS first_click_ts
, MAX(c.timestamp) AS last_click_ts
, COUNTIF(c.qtt IS NOT NULL) AS num_clicks
FROM
queries_data q
LEFT JOIN
clicks_data c
ON
q.qtt = c.qtt
GROUP BY
1
)
WHERE
num_clicks > 0
)
SELECT
utc_day --metric_date
-- , avg(time_from_first_query_to_last_click) as mean_time_from_first_query_to_last_click -- surface average if desired
, approx_quantiles(time_from_first_query_to_last_click, 100)[offset(50)] AS p50_time_first_query_to_last_click_ms
FROM
sessions_w_clicks
GROUP BY
1
ORDER BY
1
Search with AI Answers
Search with AI Answers
Grain: Individual query-answer pairs with configurable time periods
Use Case: Understand AI answer usage patterns, track which searches trigger AI responses, and analyze AI answer effectiveness
-- Parameterized lookback period using variables
DECLARE lookback_period INT64 DEFAULT 1;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
SELECT
DATE(timestamp) AS datepartition
, resource.labels.project_id AS project_id
, jsonPayload.User.UserId AS userid
, jsonPayload
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE
timestamp >= CASE
WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY)) -- Default to DAY
END
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, search_events AS (
SELECT
datepartition
, project_id
, userid
, jsonPayload.Search.Query AS query
, jsonPayload.Search.Modality AS modality
, jsonPayload.Search.Initiator AS initiator
, jsonPayload.Search.TrackingToken AS trackingtoken
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
AND LOWER(jsonPayload.Search.Initiator) IN ('user')
)
, -- Direct AI Answer events
direct_ai_answers AS (
SELECT
datepartition
, project_id
, userid
, jsonPayload.AIAnswer.TrackingToken AS trackingtoken
, jsonPayload.AIAnswer.Response AS response
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'AI_ANSWER'
AND jsonPayload.AIAnswer.TrackingToken != ''
)
, workflow_conversation AS (
SELECT
jsonPayload.workflowconversation.workflowrunid as runid
, MAX(CASE WHEN message.author='USER' THEN text END) as user_query
, MAX(CASE WHEN message.author='GLEAN' THEN text END) as response
FROM
glean_customer_event
, UNNEST(jsonPayload.workflowconversation.messages) message
WHERE
jsonPayload.type='WORKFLOW_CONVERSATION'
GROUP BY
runid
)
, -- Workflow-based AI Answer events
workflow_ai_answers AS (
SELECT
datepartition
, project_id
, userid
, jsonPayload.WorkflowRun.SourceTrackingToken AS trackingtoken
, wc.response
FROM
glean_customer_event ai_answer
LEFT JOIN
workflow_conversation wc
ON
ai_answer.jsonPayload.workflowrun.runid = wc.runid
WHERE
ai_answer.jsonPayload.Type = 'WORKFLOW_RUN'
AND ai_answer.jsonPayload.WorkflowRun.Feature = 'AI_ANSWER'
AND ai_answer.jsonPayload.WorkflowRun.Initiator = 'GLEAN'
AND ai_answer.jsonPayload.WorkflowRun.Platform = 'WEB'
)
, -- Combine both AI answer types
all_ai_answers AS (
SELECT * FROM direct_ai_answers
UNION ALL
SELECT * FROM workflow_ai_answers
)
SELECT
-- Search Information (from original queries)
s.datepartition
, s.trackingtoken
, s.query
, s.modality
, s.userid
-- AI Answer Information
, aa.response
, CASE WHEN aa.trackingtoken IS NOT NULL THEN TRUE ELSE FALSE END AS is_ai_answer_present
FROM
search_events s
LEFT JOIN
all_ai_answers aa
ON
aa.datepartition = s.datepartition
AND aa.project_id = s.project_id
AND aa.userid = s.userid
AND aa.trackingtoken = s.trackingtoken
ORDER BY
aa.datepartition DESC
, aa.project_id
, aa.userid
Search with AI Answer Votes
Search with AI Answer Votes
Grain: Individual query-answer pairs with configurable time periods
Use Case: Understand distribution of AI answer feedback (upvote/downvote), and analyze AI answer feedback rate
-- Parameterized lookback period using variables
DECLARE lookback_period INT64 DEFAULT 1;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL -- Set to NULL to include all projects, or specify a project ID
BEGIN
WITH
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
SELECT
DATE(timestamp) AS datepartition
, resource.labels.project_id AS project_id
, jsonPayload.User.UserId AS userid
, jsonPayload
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE
timestamp >= CASE
WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY)) -- Default to DAY
END
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, search_events AS (
SELECT
datepartition
, project_id
, userid
, jsonPayload.Search.Query AS query
, jsonPayload.Search.Modality AS modality
, jsonPayload.Search.Initiator AS initiator
, jsonPayload.Search.TrackingToken AS trackingtoken
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'SEARCH'
)
, -- Direct AI Answer events
direct_ai_answers AS (
SELECT
datepartition
, project_id
, userid
, jsonPayload.AIAnswer.TrackingToken AS trackingtoken
FROM
glean_customer_event
WHERE
jsonPayload.Type = 'AI_ANSWER'
AND jsonPayload.AIAnswer.TrackingToken != ''
)
, -- Workflow-based AI Answer events
workflow_ai_answers AS (
SELECT
datepartition
, project_id
, userid
, jsonPayload.WorkflowRun.SourceTrackingToken AS trackingtoken
FROM
glean_customer_event ai_answer
WHERE
ai_answer.jsonPayload.Type = 'WORKFLOW_RUN'
AND ai_answer.jsonPayload.WorkflowRun.Feature = 'AI_ANSWER'
AND ai_answer.jsonPayload.WorkflowRun.Initiator = 'GLEAN'
AND ai_answer.jsonPayload.WorkflowRun.Platform = 'WEB'
)
, -- Combine both AI answer types
all_ai_answers AS (
SELECT * FROM direct_ai_answers
UNION ALL
SELECT * FROM workflow_ai_answers
)
, -- Get all AI answers that have an upvote (thumbs up) or downvote (thumbs down)
ai_answer_votes AS (
SELECT
jsonpayload.aianswervote.trackingtoken
,jsonpayload.aianswervote.vote
FROM glean_customer_event
WHERE
jsonpayload.type = 'AI_ANSWER_VOTE'
)
SELECT
-- Dimensional Context
aa.datepartition
, aa.project_id
, aa.userid
-- Search Information (from original queries)
, s.query AS search_query
, s.modality AS search_modality
, s.initiator AS search_initiator
-- AI Answer Vote Information
, aav.vote AS ai_answer_vote
FROM
all_ai_answers aa
LEFT JOIN
search_events s
ON
aa.datepartition = s.datepartition
AND aa.project_id = s.project_id
AND aa.userid = s.userid
AND aa.trackingtoken = s.trackingtoken
LEFT JOIN
ai_answer_votes aav
ON
aa.trackingtoken = aav.trackingtoken
WHERE
-- Filter out AI answers without corresponding search events or missing initiators
s.initiator != ''
-- Filter out search queries without an AI answer.
-- Note: If you want to get the % of searches that have an AI Answer, commenet this field, so you'll get the entire list of searches along with their AI Answer Vote status
AND aav.vote != ''
ORDER BY
aa.datepartition DESC
, aa.project_id
, aa.userid;
END;
Chat & AI Analytics
Chat Metrics Across CHAT, WORKFLOW, and WORKFLOW_RUN
Chat Metrics Across CHAT, WORKFLOW, and WORKFLOW_RUN
Grain: Per Day, Per UserThis query identifies users who had at least one chat interaction on a given date; one row per user per day.
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE workflow_ids ARRAY<STRING> DEFAULT [
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS',
'DEFAULT_CHAT',
'WORLD_MODE_V2',
'DEEP_RESEARCH_PYAGENT',
'DEEP_RESEARCH_PREVIEW'
];
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- Optional project filtering - only filter if project_id is specified
(project_id IS NULL OR resource.labels.project_id = project_id)
)
, -- Extract the user level data from the most recent date to get most recent user attributes
-- Concatenate aliasids at a userid level
product_snapshot_agg AS (
SELECT
jsonPayload.productsnapshot.user.id AS userid,
MAX(jsonPayload.productsnapshot.user.signuptime) AS signuptime,
ARRAY_CONCAT_AGG(jsonPayload.productsnapshot.user.aliasids) AS aliasids,
MAX(jsonPayload.productsnapshot.user.department) AS department
FROM glean_customer_event
WHERE
-- Adjust as needed
DATE(timestamp) = CURRENT_DATE - INTERVAL 1 DAY
AND jsonPayload.type = 'PRODUCT_SNAPSHOT'
GROUP BY 1
)
, -- [Optional]: For users belonging to departments with less than 5 members, assign them a single bucket
latest_orgchart_data AS (
SELECT
userid,
signuptime,
aliasids,
CASE WHEN COUNT(userid) OVER (PARTITION BY department) < 5
THEN "Departments less than 5 Users"
ELSE department
END AS department
FROM product_snapshot_agg
)
, -- Map each alias to the canonical UserID
id_to_alias AS (
SELECT
DISTINCT aliasid,
userid AS canonicalid
FROM
latest_orgchart_data, UNNEST(aliasids) AS aliasid
)
, -- [IMP]: Calculate chat usage metrics.
/*
This CTE merges data across three event types: CHAT, WORKFLOW, and WORKFLOW_RUN.
The WORKFLOW event type was active from mid-February to mid-June 2025, after which it was replaced by WORKFLOW_RUN.
Before that period, all chat-related activity was logged under the CHAT event type.
To calculate accurate Chat usage metrics, we first UNION WORKFLOW and WORKFLOW_RUN. Since only one is ever populated
at a given time, this is a safe operation. We then perform a FULL OUTER JOIN with CHAT to deduplicate any overlapping
runs that appear in both CHAT and WORKFLOW/WORKFLOW_RUN logs.
*/
chat_usage as (
-- Workflow data + Workflow run data
SELECT
COALESCE(wf.datepartition, chat.datepartition) AS datepartition,
COALESCE(wf.userid, chat.userid) AS userid,
COUNT(DISTINCT wf.run_id) + COUNT(DISTINCT CASE WHEN wf.run_id IS NULL THEN chat.qtt END) AS num_chat_queries
FROM (
SELECT
jsonPayload.workflow.runid AS run_id,
jsonPayload.user.userid,
MIN(DATE(timestamp)) AS datepartition
FROM glean_customer_event
WHERE
jsonPayload.type = 'WORKFLOW'
-- These workflowIDs correspond to Chats
AND jsonPayload.workflow.workflowid IN UNNEST(workflow_ids)
AND jsonPayload.workflow.initiator = 'USER'
GROUP BY 1,2
UNION ALL
SELECT
DISTINCT jsonPayload.workflowrun.runid AS run_id,
jsonPayload.user.userid,
DATE(timestamp) AS datepartition
FROM glean_customer_event,
UNNEST(jsonPayload.workflowrun.workflowexecutions) AS wfe
WHERE jsonPayload.type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
AND wfe.workflowid IN UNNEST(workflow_ids)
) wf
-- Join the unioned workflow data with chat data
FULL OUTER JOIN (
SELECT
DATE(timestamp) AS datepartition,
jsonPayload.user.userid,
jsonPayload.chat.qtt AS qtt,
jsonPayload.chat.workflowrunid AS workflowrunid
FROM glean_customer_event
WHERE jsonPayload.type = 'CHAT'
AND jsonPayload.chat.initiator = 'USER'
) chat
ON wf.run_id = chat.workflowrunid
GROUP BY 1, 2
)
, feature_usage AS (
SELECT
datepartition,
userid,
COALESCE(chat_usage.num_chat_queries, 0) AS _num_chats
FROM chat_usage
)
, canonicalized AS (
SELECT
datepartition,
COALESCE(id_to_alias.canonicalid, feature_usage.userid) AS userid,
COALESCE(SUM(_num_chats), 0) AS num_chats
FROM feature_usage
LEFT JOIN id_to_alias
ON feature_usage.userid = id_to_alias.aliasid
WHERE
COALESCE(id_to_alias.canonicalid, feature_usage.userid) IS NOT NULL
GROUP BY 1, 2
HAVING COALESCE(SUM(_num_chats), 0) > 0
)
SELECT * FROM canonicalized ORDER BY datepartition DESC, userid
User Feedback Rates on Glean Chat Custom Apps per AI Application
User Feedback Rates on Glean Chat Custom Apps per AI Application
Grain: Daily aggregation by AI application
Use Case: Monitor custom AI app performance and user satisfaction
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE lookback_period INT64 DEFAULT 28;
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- filter for data from the last 28 days (adjust as needed)
timestamp >= TIMESTAMP(DATE_ADD(CURRENT_DATE, INTERVAL -lookback_period DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, feedback_events AS (
SELECT
DISTINCT TIMESTAMP_TRUNC(m.`timestamp`, DAY) AS utc_day
, m.jsonPayload.Chat.Qtt AS tracking_token --Query Tracking Token
, COALESCE(m.jsonPayload.Chat.ApplicationId, 'Default Gleanchat') AS application_id
, f.jsonPayload.ChatFeedback.Event AS event
FROM
glean_customer_event m
LEFT JOIN glean_customer_event f
ON m.jsonPayload.Chat.ResponseMessageId = f.jsonPayload.ChatFeedback.MessageId
WHERE
m.jsonPayload.Type = 'CHAT'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
UNION ALL
SELECT
DISTINCT TIMESTAMP_TRUNC(m.`timestamp`, DAY) AS utc_day
, m.jsonPayload.WorkflowRun.RunId AS tracking_token
, COALESCE(m.jsonPayload.WorkflowRun.ApplicationId, 'Default Gleanchat') AS application_id
, f.jsonPayload.ChatFeedback.Event AS event
FROM
glean_customer_event m
LEFT JOIN glean_customer_event f
ON m.jsonPayload.WorkflowRun.RunId = f.jsonPayload.ChatFeedback.RunId
WHERE
m.jsonPayload.Type = 'WORKFLOW_RUN'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
)
SELECT
application_id
, utc_day --dateparititon
, COUNT(DISTINCT tracking_token) AS num_interactions
, COUNT(DISTINCT CASE WHEN event = 'UPVOTE' THEN tracking_token END) AS upvotes
, COUNT(DISTINCT CASE WHEN event = 'DOWNVOTE' THEN tracking_token END) AS downvotes
, COUNT(DISTINCT CASE WHEN event = 'MANUAL_FEEDBACK' THEN tracking_token END) AS shared_feedback
FROM
feedback_events
WHERE
COALESCE(NULLIF(application_id,''), 'Default Gleanchat') != 'Default Gleanchat'
GROUP BY
1, 2
Upvoted/Downvoted Chat Messages with Comments
Upvoted/Downvoted Chat Messages with Comments
Grain: Individual chat message level showing ratings and comments if exists
Use Case: Analyze chat response quality and gather detailed user feedback for improvements
Note: Requires raw logs.
-- Parameterized lookback period using variables
DECLARE lookback_period INT64 DEFAULT 14;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
BEGIN
WITH
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
SELECT
DATE(timestamp) AS datepartition
, resource.labels.project_id AS project_id
, jsonPayload.User.UserId AS userid
, jsonPayload
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
timestamp >= CASE
WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY)) -- Default to DAY
END
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
,chat_events AS (
SELECT DISTINCT
m.`timestamp` AS message_time
, m.jsonPayload.User.Userid
, f.`timestamp` AS feedback_time
, f.jsonPayload.ChatFeedback.Event AS rating
, fc.jsonPayload.ChatFeedback.comments AS comments
, m.jsonPayload.Chat.Qtt AS run_id
FROM
glean_customer_event m
LEFT JOIN
glean_customer_event f
ON m.jsonPayload.Chat.ResponseMessageId = f.jsonPayload.ChatFeedback.MessageId
LEFT JOIN
-- For the same chat message we might receive both upvote/downvote and manual feedback. This is to collect comments from the user.
glean_customer_event fc
ON m.jsonPayload.Chat.ResponseMessageId = fc.jsonPayload.ChatFeedback.MessageId
WHERE
m.jsonPayload.Type = 'CHAT'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
)
,workflow_events AS (
SELECT DISTINCT
m.`timestamp` AS message_time
, m.jsonPayload.User.Userid
, f.`timestamp` AS feedback_time
, f.jsonPayload.ChatFeedback.Event AS rating
, fc.jsonPayload.ChatFeedback.comments AS comments
, m.jsonPayload.workflow.RunID AS run_id
FROM
glean_customer_event m
LEFT JOIN
glean_customer_event f
ON m.jsonPayload.Workflow.RunId = f.jsonPayload.ChatFeedback.RunID
LEFT JOIN
-- For the same chat message we might receive both upvote/downvote and manual feedback. This is to collect comments from the user.
glean_customer_event fc
ON m.jsonPayload.Workflow.RunId = fc.jsonPayload.ChatFeedback.RunID
WHERE
m.jsonPayload.Type = 'WORKFLOW'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
)
,workflowrun_events AS (
SELECT DISTINCT
m.`timestamp` AS message_time
, m.jsonPayload.User.Userid
, f.`timestamp` AS feedback_time
, f.jsonPayload.ChatFeedback.Event AS rating
, fc.jsonPayload.ChatFeedback.comments AS comments
, m.jsonPayload.workflowRun.RunID AS run_id
FROM
glean_customer_event m
LEFT JOIN
glean_customer_event f
ON m.jsonPayload.WorkflowRun.RunId = f.jsonPayload.ChatFeedback.RunID
LEFT JOIN
-- For the same chat message we might receive both upvote/downvote and manual feedback. This is to collect comments from the user.
glean_customer_event fc
ON m.jsonPayload.WorkflowRun.RunId = fc.jsonPayload.ChatFeedback.RunID
WHERE
m.jsonPayload.Type = 'WORKFLOW_RUN'
AND f.jsonPayload.Type = 'CHAT_FEEDBACK'
AND f.jsonPayload.ChatFeedback.Event IN ('UPVOTE', 'DOWNVOTE')
AND fc.jsonPayload.Type = 'CHAT_FEEDBACK'
AND fc.jsonPayload.ChatFeedback.Event = ('MANUAL_FEEDBACK')
)
SELECT *
FROM chat_events
UNION ALL
SELECT *
FROM workflow_events
UNION ALL
SELECT *
FROM workflowrun_events;
END
All Chat Citations
All Chat Citations
Grain: Individual citation level over configurable time period
Use Case: Extract all chat citations for analysis, including source documents, files, and people cited in chat responses across both legacy and modern workflow implementations
DECLARE lookback_period INT64 DEFAULT 7;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL -- Set to NULL to include all projects, or specify a project ID
DECLARE workflow_ids ARRAY<STRING> DEFAULT [
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS',
'DEFAULT_CHAT',
'WORLD_MODE_V2',
'DEEP_RESEARCH_PYAGENT',
'DEEP_RESEARCH_PREVIEW'
];
WITH
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
SELECT
DATE(timestamp) AS datepartition
, resource.labels.project_id AS project_id
, jsonPayload.User.UserId AS userid
, jsonPayload
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE
timestamp >= CASE
WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY)) -- Default to DAY
END
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Legacy CHAT_CITATIONS events
, chat_citations_events AS (
SELECT
DATE(timestamp) as datepartition,
jsonPayload.user.userid,
jsonPayload.user.useremail,
jsonPayload.chatcitations.chatsessionid,
jsonPayload.chatcitations.workflowrunid,
citation.trackingtoken,
citation.sourcedocument.datasource,
citation.sourcedocument.doctype,
citation.sourcedocument.id AS source_document_id,
citation.sourcedocument.title AS source_document_title,
citation.sourcedocument.url AS source_document_url,
citation.sourcefile.id AS source_file_id,
citation.sourcefile.name AS source_file_name,
citation.sourceperson.id AS source_person_id,
citation.sourceperson.name AS source_person_name,
'CHAT_CITATIONS' AS event_source
FROM glean_customer_event,
UNNEST(jsonPayload.chatcitations.citations) AS citation
WHERE jsonPayload.Type = 'CHAT_CITATIONS'
)
-- WORKFLOW_RUN citations for chat workflows
, workflow_run_citations AS (
SELECT
DATE(timestamp) as datepartition,
jsonPayload.user.userid,
jsonPayload.user.useremail,
jsonPayload.workflowrun.chatsessionid,
jsonPayload.workflowrun.runid AS workflowrunid,
citation.trackingtoken,
citation.sourcedocument.datasource,
citation.sourcedocument.doctype,
citation.sourcedocument.id AS source_document_id,
citation.sourcedocument.title AS source_document_title,
citation.sourcedocument.url AS source_document_url,
citation.sourcefile.id AS source_file_id,
citation.sourcefile.name AS source_file_name,
citation.sourceperson.id AS source_person_id,
citation.sourceperson.name AS source_person_name,
'WORKFLOW_RUN' AS event_source
FROM glean_customer_event,
UNNEST(jsonPayload.workflowrun.workflowexecutions) AS workflow_execution,
UNNEST(jsonPayload.workflowrun.stepexecutions) AS step_execution,
UNNEST(step_execution.citations) AS citation
WHERE jsonPayload.Type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
-- Filter for chat-related workflows
AND workflow_execution.workflowid IN UNNEST(workflow_ids)
)
-- Combine all chat citations
SELECT * FROM chat_citations_events
UNION DISTINCT
SELECT * FROM workflow_run_citations
ORDER BY datepartition DESC
Chat Message and Response Analysis
Chat Message and Response Analysis
Grain: Individual chat run level over configurable time period
Use Case: Analyze chat conversation patterns, response quality, and user engagement across departments and user segments
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE start_date DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 28 DAY);
DECLARE end_date DATE DEFAULT CURRENT_DATE();
DECLARE chat_workflow_ids ARRAY<STRING> DEFAULT [
'DOC_CONTEXT_READER',
'ORIGINAL_MESSAGE_SEARCH',
'DIRECT_LLM_RESPONSE',
'REACT_TOOLS_2_HOPS',
'DEFAULT_CHAT',
'WORLD_MODE_V2',
'DEEP_RESEARCH_PYAGENT',
'DEEP_RESEARCH_PREVIEW'
];
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE
-- Optional project filtering - only filter if project_id is specified
(project_id IS NULL OR resource.labels.project_id = project_id)
AND timestamp BETWEEN TIMESTAMP(start_date) and TIMESTAMP(end_date)
)
, chat_usage as (
-- Workflow data + Workflow run data
SELECT
COALESCE(wf.datepartition, chat.datepartition) AS date_partition
, COALESCE(wf.run_id, chat.workflow_run_id) as run_id
, COALESCE(wf.useremail, chat.useremail) as user_email
, COALESCE(wf.department, chat.department) as department
FROM (
SELECT
DISTINCT jsonPayload.workflow.runid AS run_id
, jsonPayload.user.*
, DATE(timestamp) AS datepartition
FROM glean_customer_event
WHERE
jsonPayload.workflow.workflowid IN UNNEST(chat_workflow_ids)
AND jsonPayload.workflow.initiator = 'USER'
UNION ALL
SELECT
DISTINCT jsonPayload.workflowrun.runid AS run_id
, jsonPayload.user.*
, DATE(timestamp) AS datepartition
FROM glean_customer_event, UNNEST(jsonPayload.workflowrun.workflowexecutions) AS wfe
WHERE jsonPayload.type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.initiator = 'USER'
AND wfe.workflowid IN UNNEST(chat_workflow_ids)
) wf
-- Join the unioned workflow data with chat data
FULL OUTER JOIN (
SELECT
DATE(timestamp) AS datepartition
, jsonPayload.user.*
, jsonPayload.chat.workflowrunid AS workflow_run_id
FROM glean_customer_event
WHERE jsonPayload.type = 'CHAT'
AND jsonPayload.chat.initiator = 'USER'
) chat
ON wf.run_id = chat.workflow_run_id
)
, workflow_conversation AS(
SELECT
jsonPayload.workflowconversation.workflowrunid as run_id
, MAX(CASE WHEN message.author='USER' THEN text END) as user_query
, MAX(CASE WHEN message.author='GLEAN' THEN text END) as response
FROM
glean_customer_event, UNNEST(jsonPayload.workflowconversation.messages) as message
WHERE
COALESCE(jsonPayload.workflowconversation.workflowrunid, '') != ''
GROUP BY 1
)
SELECT
chat_usage.* except(run_id)
, user_query
, response
, chat_usage.run_id
FROM chat_usage
LEFT JOIN workflow_conversation
USING(run_id)
ORDER by date_partition DESC
Chat Citation Clicks
Chat Citation Clicks
Grain: Individual chat run level over configurable time period
Use Case: Analyze chat citation click data
DECLARE lookback_period INT64 DEFAULT 25;
DECLARE lookback_unit STRING DEFAULT 'DAY';
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL -- Set to NULL to include all projects, or specify a project ID
WITH
-- Base CTE to centralize table reference with parameterized lookback period and optional project filtering
glean_customer_event AS (
SELECT
DATE(timestamp) AS datepartition
, resource.labels.project_id AS project_id
, jsonPayload.User.UserId AS userid
, jsonPayload
, timestamp
FROM
-- replace <glean_customer_event_table> with your firm's table name
<glean_customer_event_table>
WHERE
timestamp >= CASE
WHEN lookback_unit = 'DAY' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY))
WHEN lookback_unit = 'WEEK' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period WEEK))
WHEN lookback_unit = 'MONTH' THEN TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period MONTH))
WHEN lookback_unit = 'HOUR' THEN TIMESTAMP(DATETIME_SUB(CURRENT_DATETIME(), INTERVAL lookback_period HOUR))
ELSE TIMESTAMP(DATE_SUB(CURRENT_DATE(), INTERVAL lookback_period DAY)) -- Default to DAY
END
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
,ccc AS (
SELECT
project_id,
jsonPayload.user.userid AS user_id,
jsonPayload.chatcitationclick.datasource AS citation_datasource,
jsonPayload.chatcitationclick.trackingtoken AS citation_click_tracking_token,
timestamp
FROM glean_customer_event
WHERE jsonPayload.Type = 'CHAT_CITATION_CLICK'
),
wr AS (
SELECT
project_id,
jsonPayload.user.userid AS user_id,
jsonPayload.workflowrun.chatsessionid AS chat_session_id,
jsonPayload.workflowrun.runid AS run_id,
search_tracking_token
FROM glean_customer_event, UNNEST(jsonPayload.workflowrun.actionexecutions) AS ae,
UNNEST(ae.searchtrackingtokens) AS search_tracking_token
WHERE jsonPayload.Type = 'WORKFLOW_RUN'
),
cc AS (
SELECT
jsonPayload.chatcitations.workflowrunid AS run_id,
jsonPayload.chatcitations.chatsessionid AS chat_session_id,
c.trackingtoken AS citation_tracking_token,
c.sourcedocument.datasource AS cited_datasource,
c.sourcedocument.id AS doc_id,
c.sourcedocument.title AS doc_title, -- NULL in scrubbed logs
c.sourcedocument.url AS doc_url -- NULL in scrubbed logs
FROM glean_customer_event,
UNNEST(jsonPayload.chatcitations.citations) AS c
WHERE jsonPayload.Type = 'CHAT_CITATIONS'
)
SELECT
ccc.timestamp,
ccc.user_id,
ccc.citation_datasource,
wr.chat_session_id,
wr.run_id,
cc.doc_id,
cc.doc_title,
cc.doc_url
FROM ccc
LEFT JOIN wr
ON ccc.project_id = wr.project_id
AND ccc.user_id = wr.user_id
AND ccc.citation_click_tracking_token = wr.search_tracking_token
LEFT JOIN cc
ON wr.run_id = cc.run_id
AND wr.chat_session_id = cc.chat_session_id
AND ccc.citation_click_tracking_token = cc.citation_tracking_token
Agent Metrics
Agent Types Explained: When viewing query results in this section, you’ll see agents categorized by type:-
AGENT: Standard conversational agents created and managed by users. These are the primary agents that end-users interact with directly. -
SUBAGENT: Internal helper agents that are called by other agents to perform specialized tasks. These run behind the scenes and are not directly invoked by users. Common examples include search agents, data retrieval agents, and tool execution agents. Note: In production environments, you may see a high volume of subagent runs as they are triggered by parent agents during workflow execution. -
AGENT_TEMPLATE: Pre-built agent configurations provided by Glean that users can clone, customize, and deploy as their own agents. These serve as starting points for common use cases.
Agent Usage and Lifecycle Metrics
Agent Usage and Lifecycle Metrics
Grain: Per agent aggregation over configurable time period (default: 185 days)
Use Case: Track agent lifecycle from creation through usage, analyze agent popularity and engagement trends, identify stale agentsAgent Types Explained:When viewing query results, you’ll see agents categorized by type:
-
AGENT: Standard conversational agents created and managed by users. These are the primary agents that end-users interact with directly. -
SUBAGENT: Internal helper agents that are called by other agents to perform specialized tasks. These run behind the scenes and are not directly invoked by users. Common examples include search agents, data retrieval agents, and tool execution agents. Note: In production environments, you may see a high volume of subagent runs as they are triggered by parent agents during workflow execution. -
AGENT_TEMPLATE: Pre-built agent configurations provided by Glean that users can clone, customize, and deploy as their own agents. These serve as starting points for common use cases.
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 185;
WITH glean_customer_event AS (
SELECT
resource,
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp,
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Get agent metadata from latest snapshots
, agent_info AS (
SELECT
resource.labels.project_id,
jsonPayload.productsnapshot.workflow.workflowid AS workflow_id,
ARRAY_AGG(jsonPayload.productsnapshot.workflow.name ORDER BY timestamp DESC LIMIT 1)[OFFSET(0)] AS name,
ARRAY_AGG(jsonPayload.productsnapshot.workflow.namespaceenum ORDER BY timestamp DESC LIMIT 1)[OFFSET(0)] AS workflow_type,
ARRAY_AGG(
PARSE_DATE('%Y-%m-%d', SUBSTR(jsonPayload.productsnapshot.workflow.createdat, 1, 10))
ORDER BY timestamp DESC LIMIT 1
)[OFFSET(0)] AS created_date,
ARRAY_AGG(jsonPayload.productsnapshot.workflow.createdby ORDER BY timestamp DESC LIMIT 1)[OFFSET(0)] AS createdby
FROM glean_customer_event
WHERE
jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
-- Filter out draft-only agents that were never published/deployed for actual use
AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
GROUP BY 1, 2
)
-- Agent runs from WORKFLOW_RUN (must be in agent_info)
, agent_runs AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
resource.labels.project_id,
workflow_execution.workflowid AS workflow_id,
jsonpayload.workflowrun.runid AS run_id
FROM glean_customer_event, UNNEST(jsonpayload.workflowrun.workflowexecutions) AS workflow_execution
WHERE
jsonPayload.type = 'WORKFLOW_RUN'
AND workflow_execution.status = 'SUCCESS'
)
-- Filter agent runs to only those in agent_info
, agent_runs_filtered AS (
SELECT ar.*
FROM agent_runs ar
INNER JOIN agent_info ai ON ar.workflow_id = ai.workflow_id
)
-- App runs from CHAT (don't require agent_info)
, app_runs_from_chat AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
resource.labels.project_id,
jsonpayload.chat.applicationid AS workflow_id,
IF(
jsonpayload.chat.workflowrunid IS NOT NULL AND jsonpayload.chat.workflowrunid != '',
jsonpayload.chat.workflowrunid,
jsonpayload.chat.qtt
) AS run_id
FROM glean_customer_event
WHERE
jsonPayload.type = 'CHAT'
-- Filter for valid agent/app IDs: Glean assigns 16-character IDs to custom apps/agents
AND LENGTH(jsonpayload.chat.applicationid) = 16
)
-- App runs from WORKFLOW_RUN.applicationid (don't require agent_info)
, app_runs_from_workflow AS (
SELECT
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
resource.labels.project_id,
jsonPayload.workflowrun.applicationid AS workflow_id,
jsonpayload.workflowrun.runid AS run_id
FROM glean_customer_event, UNNEST(jsonpayload.workflowrun.workflowexecutions) AS workflow_execution
WHERE
jsonPayload.type = 'WORKFLOW_RUN'
AND jsonPayload.workflowrun.applicationid IS NOT NULL
AND jsonPayload.workflowrun.applicationid != ''
AND workflow_execution.status = 'SUCCESS'
GROUP BY 1, 2, 3, 4
)
-- Combine all runs (both agent and app)
, all_runs AS (
SELECT * FROM agent_runs_filtered
UNION ALL
SELECT * FROM app_runs_from_chat
UNION ALL
SELECT * FROM app_runs_from_workflow
)
-- Add creation dates as activity (only for agents with metadata)
, all_activity AS (
SELECT datepartition, project_id, workflow_id, run_id
FROM all_runs
UNION ALL
SELECT
created_date AS datepartition,
project_id,
workflow_id,
CONCAT('creation_', workflow_id) AS run_id
FROM agent_info
WHERE
created_date >= CURRENT_DATE - INTERVAL lookback_days DAY
AND created_date <= CURRENT_DATE - INTERVAL '1' DAY
AND created_date != DATE('0001-01-01')
AND createdby IS NOT NULL
)
-- Final aggregation with LEFT JOIN to include apps without metadata
SELECT
aa.project_id,
aa.workflow_id AS agent_id,
COALESCE(ai.name, aa.workflow_id) AS agent_name,
ai.workflow_type AS agent_type,
COUNT(DISTINCT CASE WHEN aa.run_id NOT LIKE 'creation_%' THEN aa.run_id END) AS num_agent_runs,
MAX(aa.datepartition) AS last_usage_date,
MIN(aa.datepartition) AS first_usage_date
FROM all_activity aa
LEFT JOIN agent_info ai ON aa.workflow_id = ai.workflow_id
GROUP BY 1, 2, 3, 4
ORDER BY 5 DESC;
Agent Creation Metrics
Agent Creation Metrics
Grain: Individual agent level for agents created within the lookback period (default: 185 days)
Use Case: Track who is creating agents, analyze permission settings, identify agent creation trends, and monitor agent lifecycle from creation
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 185;
WITH glean_customer_event AS (
SELECT
resource,
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp,
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Get latest workflow snapshot info (filter to agents only)
, workflows_last_snapshot_date AS (
SELECT
resource.labels.project_id,
jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
MAX(DATE(DATE_TRUNC(timestamp, DAY))) AS latest_snapshot_date
FROM glean_customer_event
WHERE
jsonPayload.type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.type = 'WORKFLOW'
AND jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
-- Filter out draft-only agents that were never published/deployed for actual use
AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
GROUP BY 1, 2
)
-- Extract all workflow snapshot data with parsed dates (agents only)
, all_workflow_snapshots AS (
SELECT
resource.labels.project_id,
jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
DATE(DATE_TRUNC(timestamp, DAY)) AS snapshot_date,
jsonPayload.productsnapshot.workflow.name AS name,
-- Parse dates once to reuse
PARSE_DATE('%Y-%m-%d', SUBSTR(jsonPayload.productsnapshot.workflow.createdat, 1, 10)) AS created_date,
jsonPayload.productsnapshot.workflow.createdby AS createdby,
jsonPayload.productsnapshot.workflow.trigger.type AS trigger_type,
jsonPayload.productsnapshot.workflow.namespaceenum AS workflow_type,
permissions.role AS role,
permissions.id AS permission_id,
permissions.type AS permission_type
FROM glean_customer_event, UNNEST(jsonPayload.productsnapshot.workflow.roles) AS permissions
WHERE
jsonPayload.type = 'PRODUCT_SNAPSHOT'
AND jsonPayload.productsnapshot.type = 'WORKFLOW'
AND jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
)
-- Get most recent workflow metadata
, all_workflows_info AS (
SELECT
ws.project_id,
ws.agent_id,
ws.name,
ws.created_date,
ws.createdby,
ws.trigger_type,
ws.workflow_type,
ws.role,
ws.permission_id,
ws.permission_type,
-- Mark as deleted if snapshot is older than yesterday
ws.snapshot_date < CURRENT_DATE - INTERVAL '1' DAY AS is_deleted
FROM all_workflow_snapshots ws
INNER JOIN workflows_last_snapshot_date lsd
ON ws.agent_id = lsd.agent_id
AND ws.snapshot_date = lsd.latest_snapshot_date
)
-- Aggregate workflow metadata with permission levels
, all_workflows AS (
SELECT DISTINCT
created_date,
project_id,
-- Handle system-created workflows (date = 0001-01-01)
CASE
WHEN created_date = DATE('0001-01-01') THEN NULL
ELSE createdby
END AS creator_id,
agent_id,
name,
trigger_type,
workflow_type,
created_date = DATE('0001-01-01') AS is_system_created_workflow,
is_deleted,
-- Determine viewer audience based on permissions
MAX(CASE
WHEN role IN ('VIEWER', 'EDITOR') AND permission_type = 'ALL' THEN '4) All users'
WHEN role IN ('VIEWER', 'EDITOR') AND permission_type = 'DEPARTMENT' THEN '3) Users in select departments'
WHEN role IN ('VIEWER', 'EDITOR') AND permission_type = 'USER' THEN '2) Select users'
ELSE '1) Owner only'
END) AS viewer_audience,
-- Determine editor audience
MAX(CASE
WHEN role = 'EDITOR' AND permission_type = 'ALL' THEN '4) All users'
WHEN role = 'EDITOR' AND permission_type = 'DEPARTMENT' THEN '3) Users in select departments'
WHEN role = 'EDITOR' AND permission_type = 'USER' THEN '2) Select users'
ELSE '1) Owner only'
END) AS editor_audience
FROM all_workflows_info
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9
)
-- Agent creation metrics
SELECT
created_date,
creator_id,
project_id,
agent_id,
name,
workflow_type AS agent_type,
trigger_type,
viewer_audience,
editor_audience,
is_deleted
FROM all_workflows
WHERE
workflow_type IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
-- Exclude system-created workflows
AND is_system_created_workflow = FALSE
AND creator_id IS NOT NULL
-- Only include creations within our time window
AND created_date >= CURRENT_DATE - INTERVAL lookback_days DAY
AND created_date <= CURRENT_DATE - INTERVAL '1' DAY
ORDER BY
created_date DESC,
creator_id;
Agent Feedback Metrics
Agent Feedback Metrics
Grain: Per user per agent aggregation over configurable time period (default: 28 days)
Use Case: Track agent response quality through user feedback, identify poorly performing agents, measure user satisfaction trends
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 28; -- Number of days to look back (default: 28 days)
WITH glean_customer_event AS (
SELECT
resource,
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp,
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
SELECT
resource,
DATE(DATE_TRUNC(timestamp, DAY)) AS datepartition,
jsonPayload,
timestamp,
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
-- Get latest workflow snapshot info (filter to agents only)
, workflows_last_snapshot_date AS (
SELECT
resource.labels.project_id,
jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
MAX(DATE(DATE_TRUNC(timestamp, DAY))) AS latest_snapshot_date
FROM glean_customer_event
WHERE
jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
-- Filter out draft-only agents that were never published/deployed for actual use
AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
GROUP BY 1, 2
)
-- Extract workflow snapshot data (agents only)
, all_workflow_snapshots AS (
SELECT
resource.labels.project_id,
jsonPayload.productsnapshot.workflow.workflowid AS agent_id,
DATE(DATE_TRUNC(timestamp, DAY)) AS snapshot_date,
jsonPayload.productsnapshot.workflow.name AS name,
jsonPayload.productsnapshot.workflow.namespaceenum AS workflow_type
FROM glean_customer_event
WHERE
jsonPayload.productsnapshot.workflow.namespaceenum IN ('AGENT', 'SUBAGENT', 'AGENT_TEMPLATE')
-- Filter out draft-only agents that were never published/deployed for actual use
AND jsonPayload.productsnapshot.workflow.isdraftonly = FALSE
)
-- Get most recent agent metadata
, agent_info AS (
SELECT
ws.project_id,
ws.agent_id,
ws.name,
ws.workflow_type
FROM all_workflow_snapshots ws
INNER JOIN workflows_last_snapshot_date lsd
ON ws.agent_id = lsd.agent_id
AND ws.snapshot_date = lsd.latest_snapshot_date
)
-- Agent feedback from CHAT_FEEDBACK events
, agent_feedback_raw AS (
SELECT
gce.jsonPayload.chatfeedback.runid AS run_id,
gce.jsonPayload.chatfeedback.agentid AS agent_id,
gce.jsonPayload.user.userid AS user_id,
gce.resource.labels.project_id AS project_id,
MAX(ai.name) AS agent_name,
MAX(ai.workflow_type) AS agent_type,
-- Take latest vote, disregard manual feedback
MAX_BY(gce.jsonPayload.chatfeedback.event, gce.timestamp) AS vote
FROM agent_info ai
INNER JOIN glean_customer_event gce
ON gce.jsonPayload.chatfeedback.agentid = ai.agent_id
WHERE
gce.jsonPayload.type = 'CHAT_FEEDBACK'
AND gce.jsonPayload.chatfeedback.event IN ('UPVOTE', 'DOWNVOTE')
GROUP BY 1, 2, 3, 4
)
-- Aggregate feedback by date, agent, and user
, agent_feedback AS (
SELECT
agent_id,
user_id,
project_id,
agent_name,
agent_type,
SUM(CASE WHEN vote = 'UPVOTE' THEN 1 ELSE 0 END) AS num_upvotes,
SUM(CASE WHEN vote = 'DOWNVOTE' THEN 1 ELSE 0 END) AS num_downvotes,
COUNT(DISTINCT run_id) AS num_feedback_instances
FROM agent_feedback_raw
GROUP BY 1, 2, 3, 4, 5
)
-- Final output with feedback metrics
SELECT
project_id,
agent_id,
agent_name,
agent_type,
user_id,
num_upvotes,
num_downvotes,
num_feedback_instances,
-- Calculate net sentiment
num_upvotes - num_downvotes AS net_sentiment,
-- Calculate upvote rate
CASE
WHEN (num_upvotes + num_downvotes) > 0
THEN CAST(num_upvotes AS FLOAT64) / (num_upvotes + num_downvotes)
ELSE NULL
END AS upvote_rate
FROM agent_feedback
WHERE
-- Only include agents with actual feedback
(num_upvotes + num_downvotes) > 0
ORDER BY
num_feedback_instances DESC;
Action Metric
Web Search Action Usage in Workflows
Web Search Action Usage in Workflows
Grain: Individual workflow run level with associated action information
Use Case: Track usage of web search actions across agent and assistant workflows, analyze which workflows leverage external web search capabilities
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
DECLARE lookback_days INT64 DEFAULT 30;
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
-- change date ranges as needed
timestamp >= TIMESTAMP(DATE_SUB(CURRENT_DATE, INTERVAL lookback_days DAY))
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, web_search_actions AS(
SELECT
jsonPayload.productsnapshot.actioninstance.*
FROM glean_customer_event
WHERE
jsonPayload.productsnapshot.actioninstance.actiontype LIKE '%WEB_SEARCH'
OR LOWER(jsonPayload.productsnapshot.actioninstance.actionpackname) = 'microsoftbing'
)
-- For actions in assistant workflows it is not required to enable the action therefore
-- actionid is used as actioninstanceid
-- For agent workflows the action needs to be enabled therefore we use action_instance_id
, web_search_action_ids AS(
SELECT
id, actionname
FROM web_search_actions
UNION DISTINCT
SELECT
actionid as id, actionname
FROM web_search_actions
)
SELECT
DISTINCT
jsonPayload.workflowrun.runid
, wsa.actionname
FROM
glean_customer_event, UNNEST(jsonPayload.workflowrun.actionexecutions) ae
JOIN
web_search_action_ids wsa
ON
ae.actioninstanceid = wsa.id
GleanBot User Level Activity
GleanBot User Level Activity
GleanBot User Level Activity
Grain: Individual message level with user interaction data
Use Case: Analyze bot effectiveness, user engagement patterns, and proactive messaging success rates
DECLARE project_id STRING DEFAULT NULL; -- Replace with your project ID or leave as NULL
WITH glean_customer_event AS (
SELECT *
FROM
-- replace <glean_customer_event_table> with your firm's table name
`<glean_customer_event_table>`
WHERE
timestamp > TIMESTAMP(CURRENT_DATE() - INTERVAL 1 DAY)
-- Optional project filtering - only filter if project_id is specified
AND (project_id IS NULL OR resource.labels.project_id = project_id)
)
, gleanbot_activity AS (
SELECT
jsonPayload.user.userid AS user_id
,jsonPayload.gleanbotactivity.eventtrackingtoken AS event_tracking_token
,jsonPayload.gleanbotactivity.stt AS stt
,DATE(timestamp) AS datepartition
,jsonPayload.gleanbotactivity.latenciesmillismap.proactive_message AS latency_proactive_message
,jsonPayload.gleanbotactivity.eventtype AS event_type_verbose
,jsonPayload.gleanbotactivity.responseevents AS response_events_verbose
,jsonPayload.gleanbotactivity.workflowexecutionpoints AS workflow_execution_points_verbose
,(jsonPayload.gleanbotactivity.applicationid is NOT NULL AND LENGTH(jsonPayload.gleanbotactivity.applicationid) != 0) AS is_app_session
FROM
glean_customer_event
WHERE
jsonPayload.type = 'GLEAN_BOT_ACTIVITY'
)
, message_data AS (
SELECT *
FROM gleanbot_activity
WHERE
(
-- Identify messages where proactive workflow started
'PROACTIVE_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
-- Identify messages where proactive workflow for assistant type started
OR 'PROACTIVE_CHAT_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
-- Identify messages where proactive workflow for search type started
OR 'PROACTIVE_SEARCH_WORKFLOW_START' IN UNNEST(workflow_execution_points_verbose)
-- All messages that were not DMs and did not mention @Glean or \glean fall in this category
OR event_type_verbose IN (
'NON_TAGGED_MESSAGE'
-- All events for which proactive thread summarizer workflow is initiated
, 'PROACTIVE_DISCUSSION_SUMMARIZER'
-- All events where proactive digest was clicked
'VIEW_DIGEST_CLICK'
-- All events where digest was subscribed to and thus proactively sent over DM
, 'DAILY_DIGEST_REMINDER'
)
)
-- Remove the events where proactive workflow cannot be triggered. Eg: Invalid Channel
AND 'PROACTIVE_BOT_DISABLED_FAILURE' NOT IN UNNEST(workflow_execution_points_verbose)
-- Remove the events which represent messages sent by other bots, so we don't converse with slack automations
AND 'DROP_BOT_MESSAGE' NOT IN UNNEST(workflow_execution_points_verbose)
AND 'DROP_EXTERNAL_CHANNEL_MESSAGE' NOT IN UNNEST(workflow_execution_points_verbose)
)
, interaction_data AS (
SELECT *
FROM gleanbot_activity
WHERE
event_type_verbose IN (
-- All events where a user clicked on show sources modal
'SHOW_SOURCES',
-- All events for which helpful/not helpful buttons click was initiated
'SHARE_HELPFULNESS',
-- All events for which sharing workflow was started
'SHARE_CLICK',
-- All events for which manual feedback workflow was started
'SUBMIT_FEEDBACK',
-- All events for which dismiss suggestion workflow was initiated
'DISMISS_SUGGESTION'
)
)
SELECT
message_data.datepartition
-- ID of the user who received the proactive message
,message_data.user_id
-- Unique identifier for the proactive message
,message_data.event_tracking_token
-- Whether the user was in an active app session.
,message_data.is_app_session
-- Latency in milliseconds for the proactive message delivery
,message_data.latency_proactive_message
-- Tracking token of the user's interaction with the proactive message
,interaction_data.event_tracking_token AS interaction_event_tracking_token
-- ID of the user who interacted with the proactive message
,interaction_data.user_id AS interacting_user_id
-- Whether the workflow was triggered (via non-tagged or summarizer event)
,(CASE
WHEN message_data.event_type_verbose = 'NON_TAGGED_MESSAGE' THEN TRUE
WHEN message_data.event_type_verbose = 'PROACTIVE_DISCUSSION_SUMMARIZER' THEN TRUE
ELSE FALSE
END) AS workflow_started
-- Whether the message was classified as a question. Commented out for now
-- ,IF(question_detection_data.isquestion = TRUE, TRUE, FALSE) AS question_detected
-- Bot responded: includes proactive message, or summarization thread/DM
,(CASE
WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'PROACTIVE_MESSAGE' IN UNNEST(message_data.response_events_verbose) THEN TRUE
ELSE FALSE
END) AS bot_responded
-- Bot response was explicitly triggered by user (e.g., digest opened or summary clicked)
,(CASE
WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'VIEW_DIGEST' IN UNNEST(message_data.response_events_verbose) THEN TRUE
ELSE FALSE
END) AS bot_responded_on_users_request
-- Response was shared by the user
,IF(interaction_data.event_type_verbose = 'SHARE_CLICK', TRUE, FALSE) AS response_shared
-- Response was dismissed or downvoted
,(CASE
WHEN 'SUBMIT_FEEDBACK_WORKFLOW_DOWNVOTE' IN UNNEST(interaction_data.workflow_execution_points_verbose) THEN TRUE
WHEN interaction_data.event_type_verbose = 'DISMISS_SUGGESTION' THEN TRUE
ELSE FALSE
END) AS response_downvoted
-- Citations shown to the user
,IF(interaction_data.event_type_verbose = 'SHOW_SOURCES', TRUE, FALSE) AS citations_shown
-- Response was marked helpful
,IF('SHARE_HELPFUL' IN UNNEST(interaction_data.response_events_verbose), TRUE, FALSE) AS voted_helpful
-- Response was marked not helpful
,IF('SHARE_NOT_HELPFUL' IN UNNEST(interaction_data.response_events_verbose), TRUE, FALSE) AS voted_not_helpful
-- Daily digest proactively sent to the user
,IF('DAILY_DIGEST_REMINDER_SENT' IN UNNEST(message_data.response_events_verbose), TRUE, FALSE) AS digest_sent
-- User consumed/viewed the digest
,IF('VIEW_DIGEST' IN UNNEST(message_data.response_events_verbose), TRUE, FALSE) AS digest_consumed
-- Bot sent a proactive thread or DM summary
,(CASE
WHEN 'DISCUSSION_SUMMARY_THREAD_SUMMARY_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
WHEN 'DISCUSSION_SUMMARY_DM_RESPONSE_SENT' IN UNNEST(message_data.response_events_verbose) THEN TRUE
ELSE FALSE
END) AS proactive_summary_sent
FROM message_data
LEFT JOIN interaction_data
ON message_data.event_tracking_token = interaction_data.stt