chore: metric name and group by extractor with CH and PromQL support (#9543)
This commit is contained in:
committed by
GitHub
parent
096e38ee91
commit
09cbe4aa0d
2
go.mod
2
go.mod
@@ -4,7 +4,7 @@ go 1.24.0
|
||||
|
||||
require (
|
||||
dario.cat/mergo v1.0.1
|
||||
github.com/AfterShip/clickhouse-sql-parser v0.4.11
|
||||
github.com/AfterShip/clickhouse-sql-parser v0.4.16
|
||||
github.com/ClickHouse/clickhouse-go/v2 v2.40.1
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2
|
||||
github.com/SigNoz/govaluate v0.0.0-20240203125216-988004ccc7fd
|
||||
|
||||
4
go.sum
4
go.sum
@@ -66,8 +66,8 @@ dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/AfterShip/clickhouse-sql-parser v0.4.11 h1:fZMKAjRmgzW44+hEhF6ywi4VjFZQjJ8QrFBbgBsjmF4=
|
||||
github.com/AfterShip/clickhouse-sql-parser v0.4.11/go.mod h1:W0Z82wJWkJxz2RVun/RMwxue3g7ut47Xxl+SFqdJGus=
|
||||
github.com/AfterShip/clickhouse-sql-parser v0.4.16 h1:gpl+wXclYUKT0p4+gBq22XeRYWwEoZ9f35vogqMvkLQ=
|
||||
github.com/AfterShip/clickhouse-sql-parser v0.4.16/go.mod h1:W0Z82wJWkJxz2RVun/RMwxue3g7ut47Xxl+SFqdJGus=
|
||||
github.com/Azure/azure-sdk-for-go v68.0.0+incompatible h1:fcYLmCpyNYRnvJbPerq7U0hS+6+I79yEDJBqVNcqUzU=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 h1:Gt0j3wceWMwPmiazCa8MzMA0MfhmPIz0Qp0FJ6qcM0U=
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0/go.mod h1:Ot/6aikWnKWi4l9QB7qVSwa8iMphQNqkWALMoNT3rzM=
|
||||
|
||||
687
pkg/parser/queryfilterextractor/clickhouse.go
Normal file
687
pkg/parser/queryfilterextractor/clickhouse.go
Normal file
@@ -0,0 +1,687 @@
|
||||
package queryfilterextractor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
clickhouse "github.com/AfterShip/clickhouse-sql-parser/parser"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
)
|
||||
|
||||
const (
|
||||
// MetricNameColumn is the column name used for filtering metrics
|
||||
MetricNameColumn = "metric_name"
|
||||
)
|
||||
|
||||
// ClickHouseFilterExtractor extracts metric names and grouping keys from ClickHouse SQL queries
|
||||
type ClickHouseFilterExtractor struct{}
|
||||
|
||||
// NewClickHouseFilterExtractor creates a new ClickHouse filter extractor
|
||||
func NewClickHouseFilterExtractor() *ClickHouseFilterExtractor {
|
||||
return &ClickHouseFilterExtractor{}
|
||||
}
|
||||
|
||||
// Extract parses a ClickHouse query and extracts metric names and grouping keys
|
||||
func (e *ClickHouseFilterExtractor) Extract(query string) (*FilterResult, error) {
|
||||
p := clickhouse.NewParser(query)
|
||||
stmts, err := p.ParseStmts()
|
||||
if err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to parse clickhouse query: %s", err.Error())
|
||||
}
|
||||
|
||||
result := &FilterResult{MetricNames: []string{}, GroupByColumns: []ColumnInfo{}}
|
||||
|
||||
metricNames := make(map[string]bool)
|
||||
|
||||
// Track top-level queries for GROUP BY extraction
|
||||
topLevelQueries := make(map[*clickhouse.SelectQuery]bool)
|
||||
|
||||
// Process all statements
|
||||
for _, stmt := range stmts {
|
||||
selectQuery, ok := stmt.(*clickhouse.SelectQuery)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Mark as top-level
|
||||
topLevelQueries[selectQuery] = true
|
||||
|
||||
// Walk the AST to extract metrics
|
||||
clickhouse.Walk(selectQuery, func(node clickhouse.Expr) bool {
|
||||
e.fillMetricNamesFromExpr(node, metricNames)
|
||||
return true // Continue traversal
|
||||
})
|
||||
}
|
||||
|
||||
// Extract GROUP BY from the top-level queries by first building a map of CTEs and
|
||||
// then recursively extracting the GROUP BY from the CTEs and subqueries.
|
||||
|
||||
// Build CTE map for all top-level queries
|
||||
cteMap := make(map[string]*clickhouse.SelectQuery)
|
||||
for query := range topLevelQueries {
|
||||
e.buildCTEMap(query, cteMap)
|
||||
}
|
||||
|
||||
// Extract GROUP BY with aliases and origins from the CTEs and subqueries using recursive approach
|
||||
// Use a map to handle duplicates (last ColumnInfo wins across queries)
|
||||
groupByColumnsMap := make(map[string]ColumnInfo) // column name -> ColumnInfo
|
||||
visited := make(map[*clickhouse.SelectQuery]bool)
|
||||
for query := range topLevelQueries {
|
||||
columns, err := e.extractGroupByColumns(query, cteMap, visited)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, col := range columns {
|
||||
// Last column info wins for duplicate columns across multiple queries
|
||||
groupByColumnsMap[col.Name] = col
|
||||
}
|
||||
}
|
||||
|
||||
// Convert sets to slices
|
||||
for metric := range metricNames {
|
||||
result.MetricNames = append(result.MetricNames, metric)
|
||||
}
|
||||
|
||||
// Build GroupByColumns from the map
|
||||
for _, colInfo := range groupByColumnsMap {
|
||||
result.GroupByColumns = append(result.GroupByColumns, colInfo)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// Metric Name Extraction
|
||||
// ========================================
|
||||
|
||||
// fillMetricNamesFromExpr extracts metric names from various node types
|
||||
func (e *ClickHouseFilterExtractor) fillMetricNamesFromExpr(node clickhouse.Expr, metricNames map[string]bool) {
|
||||
|
||||
switch n := node.(type) {
|
||||
case *clickhouse.BinaryOperation:
|
||||
e.fillMetricFromBinaryOp(n, metricNames)
|
||||
}
|
||||
}
|
||||
|
||||
// fillMetricFromBinaryOp extracts metrics from binary operations
|
||||
func (e *ClickHouseFilterExtractor) fillMetricFromBinaryOp(op *clickhouse.BinaryOperation, metricNames map[string]bool) {
|
||||
// Check if left side is metric_name column
|
||||
leftCol := e.getColumnName(op.LeftExpr)
|
||||
rightCol := e.getColumnName(op.RightExpr)
|
||||
|
||||
// Handle metric_name on left side: metric_name = 'value'
|
||||
if leftCol == MetricNameColumn {
|
||||
e.fillMetricWithBinaryOpConditions(op, op.RightExpr, metricNames)
|
||||
return
|
||||
}
|
||||
|
||||
// Handle metric_name on right side: 'value' = metric_name
|
||||
if rightCol == MetricNameColumn {
|
||||
e.fillMetricWithBinaryOpConditions(op, op.LeftExpr, metricNames)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// fillMetricWithBinaryOpConditions extracts metric names from the value side of a binary operation
|
||||
//
|
||||
// Supported operators:
|
||||
// - "=", "==": Extracts literal string values or values from any() function
|
||||
// - "IN", "GLOBAL IN": Extracts all literal string values from the list
|
||||
//
|
||||
// Unsupported operators (can be added later if needed):
|
||||
// - "!=", "<>", "NOT IN": Negative filters. (e.g., metric_name != 'a')
|
||||
// - "LIKE", "ILIKE": Pattern matching filters
|
||||
// - "NOT LIKE", "NOT ILIKE": Negative pattern matching filters
|
||||
// - "OR", "AND": Boolean operators as the Walk function will automatically traverse both sides
|
||||
// of OR/AND operations and extract metrics from each branch. (e.g., metric_name='a' OR metric_name='b')
|
||||
func (e *ClickHouseFilterExtractor) fillMetricWithBinaryOpConditions(op *clickhouse.BinaryOperation, valueExpr clickhouse.Expr, metricNames map[string]bool) {
|
||||
switch op.Operation {
|
||||
case clickhouse.TokenKindSingleEQ, clickhouse.TokenKindDoubleEQ:
|
||||
// metric_name = 'value' or metric_name = any(['a', 'b'])
|
||||
// Skip if value side is a function call (function-wrapped literals are ignored, test case: CH59)
|
||||
if fn, ok := valueExpr.(*clickhouse.FunctionExpr); ok {
|
||||
// Only handle any() function, skip others like lowercase('cpu')
|
||||
if fn.Name != nil && fn.Name.Name == "any" {
|
||||
e.extractInValues(valueExpr, metricNames)
|
||||
}
|
||||
// Otherwise skip function-wrapped literals
|
||||
} else if val := e.extractStringLiteral(valueExpr); val != "" {
|
||||
metricNames[val] = true
|
||||
}
|
||||
case "IN", "GLOBAL IN":
|
||||
// metric_name IN ('a', 'b', 'c')
|
||||
// GLOBAL IN behaves the same as IN for metric extraction purposes
|
||||
// Skip if value side is a function call (function-wrapped literals are ignored, test case: CH59)
|
||||
if _, ok := valueExpr.(*clickhouse.FunctionExpr); !ok {
|
||||
e.extractInValues(valueExpr, metricNames)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractStringLiteral extracts a string literal value from an expression
|
||||
func (e *ClickHouseFilterExtractor) extractStringLiteral(expr clickhouse.Expr) string {
|
||||
switch ex := expr.(type) {
|
||||
case *clickhouse.StringLiteral:
|
||||
return ex.Literal
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractInValues extracts values from IN expressions
|
||||
func (e *ClickHouseFilterExtractor) extractInValues(expr clickhouse.Expr, metricNames map[string]bool) {
|
||||
// Find all string literals in the expression
|
||||
strLits := clickhouse.FindAll(expr, func(node clickhouse.Expr) bool {
|
||||
// metric_name passed in `in` condition will be string literal.
|
||||
_, ok := node.(*clickhouse.StringLiteral)
|
||||
return ok
|
||||
})
|
||||
|
||||
for _, strLitNode := range strLits {
|
||||
if strLit, ok := strLitNode.(*clickhouse.StringLiteral); ok {
|
||||
// Unquote the string literal
|
||||
val := e.extractStringLiteral(strLit)
|
||||
if val != "" {
|
||||
metricNames[val] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// GROUP BY Column Extraction
|
||||
// ========================================
|
||||
|
||||
// extractGroupByColumns extracts the GROUP BY columns from a query
|
||||
// It follows the top-down approach where outer GROUP BY overrides inner GROUP BY in subqueries and CTEs.
|
||||
// Returns a slice of ColumnInfo with column names, aliases, and origins
|
||||
func (e *ClickHouseFilterExtractor) extractGroupByColumns(query *clickhouse.SelectQuery, cteMap map[string]*clickhouse.SelectQuery, visited map[*clickhouse.SelectQuery]bool) ([]ColumnInfo, error) {
|
||||
if visited[query] {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Mark this query as visited to prevent cycles
|
||||
visited[query] = true
|
||||
|
||||
// First, check if this query has its own GROUP BY using direct field access
|
||||
hasGroupBy := query.GroupBy != nil
|
||||
|
||||
// If this query has GROUP BY, use it (outer overrides inner)
|
||||
if hasGroupBy {
|
||||
// Extract GROUP BY columns
|
||||
tempGroupBy := make(map[string]bool)
|
||||
e.fillGroupsFromGroupByClause(query.GroupBy, tempGroupBy)
|
||||
|
||||
// Extract SELECT columns and their aliases from the same query level
|
||||
selectAliases := e.extractSelectColumns(query)
|
||||
|
||||
// Build ColumnInfo array by matching GROUP BY with SELECT aliases and origins
|
||||
result := []ColumnInfo{}
|
||||
|
||||
for groupByCol := range tempGroupBy {
|
||||
alias := selectAliases[groupByCol] // Will be "" if not in SELECT
|
||||
|
||||
// Extract originExpr by tracing back through queries
|
||||
originVisited := make(map[*clickhouse.SelectQuery]bool)
|
||||
originExpr := e.extractColumnOrigin(groupByCol, query, cteMap, originVisited)
|
||||
originField, err := extractCHOriginFieldFromQuery(fmt.Sprintf("SELECT %s", originExpr))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
result = append(result, ColumnInfo{
|
||||
Name: groupByCol,
|
||||
Alias: alias,
|
||||
OriginExpr: originExpr,
|
||||
OriginField: originField,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// If no GROUP BY in this query, follow CTE/subquery references
|
||||
// It might have grouping inside the CTE/subquery
|
||||
sourceQuery := e.extractSourceQuery(query, cteMap)
|
||||
if sourceQuery != nil {
|
||||
return e.extractGroupByColumns(sourceQuery, cteMap, visited)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// fillGroupsFromGroupByClause extracts GROUP BY columns from a specific GroupByClause and fills the map with the column names
|
||||
func (e *ClickHouseFilterExtractor) fillGroupsFromGroupByClause(groupByClause *clickhouse.GroupByClause, groupBy map[string]bool) {
|
||||
|
||||
// Extract GROUP BY expressions properly
|
||||
// Find only the direct child ColumnExprList, not nested ones
|
||||
// We use Find instead of FindAll to get only the first (direct child) ColumnExprList
|
||||
exprListNode, foundList := clickhouse.Find(groupByClause, func(node clickhouse.Expr) bool {
|
||||
_, ok := node.(*clickhouse.ColumnExprList)
|
||||
return ok
|
||||
})
|
||||
|
||||
if !foundList {
|
||||
return
|
||||
}
|
||||
|
||||
// Note: We only extract from the top-level ColumnExprList.Items to avoid extracting nested parts
|
||||
// This prevents extracting 'timestamp' from 'toDate(timestamp)' - we only get 'toDate(timestamp)'
|
||||
if exprList, ok := exprListNode.(*clickhouse.ColumnExprList); ok {
|
||||
// Extract each expression from the list - these are top-level only
|
||||
if exprList.Items != nil {
|
||||
for _, item := range exprList.Items {
|
||||
groupKey := e.extractColumnStrByExpr(item)
|
||||
if groupKey != "" {
|
||||
// Strip table alias if present (e.g., "m.region" -> "region")
|
||||
groupKey = e.stripTableAlias(groupKey)
|
||||
groupBy[groupKey] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// extractColumnStrByExpr extracts the complete string representation of different expression types
|
||||
// Supports:
|
||||
// - Ident: Simple identifier like "region" or "timestamp"
|
||||
// - FunctionExpr: Function call like "toDate(timestamp)"
|
||||
// - ColumnExpr: Column expression like "m.region", "toDate(timestamp)"
|
||||
// - Other expression types: Return the string representation of the expression
|
||||
//
|
||||
// For example:
|
||||
// - "region" -> "region"
|
||||
// - "toDate(timestamp)" -> "toDate(timestamp)"
|
||||
// - "`m.region`" -> "`m.region`"
|
||||
func (e *ClickHouseFilterExtractor) extractColumnStrByExpr(expr clickhouse.Expr) string {
|
||||
if expr == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch ex := expr.(type) {
|
||||
// Ident is a simple identifier like "region" or "timestamp"
|
||||
case *clickhouse.Ident:
|
||||
// Handling for backticks which are native to ClickHouse and used for literal names.
|
||||
// CH Parser removes the backticks from the identifier, so we need to add them back.
|
||||
if ex.QuoteType == clickhouse.BackTicks {
|
||||
return "`" + ex.Name + "`"
|
||||
}
|
||||
return ex.Name
|
||||
// FunctionExpr is a function call like "toDate(timestamp)"
|
||||
case *clickhouse.FunctionExpr:
|
||||
// For function expressions, return the complete function call string
|
||||
return ex.String()
|
||||
// ColumnExpr is a column expression like "m.region", "toDate(timestamp)"
|
||||
case *clickhouse.ColumnExpr:
|
||||
// ColumnExpr wraps another expression - extract the underlying expression
|
||||
if ex.Expr != nil {
|
||||
return e.extractColumnStrByExpr(ex.Expr)
|
||||
}
|
||||
return ex.String()
|
||||
default:
|
||||
// For other expression types, return the string representation
|
||||
return expr.String()
|
||||
}
|
||||
}
|
||||
|
||||
// stripTableAlias removes table alias prefix from a column name (e.g., "m.region" -> "region")
|
||||
// but for literals with backticks, we need preserve the entire string. (e.g., `os.type` -> "os.type")
|
||||
func (e *ClickHouseFilterExtractor) stripTableAlias(name string) string {
|
||||
// Handling for backticks which are native to ClickHouse and used for literal names.
|
||||
if strings.HasPrefix(name, "`") && strings.HasSuffix(name, "`") {
|
||||
return strings.Trim(name, "`")
|
||||
}
|
||||
|
||||
// split the name by dot and return the last part
|
||||
parts := strings.Split(name, ".")
|
||||
if len(parts) > 1 {
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
// getColumnName extracts column name from an expression
|
||||
func (e *ClickHouseFilterExtractor) getColumnName(expr clickhouse.Expr) string {
|
||||
switch ex := expr.(type) {
|
||||
case *clickhouse.Ident:
|
||||
return ex.Name
|
||||
case *clickhouse.Path:
|
||||
// Handle Path type for qualified column names like "m.metric_name"
|
||||
// Extract the last field which is the column name
|
||||
if len(ex.Fields) > 0 {
|
||||
return ex.Fields[len(ex.Fields)-1].Name
|
||||
}
|
||||
return ""
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractSourceQuery extracts the SelectQuery from FROM expressions
|
||||
// Handles CTE references, subqueries, and table expressions
|
||||
// For example: from the below query We'll try to extract the name of the source query
|
||||
// which in the below case is "aggregated". Once we find it we return the SelectQuery node
|
||||
// from the cteMap, which acts as the source for the GROUP BY extraction.
|
||||
//
|
||||
// WITH aggregated AS (
|
||||
// SELECT region as region_alias, sum(value) AS total
|
||||
// FROM metrics
|
||||
// WHERE metric_name = 'cpu_usage'
|
||||
// GROUP BY region
|
||||
// )
|
||||
// SELECT * FROM aggregated
|
||||
func (e *ClickHouseFilterExtractor) extractSourceQuery(query *clickhouse.SelectQuery, cteMap map[string]*clickhouse.SelectQuery) *clickhouse.SelectQuery {
|
||||
if query.From == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the FROM clause and extract the source
|
||||
fromExprs := clickhouse.FindAll(query.From, func(node clickhouse.Expr) bool {
|
||||
switch node.(type) {
|
||||
case *clickhouse.Ident, *clickhouse.SelectQuery:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
})
|
||||
|
||||
for _, fromExpr := range fromExprs {
|
||||
switch expr := fromExpr.(type) {
|
||||
case *clickhouse.Ident:
|
||||
// CTE reference by simple name
|
||||
if cteQuery, exists := cteMap[expr.Name]; exists {
|
||||
return cteQuery
|
||||
}
|
||||
case *clickhouse.SelectQuery:
|
||||
// Direct subquery
|
||||
return expr
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// Column Origin Tracing
|
||||
// ========================================
|
||||
|
||||
// extractColumnOrigin recursively traces a column back to its original expression
|
||||
// Returns the original expression string (e.g., "JSONExtractString(labels, 'service.name')")
|
||||
// or the column name itself if it's a direct column reference
|
||||
func (e *ClickHouseFilterExtractor) extractColumnOrigin(
|
||||
columnName string,
|
||||
query *clickhouse.SelectQuery,
|
||||
cteMap map[string]*clickhouse.SelectQuery,
|
||||
visited map[*clickhouse.SelectQuery]bool,
|
||||
) string {
|
||||
if query == nil {
|
||||
return columnName
|
||||
}
|
||||
|
||||
// Prevent infinite recursion and redundant work
|
||||
// Once a query is visited, we don't need to check it again
|
||||
if visited[query] {
|
||||
return columnName
|
||||
}
|
||||
visited[query] = true
|
||||
// this is to prevent infinite recursion in a single query search
|
||||
// but we don't want this to affect the other queries searches
|
||||
// so we delete it after the search is done for current query
|
||||
defer delete(visited, query)
|
||||
|
||||
// Step 1: Search in CTE and Joins, this will take us to very end of the SubQueries and CTE
|
||||
sourceQuery := e.extractSourceQuery(query, cteMap)
|
||||
if sourceQuery != nil {
|
||||
returningOrigin := e.extractColumnOrigin(columnName, sourceQuery, cteMap, visited)
|
||||
if returningOrigin != columnName {
|
||||
return returningOrigin
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Once we're sure there are no SubQueries and CTE we just find all the selectItem
|
||||
// and then get their column origin values
|
||||
selectItems := clickhouse.FindAll(query, func(node clickhouse.Expr) bool {
|
||||
_, ok := node.(*clickhouse.SelectItem)
|
||||
return ok
|
||||
})
|
||||
|
||||
// extractOriginFromSelectItem extracts the origin from a SelectItem
|
||||
extractOriginFromSelectItem := func(selectItem *clickhouse.SelectItem) *string {
|
||||
// Check if this SelectItem matches our column (by alias or by name)
|
||||
alias := e.extractSelectItemAlias(selectItem)
|
||||
exprStr := e.extractSelectItemName(selectItem)
|
||||
normalizedExpr := e.stripTableAlias(exprStr)
|
||||
|
||||
// Case 1: Column matches an alias in SELECT
|
||||
if alias == columnName {
|
||||
// This is an alias - get the expression it's aliasing
|
||||
if selectItem.Expr != nil {
|
||||
originExpr := e.extractFullExpression(selectItem.Expr)
|
||||
// If the expression is just a column name, trace it back further
|
||||
if normalizedExpr == columnName || e.isSimpleColumnReference(selectItem.Expr) {
|
||||
// It's referencing another column - trace back through source query
|
||||
sourceQuery := e.extractSourceQuery(query, cteMap)
|
||||
if sourceQuery != nil {
|
||||
originExpr := e.extractColumnOrigin(normalizedExpr, sourceQuery, cteMap, visited)
|
||||
return &originExpr
|
||||
}
|
||||
}
|
||||
return &originExpr
|
||||
}
|
||||
}
|
||||
|
||||
// Case 2: Column matches the expression itself (no alias)
|
||||
if normalizedExpr == columnName {
|
||||
// Check if this is a simple column reference or a complex expression
|
||||
if e.isSimpleColumnReference(selectItem.Expr) {
|
||||
// Simple column - trace back through source query
|
||||
sourceQuery := e.extractSourceQuery(query, cteMap)
|
||||
if sourceQuery != nil {
|
||||
originExpr := e.extractColumnOrigin(columnName, sourceQuery, cteMap, visited)
|
||||
return &originExpr
|
||||
}
|
||||
return &columnName
|
||||
} else {
|
||||
// Complex expression - return it as origin
|
||||
originExpr := e.extractFullExpression(selectItem.Expr)
|
||||
return &originExpr
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var finalColumnOrigin string
|
||||
for _, itemNode := range selectItems {
|
||||
if selectItem, ok := itemNode.(*clickhouse.SelectItem); ok {
|
||||
// We call the extractOriginFromSelectItem function for each SelectItem
|
||||
// and if the origin is not nil, we set the finalColumnOrigin to the origin
|
||||
// this has to be done to get to the most nested origin of column where selectItem is present
|
||||
origin := extractOriginFromSelectItem(selectItem)
|
||||
if origin != nil {
|
||||
finalColumnOrigin = *origin
|
||||
}
|
||||
}
|
||||
}
|
||||
if finalColumnOrigin != "" {
|
||||
return finalColumnOrigin
|
||||
}
|
||||
|
||||
return columnName
|
||||
}
|
||||
|
||||
// extractFullExpression extracts the complete string representation of an expression
|
||||
func (e *ClickHouseFilterExtractor) extractFullExpression(expr clickhouse.Expr) string {
|
||||
if expr == nil {
|
||||
return ""
|
||||
}
|
||||
return expr.String()
|
||||
}
|
||||
|
||||
// isSimpleColumnReference checks if an expression is just a simple column reference
|
||||
// (not a function call or complex expression)
|
||||
func (e *ClickHouseFilterExtractor) isSimpleColumnReference(expr clickhouse.Expr) bool {
|
||||
if expr == nil {
|
||||
return false
|
||||
}
|
||||
switch ex := expr.(type) {
|
||||
case *clickhouse.Ident:
|
||||
// backticks are treated as non simple column reference
|
||||
// so that we can return the origin expression with backticks
|
||||
// origin parser will handle the backticks and extract the column name from it
|
||||
if ex.QuoteType == clickhouse.BackTicks {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
case *clickhouse.Path:
|
||||
return true
|
||||
case *clickhouse.ColumnExpr:
|
||||
// Check if it wraps a simple reference
|
||||
if ex.Expr != nil {
|
||||
return e.isSimpleColumnReference(ex.Expr)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// SELECT Column Alias Extraction
|
||||
// ========================================
|
||||
|
||||
// extractSelectColumns extracts column names and their aliases from SELECT clause of a specific query
|
||||
// Returns a map where key is normalized column name and value is the alias
|
||||
// For duplicate columns with different aliases, the last alias wins
|
||||
// This follows the same pattern as extractGroupFromGroupByClause - finding direct children only
|
||||
func (e *ClickHouseFilterExtractor) extractSelectColumns(query *clickhouse.SelectQuery) map[string]string {
|
||||
aliasMap := make(map[string]string)
|
||||
|
||||
if query == nil {
|
||||
return aliasMap
|
||||
}
|
||||
|
||||
// Find SelectItem nodes which represent columns in the SELECT clause
|
||||
// SelectItem has an Expr field (the column/expression) and an Alias field
|
||||
selectItems := clickhouse.FindAll(query, func(node clickhouse.Expr) bool {
|
||||
_, ok := node.(*clickhouse.SelectItem)
|
||||
return ok
|
||||
})
|
||||
|
||||
// Process each SelectItem and extract column name and alias
|
||||
for _, itemNode := range selectItems {
|
||||
if selectItem, ok := itemNode.(*clickhouse.SelectItem); ok {
|
||||
// Extract the column name/expression from SelectItem.Expr
|
||||
columnName := e.extractSelectItemName(selectItem)
|
||||
if columnName == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Normalize column name (strip table alias)
|
||||
normalizedName := e.stripTableAlias(columnName)
|
||||
|
||||
// Extract alias from SelectItem.Alias
|
||||
alias := e.extractSelectItemAlias(selectItem)
|
||||
|
||||
// Store in map - last alias wins for duplicates
|
||||
aliasMap[normalizedName] = alias
|
||||
}
|
||||
}
|
||||
|
||||
return aliasMap
|
||||
}
|
||||
|
||||
// extractSelectItemName extracts the column name or expression from a SelectItem
|
||||
func (e *ClickHouseFilterExtractor) extractSelectItemName(selectItem *clickhouse.SelectItem) string {
|
||||
if selectItem == nil || selectItem.Expr == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return e.extractColumnStrByExpr(selectItem.Expr)
|
||||
}
|
||||
|
||||
// extractSelectItemAlias extracts the alias from a SelectItem
|
||||
// Returns empty string if no alias is present
|
||||
func (e *ClickHouseFilterExtractor) extractSelectItemAlias(selectItem *clickhouse.SelectItem) string {
|
||||
if selectItem == nil || selectItem.Alias == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// The Alias field is an *Ident (pointer type)
|
||||
if selectItem.Alias.Name != "" {
|
||||
return selectItem.Alias.Name
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ========================================
|
||||
// CTE and Subquery Extraction
|
||||
// ========================================
|
||||
|
||||
// buildCTEMap builds a map of CTE names to their SelectQuery nodes by recursively
|
||||
// traversing all queries and their nested expressions
|
||||
func (e *ClickHouseFilterExtractor) buildCTEMap(query *clickhouse.SelectQuery, cteMap map[string]*clickhouse.SelectQuery) {
|
||||
|
||||
// Access CTEs directly from WithClause if it exists
|
||||
if query.With != nil && query.With.CTEs != nil {
|
||||
for _, cte := range query.With.CTEs {
|
||||
cteName := e.extractCTEName(cte)
|
||||
cteQuery := e.extractCTEQuery(cte)
|
||||
if cteName != "" && cteQuery != nil {
|
||||
cteMap[cteName] = cteQuery
|
||||
// Recursively build CTE map for nested CTEs
|
||||
e.buildCTEMap(cteQuery, cteMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also check for CTEs in subqueries and other expressions
|
||||
e.buildCTEMapFromExpr(query, cteMap)
|
||||
}
|
||||
|
||||
// extractCTEName extracts the CTE name from a CTEStmt, the Expr field is the name of the CTE
|
||||
func (e *ClickHouseFilterExtractor) extractCTEName(cte *clickhouse.CTEStmt) string {
|
||||
if cte == nil || cte.Expr == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch name := cte.Expr.(type) {
|
||||
case *clickhouse.Ident:
|
||||
return name.Name
|
||||
default:
|
||||
return cte.Expr.String()
|
||||
}
|
||||
}
|
||||
|
||||
// extractCTEQuery extracts the SelectQuery from a CTEStmt, the Alias field is the SelectQuery
|
||||
func (e *ClickHouseFilterExtractor) extractCTEQuery(cte *clickhouse.CTEStmt) *clickhouse.SelectQuery {
|
||||
if cte == nil || cte.Alias == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// The Alias field should contain a SelectQuery
|
||||
if selectQuery, ok := cte.Alias.(*clickhouse.SelectQuery); ok {
|
||||
return selectQuery
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildCTEMapFromExpr recursively extracts CTEs from various expression types
|
||||
func (e *ClickHouseFilterExtractor) buildCTEMapFromExpr(expr clickhouse.Expr, cteMap map[string]*clickhouse.SelectQuery) {
|
||||
|
||||
// Walk through all nodes to find SelectQuery nodes that might contain CTEs
|
||||
clickhouse.Walk(expr, func(node clickhouse.Expr) bool {
|
||||
switch n := node.(type) {
|
||||
case *clickhouse.SelectQuery:
|
||||
// Don't process the same query we started with to avoid infinite recursion
|
||||
if n != expr {
|
||||
e.buildCTEMap(n, cteMap)
|
||||
}
|
||||
case *clickhouse.TableExpr:
|
||||
if n.Expr != nil {
|
||||
e.buildCTEMapFromExpr(n.Expr, cteMap)
|
||||
}
|
||||
case *clickhouse.JoinTableExpr:
|
||||
if n.Table != nil {
|
||||
e.buildCTEMapFromExpr(n.Table, cteMap)
|
||||
}
|
||||
}
|
||||
return true // Continue traversal
|
||||
})
|
||||
}
|
||||
305
pkg/parser/queryfilterextractor/clickhouse_originparser.go
Normal file
305
pkg/parser/queryfilterextractor/clickhouse_originparser.go
Normal file
@@ -0,0 +1,305 @@
|
||||
package queryfilterextractor
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/AfterShip/clickhouse-sql-parser/parser"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
)
|
||||
|
||||
// excludedFunctions contains functions that should cause ExtractOriginField to return empty string.
|
||||
// Map key is the function name in lowercase, value is the original function name.
|
||||
var excludedFunctions = map[string]string{
|
||||
// Time functions
|
||||
"now": "now",
|
||||
"today": "today",
|
||||
"yesterday": "yesterday",
|
||||
"todatetime": "toDateTime",
|
||||
"todatetime64": "toDateTime64",
|
||||
"todate": "toDate",
|
||||
"todate32": "toDate32",
|
||||
"tostartofinterval": "toStartOfInterval",
|
||||
"tostartofday": "toStartOfDay",
|
||||
"tostartofweek": "toStartOfWeek",
|
||||
"tostartofmonth": "toStartOfMonth",
|
||||
"tostartofquarter": "toStartOfQuarter",
|
||||
"tostartofyear": "toStartOfYear",
|
||||
"tostartofhour": "toStartOfHour",
|
||||
"tostartofminute": "toStartOfMinute",
|
||||
"tostartofsecond": "toStartOfSecond",
|
||||
"tostartoffiveminutes": "toStartOfFiveMinutes",
|
||||
"tostartoftenminutes": "toStartOfTenMinutes",
|
||||
"tostartoffifteenminutes": "toStartOfFifteenMinutes",
|
||||
"tointervalsecond": "toIntervalSecond",
|
||||
"tointervalminute": "toIntervalMinute",
|
||||
"tointervalhour": "toIntervalHour",
|
||||
"tointervalday": "toIntervalDay",
|
||||
"tointervalweek": "toIntervalWeek",
|
||||
"tointervalmonth": "toIntervalMonth",
|
||||
"tointervalquarter": "toIntervalQuarter",
|
||||
"tointervalyear": "toIntervalYear",
|
||||
"parsedatetime": "parseDateTime",
|
||||
"parsedatetimebesteffort": "parseDateTimeBestEffort",
|
||||
|
||||
// Aggregate functions
|
||||
"count": "count",
|
||||
"sum": "sum",
|
||||
"avg": "avg",
|
||||
"min": "min",
|
||||
"max": "max",
|
||||
"any": "any",
|
||||
"stddevpop": "stddevPop",
|
||||
"stddevsamp": "stddevSamp",
|
||||
"varpop": "varPop",
|
||||
"varsamp": "varSamp",
|
||||
"grouparray": "groupArray",
|
||||
"groupuniqarray": "groupUniqArray",
|
||||
"quantile": "quantile",
|
||||
"quantiles": "quantiles",
|
||||
"quantileexact": "quantileExact",
|
||||
"quantiletiming": "quantileTiming",
|
||||
"median": "median",
|
||||
"uniq": "uniq",
|
||||
"uniqexact": "uniqExact",
|
||||
"uniqcombined": "uniqCombined",
|
||||
"uniqhll12": "uniqHLL12",
|
||||
"topk": "topK",
|
||||
"first": "first",
|
||||
"last": "last",
|
||||
}
|
||||
|
||||
// jsonExtractFunctions contains functions that extract from JSON columns.
|
||||
// Map key is the function name in lowercase, value is the original function name.
|
||||
var jsonExtractFunctions = map[string]string{
|
||||
"jsonextractstring": "JSONExtractString",
|
||||
"jsonextractint": "JSONExtractInt",
|
||||
"jsonextractuint": "JSONExtractUInt",
|
||||
"jsonextractfloat": "JSONExtractFloat",
|
||||
"jsonextractbool": "JSONExtractBool",
|
||||
"jsonextract": "JSONExtract",
|
||||
"jsonextractraw": "JSONExtractRaw",
|
||||
"jsonextractarrayraw": "JSONExtractArrayRaw",
|
||||
"jsonextractkeysandvalues": "JSONExtractKeysAndValues",
|
||||
}
|
||||
|
||||
// isFunctionPresentInStore checks if a function name exists in the function store map
|
||||
func isFunctionPresentInStore(funcName string, funcStore map[string]string) bool {
|
||||
_, exists := funcStore[strings.ToLower(funcName)]
|
||||
return exists
|
||||
}
|
||||
|
||||
// isReservedSelectKeyword checks if a keyword is a reserved keyword for the SELECT statement
|
||||
// We're only including those which can appear in the SELECT statement without being quoted
|
||||
func isReservedSelectKeyword(keyword string) bool {
|
||||
return strings.ToUpper(keyword) == parser.KeywordSelect || strings.ToUpper(keyword) == parser.KeywordFrom
|
||||
}
|
||||
|
||||
// extractCHOriginField extracts the origin field (column name) from a query string
|
||||
// or fields getting extracted in case of JSON extraction functions.
|
||||
func extractCHOriginFieldFromQuery(query string) (string, error) {
|
||||
// Parse the query string
|
||||
p := parser.NewParser(query)
|
||||
stmts, err := p.ParseStmts()
|
||||
if err != nil {
|
||||
return "", errors.NewInternalf(errors.CodeInternal, "failed to parse origin field from query: %s", err.Error())
|
||||
}
|
||||
|
||||
// Get the first statement which should be a SELECT
|
||||
selectStmt := stmts[0].(*parser.SelectQuery)
|
||||
|
||||
// If query has multiple select items, return blank string as we don't expect multiple select items
|
||||
if len(selectStmt.SelectItems) > 1 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
if len(selectStmt.SelectItems) == 0 {
|
||||
return "", errors.NewInternalf(errors.CodeInternal, "SELECT query has no select items")
|
||||
}
|
||||
|
||||
// Extract origin field from the first (and only) select item's expression
|
||||
return extractOriginFieldFromExpr(selectStmt.SelectItems[0].Expr)
|
||||
}
|
||||
|
||||
// extractOriginFieldFromExpr extracts the origin field (column name) from an expression.
|
||||
// This is the internal helper function that contains the original logic.
|
||||
func extractOriginFieldFromExpr(expr parser.Expr) (string, error) {
|
||||
// Check if expression contains excluded functions or IF/CASE
|
||||
hasExcludedExpressions := false
|
||||
hasReservedKeyword := false
|
||||
|
||||
parser.Walk(expr, func(node parser.Expr) bool {
|
||||
// exclude reserved keywords because the parser will treat them as valid SQL
|
||||
// example: SELECT FROM table here the "FROM" is a reserved keyword,
|
||||
// but the parser will treat it as valid column to be extracted.
|
||||
if ident, ok := node.(*parser.Ident); ok {
|
||||
if ident.QuoteType == parser.Unquoted && isReservedSelectKeyword(ident.Name) {
|
||||
hasReservedKeyword = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
// for functions, we need to check if the function is excluded function or a JSON extraction function with nested JSON extraction
|
||||
if funcExpr, ok := node.(*parser.FunctionExpr); ok {
|
||||
if isFunctionPresentInStore(funcExpr.Name.Name, excludedFunctions) {
|
||||
hasExcludedExpressions = true
|
||||
return false
|
||||
}
|
||||
// Check for nested JSON extraction functions
|
||||
if isFunctionPresentInStore(funcExpr.Name.Name, jsonExtractFunctions) {
|
||||
// Check if any argument contains another JSON extraction function
|
||||
if funcExpr.Params != nil && funcExpr.Params.Items != nil {
|
||||
for _, arg := range funcExpr.Params.Items.Items {
|
||||
if containsJSONExtractFunction(arg) {
|
||||
hasExcludedExpressions = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if _, ok := node.(*parser.CaseExpr); ok {
|
||||
hasExcludedExpressions = true
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
// If the expression contains reserved keywords, return error
|
||||
if hasReservedKeyword {
|
||||
return "", errors.New(errors.TypeUnsupported, errors.CodeUnsupported, "reserved keyword found in select clause")
|
||||
}
|
||||
|
||||
// If the expression contains excluded expressions, return empty string
|
||||
if hasExcludedExpressions {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// Extract all column names from the expression
|
||||
columns := extractColumns(expr)
|
||||
|
||||
// If we found exactly one unique column, return it
|
||||
if len(columns) == 1 {
|
||||
return columns[0], nil
|
||||
}
|
||||
|
||||
// Multiple columns or no columns - return empty string
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// containsJSONExtractFunction checks if an expression contains a JSON extraction function
|
||||
func containsJSONExtractFunction(expr parser.Expr) bool {
|
||||
if expr == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
found := false
|
||||
parser.Walk(expr, func(node parser.Expr) bool {
|
||||
if funcExpr, ok := node.(*parser.FunctionExpr); ok {
|
||||
if isFunctionPresentInStore(funcExpr.Name.Name, jsonExtractFunctions) {
|
||||
found = true
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
})
|
||||
|
||||
return found
|
||||
}
|
||||
|
||||
// extractColumns recursively extracts all unique column names from an expression.
|
||||
// Note: String literals are also considered as origin fields and will be included in the result.
|
||||
func extractColumns(expr parser.Expr) []string {
|
||||
|
||||
columnMap := make(map[string]bool)
|
||||
extractColumnsHelper(expr, columnMap)
|
||||
|
||||
// Convert map to slice
|
||||
columns := make([]string, 0, len(columnMap))
|
||||
for col := range columnMap {
|
||||
columns = append(columns, col)
|
||||
}
|
||||
|
||||
return columns
|
||||
}
|
||||
|
||||
// extractColumnsHelper is a recursive helper that finds all column references.
|
||||
// Note: String literals are also considered as origin fields and will be added to the columnMap.
|
||||
func extractColumnsHelper(expr parser.Expr, columnMap map[string]bool) {
|
||||
switch n := expr.(type) {
|
||||
// Ident is a simple identifier like "region" or "timestamp"
|
||||
case *parser.Ident:
|
||||
// Add identifiers as column references
|
||||
columnMap[n.Name] = true
|
||||
|
||||
// FunctionExpr is a function call like "toDate(timestamp)", "JSONExtractString(labels, 'service.name')"
|
||||
case *parser.FunctionExpr:
|
||||
// Special handling for JSON extraction functions
|
||||
// In case of nested JSON extraction, we return blank values (handled at top level)
|
||||
if isFunctionPresentInStore(n.Name.Name, jsonExtractFunctions) {
|
||||
// For JSON functions, extract from the second argument (the JSON path/key being extracted)
|
||||
// The first argument is the column name, the second is the exact data being extracted
|
||||
// The extracted data (second argument) is treated as the origin field
|
||||
if n.Params != nil && n.Params.Items != nil && len(n.Params.Items.Items) >= 2 {
|
||||
secondArg := n.Params.Items.Items[1]
|
||||
// If the second argument is a string literal, use its value as the origin field
|
||||
// String literals are considered as origin fields
|
||||
if strLit, ok := secondArg.(*parser.StringLiteral); ok {
|
||||
columnMap[strLit.Literal] = true
|
||||
} else {
|
||||
// Otherwise, try to extract columns from it
|
||||
extractColumnsHelper(secondArg, columnMap)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// For regular functions, recursively process all arguments, ex: lower(name)
|
||||
if n.Params != nil && n.Params.Items != nil {
|
||||
for _, item := range n.Params.Items.Items {
|
||||
extractColumnsHelper(item, columnMap)
|
||||
}
|
||||
}
|
||||
|
||||
// BinaryOperation is a binary operation like "region = 'us-east-1'" or "unix_milli / 1000"
|
||||
case *parser.BinaryOperation:
|
||||
extractColumnsHelper(n.LeftExpr, columnMap)
|
||||
extractColumnsHelper(n.RightExpr, columnMap)
|
||||
|
||||
// ColumnExpr is a column expression like "m.region", "service.name"
|
||||
case *parser.ColumnExpr:
|
||||
extractColumnsHelper(n.Expr, columnMap)
|
||||
|
||||
// CastExpr is a cast expression like "CAST(unix_milli AS String)"
|
||||
case *parser.CastExpr:
|
||||
extractColumnsHelper(n.Expr, columnMap)
|
||||
|
||||
case *parser.ParamExprList:
|
||||
if n.Items != nil {
|
||||
extractColumnsHelper(n.Items, columnMap)
|
||||
}
|
||||
|
||||
// Ex: coalesce(cpu_usage, 0) + coalesce(mem_usage, 0)
|
||||
case *parser.ColumnExprList:
|
||||
for _, item := range n.Items {
|
||||
extractColumnsHelper(item, columnMap)
|
||||
}
|
||||
|
||||
// StringLiteral is a string literal like "us-east-1" or "cpu.usage"
|
||||
case *parser.StringLiteral:
|
||||
// String literals are considered as origin fields
|
||||
columnMap[n.Literal] = true
|
||||
return
|
||||
|
||||
// Support for columns like table.column_name
|
||||
case *parser.Path:
|
||||
if len(n.Fields) > 0 {
|
||||
extractColumnsHelper(n.Fields[len(n.Fields)-1], columnMap)
|
||||
}
|
||||
return
|
||||
|
||||
// Add more cases as needed for other expression types
|
||||
|
||||
default:
|
||||
// For unknown types, return empty (don't extract columns)
|
||||
return
|
||||
}
|
||||
}
|
||||
237
pkg/parser/queryfilterextractor/clickhouse_originparser_test.go
Normal file
237
pkg/parser/queryfilterextractor/clickhouse_originparser_test.go
Normal file
@@ -0,0 +1,237 @@
|
||||
package queryfilterextractor
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractOriginField(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
expected string
|
||||
expectError bool
|
||||
}{
|
||||
// JSON extraction functions - should return the second argument (JSON path/key) as origin field
|
||||
{
|
||||
name: "JSONExtractString simple",
|
||||
query: `SELECT JSONExtractString(labels, 'service.name')`,
|
||||
expected: "service.name",
|
||||
},
|
||||
{
|
||||
name: "JSONExtractInt",
|
||||
query: `SELECT JSONExtractInt(labels, 'status.code')`,
|
||||
expected: "status.code",
|
||||
},
|
||||
{
|
||||
name: "JSONExtractFloat",
|
||||
query: `SELECT JSONExtractFloat(labels, 'cpu.usage')`,
|
||||
expected: "cpu.usage",
|
||||
},
|
||||
{
|
||||
name: "JSONExtractBool",
|
||||
query: `SELECT JSONExtractBool(labels, 'feature.enabled')`,
|
||||
expected: "feature.enabled",
|
||||
},
|
||||
{
|
||||
name: "JSONExtractString with function wrapper",
|
||||
query: `SELECT lower(JSONExtractString(labels, 'user.email'))`,
|
||||
expected: "user.email",
|
||||
},
|
||||
{
|
||||
name: "Nested JSON extraction",
|
||||
query: `SELECT JSONExtractInt(JSONExtractRaw(labels, 'meta'), 'status.code')`,
|
||||
expected: "", // Nested JSON extraction should return blank
|
||||
},
|
||||
|
||||
// Nested functions - should return the deepest column
|
||||
{
|
||||
name: "Nested time functions with column",
|
||||
query: `SELECT toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(60))`,
|
||||
expected: "", // Contains toStartOfInterval and toDateTime which are excluded
|
||||
},
|
||||
{
|
||||
name: "Division with column",
|
||||
query: `SELECT unix_milli / 1000`,
|
||||
expected: "unix_milli",
|
||||
},
|
||||
{
|
||||
name: "Function with single column",
|
||||
query: `SELECT lower(unix_milli)`,
|
||||
expected: "unix_milli",
|
||||
},
|
||||
{
|
||||
name: "CAST with single column",
|
||||
query: `SELECT CAST(unix_milli AS String)`,
|
||||
expected: "unix_milli",
|
||||
},
|
||||
{
|
||||
name: "intDiv with single column",
|
||||
query: `SELECT intDiv(unix_milli, 1000)`,
|
||||
expected: "unix_milli",
|
||||
},
|
||||
|
||||
// Multiple columns - should return blank
|
||||
{
|
||||
name: "Multiple columns in coalesce",
|
||||
query: `SELECT (coalesce(cpu_usage, 0) + coalesce(mem_usage, 0)) / 2`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Multiple columns in arithmetic",
|
||||
query: `SELECT cpu_usage + mem_usage`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Multiple columns in function",
|
||||
query: `SELECT concat(first_name, last_name)`,
|
||||
expected: "",
|
||||
},
|
||||
|
||||
// IF/CASE conditions - should return blank
|
||||
{
|
||||
name: "IF with single column in condition",
|
||||
query: `SELECT IF(error_count > 0, service, 'healthy')`,
|
||||
expected: "", // Multiple columns: error_count and service
|
||||
},
|
||||
{
|
||||
name: "IF with JSON and multiple columns",
|
||||
query: `SELECT if(JSONExtractInt(metadata, 'retry.count') > 3, toLower(JSONExtractString(metadata, 'user.id')), hostname)`,
|
||||
expected: "", // Multiple columns: metadata and hostname
|
||||
},
|
||||
{
|
||||
name: "String literal should return string",
|
||||
query: `SELECT 'constant'`,
|
||||
expected: "constant",
|
||||
},
|
||||
|
||||
// No columns - should return blank
|
||||
{
|
||||
name: "Number literal",
|
||||
query: `SELECT 42`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Multiple literals",
|
||||
query: `SELECT 'constant', 42`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Multiple string literals",
|
||||
query: `SELECT 'constant', '42'`,
|
||||
expected: "",
|
||||
},
|
||||
|
||||
// Excluded functions - should return blank
|
||||
{
|
||||
name: "now() function",
|
||||
query: `SELECT now()`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "today() function",
|
||||
query: `SELECT today()`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "count aggregate",
|
||||
query: `SELECT count(user_id)`,
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "sum aggregate",
|
||||
query: `SELECT sum(amount)`,
|
||||
expected: "",
|
||||
},
|
||||
|
||||
// Single column simple cases
|
||||
{
|
||||
name: "Simple column reference",
|
||||
query: `SELECT user_id`,
|
||||
expected: "user_id",
|
||||
},
|
||||
{
|
||||
name: "Column with alias",
|
||||
query: `SELECT user_id AS id`,
|
||||
expected: "user_id",
|
||||
},
|
||||
{
|
||||
name: "Column in arithmetic with literals (multiplication)",
|
||||
query: `SELECT unix_milli * 1000`,
|
||||
expected: "unix_milli",
|
||||
},
|
||||
|
||||
// Edge cases
|
||||
{
|
||||
name: "Nested functions with single column deep",
|
||||
query: `SELECT upper(lower(trim(column_name)))`,
|
||||
expected: "column_name",
|
||||
},
|
||||
// Qualified column names (Path)
|
||||
{
|
||||
name: "Column with table prefix",
|
||||
query: `SELECT table.column_name`,
|
||||
expected: "column_name", // IndexOperation: extracts column name from Index field
|
||||
},
|
||||
{
|
||||
name: "Qualified column in function",
|
||||
query: `SELECT lower(table.column_name)`,
|
||||
expected: "column_name",
|
||||
},
|
||||
{
|
||||
name: "Qualified column in arithmetic",
|
||||
query: `SELECT table.column_name * 100`,
|
||||
expected: "column_name",
|
||||
},
|
||||
{
|
||||
name: "Nested qualified column (schema.table.column)",
|
||||
query: `SELECT schema.table.column_name`,
|
||||
expected: "column_name", // Should extract the final column name
|
||||
},
|
||||
{
|
||||
name: "Multiple qualified columns",
|
||||
query: `SELECT table1.column1 + table2.column2`,
|
||||
expected: "", // Multiple columns: column1 and column2
|
||||
},
|
||||
{
|
||||
name: "Qualified column with CAST",
|
||||
query: `SELECT CAST(table.column_name AS String)`,
|
||||
expected: "column_name",
|
||||
},
|
||||
{
|
||||
name: "Multiple select items - return blank",
|
||||
query: `SELECT JSONExtractString(labels, 'service.name'), unix_milli / 1000, cpu_usage + mem_usage`,
|
||||
expected: "",
|
||||
},
|
||||
|
||||
// Error cases
|
||||
{
|
||||
name: "Invalid SQL syntax",
|
||||
query: `SELECT FROM table`,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "Malformed query",
|
||||
query: `SELECT * FROM`,
|
||||
expectError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := extractCHOriginFieldFromQuery(tt.query)
|
||||
|
||||
if tt.expectError {
|
||||
if err == nil {
|
||||
t.Errorf("ExtractOriginField() expected error but got nil, result = %q", result)
|
||||
}
|
||||
} else {
|
||||
if err != nil {
|
||||
t.Errorf("ExtractOriginField() unexpected error: %v", err)
|
||||
}
|
||||
if result != tt.expected {
|
||||
t.Errorf("ExtractOriginField() = %q, want %q", result, tt.expected)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
1279
pkg/parser/queryfilterextractor/clickhouse_test.go
Normal file
1279
pkg/parser/queryfilterextractor/clickhouse_test.go
Normal file
File diff suppressed because it is too large
Load Diff
115
pkg/parser/queryfilterextractor/promql.go
Normal file
115
pkg/parser/queryfilterextractor/promql.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package queryfilterextractor
|
||||
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/promql/parser"
|
||||
)
|
||||
|
||||
// PromQLFilterExtractor extracts metric names and grouping keys from PromQL queries
|
||||
type PromQLFilterExtractor struct{}
|
||||
|
||||
// NewPromQLFilterExtractor creates a new PromQL filter extractor
|
||||
func NewPromQLFilterExtractor() *PromQLFilterExtractor {
|
||||
return &PromQLFilterExtractor{}
|
||||
}
|
||||
|
||||
// Extract parses a PromQL query and extracts metric names and grouping keys
|
||||
func (e *PromQLFilterExtractor) Extract(query string) (*FilterResult, error) {
|
||||
expr, err := parser.ParseExpr(query)
|
||||
if err != nil {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "failed to parse promql query: %s", err.Error())
|
||||
}
|
||||
|
||||
result := &FilterResult{
|
||||
MetricNames: []string{},
|
||||
GroupByColumns: []ColumnInfo{},
|
||||
}
|
||||
|
||||
// Use a visitor to traverse the AST
|
||||
visitor := &promQLVisitor{
|
||||
metricNames: make(map[string]bool),
|
||||
groupBy: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Walk the AST
|
||||
if err := parser.Walk(visitor, expr, nil); err != nil {
|
||||
return result, errors.NewInternalf(errors.CodeInternal, "failed to walk promql query: %s", err.Error())
|
||||
}
|
||||
|
||||
// Convert sets to slices
|
||||
for metric := range visitor.metricNames {
|
||||
result.MetricNames = append(result.MetricNames, metric)
|
||||
}
|
||||
for groupKey := range visitor.groupBy {
|
||||
result.GroupByColumns = append(result.GroupByColumns, ColumnInfo{Name: groupKey, OriginExpr: groupKey, OriginField: groupKey})
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// promQLVisitor implements the parser.Visitor interface
|
||||
type promQLVisitor struct {
|
||||
metricNames map[string]bool
|
||||
groupBy map[string]bool
|
||||
// Track if we've already captured grouping from an outermost aggregation
|
||||
hasOutermostGrouping bool
|
||||
}
|
||||
|
||||
func (v *promQLVisitor) Visit(node parser.Node, path []parser.Node) (parser.Visitor, error) {
|
||||
switch n := node.(type) {
|
||||
case *parser.VectorSelector:
|
||||
v.visitVectorSelector(n)
|
||||
case *parser.AggregateExpr:
|
||||
v.visitAggregateExpr(n, path)
|
||||
}
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// visitVectorSelector will be called whenever the Visitor encounters a VectorSelector node.
|
||||
// in the case we'll be extracting the metric names from the vector selector.
|
||||
func (v *promQLVisitor) visitVectorSelector(vs *parser.VectorSelector) {
|
||||
// Check if metric name is specified directly
|
||||
if vs.Name != "" {
|
||||
v.metricNames[vs.Name] = true
|
||||
}
|
||||
|
||||
// Check for __name__ label matcher
|
||||
for _, matcher := range vs.LabelMatchers {
|
||||
if matcher.Name == labels.MetricName {
|
||||
switch matcher.Type {
|
||||
case labels.MatchEqual:
|
||||
v.metricNames[matcher.Value] = true
|
||||
// Skip for negative filters - negative filters don't extract metric names
|
||||
// case labels.MatchNotEqual, labels.MatchRegexp, labels.MatchNotRegexp:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// visitAggregateExpr will be called whenever the Visitor encounters an AggregateExpr node.
|
||||
// in the case we'll be extracting the grouping keys from the outermost aggregation.
|
||||
func (v *promQLVisitor) visitAggregateExpr(ae *parser.AggregateExpr, path []parser.Node) {
|
||||
// Count how many AggregateExpr nodes are in the path (excluding current node)
|
||||
// This tells us the nesting level
|
||||
nestingLevel := 0
|
||||
for _, p := range path {
|
||||
if _, ok := p.(*parser.AggregateExpr); ok {
|
||||
nestingLevel++
|
||||
}
|
||||
}
|
||||
|
||||
// Only capture grouping from the outermost aggregation (nesting level 0)
|
||||
if nestingLevel == 0 && !v.hasOutermostGrouping {
|
||||
// If Without is true, we skip grouping per spec
|
||||
if !ae.Without && len(ae.Grouping) > 0 {
|
||||
v.hasOutermostGrouping = true
|
||||
for _, label := range ae.Grouping {
|
||||
v.groupBy[label] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Continue traversal to find metrics in the expression
|
||||
}
|
||||
205
pkg/parser/queryfilterextractor/promql_test.go
Normal file
205
pkg/parser/queryfilterextractor/promql_test.go
Normal file
@@ -0,0 +1,205 @@
|
||||
package queryfilterextractor
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPromQLFilterExtractor_Extract(t *testing.T) {
|
||||
extractor := NewPromQLFilterExtractor()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
wantMetrics []string
|
||||
wantGroupByColumns []ColumnInfo
|
||||
wantError bool
|
||||
}{
|
||||
{
|
||||
name: "P1 - Simple vector selector",
|
||||
query: `http_requests_total{job="api"}`,
|
||||
wantMetrics: []string{"http_requests_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P2 - Function call",
|
||||
query: `rate(cpu_usage_seconds_total[5m])`,
|
||||
wantMetrics: []string{"cpu_usage_seconds_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P3 - Aggregation with by()",
|
||||
query: `sum by (pod,region) (rate(http_requests_total[5m]))`,
|
||||
wantMetrics: []string{"http_requests_total"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "pod", OriginExpr: "pod", OriginField: "pod"}, {Name: "region", OriginExpr: "region", OriginField: "region"}},
|
||||
},
|
||||
{
|
||||
name: "P4 - Aggregation with without()",
|
||||
query: `sum without (instance) (rate(cpu_usage_total[1m]))`,
|
||||
wantMetrics: []string{"cpu_usage_total"},
|
||||
wantGroupByColumns: []ColumnInfo{}, // without() means no grouping keys per spec
|
||||
},
|
||||
{
|
||||
name: "P5 - Invalid: metric name set twice",
|
||||
query: `sum(rate(http_requests_total{__name__!="http_requests_error_total"}[5m]))`,
|
||||
wantMetrics: []string{},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
wantError: true,
|
||||
},
|
||||
{
|
||||
name: "P6 - Regex negative label",
|
||||
query: `sum(rate(http_requests_total{status!~"5.."}[5m]))`,
|
||||
wantMetrics: []string{"http_requests_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P7 - Nested aggregations",
|
||||
query: `sum by (region) (max by (pod, region) (cpu_usage_total{env="prod"}))`,
|
||||
wantMetrics: []string{"cpu_usage_total"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "region", OriginExpr: "region", OriginField: "region"}}, // Only outermost grouping
|
||||
},
|
||||
{
|
||||
name: "P7a - Nested aggregation: inner grouping ignored",
|
||||
query: `sum(max by (pod) (cpu_usage_total{env="prod"}))`,
|
||||
wantMetrics: []string{"cpu_usage_total"},
|
||||
wantGroupByColumns: []ColumnInfo{}, // Inner grouping is ignored when outer has no grouping (nestingLevel != 0 case)
|
||||
},
|
||||
{
|
||||
name: "P8 - Arithmetic expression",
|
||||
query: `(http_requests_total{job="api"} + http_errors_total{job="api"})`,
|
||||
wantMetrics: []string{"http_requests_total", "http_errors_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P9 - Mix of positive metric & exclusion label",
|
||||
query: `sum by (region)(rate(foo{job!="db"}[5m]))`,
|
||||
wantMetrics: []string{"foo"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "region", OriginExpr: "region", OriginField: "region"}},
|
||||
},
|
||||
{
|
||||
name: "P10 - Function + aggregation",
|
||||
query: `histogram_quantile(0.9, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))`,
|
||||
wantMetrics: []string{"http_request_duration_seconds_bucket"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "le", OriginExpr: "le", OriginField: "le"}},
|
||||
},
|
||||
{
|
||||
name: "P11 - Subquery",
|
||||
query: `sum_over_time(cpu_usage_total[1h:5m])`,
|
||||
wantMetrics: []string{"cpu_usage_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P12 - Nested aggregation inside subquery",
|
||||
query: `max_over_time(sum(rate(cpu_usage_total[5m]))[1h:5m])`,
|
||||
wantMetrics: []string{"cpu_usage_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P13 - Subquery with multiple metrics",
|
||||
query: `avg_over_time((foo + bar)[10m:1m])`,
|
||||
wantMetrics: []string{"foo", "bar"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P14 - Simple meta-metric",
|
||||
query: `sum by (pod) (up)`,
|
||||
wantMetrics: []string{"up"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "pod", OriginExpr: "pod", OriginField: "pod"}},
|
||||
},
|
||||
{
|
||||
name: "P15 - Binary operator unless",
|
||||
query: `sum(rate(http_requests_total[5m])) unless avg(rate(http_errors_total[5m]))`,
|
||||
wantMetrics: []string{"http_requests_total", "http_errors_total"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P16 - Vector matching",
|
||||
query: `sum(rate(foo[5m])) / ignoring(instance) group_left(job) sum(rate(bar[5m]))`,
|
||||
wantMetrics: []string{"foo", "bar"},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P17 - Offset modifier with aggregation",
|
||||
query: `sum by (env)(rate(cpu_usage_seconds_total{job="api"}[5m] offset 1h))`,
|
||||
wantMetrics: []string{"cpu_usage_seconds_total"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "env", OriginExpr: "env", OriginField: "env"}},
|
||||
},
|
||||
{
|
||||
name: "P18 - Invalid syntax",
|
||||
query: `sum by ((foo)(bar))(http_requests_total)`,
|
||||
wantMetrics: []string{},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
wantError: true,
|
||||
},
|
||||
{
|
||||
name: "P19 - Literal expression",
|
||||
query: `2 + 3`,
|
||||
wantMetrics: []string{},
|
||||
wantGroupByColumns: []ColumnInfo{},
|
||||
},
|
||||
{
|
||||
name: "P20 - Aggregation inside subquery with deriv",
|
||||
query: `deriv(sum by (instance)(rate(node_network_receive_bytes_total[5m]))[30m:5m])`,
|
||||
wantMetrics: []string{"node_network_receive_bytes_total"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "instance", OriginExpr: "instance", OriginField: "instance"}}, // Aggregation is inside subquery, not outermost
|
||||
},
|
||||
{
|
||||
name: "P21 - Aggregation inside subquery with avg_over_time",
|
||||
query: `avg_over_time(sum by (job)(rate(http_requests_total[1m]))[30m:1m])`,
|
||||
wantMetrics: []string{"http_requests_total"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "job", OriginExpr: "job", OriginField: "job"}}, // Aggregation is inside subquery, not outermost
|
||||
},
|
||||
{
|
||||
name: "P22 - Aggregation inside subquery with max_over_time",
|
||||
query: `max_over_time(sum by (pod)(rate(container_restarts_total[5m]))[1h:5m])`,
|
||||
wantMetrics: []string{"container_restarts_total"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "pod", OriginExpr: "pod", OriginField: "pod"}}, // Aggregation is inside subquery, not outermost
|
||||
},
|
||||
{
|
||||
name: "P23 - Aggregation inside subquery with deriv (no rate)",
|
||||
query: `deriv(sum by (namespace)(container_memory_working_set_bytes)[1h:10m])`,
|
||||
wantMetrics: []string{"container_memory_working_set_bytes"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "namespace", OriginExpr: "namespace", OriginField: "namespace"}}, // Aggregation is inside subquery, not outermost
|
||||
},
|
||||
{
|
||||
name: "P24 - Aggregation inside subquery with histogram_quantile",
|
||||
query: `histogram_quantile(0.95, avg_over_time(sum by (le, service)(rate(http_request_duration_seconds_bucket[5m]))[1h:5m]))`,
|
||||
wantMetrics: []string{"http_request_duration_seconds_bucket"},
|
||||
wantGroupByColumns: []ColumnInfo{{Name: "le", OriginExpr: "le", OriginField: "le"}, {Name: "service", OriginExpr: "service", OriginField: "service"}}, // Aggregation is inside subquery, not outermost
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := extractor.Extract(tt.query)
|
||||
|
||||
// Check error expectation
|
||||
if tt.wantError {
|
||||
if err == nil {
|
||||
t.Errorf("Extract() expected error but got none, query: %s", tt.query)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("Extract() unexpected error = %v, query: %s", err, tt.query)
|
||||
return
|
||||
}
|
||||
|
||||
// Sort for comparison
|
||||
gotMetrics := sortStrings(result.MetricNames)
|
||||
wantMetrics := sortStrings(tt.wantMetrics)
|
||||
|
||||
if !reflect.DeepEqual(gotMetrics, wantMetrics) {
|
||||
t.Errorf("Extract() MetricNames = %v, want %v", gotMetrics, wantMetrics)
|
||||
}
|
||||
|
||||
// Test GroupByColumns - need to normalize for comparison (order may vary)
|
||||
gotGroupByColumns := sortColumnInfo(result.GroupByColumns)
|
||||
wantGroupByColumns := sortColumnInfo(tt.wantGroupByColumns)
|
||||
|
||||
if !reflect.DeepEqual(gotGroupByColumns, wantGroupByColumns) {
|
||||
t.Errorf("Extract() GroupByColumns = %v, want %v", gotGroupByColumns, wantGroupByColumns)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
58
pkg/parser/queryfilterextractor/queryfilterextractor.go
Normal file
58
pkg/parser/queryfilterextractor/queryfilterextractor.go
Normal file
@@ -0,0 +1,58 @@
|
||||
// Package queryfilterextractor provides utilities for extracting metric names
|
||||
// and grouping keys.
|
||||
//
|
||||
// This is useful for metrics discovery, and query analysis.
|
||||
package queryfilterextractor
|
||||
|
||||
import "github.com/SigNoz/signoz/pkg/errors"
|
||||
|
||||
const (
|
||||
ExtractorCH = "qfe_ch"
|
||||
ExtractorPromQL = "qfe_promql"
|
||||
)
|
||||
|
||||
// ColumnInfo represents a column in the query
|
||||
type ColumnInfo struct {
|
||||
Name string
|
||||
Alias string
|
||||
OriginExpr string
|
||||
OriginField string
|
||||
}
|
||||
|
||||
// GroupName returns the field name in the resulting data which is used for grouping
|
||||
//
|
||||
// - examples:
|
||||
//
|
||||
// - SELECT region as new_region FROM metrics WHERE metric_name='cpu' GROUP BY region
|
||||
// GroupName() will return "new_region"
|
||||
//
|
||||
// - SELECT region FROM metrics WHERE metric_name='cpu' GROUP BY region
|
||||
// GroupName() will return "region"
|
||||
func (c *ColumnInfo) GroupName() string {
|
||||
if c.Alias != "" {
|
||||
return c.Alias
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
type FilterResult struct {
|
||||
// MetricNames are the metrics that are being filtered on
|
||||
MetricNames []string
|
||||
// GroupByColumns are the columns that are being grouped by
|
||||
GroupByColumns []ColumnInfo
|
||||
}
|
||||
|
||||
type FilterExtractor interface {
|
||||
Extract(query string) (*FilterResult, error)
|
||||
}
|
||||
|
||||
func NewExtractor(extractorType string) (FilterExtractor, error) {
|
||||
switch extractorType {
|
||||
case ExtractorCH:
|
||||
return NewClickHouseFilterExtractor(), nil
|
||||
case ExtractorPromQL:
|
||||
return NewPromQLFilterExtractor(), nil
|
||||
default:
|
||||
return nil, errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "invalid extractor type: %s", extractorType)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user