Skip to content

Latest commit

 

History

History
319 lines (258 loc) · 6.63 KB

File metadata and controls

319 lines (258 loc) · 6.63 KB

DISTINCT

The DISTINCT clause removes duplicate rows from query results, returning only unique combinations of the selected columns.

Overview

Use DISTINCT when you need unique values:

  • Remove duplicate rows
  • Find unique combinations of columns
  • Count unique values

DISTINCT ON is PostgreSQL-only and requires specific columns.

Basic Usage

Single Column

// Get unique categories
$categories = $db->find()
    ->from('products')
    ->select(['category'])
    ->distinct()
    ->orderBy('category')
    ->get();

Multiple Columns

// Get unique combinations of category and region
$combinations = $db->find()
    ->from('sales')
    ->select(['category', 'region'])
    ->distinct()
    ->orderBy('category')
    ->get();

With WHERE Clause

// Get unique active product categories
$activeCategories = $db->find()
    ->from('products')
    ->select(['category'])
    ->where('active', 1)
    ->distinct()
    ->get();

DISTINCT with Aggregations

Count Unique Values

// Count unique customers
$uniqueCustomers = $db->find()
    ->from('orders')
    ->select(['unique_customers' => Db::count('DISTINCT customer_id')])
    ->getOne();

echo "Unique customers: " . $uniqueCustomers['unique_customers'];

With GROUP BY

// Get unique products per category
$results = $db->find()
    ->from('products')
    ->select([
        'category',
        'unique_products' => Db::count('DISTINCT product_id'),
    ])
    ->groupBy('category')
    ->get();

DISTINCT ON (PostgreSQL Only)

Note: DISTINCT ON is only supported in PostgreSQL. For other databases, use DISTINCT or window functions.

try {
    // Get first product from each category (PostgreSQL only)
    $results = $db->find()
        ->from('products')
        ->select(['category', 'name', 'price'])
        ->distinctOn('category')
        ->orderBy('category')
        ->orderBy('price', 'DESC')
        ->get();
} catch (\RuntimeException $e) {
    // Will throw exception on MySQL and SQLite
    echo "DISTINCT ON not supported: " . $e->getMessage();
}

Multiple Columns (PostgreSQL)

// Get first product per category and region
$results = $db->find()
    ->from('products')
    ->select(['category', 'region', 'name'])
    ->distinctOn(['category', 'region'])
    ->orderBy('category')
    ->orderBy('region')
    ->get();

Performance Considerations

DISTINCT Performance

DISTINCT requires sorting/hashing to identify duplicates:

// ✅ Good - with index on category
$db->find()
    ->from('products')
    ->select(['category'])
    ->distinct()
    ->get();

// Consider adding index:
// CREATE INDEX idx_products_category ON products(category);

Avoid Unnecessary DISTINCT

// ❌ Unnecessary - id is unique
$db->find()
    ->from('users')
    ->select(['id', 'email'])
    ->distinct()
    ->get();

// ✅ Better - no DISTINCT needed
$db->find()
    ->from('users')
    ->select(['id', 'email'])
    ->get();

Common Use Cases

1. Dropdown Options

// Get unique countries for dropdown
$countries = $db->find()
    ->from('users')
    ->select(['country'])
    ->distinct()
    ->orderBy('country')
    ->get();

2. Tag Cloud

// Get unique tags
$tags = $db->find()
    ->from('post_tags')
    ->select(['tag'])
    ->distinct()
    ->orderBy('tag')
    ->get();

3. Available Filters

// Get available filter options
$filters = [
    'categories' => $db->find()
        ->from('products')
        ->where('active', 1)
        ->select(['category'])
        ->distinct()
        ->get(),
    'brands' => $db->find()
        ->from('products')
        ->where('active', 1)
        ->select(['brand'])
        ->distinct()
        ->get(),
];

4. Unique Customer Analysis

// Analyze unique customers by month
$results = $db->find()
    ->from('orders')
    ->select([
        'month' => Db::month('created_at'),
        'unique_customers' => Db::count('DISTINCT customer_id'),
        'total_orders' => Db::count('*'),
    ])
    ->groupBy(Db::month('created_at'))
    ->orderBy('month')
    ->get();

DISTINCT vs GROUP BY

Sometimes GROUP BY can replace DISTINCT:

// Using DISTINCT
$categories = $db->find()
    ->from('products')
    ->select(['category'])
    ->distinct()
    ->get();

// Equivalent with GROUP BY (can be faster with proper indexes)
$categories = $db->find()
    ->from('products')
    ->select(['category'])
    ->groupBy('category')
    ->get();

Use GROUP BY when you also need aggregations:

// Get categories with product count
$categories = $db->find()
    ->from('products')
    ->select([
        'category',
        'product_count' => Db::count('*'),
    ])
    ->groupBy('category')
    ->get();

Important Notes

Column Selection

  • DISTINCT applies to all selected columns
  • Unique combination of all columns in SELECT
// Returns unique (category, brand) combinations
$db->find()
    ->from('products')
    ->select(['category', 'brand'])
    ->distinct()
    ->get();

With ORDER BY

// DISTINCT works well with ORDER BY
$db->find()
    ->from('products')
    ->select(['category'])
    ->distinct()
    ->orderBy('category')
    ->get();

Database Support

  • MySQL: DISTINCT fully supported, no DISTINCT ON
  • PostgreSQL: Both DISTINCT and DISTINCT ON supported
  • SQLite: DISTINCT fully supported, no DISTINCT ON

Best Practices

  1. Use indexes on columns in DISTINCT queries:
CREATE INDEX idx_products_category ON products(category);
  1. Be specific with column selection:
// ✅ Good - only select needed columns
$db->find()->select(['category'])->distinct()->get();

// ❌ Bad - unnecessary columns slow down DISTINCT
$db->find()->select(['*'])->distinct()->get();
  1. Consider GROUP BY for aggregations:
// ✅ Good - use GROUP BY when aggregating
$db->find()
    ->select(['category', 'count' => Db::count('*')])
    ->groupBy('category')
    ->get();
  1. PostgreSQL DISTINCT ON requires proper ORDER BY:
// ✅ Good - DISTINCT ON columns match ORDER BY
$db->find()
    ->distinctOn('category')
    ->orderBy('category')  // Required!
    ->orderBy('price', 'DESC')
    ->get();

Examples

See examples/01-basic/05-ordering.php for complete working examples (Examples 11-12).

See Also