This guide walks you through querying Indian Supreme Court judgment metadata stored in Parquet format on Amazon S3 using AWS Athena.
CREATE DATABASE supreme_court_cases;CREATE EXTERNAL TABLE supreme_court_cases.judgments (
title STRING,
petitioner STRING,
respondent STRING,
description STRING,
judge STRING,
author_judge STRING,
citation STRING,
case_id STRING,
cnr STRING,
decision_date STRING,
disposal_nature STRING,
court STRING,
available_languages STRING,
raw_html STRING,
path STRING,
nc_display STRING,
scraped_at STRING
)
PARTITIONED BY (year STRING)
STORED AS PARQUET
LOCATION 's3://indian-supreme-court-judgments/metadata/parquet/'
TBLPROPERTIES (
'has_encrypted_data'='false',
'projection.enabled'='true',
'projection.year.type'='integer',
'projection.year.range'='1950,2025',
'storage.location.template'='s3://indian-supreme-court-judgments/metadata/parquet/year=${year}/'
)SELECT
title,
petitioner,
respondent,
judge,
decision_date,
year
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
LIMIT 10;SELECT
title,
decision_date,
judge,
author_judge
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
ORDER BY
decision_date DESC
LIMIT 10;SELECT
title,
petitioner,
respondent,
judge,
decision_date
FROM
supreme_court_cases.judgments
WHERE
judge LIKE '%CHANDRACHUD%'
AND year = '2025'
LIMIT 10;SELECT
title,
petitioner,
respondent,
citation,
decision_date
FROM
supreme_court_cases.judgments
WHERE
citation LIKE '%(2025)%'
LIMIT 10;SELECT
judge,
COUNT(*) AS total_cases
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
GROUP BY
judge
ORDER BY
total_cases DESC
LIMIT 5;SELECT
author_judge,
COUNT(*) AS authored_judgments
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
AND author_judge != ''
GROUP BY
author_judge
ORDER BY
authored_judgments DESC
LIMIT 10;SELECT
disposal_nature,
COUNT(*) AS case_count
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
GROUP BY
disposal_nature
ORDER BY
case_count DESC;SELECT
title,
available_languages,
decision_date
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
AND array_size(split(available_languages, ',')) > 1
ORDER BY
decision_date DESC
LIMIT 10;SELECT
title,
petitioner,
respondent,
judge,
decision_date
FROM
supreme_court_cases.judgments
WHERE
judge IN (
SELECT
judge
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
GROUP BY
judge
HAVING
COUNT(*) > 10
)
AND year = '2025'
LIMIT 20;SELECT
title,
judge,
author_judge,
decision_date,
RANK() OVER (PARTITION BY author_judge ORDER BY decision_date DESC) AS recent_rank
FROM
supreme_court_cases.judgments
WHERE
year = '2025'
AND author_judge != ''
LIMIT 20;WITH case_parties AS (
SELECT
title,
petitioner,
respondent,
decision_date,
year
FROM
supreme_court_cases.judgments
)
SELECT
a.title AS case_1,
b.title AS case_2,
a.decision_date,
b.decision_date
FROM
case_parties a
JOIN
case_parties b
ON
a.petitioner = b.petitioner
AND a.title != b.title
WHERE
a.year = '2025'
AND b.year = '2025'
LIMIT 10;SELECT
year,
COUNT(*) AS judgment_count
FROM
supreme_court_cases.judgments
WHERE
year BETWEEN '2020' AND '2025'
GROUP BY
year
ORDER BY
year;| Column | Type | Description |
|---|---|---|
title |
string | Case title |
petitioner |
string | Petitioner name(s) |
respondent |
string | Respondent name(s) |
description |
string | Case description |
judge |
string | Judge(s) presiding |
author_judge |
string | Authoring judge |
citation |
string | Legal citation |
case_id |
string | Unique case identifier |
cnr |
string | Case Number Register |
decision_date |
string | Date of judgment |
disposal_nature |
string | Nature of disposal |
court |
string | Court name |
available_languages |
string | Available languages (CSV format) |
raw_html |
string | Raw HTML |
path |
string | File path to PDF |
nc_display |
string | Display name |
scraped_at |
string | Timestamp when scraped |
year |
string | Year (partition column) |
The Supreme Court judgments data is organized in the following structure on S3:
s3://indian-supreme-court-judgments/metadata/year=YYYY/metadata.parquet
Each year has its own parquet file, making it efficient to query data for specific time periods.