Functions to format a SQL file with multiple queries and SQL statements
sql_file = """
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;
create or replace view first_view as -- my first view
select a.car_id,
b.car_name, sum(a.price) over (partition by b.car_name order by a.car_id) as sum_price, a.price,
from sales as a left join (select car_id, car_name, from cars) as b
on a.car_id = b.car_id
where car_id>1 and car_id<=100 order by b.car_name;
-- Table no. 1 --
create or replace table first_table as -- my first table
select car_id,
avg(price) as avg_price,
from first_view
group by car_id order by car_id;
--- End of file ---
""".strip()
Then we would like to format the SQL-queries in this file, while letting every other non-query-SQL statement untouched. For the example above we would like to have something like this:
expected_sql_file = """
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;
CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
b.car_name,
sum(a.price) OVER (PARTITION BY b.car_name
ORDER BY a.car_id) as sum_price,
a.price
FROM sales as a
LEFT JOIN (SELECT car_id,
car_name
FROM cars) as b
ON a.car_id = b.car_id
WHERE car_id > 1
and car_id <= 100
ORDER BY b.car_name;
-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
avg(price) as avg_price
FROM first_view
GROUP BY car_id
ORDER BY car_id;
--- End of file ---
""".lstrip()
Formatting philosophy of SQL files
- Every SQL-query is separated from above by two new lines
- Every SQL-query is formatted via
format_sql
Basic file formatting
assert_and_print(
format_sql_commands(sql_file),
expected_sql_file
)
Using the /*skip-formatter*/
marker to not format some query
assert_and_print(
format_sql_commands("""
use database my_database;
/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf;
"""),
"""use database my_database;
/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
qwer
FROM table2
GROUP BY asdf;
""")
assert_and_print(
format_sql_commands("""
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf;
"""),
"""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
qwer
FROM table2
GROUP BY asdf;
""".lstrip())
assert_and_print(
format_sql_commands("""
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf -- some comment
;
"""),
"""
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
qwer
FROM table2
GROUP BY asdf; -- some comment
""".lstrip())
assert_and_print(
format_sql_commands(
"""
create table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1; /* Some comment */
create view my_view As
Select asdf
From my_table; /* Another comment */
"""
),
"""
CREATE TABLE my_table AS
SELECT asdf,
qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as qwer4
FROM table1; /* Some comment */
CREATE VIEW my_view AS
SELECT asdf
FROM my_table; /* Another comment */
""".lstrip()
)
assert_and_print(
format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
Select asdf
From my_table;
""".lstrip()
),
"""
CREATE OR REPLACE TRANSIENT TABLE my_table AS
SELECT asdf,
qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as qwer4
FROM table1;
CREATE VIEW my_view AS
SELECT asdf
FROM my_table;
""".lstrip()
)
If the validation fails, then the function returns a dictionary instead of the formatted queries with information about the error
Semicolon validation error
assert_and_print(
format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
Select asdf
From my_table;
""".lstrip()
),
{"semicolon": {"error_code": 2, "lines": [(1, 7)]}}
)
Unbalanced parenthesis error
assert_and_print(
format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ; ()
( /* ) */
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
(Select asdf
From my_table;
""".lstrip()
),
{"unbalanced_parenthesis": {"error_code": 3, "lines": [[3, 4], [9]]}}
)
Unbalanced parenthesis + semicolon error
assert_and_print(
format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ; ()
( /* ) */
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
(Select asdf
From my_table;
""".lstrip()
),
{
"semicolon": {"error_code": 2, "lines": [(1, 8)]},
"unbalanced_parenthesis": {"error_code": 3, "lines": [[3, 4, 9]]},
}
)
Unbalanced case when ... end
assert_and_print(
format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
case when asdf = 1 then 1 as qwer,
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
""".lstrip()
),
{"unbalanced_case": {"error_code": 4, "lines": [[3]]}}
)
with tempfile.NamedTemporaryFile(mode="r+") as file:
file.write(sql_file)
file.seek(0)
format_sql_file(file.name)
formatted_file = file.read()
assert_and_print(
formatted_file,
expected_sql_file
)
sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
file.write(sql_forgotten_semicolon)
file.seek(0)
exit_code = format_sql_file(file.name)
formatted_file = file.read()
assert_and_print( # no formatting
formatted_file,
sql_forgotten_semicolon
)
assert exit_code == 2
sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
(Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
file.write(sql_forgotten_semicolon)
file.seek(0)
exit_code = format_sql_file(file.name)
formatted_file = file.read()
assert_and_print( # no formatting
formatted_file,
sql_forgotten_semicolon
)
assert exit_code == 2
sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
(Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
file.write(sql_forgotten_semicolon)
file.seek(0)
exit_code = format_sql_file(file.name)
formatted_file = file.read()
assert_and_print( # no formatting
formatted_file,
sql_forgotten_semicolon
)
assert exit_code == 2
with tempfile.TemporaryDirectory() as tmp_dir:
with open(os.path.join(tmp_dir, "tmp"), "w") as f:
f.write(sql_file)
with open(os.path.join(tmp_dir, "tmp2"), "w") as f:
f.write(sql_file)
format_sql_files([os.path.join(tmp_dir, "tmp"), os.path.join(tmp_dir, "tmp2")])
with open(os.path.join(tmp_dir, "tmp"), "r") as f:
formatted_file = f.read()
assert_and_print(
formatted_file,
expected_sql_file
)
with open(os.path.join(tmp_dir, "tmp2"), "r") as f:
formatted_file = f.read()
assert_and_print(
formatted_file,
expected_sql_file
)