Functions to format a SQL file with multiple queries and SQL statements
   
    
    
    
    
   
sql_file = """
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;
create or replace view first_view as -- my first view
select a.car_id,
       b.car_name, sum(a.price) over (partition by b.car_name order by a.car_id) as sum_price, a.price,
from sales as a left join (select car_id, car_name, from cars) as b 
on a.car_id = b.car_id
where car_id>1 and car_id<=100 order by b.car_name;
-- Table no. 1 --
create or replace table first_table as -- my first table
select car_id,
       avg(price) as avg_price,
from first_view
group by car_id order by car_id;
--- End of file ---
""".strip()
Then we would like to format the SQL-queries in this file, while letting every other non-query-SQL statement untouched. For the example above we would like to have something like this:
expected_sql_file = """
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;
CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
       b.car_name,
       sum(a.price) OVER (PARTITION BY b.car_name
                          ORDER BY a.car_id) as sum_price,
       a.price
FROM   sales as a
    LEFT JOIN (SELECT car_id,
                      car_name
               FROM   cars) as b
        ON a.car_id = b.car_id
WHERE  car_id > 1
   and car_id <= 100
ORDER BY b.car_name;
-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
       avg(price) as avg_price
FROM   first_view
GROUP BY car_id
ORDER BY car_id;
--- End of file ---
""".lstrip()
Formatting philosophy of SQL files
- Every SQL-query is separated from above by two new lines
 - Every SQL-query is formatted via 
format_sql 
Basic file formatting
assert_and_print(
    format_sql_commands(sql_file),
    expected_sql_file
)
Using the /*skip-formatter*/ marker to not format some query
assert_and_print(
    format_sql_commands("""
use database my_database;
/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf;
"""),
    """use database my_database;
/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf;
""")
assert_and_print(
    format_sql_commands("""
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf;
"""),
    """
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf;
""".lstrip())
assert_and_print(
    format_sql_commands("""
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf -- some comment
;
"""),
    """
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf; -- some comment
""".lstrip())
assert_and_print(
    format_sql_commands(
"""
create table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1; /* Some comment */
create view my_view As
Select asdf
From my_table; /* Another comment */
"""
    ),
"""
CREATE TABLE my_table AS
SELECT asdf,
       qwer, /* ; */
       qwer2, -- ;
       replace(';', '', qwer3) as qwer4
FROM   table1; /* Some comment */
CREATE VIEW my_view AS
SELECT asdf
FROM   my_table; /* Another comment */
""".lstrip()
)
assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
Select asdf
From my_table;
""".lstrip()
    ),
"""
CREATE OR REPLACE TRANSIENT TABLE my_table AS
SELECT asdf,
       qwer, /* ; */
       qwer2, -- ;
       replace(';', '', qwer3) as qwer4
FROM   table1;
CREATE VIEW my_view AS
SELECT asdf
FROM   my_table;
""".lstrip()
)
If the validation fails, then the function returns a dictionary instead of the formatted queries with information about the error
Semicolon validation error
assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
Select asdf
From my_table;
""".lstrip()
    ), 
    {"semicolon": {"error_code": 2, "lines": [(1, 7)]}}
)
Unbalanced parenthesis error
assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ; ()
( /* ) */
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
(Select asdf
From my_table;
""".lstrip()
    ), 
    {"unbalanced_parenthesis": {"error_code": 3, "lines": [[3, 4], [9]]}}
)
Unbalanced parenthesis + semicolon error
assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ; ()
( /* ) */
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
(Select asdf
From my_table;
""".lstrip()
    ), 
    {
        "semicolon": {"error_code": 2, "lines": [(1, 8)]},        
        "unbalanced_parenthesis": {"error_code": 3, "lines": [[3, 4, 9]]},
    }
)
Unbalanced case when ... end
assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
case when asdf = 1 then 1 as qwer,
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
""".lstrip()
    ), 
    {"unbalanced_case": {"error_code": 4, "lines": [[3]]}}
)
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_file)
    file.seek(0)
    format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print(
    formatted_file,
    expected_sql_file
)
sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_forgotten_semicolon)
    file.seek(0)
    exit_code = format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print( # no formatting
    formatted_file,
    sql_forgotten_semicolon
)
assert exit_code == 2
sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;
create view my_view As
(Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_forgotten_semicolon)
    file.seek(0)
    exit_code = format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print( # no formatting
    formatted_file,
    sql_forgotten_semicolon
)
assert exit_code == 2
sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1
create view my_view As
(Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_forgotten_semicolon)
    file.seek(0)
    exit_code = format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print( # no formatting
    formatted_file,
    sql_forgotten_semicolon
)
assert exit_code == 2
with tempfile.TemporaryDirectory() as tmp_dir:
    with open(os.path.join(tmp_dir, "tmp"), "w") as f:
        f.write(sql_file)
    with open(os.path.join(tmp_dir, "tmp2"), "w") as f:
        f.write(sql_file)
    format_sql_files([os.path.join(tmp_dir, "tmp"), os.path.join(tmp_dir, "tmp2")])
    with open(os.path.join(tmp_dir, "tmp"), "r") as f:
        formatted_file = f.read()
    assert_and_print(
        formatted_file,
        expected_sql_file
    )
    with open(os.path.join(tmp_dir, "tmp2"), "r") as f:
        formatted_file = f.read()
    assert_and_print(
        formatted_file,
        expected_sql_file
    )