-
Notifications
You must be signed in to change notification settings - Fork 830
Description
Describe the bug
Hi, I noticed a weird column autocompletion behavior that seems caused by having 2 or more databases in a DuckDB connection.
This happens when you attach a DuckLake, as you have both the datalake DB with many schemas and tables, and the 1-schema metadata catalog DB (e.g. postgres/duckdb/sqlite) attached.
I've reproduced the issue twice by attaching the ducklake in SQL, and creating a connection in Python.
The latter seems to point that the issue is having 2+ DBs in one DuckDB connection, and not a DuckLake-specific problem.
I noticed that there is no column autocompletion in SQL cells unless schema.table are typed first in the column name. Without them, only SQL functions or schemas are suggested:
In comparison, DuckDB UI seems able to autocomplete column names:
Will you submit a PR?
- Yes
Environment
{
"marimo": "0.18.4",
"editable": false,
"location": "/usr/local/lib/python3.14/site-packages/marimo",
"OS": "Linux",
"OS Version": "6.17.9-arch1-1",
"Processor": "",
"Python Version": "3.14.2",
"Locale": "C",
"Binaries": {
"Browser": "--",
"Node": "--"
},
"Dependencies": {
"click": "8.3.1",
"docutils": "0.22.3",
"itsdangerous": "2.2.0",
"jedi": "0.19.2",
"markdown": "3.10",
"narwhals": "2.13.0",
"packaging": "25.0",
"psutil": "7.1.3",
"pygments": "2.19.2",
"pymdown-extensions": "10.19.1",
"pyyaml": "6.0.3",
"starlette": "0.50.0",
"tomlkit": "0.13.3",
"typing-extensions": "4.15.0",
"uvicorn": "0.38.0",
"websockets": "15.0.1"
},
"Optional Dependencies": {
"altair": "6.0.0",
"duckdb": "1.4.3",
"mcp": "1.24.0",
"nbformat": "5.10.4",
"openai": "2.11.0",
"pandas": "2.3.3",
"polars": "1.36.1",
"pyarrow": "22.0.0",
"pytest": "9.0.2",
"python-lsp-ruff": "2.3.0",
"python-lsp-server": "1.14.0",
"ruff": "0.14.9",
"sqlglot": "28.3.0",
"ty": "0.0.1a34",
"vegafusion": "2.0.3"
},
"Experimental Flags": {
"external_agents": true,
"performant_table_charts": true,
"chat_modes": true,
"rtc_v2": false
}
}
Code to reproduce
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "duckdb==1.4.3",
# "polars[pyarrow]==1.36.1",
# "sqlglot==28.3.0",
# ]
# ///
import marimo
__generated_with = "0.18.4"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _():
import marimo as mo
return (mo,)
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# DuckDB column autocompletion test
""")
return
@app.cell
def _(mo):
mo.md(r"""
With 1 db, column autocompletion works
""")
return
@app.cell
def _(mo):
_df = mo.sql(
f"""
use memory;
CREATE OR REPLACE TABLE memory.trains_memory AS FROM "http://blobs.duckdb.org/train_services.parquet";
"""
)
return
@app.cell
def _(mo):
_df = mo.sql(
f"""
-- try here, autocompletion works
SELECT service_id FROM memory.trains_memory;
"""
)
return
@app.cell
def _(mo):
mo.md(r"""
Let's attach a DuckLake using the metadata.duckdb file as a catalog, and data_files/ to store parquet files.
""")
return
@app.cell
def _(mo):
_df = mo.sql(
f"""
ATTACH 'ducklake:metadata.duckdb' AS my_ducklake (DATA_PATH 'data_files/');;
USE my_ducklake;
create schema if not exists my_ducklake.demo;
CREATE OR REPLACE TABLE my_ducklake.demo.trains_ducklake AS FROM "http://blobs.duckdb.org/train_services.parquet";
"""
)
return
app._unparsable_cell(
r"""
With both memory and ducklake attached, column autocompletion disappears:
""",
name="_"
)
@app.cell
def _(mo):
_df = mo.sql(
f"""
-- try here, no autocompletion works
SELECT service_id as service_id_memory FROM memory.trains_memory;
SELECT service_id as service_id_ducklake FROM my_ducklake.demo.trains_ducklake;
-- this works, but it's odd
SELECT memory.main.trains_memory.service_id as service_id_memory_full_name FROM memory.trains_memory;
SELECT my_ducklake.demo.trains_ducklake.service_id as service_id_full_name FROM my_ducklake.demo.trains_ducklake;
"""
)
return
@app.cell
def _(mo):
mo.md(r"""
The issue is also reproducible if we attach the DuckLake as its own Marimo connection, instead of "DuckDB (In-Memory)".
""")
return
@app.cell
def _():
import duckdb
duckdb.execute("use memory; detach database if exists my_ducklake;")
DATABASE_URL = 'ducklake:metadata.duckdb'
datalake_standalone = duckdb.connect(DATABASE_URL, read_only=False)
return (datalake_standalone,)
@app.cell
def _(mo):
mo.md(r"""
Keep "DuckDB (In-Memory)" for the next cell, autocompletion works again
""")
return
@app.cell
def _(mo, trains_memory):
_df = mo.sql(
f"""
-- try here (In-Memory), autocompletion for memory works if ducklake is attached in another cell
SELECT service_id FROM trains_memory;
"""
)
return
@app.cell
def _(mo):
mo.md(r"""
In the next Marimo SQL cell, select "DuckDB (datalake_standalone)"
There is still no autocompletion for the DuckLake select statement.
""")
return
@app.cell
def _(datalake_standalone, mo):
_df = mo.sql(
f"""
-- try here (datalake_standalone), autocompletion for ducklake still doesn't work
SELECT service_id FROM demo.trains_ducklake;
-- this works
SELECT demo.trains_ducklake.service_id as service_id_ducklake_full_name FROM demo.trains_ducklake;
""",
engine=datalake_standalone
)
return
if __name__ == "__main__":
app.run()