Skip to content

Commit d1e804e

Browse files
authored
add not_in filter for tables and dataframes (#7439)
## 📝 Summary <!-- Provide a concise summary of what this pull request is addressing. If this PR fixes any issues, list them here by number (e.g., Fixes #123). --> https://github.com/user-attachments/assets/4ed2c47c-d8e2-4df4-9dd3-d15d28a2420e ## 🔍 Description of Changes <!-- Detail the specific changes made in this pull request. Explain the problem addressed and how it was resolved. If applicable, provide before and after comparisons, screenshots, or any relevant details to help reviewers understand the changes easily. --> ## 📋 Checklist - [x] I have read the [contributor guidelines](https://github.com/marimo-team/marimo/blob/main/CONTRIBUTING.md). - [ ] For large changes, or changes that affect the public API: this change was discussed or approved through an issue, on [Discord](https://marimo.io/discord?ref=pr), or the community [discussions](https://github.com/marimo-team/marimo/discussions) (Please provide a link if applicable). - [x] I have added tests for the changes made. - [x] I have run the code and verified that it works as expected.
1 parent e974123 commit d1e804e

File tree

11 files changed

+178
-15
lines changed

11 files changed

+178
-15
lines changed

frontend/src/components/data-table/context-menu.tsx

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ export const CellContextMenu = <TData,>({
9595
const column = cell.column;
9696
const canFilter = column.getCanFilter() && column.columnDef.meta?.filterType;
9797

98-
const handleFilterCell = () => {
98+
const handleFilterCell = (operator: "in" | "not_in") => {
9999
column.setFilterValue(
100100
Filter.select({
101101
options: [cell.getValue()],
102-
operator: "in",
102+
operator,
103103
}),
104104
);
105105
};
@@ -119,10 +119,14 @@ export const CellContextMenu = <TData,>({
119119
{canFilter && (
120120
<>
121121
<ContextMenuSeparator />
122-
<ContextMenuItem onClick={handleFilterCell}>
122+
<ContextMenuItem onClick={() => handleFilterCell("in")}>
123123
<FilterIcon className="mo-dropdown-icon h-3 w-3" />
124124
Filter by this value
125125
</ContextMenuItem>
126+
<ContextMenuItem onClick={() => handleFilterCell("not_in")}>
127+
<FilterIcon className="mo-dropdown-icon h-3 w-3" />
128+
Remove rows with this value
129+
</ContextMenuItem>
126130
</>
127131
)}
128132
</ContextMenuContent>

frontend/src/components/data-table/filter-pills.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ function formatValue(value: ColumnFilterValue, timeFormatter: DateFormatter) {
9696
const stringifiedOptions = value.options.map((o) =>
9797
stringifyUnknownValue({ value: o }),
9898
);
99-
return `is in [${stringifiedOptions.join(", ")}]`;
99+
const operator = value.operator === "in" ? "is in" : "not in";
100+
return `${operator} [${stringifiedOptions.join(", ")}]`;
100101
}
101102
if (value.type === "text") {
102103
return `contains "${value.text}"`;

frontend/src/components/data-table/filters.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type { ConditionType } from "@/plugins/impl/data-frames/schema";
77
import type { ColumnId } from "@/plugins/impl/data-frames/types";
88
import type { OperatorType } from "@/plugins/impl/data-frames/utils/operators";
99
import { assertNever } from "@/utils/assertNever";
10+
import { Logger } from "@/utils/Logger";
1011

1112
declare module "@tanstack/react-table" {
1213
//allows us to define custom properties for our columns
@@ -192,12 +193,20 @@ export function filterToFilterCondition(
192193
}
193194

194195
return [];
195-
case "select":
196+
case "select": {
197+
let operator = filter.operator;
198+
if (filter.operator !== "in" && filter.operator !== "not_in") {
199+
Logger.warn("Invalid operator for select filter", {
200+
operator: filter.operator,
201+
});
202+
operator = "in"; // default to in operator
203+
}
196204
return {
197205
column_id: columnId,
198-
operator: "in",
206+
operator,
199207
value: filter.options,
200208
};
209+
}
201210

202211
default:
203212
assertNever(filter);

frontend/src/plugins/impl/data-frames/utils/__tests__/operators.test.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,14 @@ describe("isConditionValueValid", () => {
100100
);
101101
expect(isConditionValueValid("contains", "test")).toBe(true);
102102
expect(isConditionValueValid("in", ["test"])).toBe(true);
103+
expect(isConditionValueValid("not_in", ["test"])).toBe(true);
103104
});
104105

105106
it("should return false if the value is not valid according to the schema for the given operator", () => {
106107
expect(isConditionValueValid("==", "not a number")).toBe(false);
107108
expect(isConditionValueValid("contains", 123)).toBe(false);
108109
expect(isConditionValueValid("in", "not an array")).toBe(false);
110+
expect(isConditionValueValid("not_in", "not an array")).toBe(false);
109111
});
110112

111113
it("should return true if the operator does not require a value", () => {

frontend/src/plugins/impl/data-frames/utils/operators.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ export const STRING_OPERATORS = {
5858
starts_with: [Schema.string],
5959
ends_with: [Schema.string],
6060
in: [Schema.stringMultiColumnValues],
61+
not_in: [Schema.stringMultiColumnValues],
6162
is_null: [],
6263
is_not_null: [],
6364
};

marimo/_plugins/ui/_impl/dataframes/transforms/handlers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,14 @@ def convert_value(v: Any, converter: Callable[[str], Any]) -> Any:
223223
condition_expr = column.is_in(value) | column.is_null()
224224
else:
225225
condition_expr = column.is_in(value or [])
226+
elif condition.operator == "not_in":
227+
# ~is_in returns null for null values, so we need to explicitly include/exclude nulls
228+
if value is not None and None in value:
229+
condition_expr = ~column.is_in(value) & ~column.is_null()
230+
else:
231+
condition_expr = (
232+
~column.is_in(value or []) | column.is_null()
233+
)
226234
else:
227235
assert_never(condition.operator)
228236

marimo/_plugins/ui/_impl/dataframes/transforms/print_code.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,9 @@ def generate_where_clause(df_name: str, where: Condition) -> str:
5656
return f"{df_name}[{_as_literal(column_id)}].str.startswith({_as_literal(value)})" # noqa: E501
5757
elif operator == "ends_with":
5858
return f"{df_name}[{_as_literal(column_id)}].str.endswith({_as_literal(value)})" # noqa: E501
59-
elif operator == "in":
60-
return f"{df_name}[{_as_literal(column_id)}].isin({_list_of_strings(value)})" # noqa: E501
59+
elif operator == "in" or operator == "not_in":
60+
result = f"{df_name}[{_as_literal(column_id)}].isin({_list_of_strings(value)})" # noqa: E501
61+
return result if operator == "in" else f"~{result}"
6162
elif operator == "!=":
6263
return (
6364
f"{df_name}[{_as_literal(column_id)}].ne({_as_literal(value)})"
@@ -237,8 +238,9 @@ def generate_where_clause_polars(where: Condition) -> str:
237238
return f"pl.col({_as_literal(column_id)}).str.starts_with({_as_literal(value)})" # noqa: E501
238239
elif operator == "ends_with":
239240
return f"pl.col({_as_literal(column_id)}).str.ends_with({_as_literal(value)})" # noqa: E501
240-
elif operator == "in":
241-
return f"pl.col({_as_literal(column_id)}).is_in({_list_of_strings(value)})" # noqa: E501
241+
elif operator == "in" or operator == "not_in":
242+
result = f"pl.col({_as_literal(column_id)}).is_in({_list_of_strings(value)})" # noqa: E501
243+
return result if operator == "in" else f"~{result}"
242244
elif operator in [">", ">=", "<", "<="]:
243245
return f"pl.col({_as_literal(column_id)}) {operator} {_as_literal(value)}" # noqa: E501
244246
elif operator == "is_null":
@@ -397,8 +399,9 @@ def generate_where_clause(df_name: str, where: Condition) -> str:
397399
return f"({df_name}[{_as_literal(column_id)}].startswith({_as_literal(value)}))" # noqa: E501
398400
elif operator == "ends_with":
399401
return f"({df_name}[{_as_literal(column_id)}].endswith({_as_literal(value)}))" # noqa: E501
400-
elif operator == "in":
401-
return f"({df_name}[{_as_literal(column_id)}].isin({_list_of_strings(value)}))" # noqa: E501
402+
elif operator == "in" or operator == "not_in":
403+
result = f"({df_name}[{_as_literal(column_id)}].isin({_list_of_strings(value)}))" # noqa: E501
404+
return result if operator == "in" else f"~{result}"
402405
elif operator in [">", ">=", "<", "<="]:
403406
return f"({df_name}[{_as_literal(column_id)}] {operator} {_as_literal(value)})" # noqa: E501
404407
elif operator == "is_null":

marimo/_plugins/ui/_impl/dataframes/transforms/types.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
"starts_with",
4141
"ends_with",
4242
"in",
43+
"not_in",
4344
]
4445
Aggregation = Literal[
4546
"count",
@@ -77,7 +78,7 @@ def __hash__(self) -> int:
7778
return hash((self.column_id, self.operator, self.value))
7879

7980
def __post_init__(self) -> None:
80-
if self.operator == "in":
81+
if self.operator == "in" or self.operator == "not_in":
8182
if isinstance(self.value, list):
8283
# Hack to convert to tuple for frozen dataclass
8384
# Only tuples can be hashed
@@ -86,7 +87,7 @@ def __post_init__(self) -> None:
8687
pass
8788
else:
8889
raise ValueError(
89-
"value must be a list or tuple for 'in' operator"
90+
"value must be a list or tuple for 'in' or 'not_in' operator"
9091
)
9192

9293

tests/_plugins/ui/_impl/dataframes/test_handlers.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,111 @@ def test_filter_rows_in_operator_null_rows(
584584
result = apply(df, transform)
585585
assert_frame_equal(result, expected)
586586

587+
@staticmethod
588+
@pytest.mark.parametrize(
589+
("df", "expected"),
590+
list(
591+
zip(
592+
create_test_dataframes({"A": [1, 2, 3], "B": [4, 5, 6]}),
593+
create_test_dataframes({"A": [3], "B": [6]}),
594+
)
595+
),
596+
)
597+
def test_filter_rows_not_in_operator(
598+
df: DataFrameType, expected: DataFrameType
599+
) -> None:
600+
transform = FilterRowsTransform(
601+
type=TransformType.FILTER_ROWS,
602+
operation="keep_rows",
603+
where=[Condition(column_id="A", operator="not_in", value=[1, 2])],
604+
)
605+
result = apply(df, transform)
606+
assert_frame_equal(result, expected)
607+
608+
@staticmethod
609+
@pytest.mark.parametrize(
610+
("df", "expected"),
611+
list(
612+
zip(
613+
create_test_dataframes(
614+
{"A": ["foo", "bar", "baz"], "B": [1, 2, 3]}
615+
),
616+
create_test_dataframes({"A": ["baz"], "B": [3]}),
617+
)
618+
),
619+
)
620+
def test_filter_rows_not_in_operator_strings(
621+
df: DataFrameType, expected: DataFrameType
622+
) -> None:
623+
transform = FilterRowsTransform(
624+
type=TransformType.FILTER_ROWS,
625+
operation="keep_rows",
626+
where=[
627+
Condition(
628+
column_id="A", operator="not_in", value=["foo", "bar"]
629+
)
630+
],
631+
)
632+
result = apply(df, transform)
633+
assert_frame_equal(result, expected)
634+
635+
@staticmethod
636+
@pytest.mark.parametrize(
637+
("df", "expected"),
638+
[
639+
*zip(
640+
create_test_dataframes(
641+
{"A": [1, 2, 3, None], "B": [4, 5, 6, 7]},
642+
exclude=["ibis"],
643+
),
644+
create_test_dataframes({"A": [3], "B": [6]}, exclude=["ibis"]),
645+
),
646+
],
647+
)
648+
def test_filter_rows_not_in_operator_with_nulls(
649+
df: DataFrameType, expected: DataFrameType
650+
) -> None:
651+
# not_in with None in value should exclude rows where A is 1, 2, or null
652+
transform = FilterRowsTransform(
653+
type=TransformType.FILTER_ROWS,
654+
operation="keep_rows",
655+
where=[
656+
Condition(column_id="A", operator="not_in", value=[1, 2, None])
657+
],
658+
)
659+
result = apply(df, transform)
660+
assert_frame_equal(result, expected)
661+
662+
@staticmethod
663+
@pytest.mark.parametrize(
664+
("df", "expected"),
665+
[
666+
*zip(
667+
create_test_dataframes(
668+
{"A": [1, 2, 3, None], "B": [4, 5, 6, 7]},
669+
exclude=["ibis"],
670+
),
671+
create_test_dataframes(
672+
{"A": [3, None], "B": [6, 7]}, exclude=["ibis"]
673+
),
674+
),
675+
],
676+
)
677+
def test_filter_rows_not_in_operator_keep_nulls(
678+
df: DataFrameType, expected: DataFrameType
679+
) -> None:
680+
# not_in WITHOUT None in value should keep null rows (only exclude 1 and 2)
681+
transform = FilterRowsTransform(
682+
type=TransformType.FILTER_ROWS,
683+
operation="keep_rows",
684+
where=[Condition(column_id="A", operator="not_in", value=[1, 2])],
685+
)
686+
result = apply(df, transform)
687+
if nw.dependencies.is_pandas_dataframe(result):
688+
assert_frame_equal_with_nans(result, expected)
689+
else:
690+
assert_frame_equal(result, expected)
691+
587692
@staticmethod
588693
@pytest.mark.parametrize(
589694
("df", "expected"),

tests/_plugins/ui/_impl/dataframes/test_print_code.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def create_transform_strategy(
131131
list_condition_strategy = st.builds(
132132
Condition,
133133
column_id=list_column_id,
134-
operator=st.just("in"),
134+
operator=st.sampled_from(["in", "not_in"]),
135135
value=st.lists(st.one_of(st.text()), min_size=1),
136136
)
137137

0 commit comments

Comments
 (0)