Skip to content

Commit 0280771

Browse files
authored
Merge pull request #20953 from hvitved/rust/data-flow-call-models
Rust: Model more data flow constructs as calls using MaD
2 parents ef991e5 + 57ce2ee commit 0280771

33 files changed

+1230
-571
lines changed

rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll

Lines changed: 90 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ final class DataFlowCall extends TDataFlowCall {
9090
* Holds if `arg` is an argument of `call` at the position `pos`.
9191
*/
9292
predicate isArgumentForCall(Expr arg, Call call, RustDataFlow::ArgumentPosition pos) {
93-
// TODO: Handle index expressions as calls in data flow.
94-
not call instanceof IndexExpr and
9593
arg = pos.getArgument(call)
9694
}
9795

@@ -206,8 +204,11 @@ module LocalFlow {
206204
)
207205
or
208206
// An edge from a pattern/expression to its corresponding SSA definition.
209-
nodeFrom.(AstNodeNode).getAstNode() =
210-
nodeTo.(SsaNode).asDefinition().(Ssa::WriteDefinition).getWriteAccess()
207+
exists(AstNode n |
208+
n = nodeTo.(SsaNode).asDefinition().(Ssa::WriteDefinition).getWriteAccess() and
209+
n = nodeFrom.(AstNodeNode).getAstNode() and
210+
not n = any(CompoundAssignmentExpr cae).getLhs()
211+
)
211212
or
212213
nodeFrom.(SourceParameterNode).getParameter().(Param).getPat() = nodeTo.asPat()
213214
or
@@ -261,6 +262,30 @@ private module Aliases {
261262
class LambdaCallKindAlias = LambdaCallKind;
262263
}
263264

265+
/**
266+
* Index assignments like `a[i] = rhs` are treated as `*a.index_mut(i) = rhs`,
267+
* so they should in principle be handled by `referenceAssignment`.
268+
*
269+
* However, this would require support for [generalized reverse flow][1], which
270+
* is not yet implemented, so instead we simulate reverse flow where it would
271+
* have applied via the model for `<_ as core::ops::index::IndexMut>::index_mut`.
272+
*
273+
* The same is the case for compound assignments like `a[i] += rhs`, which are
274+
* treated as `(*a.index_mut(i)).add_assign(rhs)`.
275+
*
276+
* [1]: https://github.com/github/codeql/pull/18109
277+
*/
278+
predicate indexAssignment(
279+
AssignmentOperation assignment, IndexExpr index, Node rhs, PostUpdateNode base, Content c
280+
) {
281+
assignment.getLhs() = index and
282+
rhs.asExpr() = assignment.getRhs() and
283+
base.getPreUpdateNode().asExpr() = index.getBase() and
284+
c instanceof ElementContent and
285+
// simulate that the flow summary applies
286+
not index.getResolvedTarget().fromSource()
287+
}
288+
264289
module RustDataFlow implements InputSig<Location> {
265290
private import Aliases
266291
private import codeql.rust.dataflow.DataFlow
@@ -360,6 +385,7 @@ module RustDataFlow implements InputSig<Location> {
360385
node instanceof ClosureParameterNode or
361386
node instanceof DerefBorrowNode or
362387
node instanceof DerefOutNode or
388+
node instanceof IndexOutNode or
363389
node.asExpr() instanceof ParenExpr or
364390
nodeIsHidden(node.(PostUpdateNode).getPreUpdateNode())
365391
}
@@ -399,13 +425,26 @@ module RustDataFlow implements InputSig<Location> {
399425

400426
final class ReturnKind = ReturnKindAlias;
401427

428+
private Function getStaticTargetExt(Call c) {
429+
result = c.getStaticTarget()
430+
or
431+
// If the static target of an overloaded operation cannot be resolved, we fall
432+
// back to the trait method as the target. This ensures that the flow models
433+
// still apply.
434+
not exists(c.getStaticTarget()) and
435+
exists(TraitItemNode t, string methodName |
436+
c.(Operation).isOverloaded(t, methodName, _) and
437+
result = t.getAssocItem(methodName)
438+
)
439+
}
440+
402441
/** Gets a viable implementation of the target of the given `Call`. */
403442
DataFlowCallable viableCallable(DataFlowCall call) {
404443
exists(Call c | c = call.asCall() |
405444
result.asCfgScope() = c.getARuntimeTarget()
406445
or
407446
exists(SummarizedCallable sc, Function staticTarget |
408-
staticTarget = c.getStaticTarget() and
447+
staticTarget = getStaticTargetExt(c) and
409448
sc = result.asSummarizedCallable()
410449
|
411450
sc = staticTarget
@@ -552,12 +591,6 @@ module RustDataFlow implements InputSig<Location> {
552591
access = c.(FieldContent).getAnAccess()
553592
)
554593
or
555-
exists(IndexExpr arr |
556-
c instanceof ElementContent and
557-
node1.asExpr() = arr.getBase() and
558-
node2.asExpr() = arr
559-
)
560-
or
561594
exists(ForExpr for |
562595
c instanceof ElementContent and
563596
node1.asExpr() = for.getIterable() and
@@ -583,6 +616,12 @@ module RustDataFlow implements InputSig<Location> {
583616
node2.asExpr() = deref
584617
)
585618
or
619+
exists(IndexExpr index |
620+
c instanceof ReferenceContent and
621+
node1.(IndexOutNode).getIndexExpr() = index and
622+
node2.asExpr() = index
623+
)
624+
or
586625
// Read from function return
587626
exists(DataFlowCall call |
588627
lambdaCall(call, _, node1) and
@@ -602,8 +641,18 @@ module RustDataFlow implements InputSig<Location> {
602641
implicitDeref(node1, node2, c)
603642
or
604643
// A read step dual to the store step for implicit borrows.
605-
implicitBorrow(node2.(PostUpdateNode).getPreUpdateNode(),
606-
node1.(PostUpdateNode).getPreUpdateNode(), c)
644+
exists(Node n | implicitBorrow(n, node1.(PostUpdateNode).getPreUpdateNode(), c) |
645+
node2.(PostUpdateNode).getPreUpdateNode() = n
646+
or
647+
// For compound assignments into variables like `x += y`, we do not want flow into
648+
// `[post] x`, as that would create spurious flow when `x` is a parameter. Instead,
649+
// we add the step directly into the SSA definition for `x` after the update.
650+
exists(CompoundAssignmentExpr cae, Expr lhs |
651+
lhs = cae.getLhs() and
652+
lhs = node2.(SsaNode).asDefinition().(Ssa::WriteDefinition).getWriteAccess() and
653+
n = TExprNode(lhs)
654+
)
655+
)
607656
or
608657
VariableCapture::readStep(node1, c, node2)
609658
}
@@ -644,13 +693,27 @@ module RustDataFlow implements InputSig<Location> {
644693
}
645694

646695
pragma[nomagic]
647-
private predicate referenceAssignment(Node node1, Node node2, ReferenceContent c) {
648-
exists(AssignmentExpr assignment, PrefixExpr deref |
649-
assignment.getLhs() = deref and
650-
deref.getOperatorName() = "*" and
696+
private predicate referenceAssignment(
697+
Node node1, Node node2, Expr e, boolean clears, ReferenceContent c
698+
) {
699+
exists(AssignmentExpr assignment, Expr lhs |
700+
assignment.getLhs() = lhs and
651701
node1.asExpr() = assignment.getRhs() and
652-
node2.asExpr() = deref.getExpr() and
653702
exists(c)
703+
|
704+
lhs =
705+
any(DerefExpr de |
706+
de = node2.(DerefOutNode).getDerefExpr() and
707+
e = de.getExpr()
708+
) and
709+
clears = true
710+
or
711+
lhs =
712+
any(IndexExpr ie |
713+
ie = node2.(IndexOutNode).getIndexExpr() and
714+
e = ie.getBase() and
715+
clears = false
716+
)
654717
)
655718
}
656719

@@ -694,14 +757,14 @@ module RustDataFlow implements InputSig<Location> {
694757
or
695758
fieldAssignment(node1, node2.(PostUpdateNode).getPreUpdateNode(), c)
696759
or
697-
referenceAssignment(node1, node2.(PostUpdateNode).getPreUpdateNode(), c)
760+
referenceAssignment(node1, node2.(PostUpdateNode).getPreUpdateNode(), _, _, c)
698761
or
699-
exists(AssignmentExpr assignment, IndexExpr index |
700-
c instanceof ElementContent and
701-
assignment.getLhs() = index and
702-
node1.asExpr() = assignment.getRhs() and
703-
node2.(PostUpdateNode).getPreUpdateNode().asExpr() = index.getBase()
704-
)
762+
indexAssignment(any(AssignmentExpr ae), _, node1, node2, c)
763+
or
764+
// Compund assignment like `a[i] += rhs` are modeled as a store step from `rhs`
765+
// to `[post] a[i]`, followed by a taint step into `[post] a`.
766+
indexAssignment(any(CompoundAssignmentExpr cae),
767+
node2.(PostUpdateNode).getPreUpdateNode().asExpr(), node1, _, c)
705768
or
706769
referenceExprToExpr(node1, node2, c)
707770
or
@@ -738,7 +801,7 @@ module RustDataFlow implements InputSig<Location> {
738801
predicate clearsContent(Node n, ContentSet cs) {
739802
fieldAssignment(_, n, cs.(SingletonContentSet).getContent())
740803
or
741-
referenceAssignment(_, n, cs.(SingletonContentSet).getContent())
804+
referenceAssignment(_, _, n.asExpr(), true, cs.(SingletonContentSet).getContent())
742805
or
743806
FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), cs)
744807
or
@@ -982,9 +1045,7 @@ private module Cached {
9821045
newtype TDataFlowCall =
9831046
TCall(Call call) {
9841047
Stages::DataFlowStage::ref() and
985-
call.hasEnclosingCfgScope() and
986-
// TODO: Handle index expressions as calls in data flow.
987-
not call instanceof IndexExpr
1048+
call.hasEnclosingCfgScope()
9881049
} or
9891050
TSummaryCall(
9901051
FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver

rust/ql/lib/codeql/rust/dataflow/internal/Node.qll

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,8 @@ final private class ExprOutNode extends ExprNode, OutNode {
350350
ExprOutNode() {
351351
exists(Call call |
352352
call = this.asExpr() and
353-
not call instanceof DerefExpr // Handled by `DerefOutNode`
353+
not call instanceof DerefExpr and // Handled by `DerefOutNode`
354+
not call instanceof IndexExpr // Handled by `IndexOutNode`
354355
)
355356
}
356357

@@ -387,6 +388,32 @@ class DerefOutNode extends OutNode, TDerefOutNode {
387388
override string toString() { result = de.toString() + " [pre-dereferenced]" }
388389
}
389390

391+
/**
392+
* A node that represents the value of a `x[y]` expression _before_ implicit
393+
* dereferencing:
394+
*
395+
* `x[y]` equivalent to `*x.index(y)`, and this node represents the
396+
* `x.index(y)` part.
397+
*/
398+
class IndexOutNode extends OutNode, TIndexOutNode {
399+
IndexExpr ie;
400+
401+
IndexOutNode() { this = TIndexOutNode(ie, false) }
402+
403+
IndexExpr getIndexExpr() { result = ie }
404+
405+
override CfgScope getCfgScope() { result = ie.getEnclosingCfgScope() }
406+
407+
override DataFlowCall getCall(ReturnKind kind) {
408+
result.asCall() = ie and
409+
kind = TNormalReturnKind()
410+
}
411+
412+
override Location getLocation() { result = ie.getLocation() }
413+
414+
override string toString() { result = ie.toString() + " [pre-dereferenced]" }
415+
}
416+
390417
final class SummaryOutNode extends FlowSummaryNode, OutNode {
391418
private DataFlowCall call;
392419
private ReturnKind kind_;
@@ -476,6 +503,18 @@ class DerefOutPostUpdateNode extends PostUpdateNode, TDerefOutNode {
476503
override Location getLocation() { result = de.getLocation() }
477504
}
478505

506+
class IndexOutPostUpdateNode extends PostUpdateNode, TIndexOutNode {
507+
IndexExpr ie;
508+
509+
IndexOutPostUpdateNode() { this = TIndexOutNode(ie, true) }
510+
511+
override IndexOutNode getPreUpdateNode() { result = TIndexOutNode(ie, false) }
512+
513+
override CfgScope getCfgScope() { result = ie.getEnclosingCfgScope() }
514+
515+
override Location getLocation() { result = ie.getLocation() }
516+
}
517+
479518
final class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNode {
480519
private FlowSummaryNode pre;
481520

@@ -514,7 +553,10 @@ newtype TNode =
514553
TExprPostUpdateNode(Expr e) {
515554
e.hasEnclosingCfgScope() and
516555
(
517-
isArgumentForCall(e, _, _)
556+
isArgumentForCall(e, _, _) and
557+
// For compound assignments into variables like `x += y`, we do not want flow into
558+
// `[post] x`, as that would create spurious flow when `x` is a parameter.
559+
not (e = any(CompoundAssignmentExpr cae).getLhs() and e instanceof VariableAccess)
518560
or
519561
lambdaCallExpr(_, _, e)
520562
or
@@ -526,7 +568,6 @@ newtype TNode =
526568
or
527569
e =
528570
[
529-
any(IndexExpr i).getBase(), //
530571
any(FieldExpr access).getContainer(), //
531572
any(TryExpr try).getExpr(), //
532573
any(AwaitExpr a).getExpr(), //
@@ -542,6 +583,7 @@ newtype TNode =
542583
borrow = true
543584
} or
544585
TDerefOutNode(DerefExpr de, Boolean isPost) or
586+
TIndexOutNode(IndexExpr ie, Boolean isPost) or
545587
TSsaNode(SsaImpl::DataFlowIntegration::SsaNode node) or
546588
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) {
547589
forall(AstNode n | n = sn.getSinkElement() or n = sn.getSourceElement() |

rust/ql/lib/codeql/rust/dataflow/internal/TaintTrackingImpl.qll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,6 @@ module RustTaintTracking implements InputSig<Location, RustDataFlow> {
2020
Stages::DataFlowStage::ref() and
2121
model = "" and
2222
(
23-
exists(BinaryExpr binary |
24-
binary.getOperatorName() = ["+", "-", "*", "/", "%", "&", "|", "^", "<<", ">>"] and
25-
pred.asExpr() = [binary.getLhs(), binary.getRhs()] and
26-
succ.asExpr() = binary
27-
)
28-
or
29-
exists(PrefixExpr prefix |
30-
prefix.getOperatorName() = ["-", "!"] and
31-
pred.asExpr() = prefix.getExpr() and
32-
succ.asExpr() = prefix
33-
)
34-
or
3523
pred.asExpr() = succ.asExpr().(CastExpr).getExpr()
3624
or
3725
exists(IndexExpr index |
@@ -65,6 +53,9 @@ module RustTaintTracking implements InputSig<Location, RustDataFlow> {
6553
or
6654
succ.(Node::PostUpdateNode).getPreUpdateNode().asExpr() =
6755
getPostUpdateReverseStep(pred.(Node::PostUpdateNode).getPreUpdateNode().asExpr(), false)
56+
or
57+
indexAssignment(any(CompoundAssignmentExpr cae),
58+
pred.(Node::PostUpdateNode).getPreUpdateNode().asExpr(), _, succ, _)
6859
)
6960
or
7061
FlowSummaryImpl::Private::Steps::summaryLocalStep(pred.(Node::FlowSummaryNode).getSummaryNode(),

rust/ql/lib/codeql/rust/elements/internal/OperationImpl.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ module Impl {
131131
* Holds if this operation is overloaded to the method `methodName` of the
132132
* trait `trait`.
133133
*/
134+
pragma[nomagic]
134135
predicate isOverloaded(Trait trait, string methodName, int borrows) {
135136
isOverloaded(this.getOperatorName(), this.getNumberOfOperands(), trait.getCanonicalPath(),
136137
methodName, borrows)

rust/ql/lib/codeql/rust/elements/internal/VariableImpl.qll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -676,12 +676,12 @@ module Impl {
676676
predicate isCapture() { this.getEnclosingCfgScope() != v.getEnclosingCfgScope() }
677677
}
678678

679-
/** Holds if `e` occurs in the LHS of an assignment or compound assignment. */
680-
private predicate assignmentExprDescendant(AssignmentExpr ae, Expr e) {
681-
e = ae.getLhs()
679+
/** Holds if `e` occurs in the LHS of an assignment operation. */
680+
predicate assignmentOperationDescendant(AssignmentOperation ao, Expr e) {
681+
e = ao.getLhs()
682682
or
683683
exists(Expr mid |
684-
assignmentExprDescendant(ae, mid) and
684+
assignmentOperationDescendant(ao, mid) and
685685
getImmediateParentAdj(e) = mid and
686686
not mid instanceof DerefExpr and
687687
not mid instanceof FieldExpr and
@@ -696,7 +696,7 @@ module Impl {
696696
cached
697697
VariableWriteAccess() {
698698
Cached::ref() and
699-
assignmentExprDescendant(ae, this)
699+
assignmentOperationDescendant(ae, this)
700700
}
701701

702702
/** Gets the assignment expression that has this write access in the left-hand side. */

0 commit comments

Comments
 (0)