Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add options to filter out internal GC and dispatch frames #87

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 40 additions & 10 deletions src/Allocs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ The `kwargs` are the same as [`PProf.pprof`](@ref), except:
of every allocation. Note that this tends to make the Graph view harder to
read, because it's over-aggregated, so we recommend filtering out the `Type:`
nodes in the PProf web UI.
- `skip_gc_internal::Bool = true`: By default, we hide the tail-end of the
stacktrace because it's not very useful. *Every* frame ends with some `jl_gc_*_alloc`,
which calls `maybe_record_alloc_to_profile`, which is distracting in Profile viewer.
This is similar to `skip_jl_dispatch` in the default CPU pprof() function.
"""
function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch()
;
Expand All @@ -40,9 +44,12 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch
keep_frames::Union{Nothing, AbstractString} = nothing,
ui_relative_percentages::Bool = true,
full_signatures::Bool = true,
skip_jl_dispatch::Bool = false,
# Allocs-specific arguments:
skip_gc_internal::Bool = true,
frame_for_type::Bool = true,
)
PProf.log_greeting(skip_jl_dispatch)
period = UInt64(0x1)

@assert !isempty(basename(out)) "`out=` must specify a file path to write to. Got unexpected: '$out'"
Expand All @@ -63,6 +70,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch
# specializations to PProf Functions. We use a hash of the function for the key.
funcs_map = Dict{UInt64, UInt64}()
functions = Vector{Function}()
funcs_to_skip = Set{Any}()

# NOTE: It's a bug to use the actual StackFrame itself as a key in a dictionary, since
# different StackFrames can compare the same sometimes! 🙀 So we manually compute an ID
Expand All @@ -80,23 +88,26 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch
drop_frames = isnothing(drop_frames) ? 0 : enter!(drop_frames)
keep_frames = isnothing(keep_frames) ? 0 : enter!(keep_frames)

function maybe_add_location(frame::StackFrame)::UInt64
function maybe_add_location(frame::StackFrame)::Tuple{Bool,UInt64}
# Use a unique function id for the frame:
# See: https://github.com/JuliaPerf/PProf.jl/issues/69
function_key = method_instance_id(frame)
line_number = frame.line

function_key in funcs_to_skip && return false, 0

# For allocations profile, we uniquely identify each frame position by
# (function,line_number) pairs. That identifies a position in the code.
frame_key = (function_key, line_number)
return get!(locs_map, frame_key) do

success = Ref(true)
loc = get!(locs_map, frame_key) do
loc_id = UInt64(length(locations) + 1)

# Extract info from the location frame
(function_name, file_name) =
string(frame.func), string(frame.file)

# Use a unique function id for the frame:
function_key = method_instance_id(frame)

# Decode the IP into information about this stack frame
function_id = get!(funcs_map, function_key) do
func_id = UInt64(length(functions) + 1)
Expand All @@ -121,6 +132,14 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch
end
isempty(simple_name) && (simple_name = "[unknown function]")
isempty(full_name_with_args) && (full_name_with_args = "[unknown function]")

if !PProf.should_keep_frame(from_c, skip_jl_dispatch,
skip_gc_internal, frame, simple_name)
success[] = false
push!(funcs_to_skip, function_key)
return 0
end

# WEIRD TRICK: By entering a separate copy of the string (with a
# different string id) for the name and system_name, pprof will use
# the supplied `name` *verbatim*, without pruning off the arguments.
Expand All @@ -131,6 +150,7 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch
else
name = enter!(simple_name)
end

file = Base.find_source_file(file_name)
file = file !== nothing ? file : file_name
filename = enter!(file)
Expand All @@ -139,13 +159,19 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch
return func_id
end

if !success[]
return 0
end

push!(
locations,
Location(;id = loc_id, line=[Line(function_id, line_number)])
)

return loc_id
end

return success[], loc
end

type_name_cache = Dict{Any,String}()
Expand All @@ -158,17 +184,21 @@ function pprof(alloc_profile::Profile.Allocs.AllocResults = Profile.Allocs.fetch

function construct_location_for_type(typename)
# TODO: Lol something less hacky than this:
return maybe_add_location(StackFrame(get_type_name(typename), "nothing", 0))
_, loc = maybe_add_location(StackFrame(get_type_name(typename), "nothing", 0))
return loc
end

# convert the sample.stack to vector of location_ids
@showprogress "Analyzing $(length(alloc_profile.allocs)) allocation samples..." for sample in alloc_profile.allocs
# for each location in the sample.stack, if it's the first time seeing it,
# we also enter that location into the locations table
location_ids = UInt64[
maybe_add_location(frame)
for frame in sample.stacktrace if (!frame.from_c || from_c)
]
location_ids = sizehint!(UInt64[], length(sample.stacktrace))
for frame in sample.stacktrace
should_add, loc = maybe_add_location(frame)
if should_add
push!(location_ids, loc)
end
end

if frame_for_type
# Add location_id for the type:
Expand Down
103 changes: 92 additions & 11 deletions src/PProf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,16 @@ You can also use `PProf.refresh(file="...")` to open a new file in the server.
- `keep_frames`: frames with function_name fully matching regexp string will be kept, even if it matches drop_functions.
- `ui_relative_percentages`: Passes `-relative_percentages` to pprof. Causes nodes
ignored/hidden through the web UI to be ignored from totals when computing percentages.
- `skip_jl_dispatch::Bool = false`: If true, we hide the internal frames from
julia dynamic dispatch: `jl_apply_generic`, `jl_apply`, `jl_invoke`, etc. This helps a lot
in the Graph view, because otherwise they imply a false recursion in your program when
there are multiple dynamic dispatches in your stacktrace. However, the dispatches can be
very useful in the flamegraph view, since they often represent performance problems. So
consider switching this based on which pprof UI view you're using.
- `skip_gc_internal::Bool = false`: Provided for consistency with PProf.Alloc.pprof().
True by default, since this would only be present when running an Alloc profile at the
same time, and you may want to know the impact of that on your CPU profile. See
[`PProf.Alloc.pprof`](@ref).
"""
function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
lidict::Union{Nothing, Dict} = nothing;
Expand All @@ -94,7 +104,10 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
drop_frames::Union{Nothing, AbstractString} = nothing,
keep_frames::Union{Nothing, AbstractString} = nothing,
ui_relative_percentages::Bool = true,
skip_jl_dispatch::Bool = false,
skip_gc_internal::Bool = false,
)
log_greeting(skip_jl_dispatch)
if data === nothing
data = if isdefined(Profile, :has_meta)
copy(Profile.fetch(include_meta = false))
Expand Down Expand Up @@ -127,10 +140,11 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
# Functions need a uid, we'll use the pointer for the method instance
seen_funcs = Set{UInt64}()
funcs = Dict{UInt64, Function}()
funcs_to_skip = Set{UInt64}()

seen_locs = Set{UInt64}()
locs = Dict{UInt64, Location}()
locs_from_c = Dict{UInt64, Bool}()
locs_to_skip = Set{UInt64}()
samples = Vector{Sample}()

sample_type = [
Expand Down Expand Up @@ -173,7 +187,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
# if we have already seen this IP avoid decoding it again
if ip in seen_locs
# Only keep C frames if from_c=true
if (from_c || !locs_from_c[ip])
if !(ip in locs_to_skip)
push!(location_id, ip)
end
continue
Expand All @@ -183,6 +197,7 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
# Decode the IP into information about this stack frame (or frames given inlining)
location = Location(;id = ip, address = ip)
location_from_c = true
should_keep_location = false
# Will have multiple frames if frames were inlined (the last frame is the "real
# function", the inlinee)
for frame in lookup[ip]
Expand All @@ -194,10 +209,13 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,

# Use a unique function id for the frame:
func_id = method_instance_id(frame)
push!(location.line, Line(function_id = func_id, line = frame.line))

# Known function
func_id in seen_funcs && continue
if func_id in seen_funcs && !(func_id in funcs_to_skip)
should_keep_location = true
push!(location.line, Line(function_id = func_id, line = frame.line))
continue
end
push!(seen_funcs, func_id)

# Store the function in our functions dict
Expand All @@ -220,6 +238,15 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
end
isempty(simple_name) && (simple_name = "[unknown function]")
isempty(full_name_with_args) && (full_name_with_args = "[unknown function]")

# Only keep C functions if from_c=true, and it's not a skipped internal frame.
if !should_keep_frame(from_c, skip_jl_dispatch,
skip_gc_internal, frame, simple_name)
push!(funcs_to_skip, func_id)
continue
end
should_keep_location = true

# WEIRD TRICK: By entering a separate copy of the string (with a
# different string id) for the name and system_name, pprof will use
# the supplied `name` *verbatim*, without pruning off the arguments.
Expand All @@ -232,16 +259,17 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
end
file = Base.find_source_file(file)
filename = enter!(file)
# Only keep C functions if from_c=true
if (from_c || !frame.from_c)
funcs[func_id] = Function(func_id, name, system_name, filename, start_line)
end

# Save the new function to the Location
funcs[func_id] = Function(func_id, name, system_name, filename, start_line)
push!(location.line, Line(function_id = func_id, line = frame.line))
end
locs_from_c[ip] = location_from_c
# Only keep C frames if from_c=true
if (from_c || !location_from_c)
# If any of the frames were kept, save the location
if should_keep_location
locs[ip] = location
push!(location_id, ip)
else
push!(locs_to_skip, ip)
end
end

Expand Down Expand Up @@ -272,6 +300,22 @@ function pprof(data::Union{Nothing, Vector{UInt}} = nothing,
out
end

log_once = false
function log_greeting(skip_jl_dispatch)
if !(log_once::Bool)
global log_once = true
if !skip_jl_dispatch
@info """👋 Welcome to PProf!

Consider trying `skip_jl_dispatch = true` as well, for a cleaner Graph view.
The current options are best for the Flamegraph view.

Now outputting your profile...
"""
end
end
end

function _escape_name_for_pprof(name)
# HACK: Apparently proto doesn't escape func names with `"` in them ... >.<
# TODO: Remove this hack after https://github.com/google/pprof/pull/564
Expand All @@ -291,6 +335,41 @@ function method_instance_id(frame)
end
end

function should_keep_frame(from_c, no_jl, no_gc, frame, name)
if ((from_c || !frame.from_c)
&& !(no_jl && is_julia_dispatch_internal(name))
&& !(no_gc && is_gc_internal(name)))
return true
end
return false
end
function is_julia_dispatch_internal(name)
return name in (
"jl_apply",
"_jl_apply",
"jl_invoke",
"_jl_invoke",
"jl_apply_generic",
"_jl_apply_generic",
"ijl_apply_generic",
)
end
function is_gc_internal(name)
return name in (
"jl_gc_alloc",
"jl_gc_alloc_",
"jl_gc_pool_alloc",
"jl_gc_big_alloc",
"jl_gc_alloc_typed",
"ijl_gc_pool_alloc",
"ijl_gc_big_alloc",
"ijl_gc_alloc_typed",
"ijl_gc_pool_alloc_instrumented",
"ijl_gc_big_alloc_instrumented",
"maybe_record_alloc_to_profile",
)
end

"""
refresh(; webhost = "localhost", webport = 57599, file = "profile.pb.gz",
ui_relative_percentages = true)
Expand Down Expand Up @@ -322,6 +401,8 @@ function refresh(; webhost::AbstractString = "localhost",
proc[] = pprof_jll.pprof() do pprof_path
open(pipeline(`$pprof_path -http=$webhost:$webport $relative_percentages_flag $file`))
end

sleep(1) # Wait for the server to start. This is usually long enough.
end

"""
Expand Down
34 changes: 32 additions & 2 deletions test/Allocs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ const out = tempname()
Profile.Allocs.clear()
Profile.Allocs.@profile sample_rate=1.0 begin
# Profile compilation
@eval foo(x,y) = x * y + y / x
@eval foo(2, 3)
@eval foo_alloc(x,y) = x * y + x / y
@eval foo_alloc(1,3)
end

# Write the profile
Expand Down Expand Up @@ -63,4 +63,34 @@ end
@test float_loc !== nothing
end

function load_prof_proto(file)
@show file
open(io->decode(ProtoDecoder(io), PProf.perftools.profiles.Profile), file, "r")
end

@testset "skip_jl_dispatch" begin
Profile.Allocs.clear();
Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1)

@assert length(Profile.Allocs.fetch().allocs) > 0

args = (; out=tempname(), web=false)
matches(r, proto) = any(s->occursin(r, s), proto.string_table)
@test matches("jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=false)))
@test !matches("jl_apply_generic", load_prof_proto(PProf.Allocs.pprof(;args..., skip_jl_dispatch=true)))
end
@testset "skip_gc_internal" begin
Profile.Allocs.clear();
Profile.Allocs.@profile sample_rate=1 Base.inferencebarrier(foo)(1)

@assert length(Profile.Allocs.fetch().allocs) > 0

args = (; out=tempname(), web=false)
matches(r, proto) = any(s->occursin(r, s), proto.string_table)
@test matches(r"jl_gc_.*alloc", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=false)))
@test !matches(r"jl_gc_.*alloc", load_prof_proto(PProf.Allocs.pprof(;args..., skip_gc_internal=true)))
end



end # module PProfAllocsTest
13 changes: 11 additions & 2 deletions test/PProf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ end

@profile for i in 1:10000
# Profile compilation
@eval foo(x,y) = x * y + x / y
@eval foo($i,3)
@eval $(Symbol("foo$i"))(x,y) = x * y + x / y
@eval $(Symbol("foo$i"))($i,3)
end

# Cache profile output to test that it isn't changed
Expand Down Expand Up @@ -98,6 +98,15 @@ end
@test load_prof_proto(pprof(out=tempname(), web=false, keep_frames = "foo")).keep_frames != 0
end

@testset "skip_jl_dispatch" begin
Profile.clear()
@profile Base.inferencebarrier(foo)(1000000, 5, [])
args = (; out=tempname(), web=false)
matches(r, proto) = any(s->occursin(r, s), proto.string_table)
@test matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_jl_dispatch=false)))
@test !matches(r"jl_apply_generic", load_prof_proto(pprof(;args..., skip_jl_dispatch=true)))
end

@testset "@pprof macro" begin

@pprof foo(10000, 5, [])
Expand Down