Skip to content

Commit

Permalink
Implement PAM for MEPs (#1724)
Browse files Browse the repository at this point in the history
PAM (Pluggable Authentication Modules) is an opt-in configuration item for
`ManagerEndpointConfig`.  As documented in this PR, it is enabled via the `pam`
configuration item, and defaults to false/not enabled if not specified:

```yaml
multi_user: true
pam:
  enable: true
```

I was unable to find a suitable Python PAM implementation for our needs, so
ended up creating a PAM wrapper.  In particular, all of the PAM implementations
I found seemed to only implement the `pam_authenticate()` method, but we need
the `pam_acct_mgmt()` and `pam_*_session()` functions.  Until I'm educated
otherwise then, our internal library appears to be more fully featured than
other Python PAM implementations -- we may pull it out and offer it as an
independent project at some point.

[sc-36027]
  • Loading branch information
khk-globus committed Nov 15, 2024
1 parent 168adc4 commit be1d0cd
Show file tree
Hide file tree
Showing 14 changed files with 1,002 additions and 45 deletions.
27 changes: 27 additions & 0 deletions changelog.d/20241115_095433_kevin_implement_pam_for_meps.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
New Functionality
^^^^^^^^^^^^^^^^^

- Implement optional PAM capabilities for ensuring user accounts meet
site-specific criteria before starting user endpoints. Within the multi user
endpoint, PAM defaults to off, but is enabled via the ``pam`` field:

.. code-block:: yaml
:caption: ``config.yaml`` -- Example MEP configuration opting-in to PAM
multi_user: true
pam:
enable: true
As authentication is implemented via Globus Auth and identity mapping, the
Globus Compute Endpoint does not implement the authorization or password
managment phases of PAM. It implements account
(|pam_acct_mgmt(3)|_) and session (|pam_open_session(3)|) management.

For more information, consult :ref:`the PAM section <pam>` of the
documentation.

.. |pam_acct_mgmt(3)| replace:: ``pam_acct_mgmt(3)``
.. _pam_acct_mgmt(3): https://www.man7.org/linux/man-pages/man3/pam_acct_mgmt.3.html
.. |pam_open_session(3)| replace:: ``pam_open_session(3)``
.. _pam_open_session(3): https://www.man7.org/linux/man-pages/man3/pam_open_session.3.html

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import textwrap

from click import ClickException
from globus_compute_endpoint.endpoint.config import UserEndpointConfig
from globus_compute_endpoint.endpoint.config.config import UserEndpointConfig
from globus_compute_endpoint.endpoint.config.utils import get_config
from globus_compute_endpoint.endpoint.endpoint import Endpoint
from globus_sdk import GlobusApp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
ManagerEndpointConfigModel,
UserEndpointConfigModel,
)
from .pam import PamConfiguration # noqa: F401
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)

from ..utils import is_privileged
from .pam import PamConfiguration

MINIMUM_HEARTBEAT: float = 5.0
log = logging.getLogger(__name__)
Expand Down Expand Up @@ -327,6 +328,10 @@ class ManagerEndpointConfig(BaseConfig):
configuration item is required, and a ``ValueError`` will be raised if the path
does not exist.
:param pam: Whether to enable authorization of user-endpoints via PAM routines, and
optionally specify the PAM service name. See |PamConfiguration|. If not
specified, PAM authorization defaults to disabled.
:param mu_child_ep_grace_period_s: The web-services send a start-user-endpoint to
the endpoint manager ahead of tasks for the target user endpoint. If the
user-endpoint is already running, these requests are ignored. To account for
Expand All @@ -347,6 +352,7 @@ class ManagerEndpointConfig(BaseConfig):
.. |BaseConfig| replace:: :class:`BaseConfig <globus_compute_endpoint.endpoint.config.config.BaseConfig>`
.. |ManagerEndpointConfig| replace:: :class:`ManagerEndpointConfig <globus_compute_endpoint.endpoint.config.config.ManagerEndpointConfig>`
.. |UserEndpointConfig| replace:: :class:`UserEndpointConfig <globus_compute_endpoint.endpoint.config.config.UserEndpointConfig>`
.. |PamConfiguration| replace:: :class:`PamConfiguration <globus_compute_endpoint.endpoint.config.pam.PamConfiguration>`
.. |setuid(2)| replace:: ``setuid(2)``
.. _setuid(2): https://www.man7.org/linux/man-pages/man2/setuid.2.html
Expand All @@ -357,6 +363,7 @@ def __init__(
*,
public: bool = False,
identity_mapping_config_path: os.PathLike | str | None = None,
pam: PamConfiguration | None = None,
force_mu_allow_same_user: bool = False,
mu_child_ep_grace_period_s: float = 30.0,
**kwargs,
Expand All @@ -372,6 +379,8 @@ def __init__(
_tmp = identity_mapping_config_path # work with both mypy and flake8
self.identity_mapping_config_path = _tmp # type: ignore[assignment]

self.pam = pam or PamConfiguration(enable=False)

@property
def identity_mapping_config_path(self) -> pathlib.Path | None:
return self._identity_mapping_config_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
validator,
)
from globus_compute_endpoint import engines, strategies
from globus_compute_endpoint.endpoint.config.pam import PamConfiguration
from parsl import addresses as parsl_addresses
from parsl import channels as parsl_channels
from parsl import launchers as parsl_launchers
Expand Down Expand Up @@ -185,6 +186,7 @@ class ManagerEndpointConfigModel(BaseConfigModel):
identity_mapping_config_path: t.Optional[FilePath]
force_mu_allow_same_user: t.Optional[bool]
mu_child_ep_grace_period_s: t.Optional[float]
pam: t.Optional[PamConfiguration]

class Config:
extra = "forbid"
24 changes: 24 additions & 0 deletions compute_endpoint/globus_compute_endpoint/endpoint/config/pam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from dataclasses import asdict, dataclass

import yaml


@dataclass
class PamConfiguration:
"""
:param enable: Whether to initiate a PAM session for each UEP start request.
:param service_name: What PAM service_name with which to initialize the PAM
session. If a particular MEP has different requirements, define those PAM
requirements in ``/etc/pam.d/``, and specify the service name with this field.
"""

enable: bool = True
service_name: str = "globus-compute-endpoint"


def _to_yaml(dumper: yaml.SafeDumper, data: PamConfiguration):
return dumper.represent_mapping("tag:yaml.org,2002:map", asdict(data))


yaml.SafeDumper.add_representer(PamConfiguration, _to_yaml)
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@
import typing as t
import uuid
from concurrent.futures import Future
from contextlib import contextmanager
from datetime import datetime
from http import HTTPStatus

import globus_compute_sdk as GC
from cachetools import TTLCache
from globus_compute_endpoint.endpoint.identity_mapper import PosixIdentityMapper

try:
import pyprctl
except AttributeError as e:
raise ImportError("pyprctl is not supported on this system") from e

import globus_compute_sdk as GC
import setproctitle
import yaml
from cachetools import TTLCache
from globus_compute_common.messagepack import pack
from globus_compute_common.messagepack.message_types import EPStatusReport
from globus_compute_common.pydantic_v1 import BaseModel
Expand All @@ -42,6 +42,7 @@
serialize_config,
)
from globus_compute_endpoint.endpoint.endpoint import Endpoint
from globus_compute_endpoint.endpoint.identity_mapper import PosixIdentityMapper
from globus_compute_endpoint.endpoint.rabbit_mq import (
CommandQueueSubscriber,
ResultPublisher,
Expand All @@ -52,6 +53,7 @@
send_endpoint_startup_failure_to_amqp,
update_url_port,
)
from globus_compute_endpoint.pam import PamHandle
from globus_sdk import GlobusAPIError, NetworkError

if t.TYPE_CHECKING:
Expand Down Expand Up @@ -761,6 +763,50 @@ def send_failure_notice(
finally:
sys.exit()

@contextmanager
def do_host_auth(self, username):
if not self._config.pam.enable:
yield
# If the administrator has *not* enabled PAM, then assume the
# intention is for a paranoid safe process and drop all
# privileges now ...
pyprctl.CapState().set_current()

# ... and stating that even if exec'ing might return some
# privileges, "no." In particular after this, SETUID executables
# invoked from this process root will not get privileges
pyprctl.set_no_new_privs()

return

sname = self._config.pam.service_name
log.debug("PAM: Creating handle (%s, %s)", sname, username)
try:
with PamHandle(sname, username=username) as pamh:
log.debug("PAM: Invoking account stage")
pamh.pam_acct_mgmt()
log.debug("PAM: Creating credentials")
pamh.credentials_establish()
log.debug("PAM: Opening session")
pamh.pam_open_session()

yield

# wiped by initgroups, so reinitialize
log.debug("PAM: Recreating credentials")
pamh.credentials_establish()
log.debug("PAM: Closing session")
pamh.pam_close_session()
log.debug("PAM: Removing credentials")
pamh.credentials_delete()

log.debug("PAM: Closing handle")
except Exception as e:
log.error(str(e)) # Share (very likely) pamlib error with admin ...

# ... but be opaque with user.
raise PermissionError("see your system administrator") from None

def cmd_start_endpoint(
self,
user_record: pwd.struct_passwd,
Expand Down Expand Up @@ -898,25 +944,25 @@ def cmd_start_endpoint(
# who run the multi-user setup as a non-privileged user, there is
# no need to change the user: they're already executing _as that
# uid_!
log.debug("Initializing groups for %s, %s", uname, gid)
os.initgroups(uname, gid) # raises (good!) on error
exit_code += 1

# But actually becoming the correct UID is _not_ fungible. If we
# can't -- for whatever reason -- that's a problem. So do NOT
# ignore the potential error.
log.debug("Setting process group for %s to %s", pid, gid)
os.setresgid(gid, gid, gid) # raises (good!) on error
exit_code += 1
log.debug("Setting process uid for %s to %s (%s)", pid, uid, uname)
os.setresuid(uid, uid, uid) # raises (good!) on error
exit_code += 1
with self.do_host_auth(uname):
log.debug("Setting process group for %s to %s", pid, gid)
os.setresgid(gid, gid, gid) # raises (good!) on error
exit_code += 1

log.debug("Initializing groups for %s, %s", uname, gid)
os.initgroups(uname, gid) # raises (good!) on error
exit_code += 1

log.debug("Setting process uid for %s to %s (%s)", pid, uid, uname)
os.setresuid(uid, uid, uid) # raises (good!) on error
exit_code += 1

try:
# Be paranoid by testing that we *can't* get back to orig_uid
os.setuid(orig_uid)
except PermissionError:
pass # good; the kernel has our backs now
pass # good; the kernel has our back now
else:
log.critical(
"Unexpectedly regained original privileges! (Should not have"
Expand All @@ -926,17 +972,11 @@ def cmd_start_endpoint(
# This message is potentially (likely) sent back to the SDK; no
# sense in sharing the specifics (i.e., `msg`) beyond the
# administrator.
raise PermissionError("PermissionError: failed to start endpoint")
raise PermissionError("failed to start endpoint")
del orig_uid, orig_gid

exit_code += 1

# If we had any capabilities, we drop them now.
pyprctl.CapState().set_current()

# Even if exec'ing might return some privileges, "no."
pyprctl.set_no_new_privs()

# some Q&D verification for admin debugging purposes
if not shutil.which(proc_args[0], path=env["PATH"]):
log.warning(
Expand Down
Loading

0 comments on commit be1d0cd

Please sign in to comment.