Source code for gemseo.core.parallel_execution

# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
# Contributors:
#    INITIAL AUTHORS - API and implementation and/or documentation
#        :author: Charlie Vanaret, Francois Gallard, Gilberto Ruiz
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Parallel execution of disciplines and functions using multiprocessing."""
from __future__ import annotations

import logging
import multiprocessing as mp
import os
import queue
import sys
import threading as th
import time
import traceback
from collections.abc import Iterable
from typing import Any
from typing import Callable
from typing import Mapping
from typing import Sequence
from typing import Union

from numpy import ndarray

IS_WIN = os.name == "nt"
ParallelExecutionWorkerType = Union[Sequence[Union[object, Callable]], object, Callable]

SUBPROCESS_NAME = "subprocess"

LOGGER = logging.getLogger(__name__)


[docs]def worker(
    par_exe: ParallelExecution | DiscParallelExecution | DiscParallelLinearization,
    queue_in: queue.Queue,
    queue_out: queue.Queue,
) -> None:
    """Execute a function while there are args left in the queue_in.

    Args:
        par_exe: The parallel execution object that contains the function
            to be executed.
        queue_in: The inputs to be evaluated.
        queue_out: The queue object where the outputs of the function will
            be saved.
    """

    for args in iter(queue_in.get, None):
        try:
            sys.stdout.flush()
            task_indx, function_output = par_exe._run_task_by_index(args)
        except Exception as err:
            traceback.print_exc()
            queue_out.put((args, err))
            queue_in.task_done()
            continue
        queue_out.put((task_indx, function_output))
        queue_in.task_done()


[docs]class ParallelExecution:
    """Perform a parallel execution of tasks on input values.

    Input values must be a list of independent pointers.
    """

    N_CPUS = mp.cpu_count()

    workers: ParallelExecutionWorkerType
    """The objects that perform the tasks."""

    n_processes: int
    """The maximum simultaneous number of threads or processes."""

    use_threading: bool
    """Whether to use threads instead of processes to parallelize the execution."""

    wait_time_between_fork: float
    """The time to wait between two forks of the process/thread."""

    input_values: ndarray | None
    """The input values to be passed to the workers."""

    def __init__(
        self,
        workers: ParallelExecutionWorkerType,
        n_processes: int = N_CPUS,
        use_threading: bool = False,
        wait_time_between_fork: float = 0.0,
        exceptions_to_re_raise: tuple[type[Exception]] | None = None,
    ) -> None:
        """
        Args:
            workers: The objects that perform the tasks.
                Either pass one worker, and it will be forked in multiprocessing.
                Or, when using multithreading or different workers, pass one worker
                per input data.
            n_processes: The maximum simultaneous number of threads,
                if ``use_threading`` is True, or processes otherwise,
                used to parallelize the execution.
            use_threading: If True, use threads instead of processes
                to parallelize the execution.
                Multiprocessing will copy (serialize) all the disciplines,
                while threading will share all the memory.
                This is important to note if you want to execute the same
                discipline multiple times, in which case you shall use
                multiprocessing.
            wait_time_between_fork: The time to wait between two forks of the
                process/thread.
            exceptions_to_re_raise: The exceptions that should be raised again
                when caught inside a worker. If None, all exceptions coming from
                workers are caught and the execution is allowed to continue.

        Raises:
            ValueError: If there are duplicated workers in `workers` when
                using multithreading.
        """
        self.workers = workers
        self.n_processes = n_processes
        self.use_threading = use_threading
        if exceptions_to_re_raise is None:
            self.__exceptions_to_re_raise = ()
        else:
            self.__exceptions_to_re_raise = exceptions_to_re_raise

        if use_threading:
            ids = {id(worker) for worker in workers}
            if len(ids) != len(workers):
                raise ValueError(
                    "When using multithreading, all workers"
                    " shall be different objects !"
                )
        self.wait_time_between_fork = wait_time_between_fork
        self.input_values = None

    def _run_task_by_index(self, task_index: int) -> tuple[int, Any]:
        """Run a task from an index of discipline and the input local data.

        The purpose is to be used by multiprocessing queues as a task.

        Args:
            task_index: The index of the task among `self.workers`.

        Returns:
            The task index and the output of its computation.
        """
        input_loc = self.input_values[task_index]
        if ParallelExecution._is_worker(self.workers):
            worker = self.workers
        elif len(self.workers) > 1:
            worker = self.workers[task_index]
        else:
            worker = self.workers[0]

        # return the worker index to order the outputs properly
        output = self._run_task(worker, input_loc)
        return task_index, output

[docs]    def execute(
        self,
        input_values: Sequence[ndarray] | ndarray,
        exec_callback: Callable[[int, Any], Any] | None = None,
        task_submitted_callback: Callable | None = None,
    ) -> dict[int, Any]:
        """Execute all the processes.

        Args:
            input_values: The input values.
            exec_callback: A callback function called with the
                pair (index, outputs) as arguments when an item is retrieved
                from the processing. Index is the associated index
                in input_values of the input used to compute the outputs.
                If None, no function is called.
            task_submitted_callback: A callback function called when all the
                tasks are submitted, but not done yet. If None, no function
                is called.

        Returns:
            The computed outputs.

        Raises:
            TypeError: If the `exec_callback` is not callable.
                If the `task_submitted_callback` is not callable.

        Warnings:
            This class relies on multiprocessing features, it is therefore
            necessary to protect its execution with an ``if __name__ == '__main__':``
            statement when working on Windows.
        """

        n_tasks = len(input_values)
        self.input_values = input_values

        if exec_callback is not None and not callable(exec_callback):
            raise TypeError("exec_callback function must be callable !")

        if task_submitted_callback is not None:
            if not callable(task_submitted_callback):
                raise TypeError("task_submitted_callback function must be callable !")

        tasks = list(range(n_tasks))[::-1]
        # Queue for workers
        if self.use_threading:
            queue_in = queue.Queue()
            queue_out = queue.Queue()
        else:
            mananger = mp.Manager()
            queue_in = mananger.Queue()
            queue_out = mananger.Queue()
            tasks = mananger.list(tasks)
        processes = []

        if self.use_threading:
            for _ in range(self.n_processes):
                thread = th.Thread(
                    target=worker,
                    args=(self, queue_in, queue_out),
                    name=SUBPROCESS_NAME,
                )
                thread.daemon = True
                thread.start()
                processes.append(thread)

        else:
            for _ in range(self.n_processes):
                proc = mp.Process(
                    target=worker,
                    args=(self, queue_in, queue_out),
                    name=SUBPROCESS_NAME,
                )
                proc.daemon = True
                proc.start()
                processes.append(proc)

        if mp.current_process().name != SUBPROCESS_NAME or self.use_threading:
            # fill input queue
            while tasks:
                # if not self.use_threading:
                #    lock.acquire()
                task_indx = tasks[-1]
                del tasks[-1]
                # delay the next processes execution after the first one
                if self.wait_time_between_fork > 0 and task_indx > 0:
                    time.sleep(self.wait_time_between_fork)
                queue_in.put(task_indx)

            if task_submitted_callback is not None:
                task_submitted_callback()
                # print("Submitted all tasks in", time.time() - t1)
            # sort the outputs with the same order as functions
            ordered_outputs = [None] * n_tasks
            got_n_outs = 0
            # Retrieve outputs on the fly to call the callbacks, typically
            # iterates progress bar and stores the data in database or cache
            stop = False
            while got_n_outs != n_tasks and not stop:
                index, output = queue_out.get()
                if isinstance(output, Exception):
                    LOGGER.error("Failed to execute task indexed %s", str(index))
                    LOGGER.error(output)
                    # Condition to stop the execution only for required exceptions.
                    # Otherwise, keep getting outputs from the queue.
                    if isinstance(output, self.__exceptions_to_re_raise):
                        stop = True
                else:
                    ordered_outputs[index] = output
                    # Call the callback function
                    if exec_callback is not None:
                        exec_callback(index, output)
                got_n_outs += 1

            # Tell threads and processes to terminate
            for _ in processes:
                queue_in.put(None)

            # Join processes and threads
            for proc in processes:
                proc.join()

            # Check for exceptions and eventually raise them if required.
            if isinstance(output, self.__exceptions_to_re_raise):
                raise output

            # Update self.workers objects.
            self._update_local_objects(ordered_outputs)

            # Filter outputs, eventually.
            return self._filter_ordered_outputs(ordered_outputs)

    @staticmethod
    def _filter_ordered_outputs(ordered_outputs):
        """Filters the ordered_outputs.

        Eventually return a subset in the execute method.
        To be overloaded by subclasses.

        Args:
            ordered_outputs: The outputs, map of ``_run_task`` over ``inputs_list``.

        Returns:
            The filtered outputs.
        """
        return ordered_outputs

    def _update_local_objects(self, ordered_outputs):
        """Update the local objects from parallel results.

        The ordered_outputs contains the stacked outputs of the function
        _run_task() To be overloaded by subclasses.

        Args:
            ordered_outputs: The outputs, map of ``_run_task`` over ``inputs_list``.
        """

    @staticmethod
    def _run_task(
        worker: ParallelExecutionWorkerType,
        input_loc: Any,
    ) -> Any:
        """Effectively perform the computation.

        To be overloaded by subclasses.

        Args:
            worker: The worker pointer.
            input_loc: The input of the worker.

        Returns:
            The computation of the task.

        Raises:
            TypeError: If the provided worker has the wrong type.
        """
        if not ParallelExecution._is_worker(worker):
            raise TypeError(f"Cannot handle worker: {worker}.")

        if hasattr(worker, "execute"):
            return worker.execute(input_loc)

        return worker(input_loc)

    @staticmethod
    def _is_worker(
        worker: ParallelExecutionWorkerType,
    ) -> bool:
        """Test if the worker is acceptable.

        A `worker` has to be callable or have an "execute" method.

        Args:
            worker: The worker to test.

        Returns:
            Whether the worker is acceptable.
        """
        return hasattr(worker, "execute") or callable(worker)


[docs]class DiscParallelExecution(ParallelExecution):
    """Execute disciplines in parallel."""

    def _update_local_objects(self, ordered_outputs: Mapping[int, Any]) -> None:
        """Update the local objects from the parallel results.

        The ordered_outputs contains the stacked outputs of the function
        _run_task()

        Args:
            ordered_outputs: The outputs, map of _run_task
                over inputs_list.
        """
        if not isinstance(self.workers, Iterable) or not len(self.workers) == len(
            self.input_values
        ):
            if IS_WIN and not self.use_threading:
                self.workers.n_calls += len(self.input_values)
            return
        for disc, output in zip(self.workers, ordered_outputs):
            # Update discipline local data
            disc.local_data = output


[docs]class DiscParallelLinearization(ParallelExecution):
    """Linearize disciplines in parallel."""

    def _update_local_objects(self, ordered_outputs: Mapping[int, Any]) -> None:
        """Update the local objects from the parallel results.

        The ordered_outputs contains the stacked outputs of the function
        _run_task()

        Args:
            ordered_outputs: The outputs, map of _run_task
                over inputs_list.
        """
        if not isinstance(self.workers, Iterable) or not len(self.workers) == len(
            self.input_values
        ):
            if IS_WIN and not self.use_threading:
                # Only increase the number of calls if the Jacobian was computed.
                if ordered_outputs[0][0]:
                    self.workers.n_calls += len(self.input_values)
                    self.workers.n_calls_linearize += len(self.input_values)
            return

        for disc, output in zip(self.workers, ordered_outputs):
            # Update discipline jacobian
            disc.jac = output[1]
            # Update discipline local data in case of execution
            disc.local_data = output[0]

    @staticmethod
    def _run_task(worker, input_loc):
        """Effectively performs the computation.

        To be overloaded by subclasses

        Args:
            worker: The worker pointer.
            input_loc: The input of the worker.

        Returns:
            The local data of the worker and its Jacobian.
        """
        jac = worker.linearize(input_loc)
        return worker.local_data, jac

    @staticmethod
    def _filter_ordered_outputs(ordered_outputs):
        """Filter the ordered_outputs.

        Eventually return a subset in the execute method.
        To be overloaded by subclasses.

        Args:
            ordered_outputs: The outputs, map of ``_run_task`` over ``inputs_list``.

        Returns:
            The Jacobians.
        """
        # Only keep the jacobians as outputs, dismiss local_data
        return [out[1] for out in ordered_outputs]