Source code for gemseo.core.parallel_execution

# -*- coding: utf-8 -*-
# Copyright 2021 IRT Saint Exupéry, https://www.irt-saintexupery.com
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 3 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Contributors:
#    INITIAL AUTHORS - API and implementation and/or documentation
#        :author: Charlie Vanaret, Francois Gallard
#    OTHER AUTHORS   - MACROSCOPIC CHANGES
"""Parallel execution of disciplines and functions using multiprocessing."""
from __future__ import division, unicode_literals

import logging
import multiprocessing as mp
import os
import queue
import sys
import threading as th
import time
import traceback
from collections import Iterable
from typing import Any, Callable, Dict, Mapping, Optional, Sequence, Tuple, Union

from numpy import ndarray

IS_WIN = os.name == "nt"
ParallelExecutionWorkerType = Union[Sequence[Union[object, Callable]], object, Callable]

SUBPROCESS_NAME = "subprocess"

LOGGER = logging.getLogger(__name__)


[docs]def worker( par_exe, # type: Union[ParallelExecution, DiscParallelExecution, DiscParallelLinearization] queue_in, # type: queue.Queue queue_out, # type: queue.Queue ): # type: (...) -> None """Execute a function while there are args left in the queue_in. Args: par_exe: The parallel execution object that contains the function to be executed. queue_in: The inputs to be evaluated. queue_out: The queue object where the outputs of the function will be saved. """ for args in iter(queue_in.get, None): try: sys.stdout.flush() task_indx, function_output = par_exe._run_task_by_index(args) except Exception as err: traceback.print_exc() queue_out.put((args, err)) queue_in.task_done() continue queue_out.put((task_indx, function_output)) queue_in.task_done()
[docs]class ParallelExecution(object): """Perform a parallel execution of tasks on input values. Input values must be a list of independent pointers. """ N_CPUS = mp.cpu_count() def __init__( self, worker_list, # type: ParallelExecutionWorkerType n_processes=N_CPUS, # type: int use_threading=False, # type: bool wait_time_between_fork=0.0, # type: float ): # type: (...) -> None """ Args: worker_list: The objects that perform the tasks. Either pass one worker, and it will be forked in multiprocessing. Or, when using multithreading or different workers, pass one worker per input data. n_processes: The maximum number of processes on which to run. use_threading: If True, use Threads instead of processes to parallelize the execution. Multiprocessing will copy (serialize) all the disciplines, while threading will share all the memory. This is important to note if you want to execute the same discipline multiple times, in which case you shall use multiprocessing. wait_time_between_fork: The time to wait between two forks of the process/Thread. Raises: ValueError: If there are duplicated workers in `worker_list` when using multithreading. """ self.worker_list = worker_list self.n_processes = n_processes self.use_threading = use_threading if use_threading: ids = set(id(worker) for worker in worker_list) if len(ids) != len(worker_list): raise ValueError( "When using multithreading, all workers" " shall be different objects !" ) self.wait_time_between_fork = wait_time_between_fork self.input_data_list = None def _run_task_by_index( self, task_index # type: int ): # type: (...) -> Tuple[int, Any] """Run a task from an index of discipline and the input local data. The purpose is to be used by multiprocessing queues as a task. Args: task_index: The index of the task among `self.worker_list`. Returns: The task index and the output of its computation. """ input_loc = self.input_data_list[task_index] if ParallelExecution._is_worker(self.worker_list): worker = self.worker_list elif len(self.worker_list) > 1: worker = self.worker_list[task_index] else: worker = self.worker_list[0] # return the worker index to order the outputs properly output = self._run_task(worker, input_loc) return task_index, output
[docs] def execute( self, input_data_list, # type: Union[Sequence[ndarray], ndarray] exec_callback=None, # type: Optional[Callable[[int, Any], Any]] task_submitted_callback=None, # type: Optional[Callable] ): # type: (...) -> Dict[int, Any] """Execute all the processes. Args: input_data_list: The input values. exec_callback: A callback function called with the pair (index, outputs) as arguments when an item is retrieved from the processing. Index is the associated index in input_data_list of the input used to compute the outputs. If None, no function is called. task_submitted_callback: A callback function called when all the tasks are submitted, but not done yet. If None, no function is called. Returns: The computed outputs. Raises: TypeError: If the `exec_callback` is not callable. If the `task_submitted_callback` is not callable. """ n_tasks = len(input_data_list) self.input_data_list = input_data_list if exec_callback is not None and not callable(exec_callback): raise TypeError("exec_callback function must be callable !") if task_submitted_callback is not None: if not callable(task_submitted_callback): raise TypeError("task_submitted_callback function must be callable !") tasks_list = list(range(n_tasks))[::-1] # Queue for workers if self.use_threading: queue_in = queue.Queue() queue_out = queue.Queue() else: mananger = mp.Manager() queue_in = mananger.Queue() queue_out = mananger.Queue() tasks_list = mananger.list(tasks_list) processes = [] if self.use_threading: for _ in range(self.n_processes): thread = th.Thread( target=worker, args=(self, queue_in, queue_out), name=SUBPROCESS_NAME, ) thread.daemon = True thread.start() processes.append(thread) else: for _ in range(self.n_processes): proc = mp.Process( target=worker, args=(self, queue_in, queue_out), name=SUBPROCESS_NAME, ) proc.daemon = True proc.start() processes.append(proc) if mp.current_process().name != SUBPROCESS_NAME or self.use_threading: # fill input queue while tasks_list: # if not self.use_threading: # lock.acquire() task_indx = tasks_list[-1] del tasks_list[-1] # delay the next processes execution after the first one if self.wait_time_between_fork > 0 and task_indx > 0: time.sleep(self.wait_time_between_fork) queue_in.put(task_indx) if task_submitted_callback is not None: task_submitted_callback() # print("Submitted all tasks in", time.time() - t1) # sort the outputs with the same order as functions ordered_outputs = [None] * n_tasks got_n_outs = 0 # Retrieve outputs on the fly to call the callbacks, typically # iterates progress bar and stores the data in database or cache while got_n_outs != n_tasks: index, output = queue_out.get() if isinstance(output, Exception): LOGGER.error("Failed to execute task indexed %s", str(index)) LOGGER.error(output) else: ordered_outputs[index] = output # Call the callback function if exec_callback is not None: exec_callback(index, output) got_n_outs += 1 # Tells threads and processes to terminate for _ in processes: queue_in.put(None) # Join processes and threads for proc in processes: proc.join() # Update self.workers objects self._update_local_objects(ordered_outputs) # Filters outputs, eventually return self._filter_ordered_outputs(ordered_outputs)
@staticmethod def _filter_ordered_outputs(ordered_outputs): """Filters the ordered_outputs. Eventually return a subset in the execute method. To be overloaded by subclasses. :param ordered_outputs: the list of outputs, map of _run_task over inputs_list """ return ordered_outputs def _update_local_objects(self, ordered_outputs): """Update the local objects from parallel results. The ordered_outputs contains the stacked outputs of the function _run_task() To be overloaded by subclasses. :param ordered_outputs: the list of outputs, map of _run_task over inputs_list """ @staticmethod def _run_task( worker, # type: ParallelExecutionWorkerType input_loc, # type: Any ): # type: (...) -> Any """Effectively perform the computation. To be overloaded by subclasses. Args: worker: The worker pointer. input_loc: The input of the worker. Returns: The computation of the task. Raises: TypeError: If the provided worker has the wrong type. """ if not ParallelExecution._is_worker(worker): raise TypeError("Cannot handle worker: {}.".format(worker)) if hasattr(worker, "execute"): return worker.execute(input_loc) return worker(input_loc) @staticmethod def _is_worker( worker, # type: ParallelExecutionWorkerType ): # type: (...) -> bool """Test if the worker is acceptable. A `worker` has to be callable or have an "execute" method. Args: worker: The worker to test. Returns: Whether the worker is acceptable. """ return hasattr(worker, "execute") or callable(worker)
[docs]class DiscParallelExecution(ParallelExecution): """Execute disciplines in parallel.""" def _update_local_objects( self, ordered_outputs # type: Mapping[int, Any] ): # type: (...) -> None """Update the local objects from the parallel results. The ordered_outputs contains the stacked outputs of the function _run_task() Args: ordered_outputs: The outputs, map of _run_task over inputs_list. """ if not isinstance(self.worker_list, Iterable) or not len( self.worker_list ) == len(self.input_data_list): if IS_WIN and not self.use_threading: self.worker_list.n_calls += len(self.input_data_list) return for disc, output in zip(self.worker_list, ordered_outputs): # Update discipline local data disc.local_data = output
[docs]class DiscParallelLinearization(ParallelExecution): """Linearize disciplines in parallel.""" def _update_local_objects( self, ordered_outputs # type: Mapping[int, Any] ): # type: (...) -> None """Update the local objects from the parallel results. The ordered_outputs contains the stacked outputs of the function _run_task() Args: ordered_outputs: The outputs, map of _run_task over inputs_list. """ if not isinstance(self.worker_list, Iterable) or not len( self.worker_list ) == len(self.input_data_list): if IS_WIN and not self.use_threading: # Only increase the number of calls if the Jacobian was computed. if ordered_outputs[0][0]: self.worker_list.n_calls += len(self.input_data_list) self.worker_list.n_calls_linearize += len(self.input_data_list) return for disc, output in zip(self.worker_list, ordered_outputs): # Update discipline jacobian disc.jac = output[1] # Update discipline local data in case of execution disc.local_data = output[0] @staticmethod def _run_task(worker, input_loc): """Effectively performs the computation. To be overloaded by subclasses :param worker: the worker pointes :param input_loc: input of the worker """ jac = worker.linearize(input_loc) return worker.local_data, jac @staticmethod def _filter_ordered_outputs(ordered_outputs): """Filters the ordered_outputs. Eventually return a subset in the execute method. To be overloaded by subclasses. :param ordered_outputs: the list of outputs, map of _run_task over inputs_list """ # Only keep the jacobians as outputs, dismiss local_data return [out[1] for out in ordered_outputs]