/home/arjun/llvm-project/llvm/include/llvm/Support/Threading.h

Source (jump to first uncovered line)
//===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares helper functions for running LLVM in a multi-threaded
// environment.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_THREADING_H
#define LLVM_SUPPORT_THREADING_H

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/FunctionExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
#include "llvm/Support/Compiler.h"
#include <ciso646> // So we can check the C++ standard lib macros.
#include <functional>

#if defined(_MSC_VER)
// MSVC's call_once implementation worked since VS 2015, which is the minimum
// supported version as of this writing.
#define LLVM_THREADING_USE_STD_CALL_ONCE 1
#elif defined(LLVM_ON_UNIX) &&                                                 \
    (defined(_LIBCPP_VERSION) ||                                               \
     !(defined(__NetBSD__) || defined(__OpenBSD__) ||                          \
       (defined(__ppc__) || defined(__PPC__))))
// std::call_once from libc++ is used on all Unix platforms. Other
// implementations like libstdc++ are known to have problems on NetBSD,
// OpenBSD and PowerPC.
#define LLVM_THREADING_USE_STD_CALL_ONCE 1
#elif defined(LLVM_ON_UNIX) &&                                                 \
    ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
#define LLVM_THREADING_USE_STD_CALL_ONCE 1
#else
#define LLVM_THREADING_USE_STD_CALL_ONCE 0
#endif

#if LLVM_THREADING_USE_STD_CALL_ONCE
#include <mutex>
#else
#include "llvm/Support/Atomic.h"
#endif

namespace llvm {
class Twine;

/// Returns true if LLVM is compiled with support for multi-threading, and
/// false otherwise.
bool llvm_is_multithreaded();

/// Execute the given \p UserFn on a separate thread, passing it the provided \p
/// UserData and waits for thread completion.
///
/// This function does not guarantee that the code will actually be executed
/// on a separate thread or honoring the requested stack size, but tries to do
/// so where system support is available.
///
/// \param UserFn - The callback to execute.
/// \param UserData - An argument to pass to the callback function.
/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
/// (or None for default)
void llvm_execute_on_thread(
    void (*UserFn)(void *), void *UserData,
    llvm::Optional<unsigned> StackSizeInBytes = llvm::None);

/// Schedule the given \p Func for execution on a separate thread, then return
/// to the caller immediately. Roughly equivalent to
/// `std::thread(Func).detach()`, except it allows requesting a specific stack
/// size, if supported for the platform.
///
/// This function would report a fatal error if it can't execute the code
/// on a separate thread.
///
/// \param Func - The callback to execute.
/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
/// (or None for default)
void llvm_execute_on_thread_async(
    llvm::unique_function<void()> Func,
    llvm::Optional<unsigned> StackSizeInBytes = llvm::None);

#if LLVM_THREADING_USE_STD_CALL_ONCE

  typedef std::once_flag once_flag;

#else

  enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };

  /// The llvm::once_flag structure
  ///
  /// This type is modeled after std::once_flag to use with llvm::call_once.
  /// This structure must be used as an opaque object. It is a struct to force
  /// autoinitialization and behave like std::once_flag.
  struct once_flag {
    volatile sys::cas_flag status = Uninitialized;
  };

#endif

  /// Execute the function specified as a parameter once.
  ///
  /// Typical usage:
  /// \code
  ///   void foo() {...};
  ///   ...
  ///   static once_flag flag;
  ///   call_once(flag, foo);
  /// \endcode
  ///
  /// \param flag Flag used for tracking whether or not this has run.
  /// \param F Function to call once.
  template <typename Function, typename... Args>
  void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
#if LLVM_THREADING_USE_STD_CALL_ONCE
    std::call_once(flag, std::forward<Function>(F),
                   std::forward<Args>(ArgList)...);
#else
    // For other platforms we use a generic (if brittle) version based on our
    // atomics.
    sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
    if (old_val == Uninitialized) {
      std::forward<Function>(F)(std::forward<Args>(ArgList)...);
      sys::MemoryFence();
      TsanIgnoreWritesBegin();
      TsanHappensBefore(&flag.status);
      flag.status = Done;
      TsanIgnoreWritesEnd();
    } else {
      // Wait until any thread doing the call has finished.
      sys::cas_flag tmp = flag.status;
      sys::MemoryFence();
      while (tmp != Done) {
        tmp = flag.status;
        sys::MemoryFence();
      }
    }
    TsanHappensAfter(&flag.status);
#endif
  }

  /// This tells how a thread pool will be used
  class ThreadPoolStrategy {
  public:
    // The default value (0) means all available threads should be used,
    // taking the affinity mask into account. If set, this value only represents
    // a suggested high bound, the runtime might choose a lower value (not
    // higher).
    unsigned ThreadsRequested = 0;

    // If SMT is active, use hyper threads. If false, there will be only one
    // std::thread per core.
    bool UseHyperThreads = true;

    // If set, will constrain 'ThreadsRequested' to the number of hardware
    // threads, or hardware cores.
    bool Limit = false;

    /// Retrieves the max available threads for the current strategy. This
    /// accounts for affinity masks and takes advantage of all CPU sockets.
    unsigned compute_thread_count() const;

    /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
    /// multi-socket system, this ensures threads are assigned to all CPU
    /// sockets. \p ThreadPoolNum represents a number bounded by [0,
    /// compute_thread_count()).
    void apply_thread_strategy(unsigned ThreadPoolNum) const;

    /// Finds the CPU socket where a thread should go. Returns 'None' if the
    /// thread shall remain on the actual CPU socket.
    Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
  };

  /// Build a strategy from a number of threads as a string provided in \p Num.
  /// When Num is above the max number of threads specified by the \p Default
  /// strategy, we attempt to equally allocate the threads on all CPU sockets.
  /// "0" or an empty string will return the \p Default strategy.
  /// "all" for using all hardware threads.
  Optional<ThreadPoolStrategy>
  get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});

  /// Returns a thread strategy for tasks requiring significant memory or other
  /// resources. To be used for workloads where hardware_concurrency() proves to
  /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
  /// based on physical cores, if available for the host system, otherwise falls
  /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
  /// LLVM_ENABLE_THREADS = OFF.
  inline ThreadPoolStrategy
  heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
    ThreadPoolStrategy S;
    S.UseHyperThreads = false;
    S.ThreadsRequested = ThreadCount;
    return S;
  }

  /// Like heavyweight_hardware_concurrency() above, but builds a strategy
  /// based on the rules described for get_threadpool_strategy().
  /// If \p Num is invalid, returns a default strategy where one thread per
  /// hardware core is used.
  inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
    Optional<ThreadPoolStrategy> S =
        get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
    if (S)
      return *S;
    return heavyweight_hardware_concurrency();
  }

  /// Returns a default thread strategy where all available hardware ressources
  /// are to be used, except for those initially excluded by an affinity mask.
  /// This function takes affinity into consideration. Returns 1 when LLVM is
  /// configured with LLVM_ENABLE_THREADS=OFF.
  inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
    ThreadPoolStrategy S;
    S.ThreadsRequested = ThreadCount;
    return S;
  }

  /// Return the current thread id, as used in various OS system calls.
  /// Note that not all platforms guarantee that the value returned will be
  /// unique across the entire system, so portable code should not assume
  /// this.
  uint64_t get_threadid();

  /// Get the maximum length of a thread name on this platform.
  /// A value of 0 means there is no limit.
  uint32_t get_max_thread_name_length();

  /// Set the name of the current thread.  Setting a thread's name can
  /// be helpful for enabling useful diagnostics under a debugger or when
  /// logging.  The level of support for setting a thread's name varies
  /// wildly across operating systems, and we only make a best effort to
  /// perform the operation on supported platforms.  No indication of success
  /// or failure is returned.
  void set_thread_name(const Twine &Name);

  /// Get the name of the current thread.  The level of support for
  /// getting a thread's name varies wildly across operating systems, and it
  /// is not even guaranteed that if you can successfully set a thread's name
  /// that you can later get it back.  This function is intended for diagnostic
  /// purposes, and as with setting a thread's name no indication of whether
  /// the operation succeeded or failed is returned.
  void get_thread_name(SmallVectorImpl<char> &Name);

  /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
  /// group, the calling thread can be executed. On Windows, threads cannot
  /// cross CPU sockets boundaries.
  llvm::BitVector get_thread_affinity_mask();

  /// Returns how many physical CPUs or NUMA groups the system has.
  unsigned get_cpus();

  enum class ThreadPriority {
    Background = 0,
    Default = 1,
  };
  /// If priority is Background tries to lower current threads priority such
  /// that it does not affect foreground tasks significantly. Can be used for
  /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
  /// this task.
  /// If the priority is default tries to restore current threads priority to
  /// default scheduling priority.
  enum class SetThreadPriorityResult { FAILURE, SUCCESS };
  SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
}

#endif

Coverage Report

Created: 2020-06-26 05:44

Line	Count	Source (jump to first uncovered line)
1		//===-- llvm/Support/Threading.h - Control multithreading mode --- C++ --===//
2		//
3		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4		// See https://llvm.org/LICENSE.txt for license information.
5		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6		//
7		//===----------------------------------------------------------------------===//
8		//
9		// This file declares helper functions for running LLVM in a multi-threaded
10		// environment.
11		//
12		//===----------------------------------------------------------------------===//
13
14		#ifndef LLVM_SUPPORT_THREADING_H
15		#define LLVM_SUPPORT_THREADING_H
16
17		#include "llvm/ADT/BitVector.h"
18		#include "llvm/ADT/FunctionExtras.h"
19		#include "llvm/ADT/SmallVector.h"
20		#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
21		#include "llvm/Support/Compiler.h"
22		#include <ciso646> // So we can check the C++ standard lib macros.
23		#include <functional>
24
25		#if defined(_MSC_VER)
26		// MSVC's call_once implementation worked since VS 2015, which is the minimum
27		// supported version as of this writing.
28		#define LLVM_THREADING_USE_STD_CALL_ONCE 1
29		#elif defined(LLVM_ON_UNIX) && \
30		(defined(_LIBCPP_VERSION) \|\| \
31		!(defined(__NetBSD__) \|\| defined(__OpenBSD__) \|\| \
32		(defined(__ppc__) \|\| defined(__PPC__))))
33		// std::call_once from libc++ is used on all Unix platforms. Other
34		// implementations like libstdc++ are known to have problems on NetBSD,
35		// OpenBSD and PowerPC.
36		#define LLVM_THREADING_USE_STD_CALL_ONCE 1
37		#elif defined(LLVM_ON_UNIX) && \
38		((defined(__ppc__) \|\| defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
39		#define LLVM_THREADING_USE_STD_CALL_ONCE 1
40		#else
41		#define LLVM_THREADING_USE_STD_CALL_ONCE 0
42		#endif
43
44		#if LLVM_THREADING_USE_STD_CALL_ONCE
45		#include <mutex>
46		#else
47		#include "llvm/Support/Atomic.h"
48		#endif
49
50		namespace llvm {
51		class Twine;
52
53		/// Returns true if LLVM is compiled with support for multi-threading, and
54		/// false otherwise.
55		bool llvm_is_multithreaded();
56
57		/// Execute the given \p UserFn on a separate thread, passing it the provided \p
58		/// UserData and waits for thread completion.
59		///
60		/// This function does not guarantee that the code will actually be executed
61		/// on a separate thread or honoring the requested stack size, but tries to do
62		/// so where system support is available.
63		///
64		/// \param UserFn - The callback to execute.
65		/// \param UserData - An argument to pass to the callback function.
66		/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
67		/// (or None for default)
68		void llvm_execute_on_thread(
69		void (UserFn)(void ), void *UserData,
70		llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
71
72		/// Schedule the given \p Func for execution on a separate thread, then return
73		/// to the caller immediately. Roughly equivalent to
74		/// `std::thread(Func).detach()`, except it allows requesting a specific stack
75		/// size, if supported for the platform.
76		///
77		/// This function would report a fatal error if it can't execute the code
78		/// on a separate thread.
79		///
80		/// \param Func - The callback to execute.
81		/// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
82		/// (or None for default)
83		void llvm_execute_on_thread_async(
84		llvm::unique_function<void()> Func,
85		llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
86
87		#if LLVM_THREADING_USE_STD_CALL_ONCE
88
89		typedef std::once_flag once_flag;
90
91		#else
92
93		enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
94
95		/// The llvm::once_flag structure
96		///
97		/// This type is modeled after std::once_flag to use with llvm::call_once.
98		/// This structure must be used as an opaque object. It is a struct to force
99		/// autoinitialization and behave like std::once_flag.
100		struct once_flag {
101		volatile sys::cas_flag status = Uninitialized;
102		};
103
104		#endif
105
106		/// Execute the function specified as a parameter once.
107		///
108		/// Typical usage:
109		/// \code
110		/// void foo() {...};
111		/// ...
112		/// static once_flag flag;
113		/// call_once(flag, foo);
114		/// \endcode
115		///
116		/// \param flag Flag used for tracking whether or not this has run.
117		/// \param F Function to call once.
118		template <typename Function, typename... Args>
119	8	void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
120	8	#if LLVM_THREADING_USE_STD_CALL_ONCE
121	8	std::call_once(flag, std::forward<Function>(F),
122	8	std::forward<Args>(ArgList)...);
123		#else
124		// For other platforms we use a generic (if brittle) version based on our
125		// atomics.
126		sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
127		if (old_val == Uninitialized) {
128		std::forward<Function>(F)(std::forward<Args>(ArgList)...);
129		sys::MemoryFence();
130		TsanIgnoreWritesBegin();
131		TsanHappensBefore(&flag.status);
132		flag.status = Done;
133		TsanIgnoreWritesEnd();
134		} else {
135		// Wait until any thread doing the call has finished.
136		sys::cas_flag tmp = flag.status;
137		sys::MemoryFence();
138		while (tmp != Done) {
139		tmp = flag.status;
140		sys::MemoryFence();
141		}
142		}
143		TsanHappensAfter(&flag.status);
144		#endif
145		}
146
147		/// This tells how a thread pool will be used
148		class ThreadPoolStrategy {
149		public:
150		// The default value (0) means all available threads should be used,
151		// taking the affinity mask into account. If set, this value only represents
152		// a suggested high bound, the runtime might choose a lower value (not
153		// higher).
154		unsigned ThreadsRequested = 0;
155
156		// If SMT is active, use hyper threads. If false, there will be only one
157		// std::thread per core.
158		bool UseHyperThreads = true;
159
160		// If set, will constrain 'ThreadsRequested' to the number of hardware
161		// threads, or hardware cores.
162		bool Limit = false;
163
164		/// Retrieves the max available threads for the current strategy. This
165		/// accounts for affinity masks and takes advantage of all CPU sockets.
166		unsigned compute_thread_count() const;
167
168		/// Assign the current thread to an ideal hardware CPU or NUMA node. In a
169		/// multi-socket system, this ensures threads are assigned to all CPU
170		/// sockets. \p ThreadPoolNum represents a number bounded by [0,
171		/// compute_thread_count()).
172		void apply_thread_strategy(unsigned ThreadPoolNum) const;
173
174		/// Finds the CPU socket where a thread should go. Returns 'None' if the
175		/// thread shall remain on the actual CPU socket.
176		Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
177		};
178
179		/// Build a strategy from a number of threads as a string provided in \p Num.
180		/// When Num is above the max number of threads specified by the \p Default
181		/// strategy, we attempt to equally allocate the threads on all CPU sockets.
182		/// "0" or an empty string will return the \p Default strategy.
183		/// "all" for using all hardware threads.
184		Optional<ThreadPoolStrategy>
185		get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
186
187		/// Returns a thread strategy for tasks requiring significant memory or other
188		/// resources. To be used for workloads where hardware_concurrency() proves to
189		/// be less efficient. Avoid this strategy if doing lots of I/O. Currently
190		/// based on physical cores, if available for the host system, otherwise falls
191		/// back to hardware_concurrency(). Returns 1 when LLVM is configured with
192		/// LLVM_ENABLE_THREADS = OFF.
193		inline ThreadPoolStrategy
194	0	heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
195	0	ThreadPoolStrategy S;
196	0	S.UseHyperThreads = false;
197	0	S.ThreadsRequested = ThreadCount;
198	0	return S;
199	0	}
200
201		/// Like heavyweight_hardware_concurrency() above, but builds a strategy
202		/// based on the rules described for get_threadpool_strategy().
203		/// If \p Num is invalid, returns a default strategy where one thread per
204		/// hardware core is used.
205	0	inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
206	0	Optional<ThreadPoolStrategy> S =
207	0	get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
208	0	if (S)
209	0	return *S;
210	0	return heavyweight_hardware_concurrency();
211	0	}
212
213		/// Returns a default thread strategy where all available hardware ressources
214		/// are to be used, except for those initially excluded by an affinity mask.
215		/// This function takes affinity into consideration. Returns 1 when LLVM is
216		/// configured with LLVM_ENABLE_THREADS=OFF.
217	0	inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
218	0	ThreadPoolStrategy S;
219	0	S.ThreadsRequested = ThreadCount;
220	0	return S;
221	0	}
222
223		/// Return the current thread id, as used in various OS system calls.
224		/// Note that not all platforms guarantee that the value returned will be
225		/// unique across the entire system, so portable code should not assume
226		/// this.
227		uint64_t get_threadid();
228
229		/// Get the maximum length of a thread name on this platform.
230		/// A value of 0 means there is no limit.
231		uint32_t get_max_thread_name_length();
232
233		/// Set the name of the current thread. Setting a thread's name can
234		/// be helpful for enabling useful diagnostics under a debugger or when
235		/// logging. The level of support for setting a thread's name varies
236		/// wildly across operating systems, and we only make a best effort to
237		/// perform the operation on supported platforms. No indication of success
238		/// or failure is returned.
239		void set_thread_name(const Twine &Name);
240
241		/// Get the name of the current thread. The level of support for
242		/// getting a thread's name varies wildly across operating systems, and it
243		/// is not even guaranteed that if you can successfully set a thread's name
244		/// that you can later get it back. This function is intended for diagnostic
245		/// purposes, and as with setting a thread's name no indication of whether
246		/// the operation succeeded or failed is returned.
247		void get_thread_name(SmallVectorImpl<char> &Name);
248
249		/// Returns a mask that represents on which hardware thread, core, CPU, NUMA
250		/// group, the calling thread can be executed. On Windows, threads cannot
251		/// cross CPU sockets boundaries.
252		llvm::BitVector get_thread_affinity_mask();
253
254		/// Returns how many physical CPUs or NUMA groups the system has.
255		unsigned get_cpus();
256
257		enum class ThreadPriority {
258		Background = 0,
259		Default = 1,
260		};
261		/// If priority is Background tries to lower current threads priority such
262		/// that it does not affect foreground tasks significantly. Can be used for
263		/// long-running, latency-insensitive tasks to make sure cpu is not hogged by
264		/// this task.
265		/// If the priority is default tries to restore current threads priority to
266		/// default scheduling priority.
267		enum class SetThreadPriorityResult { FAILURE, SUCCESS };
268		SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
269		}
270
271		#endif