Skip to content

Commit 516b12a

Browse files
committed
Optimise mbed_ticker_api.c
The generic code in mbed_ticker_api.c uses run-time polymorphism to handle different tickers, and has generic run-time calculations for different ticker widths and frequencies, with a single special-case for 1MHz. Extend the run-time special casing to handle any conversion cases where either the multiply or divide can be done as a shift. This is a speed optimisation for certain platforms. Add a new option `target.custom-tickers`. If turned off, it promises that only USTICKER and LPTICKER devices will be used. This then permits elimination and/or simplification of runtime calculations, saving size and speed. If either both USTICKER and LPTICKER have the same width, or the same frequency, or only one of them exists, then operations can be hard-coded. This is a significant ROM space saving, and a minor speed and RAM saving. We get to optimise all the calculations, but the run-time polymorphism is retained even if there is only one ticker, as it doesn't significantly affect code size versus direct calls, and the existence of lp_ticker_wrapper and various us_ticker optimisations requires it, even if only LPTICKER is available.
1 parent 1bc0d26 commit 516b12a

File tree

3 files changed

+231
-49
lines changed

3 files changed

+231
-49
lines changed

hal/mbed_ticker_api.c

Lines changed: 174 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,88 @@
2121
#include "platform/mbed_assert.h"
2222
#include "platform/mbed_error.h"
2323

24+
#if !MBED_CONF_TARGET_CUSTOM_TICKERS
25+
#include "us_ticker_api.h"
26+
#include "lp_ticker_api.h"
27+
#endif
28+
29+
// It's almost always worth avoiding division, but only worth avoiding
30+
// multiplication on some cores.
31+
#if defined(__CORTEX_M0) || defined(__CORTEX_M0PLUS) || defined(__CORTEX_M23)
32+
#define SLOW_MULTIPLY 1
33+
#else
34+
#define SLOW_MULTIPLY 0
35+
#endif
36+
2437
static void schedule_interrupt(const ticker_data_t *const ticker);
2538
static void update_present_time(const ticker_data_t *const ticker);
2639

40+
/* Macros that either look up the info from mbed_ticker_queue_t, or give a constant.
41+
* Some constants are defined during the definition of initialize, to keep the
42+
* compile-time and run-time calculations alongside each other.
43+
*/
44+
#ifdef MBED_TICKER_CONSTANT_PERIOD_NUM
45+
#define TICKER_PERIOD_NUM(queue) MBED_TICKER_CONSTANT_PERIOD_NUM
46+
// don't bother doing this - rely on the compiler being able convert "/ 2^k" to ">> k".
47+
#define TICKER_PERIOD_NUM_SHIFTS(queue) (-1)
48+
#else
49+
#define TICKER_PERIOD_NUM(queue) ((queue)->period_num)
50+
#define TICKER_PERIOD_NUM_SHIFTS(queue) ((queue)->period_num_shifts)
51+
#endif
52+
53+
#ifdef MBED_TICKER_CONSTANT_PERIOD_DEN
54+
#define TICKER_PERIOD_DEN(queue) MBED_TICKER_CONSTANT_PERIOD_DEN
55+
#define TICKER_PERIOD_DEN_SHIFTS(queue) (-1)
56+
#else
57+
#define TICKER_PERIOD_DEN(queue) ((queue)->period_den)
58+
#define TICKER_PERIOD_DEN_SHIFTS(queue) ((queue)->period_den_shifts)
59+
#endif
60+
61+
// But the above can generate compiler warnings from `if (-1 >= 0) { x >>= -1; }`
62+
#if defined ( __CC_ARM )
63+
#pragma diag_suppress 62 // Shift count is negative
64+
#elif defined ( __GNUC__ )
65+
#pragma GCC diagnostic ignored "-Wshift-count-negative"
66+
#elif defined (__ICCARM__)
67+
#pragma diag_suppress=Pe062 // Shift count is negative
68+
#endif
69+
70+
#ifdef MBED_TICKER_CONSTANT_MASK
71+
#define TICKER_BITMASK(queue) MBED_TICKER_CONSTANT_MASK
72+
#define TICKER_MAX_DELTA(queue) CONSTANT_MAX_DELTA
73+
#else
74+
#define TICKER_BITMASK(queue) ((queue)->bitmask)
75+
#define TICKER_MAX_DELTA(queue) ((queue)->max_delta)
76+
#endif
77+
78+
#if defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK
79+
#define TICKER_MAX_DELTA_US(queue) CONSTANT_MAX_DELTA_US
80+
#else
81+
#define TICKER_MAX_DELTA_US(queue) ((queue)->max_delta_us)
82+
#endif
83+
84+
#ifndef MBED_TICKER_CONSTANT_PERIOD
85+
static inline uint32_t gcd(uint32_t a, uint32_t b)
86+
{
87+
do {
88+
uint32_t r = a % b;
89+
a = b;
90+
b = r;
91+
} while (b != 0);
92+
return a;
93+
}
94+
95+
static int exact_log2(uint32_t n)
96+
{
97+
for (int i = 31; i > 0; --i) {
98+
if ((1U << i) == n) {
99+
return i;
100+
}
101+
}
102+
return -1;
103+
}
104+
#endif
105+
27106
/*
28107
* Initialize a ticker instance.
29108
*/
@@ -40,9 +119,29 @@ static void initialize(const ticker_data_t *ticker)
40119

41120
ticker->interface->init();
42121

122+
#if !(defined NDEBUG && defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
43123
const ticker_info_t *info = ticker->interface->get_info();
124+
#endif
125+
126+
#if !MBED_CONF_TARGET_CUSTOM_TICKERS
127+
/* They must be passing us one of the well-known tickers. Check info
128+
* rather than the data, to cope with the lp_ticker_wrapper. It doesn't count
129+
* as a "custom ticker" for the purpose of this optimization.
130+
*/
131+
#if DEVICE_USTICKER && DEVICE_LPTICKER
132+
MBED_ASSERT(info == get_us_ticker_data()->interface->get_info() || info == get_lp_ticker_data()->interface->get_info());
133+
#elif DEVICE_USTICKER
134+
MBED_ASSERT(info == get_us_ticker_data()->interface->get_info());
135+
#elif DEVICE_LPTICKER
136+
MBED_ASSERT(info == get_lp_ticker_data()->interface->get_info());
137+
#else
138+
MBED_ASSERT(false);
139+
#endif
140+
#endif
141+
142+
#ifndef MBED_TICKER_CONSTANT_PERIOD
44143
uint32_t frequency = info->frequency;
45-
if (info->frequency == 0) {
144+
if (frequency == 0) {
46145
#if MBED_TRAP_ERRORS_ENABLED
47146
MBED_ERROR(
48147
MBED_MAKE_ERROR(
@@ -56,16 +155,24 @@ static void initialize(const ticker_data_t *ticker)
56155
#endif // MBED_TRAP_ERRORS_ENABLED
57156
}
58157

59-
uint8_t frequency_shifts = 0;
60-
for (uint8_t i = 31; i > 0; --i) {
61-
if ((1U << i) == frequency) {
62-
frequency_shifts = i;
63-
break;
64-
}
65-
}
158+
const uint32_t period_gcd = gcd(frequency, 1000000);
159+
#ifdef MBED_TICKER_CONSTANT_PERIOD_NUM
160+
MBED_ASSERT(MBED_TICKER_CONSTANT_PERIOD_NUM == 1000000 / period_gcd);
161+
#else
162+
ticker->queue->period_num = 1000000 / period_gcd;
163+
ticker->queue->period_num_shifts = exact_log2(ticker->queue->period_num);
164+
#endif
165+
#ifdef MBED_TICKER_CONSTANT_PERIOD_DEN
166+
MBED_ASSERT(MBED_TICKER_CONSTANT_PERIOD_DEN == frequency / period_gcd);
167+
#else
168+
ticker->queue->period_den = frequency / period_gcd;
169+
ticker->queue->period_den_shifts = exact_log2(ticker->queue->period_den);
170+
#endif
171+
#endif // MBED_TICKER_CONSTANT_PERIOD_NUM
66172

173+
#ifndef MBED_TICKER_CONSTANT_MASK
67174
uint32_t bits = info->bits;
68-
if ((info->bits > 32) || (info->bits < 4)) {
175+
if ((bits > 32) || (bits < 4)) {
69176
#if MBED_TRAP_ERRORS_ENABLED
70177
MBED_ERROR(
71178
MBED_MAKE_ERROR(
@@ -78,19 +185,24 @@ static void initialize(const ticker_data_t *ticker)
78185
bits = 32;
79186
#endif // MBED_TRAP_ERRORS_ENABLED
80187
}
81-
uint32_t max_delta = 0x7 << (bits - 4); // 7/16th
82-
uint64_t max_delta_us =
83-
((uint64_t)max_delta * 1000000 + frequency - 1) / frequency;
188+
ticker->queue->bitmask = bits == 32 ? 0xFFFFFFFF : (1U << bits) - 1;
189+
ticker->queue->max_delta = 7 << (bits - 4); // 7/16th
190+
#else // MBED_TICKER_CONSTANT_MASK
191+
#define CONSTANT_MAX_DELTA (7 * ((MBED_TICKER_CONSTANT_MASK >> 4) + 1)) // 7/16th
192+
#endif // MBED_TICKER_CONSTANT_MASK
193+
194+
#if !(defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
195+
ticker->queue->max_delta_us =
196+
((uint64_t)TICKER_MAX_DELTA(ticker->queue) * TICKER_PERIOD_NUM(ticker->queue) + TICKER_PERIOD_DEN(ticker->queue) - 1) / TICKER_PERIOD_DEN(ticker->queue);
197+
#else
198+
#define CONSTANT_MAX_DELTA_US \
199+
(((uint64_t) CONSTANT_MAX_DELTA * MBED_TICKER_CONSTANT_PERIOD_NUM + MBED_TICKER_CONSTANT_PERIOD_DEN - 1) / MBED_TICKER_CONSTANT_PERIOD_DEN)
200+
#endif
84201

85202
ticker->queue->event_handler = NULL;
86203
ticker->queue->head = NULL;
87204
ticker->queue->tick_last_read = ticker->interface->read();
88205
ticker->queue->tick_remainder = 0;
89-
ticker->queue->frequency = frequency;
90-
ticker->queue->frequency_shifts = frequency_shifts;
91-
ticker->queue->bitmask = ((uint64_t)1 << bits) - 1;
92-
ticker->queue->max_delta = max_delta;
93-
ticker->queue->max_delta_us = max_delta_us;
94206
ticker->queue->present_time = 0;
95207
ticker->queue->dispatching = false;
96208
ticker->queue->suspended = false;
@@ -154,27 +266,31 @@ static void update_present_time(const ticker_data_t *const ticker)
154266
return;
155267
}
156268

157-
uint64_t elapsed_ticks = (ticker_time - queue->tick_last_read) & queue->bitmask;
269+
uint32_t elapsed_ticks = (ticker_time - queue->tick_last_read) & TICKER_BITMASK(queue);
158270
queue->tick_last_read = ticker_time;
159271

272+
// Convert elapsed_ticks to elapsed_us as (elapsed_ticks * period_num / period_den)
273+
// adding in any remainder from the last division
274+
uint64_t scaled_ticks;
275+
if (SLOW_MULTIPLY && TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
276+
scaled_ticks = (uint64_t) elapsed_ticks << TICKER_PERIOD_NUM_SHIFTS(queue);
277+
} else {
278+
scaled_ticks = (uint64_t) elapsed_ticks * TICKER_PERIOD_NUM(queue);
279+
}
160280
uint64_t elapsed_us;
161-
if (1000000 == queue->frequency) {
162-
// Optimized for 1MHz
163-
164-
elapsed_us = elapsed_ticks;
281+
if (TICKER_PERIOD_DEN_SHIFTS(queue) == 0) {
282+
// Optimized for cases that don't need division
283+
elapsed_us = scaled_ticks;
165284
} else {
166-
uint64_t us_x_ticks = elapsed_ticks * 1000000;
167-
if (0 != queue->frequency_shifts) {
168-
// Optimized for frequencies divisible by 2
169-
elapsed_us = us_x_ticks >> queue->frequency_shifts;
170-
queue->tick_remainder += us_x_ticks - (elapsed_us << queue->frequency_shifts);
285+
scaled_ticks += queue->tick_remainder;
286+
if (TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
287+
// Speed-optimised for shifts
288+
elapsed_us = scaled_ticks >> TICKER_PERIOD_DEN_SHIFTS(queue);
289+
queue->tick_remainder = scaled_ticks - (elapsed_us << TICKER_PERIOD_DEN_SHIFTS(queue));
171290
} else {
172-
elapsed_us = us_x_ticks / queue->frequency;
173-
queue->tick_remainder += us_x_ticks - elapsed_us * queue->frequency;
174-
}
175-
if (queue->tick_remainder >= queue->frequency) {
176-
elapsed_us += 1;
177-
queue->tick_remainder -= queue->frequency;
291+
// General case division
292+
elapsed_us = scaled_ticks / TICKER_PERIOD_DEN(queue);
293+
queue->tick_remainder = scaled_ticks - elapsed_us * TICKER_PERIOD_DEN(queue);
178294
}
179295
}
180296

@@ -190,25 +306,37 @@ static timestamp_t compute_tick_round_up(const ticker_data_t *const ticker, us_t
190306
ticker_event_queue_t *queue = ticker->queue;
191307
us_timestamp_t delta_us = timestamp - queue->present_time;
192308

193-
timestamp_t delta = ticker->queue->max_delta;
194-
if (delta_us <= ticker->queue->max_delta_us) {
309+
timestamp_t delta = TICKER_MAX_DELTA(ticker->queue);
310+
if (delta_us <= TICKER_MAX_DELTA_US(ticker->queue)) {
195311
// Checking max_delta_us ensures the operation will not overflow
196312

197-
if (1000000 == queue->frequency) {
198-
// Optimized for 1MHz
199-
delta = delta_us;
200-
} else if (0 != queue->frequency_shifts) {
201-
// Optimized frequencies divisible by 2
202-
delta = ((delta_us << ticker->queue->frequency_shifts) + 1000000 - 1) / 1000000;
313+
// Convert delta_us to delta (ticks) as (delta_us * period_den / period_num)
314+
// taking care to round up if num != 1
315+
uint64_t scaled_delta;
316+
if (SLOW_MULTIPLY && TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
317+
// Optimized denominators divisible by 2
318+
scaled_delta = delta_us << TICKER_PERIOD_DEN_SHIFTS(queue);
203319
} else {
204320
// General case
205-
delta = (delta_us * queue->frequency + 1000000 - 1) / 1000000;
321+
scaled_delta = delta_us * TICKER_PERIOD_DEN(queue);
322+
}
323+
if (TICKER_PERIOD_NUM_SHIFTS(queue) == 0) {
324+
delta = scaled_delta;
325+
} else {
326+
scaled_delta += TICKER_PERIOD_NUM(queue) - 1;
327+
if (TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
328+
// Optimized numerators divisible by 2
329+
delta = scaled_delta >> TICKER_PERIOD_NUM_SHIFTS(queue);
330+
} else {
331+
// General case
332+
delta = scaled_delta / TICKER_PERIOD_NUM(queue);
333+
}
206334
}
207-
if (delta > ticker->queue->max_delta) {
208-
delta = ticker->queue->max_delta;
335+
if (delta > TICKER_MAX_DELTA(queue)) {
336+
delta = TICKER_MAX_DELTA(queue);
209337
}
210338
}
211-
return (queue->tick_last_read + delta) & queue->bitmask;
339+
return (queue->tick_last_read + delta) & TICKER_BITMASK(queue);
212340
}
213341

214342
//NOTE: Must be called from critical section!
@@ -308,7 +436,7 @@ static void schedule_interrupt(const ticker_data_t *const ticker)
308436
}
309437
} else {
310438
uint32_t match_tick =
311-
(queue->tick_last_read + queue->max_delta) & queue->bitmask;
439+
(queue->tick_last_read + TICKER_MAX_DELTA(queue)) & TICKER_BITMASK(queue);
312440
ticker->interface->set_interrupt(match_tick);
313441
}
314442
}

hal/ticker_api.h

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,22 +70,72 @@ typedef struct {
7070
bool runs_in_deep_sleep; /**< Whether ticker operates in deep sleep */
7171
} ticker_interface_t;
7272

73+
/* Optimizations to avoid run-time computation if custom ticker support is disabled and
74+
* there is exactly one of USTICKER or LPTICKER available, or if they have the same
75+
* parameter value(s).
76+
*/
77+
#define MBED_TICKER_JUST_US (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_USTICKER && !DEVICE_LPTICKER)
78+
#define MBED_TICKER_JUST_LP (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_LPTICKER && !DEVICE_USTICKER)
79+
#define MBED_TICKER_EXACTLY_ONE (MBED_TICKER_JUST_US || MBED_TICKER_JUST_LP)
80+
81+
#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_NUM) || \
82+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_NUM && defined LP_TICKER_PERIOD_NUM && \
83+
US_TICKER_PERIOD_NUM == LP_TICKER_PERIOD_NUM)
84+
#define MBED_TICKER_CONSTANT_PERIOD_NUM US_TICKER_PERIOD_NUM
85+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_NUM
86+
#define MBED_TICKER_CONSTANT_PERIOD_NUM LP_TICKER_PERIOD_NUM
87+
#endif
88+
89+
#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_DEN) || \
90+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_DEN && defined LP_TICKER_PERIOD_DEN && \
91+
US_TICKER_PERIOD_DEN == LP_TICKER_PERIOD_DEN)
92+
#define MBED_TICKER_CONSTANT_PERIOD_DEN US_TICKER_PERIOD_DEN
93+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_DEN
94+
#define MBED_TICKER_CONSTANT_PERIOD_DEN LP_TICKER_PERIOD_DEN
95+
#endif
96+
97+
#if defined MBED_TICKER_CONSTANT_PERIOD_NUM && defined MBED_TICKER_CONSTANT_PERIOD_DEN
98+
#define MBED_TICKER_CONSTANT_PERIOD
99+
#endif
100+
101+
#if (MBED_TICKER_JUST_US && defined US_TICKER_MASK) || \
102+
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_MASK && defined LP_TICKER_MASK && \
103+
US_TICKER_MASK == LP_TICKER_MASK)
104+
#define MBED_TICKER_CONSTANT_MASK US_TICKER_MASK
105+
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_MASK
106+
#define MBED_TICKER_CONSTANT_MASK LP_TICKER_MASK
107+
#endif
108+
73109
/** Ticker's event queue structure
74110
*/
75111
typedef struct {
76112
ticker_event_handler event_handler; /**< Event handler */
77113
ticker_event_t *head; /**< A pointer to head */
78-
uint32_t frequency; /**< Frequency of the timer in Hz */
114+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
115+
uint32_t period_num; /**< Ratio of period to 1us, numerator */
116+
#endif
117+
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
118+
uint32_t period_den; /**< Ratio of period to 1us, denominator */
119+
#endif
120+
#ifndef MBED_TICKER_CONSTANT_MASK
79121
uint32_t bitmask; /**< Mask to be applied to time values read */
80122
uint32_t max_delta; /**< Largest delta in ticks that can be used when scheduling */
123+
#endif
124+
#if !(defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
81125
uint64_t max_delta_us; /**< Largest delta in us that can be used when scheduling */
126+
#endif
82127
uint32_t tick_last_read; /**< Last tick read */
83-
uint64_t tick_remainder; /**< Ticks that have not been added to base_time */
128+
uint32_t tick_remainder; /**< Ticks that have not been added to base_time */
84129
us_timestamp_t present_time; /**< Store the timestamp used for present time */
85130
bool initialized; /**< Indicate if the instance is initialized */
86131
bool dispatching; /**< The function ticker_irq_handler is dispatching */
87132
bool suspended; /**< Indicate if the instance is suspended */
88-
uint8_t frequency_shifts; /**< If frequency is a value of 2^n, this is n, otherwise 0 */
133+
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
134+
int8_t period_num_shifts; /**< If numerator is a value of 2^n, this is n, otherwise -1 */
135+
#endif
136+
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
137+
int8_t period_den_shifts; /**< If denominator is a value of 2^n, this is n, otherwise -1 */
138+
#endif
89139
} ticker_event_queue_t;
90140

91141
/** Ticker's data structure

targets/targets.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777
"help": "Initialize the microsecond ticker at boot rather than on first use, and leave it initialized. This speeds up wait_us in particular.",
7878
"value": false
7979
},
80+
"custom-tickers": {
81+
"help": "Support custom tickers in addition to USTICKER and LPTICKER. Turning this off can permit some space and speed optimisations, if characteristics of USTICKER and LPTICKER are known at compile time.",
82+
"value": true
83+
},
8084
"xip-enable": {
8185
"help": "Enable Execute In Place (XIP) on this target. Value is only significant if the board has executable external storage such as QSPIF. If this is enabled, customize the linker file to choose what text segments are placed on external storage",
8286
"value": false

0 commit comments

Comments
 (0)