AOMedia AV1 Codec
nonrd_opt.h
1 /*
2  * Copyright (c) 2022, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_NONRD_OPT_H_
13 #define AOM_AV1_ENCODER_NONRD_OPT_H_
14 
15 #include "av1/encoder/context_tree.h"
16 #include "av1/encoder/rdopt_utils.h"
17 #include "av1/encoder/rdopt.h"
18 
19 #define RTC_INTER_MODES (4)
20 #define RTC_INTRA_MODES (4)
21 #define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
22 #define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
23 #define NUM_COMP_INTER_MODES_RT (6)
24 #define NUM_COMP_INTER_MODES_RT_FULL (10)
25 #define NUM_INTER_MODES 12
26 #define NUM_INTER_MODES_FULL 28
27 #define CAP_TX_SIZE_FOR_BSIZE_GT32(tx_mode_search_type, bsize) \
28  (((tx_mode_search_type) != ONLY_4X4 && (bsize) > BLOCK_32X32) ? true : false)
29 #define TX_SIZE_FOR_BSIZE_GT32 (TX_16X16)
30 #define FILTER_SEARCH_SIZE 2
31 #if !CONFIG_REALTIME_ONLY
32 #define MOTION_MODE_SEARCH_SIZE 2
33 #endif
34 
35 extern int g_pick_inter_mode_cnt;
37 typedef struct {
38  uint8_t *data;
39  int stride;
40  int in_use;
41 } PRED_BUFFER;
42 
43 typedef struct {
44  PRED_BUFFER *best_pred;
45  PREDICTION_MODE best_mode;
46  TX_SIZE best_tx_size;
47  TX_TYPE tx_type;
48  MV_REFERENCE_FRAME best_ref_frame;
49  MV_REFERENCE_FRAME best_second_ref_frame;
50  uint8_t best_mode_skip_txfm;
51  uint8_t best_mode_initial_skip_flag;
52  int_interpfilters best_pred_filter;
53  MOTION_MODE best_motion_mode;
54  WarpedMotionParams wm_params;
55  int num_proj_ref;
56  PALETTE_MODE_INFO pmi;
57  int64_t best_sse;
58 } BEST_PICKMODE;
59 
60 typedef struct {
61  MV_REFERENCE_FRAME ref_frame;
62  PREDICTION_MODE pred_mode;
63 } REF_MODE;
64 
65 typedef struct {
66  MV_REFERENCE_FRAME ref_frame[2];
67  PREDICTION_MODE pred_mode;
68 } COMP_REF_MODE;
69 
70 struct estimate_block_intra_args {
71  AV1_COMP *cpi;
72  MACROBLOCK *x;
73  PREDICTION_MODE mode;
74  int skippable;
75  RD_STATS *rdc;
76  unsigned int best_sad;
77  bool prune_mode_based_on_sad;
78  bool prune_palette_sad;
79 };
85 typedef struct {
87  BEST_PICKMODE best_pickmode;
89  RD_STATS this_rdc;
91  RD_STATS best_rdc;
93  int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
95  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
97  unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
99  unsigned int ref_costs_single[REF_FRAMES];
101  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
103  int_mv frame_mv_best[MB_MODE_COUNT][REF_FRAMES];
105  int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
107  int use_ref_frame_mask[REF_FRAMES];
109  uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
111  bool use_scaled_ref_frame[REF_FRAMES];
113 
114 static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
115  2, 2, 3, 3, 3, 4,
116  4, 4, 5, 5 };
117 static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
118  2, 3, 2, 3, 4, 3,
119  4, 5, 4, 5 };
120 
121 static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
122  SMOOTH_PRED };
123 
124 static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
125  NEWMV };
126 
127 static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
128  { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
129  { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
130  { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
131  { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
132  { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
133  { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
134  { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
135  { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
136 };
137 
138 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
139 // mode
140 static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
141  { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
142  { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
143  { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
144  { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
145  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
146  { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
147 };
148 
149 static const REF_MODE ref_mode_set_full[NUM_INTER_MODES_FULL] = {
150  { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
151  { LAST_FRAME, GLOBALMV }, { LAST_FRAME, NEWMV },
152  { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
153  { GOLDEN_FRAME, GLOBALMV }, { GOLDEN_FRAME, NEWMV },
154  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
155  { ALTREF_FRAME, GLOBALMV }, { ALTREF_FRAME, NEWMV },
156  { LAST2_FRAME, NEARESTMV }, { LAST2_FRAME, NEARMV },
157  { LAST2_FRAME, GLOBALMV }, { LAST2_FRAME, NEWMV },
158  { LAST3_FRAME, NEARESTMV }, { LAST3_FRAME, NEARMV },
159  { LAST3_FRAME, GLOBALMV }, { LAST3_FRAME, NEWMV },
160  { BWDREF_FRAME, NEARESTMV }, { BWDREF_FRAME, NEARMV },
161  { BWDREF_FRAME, GLOBALMV }, { BWDREF_FRAME, NEWMV },
162  { ALTREF2_FRAME, NEARESTMV }, { ALTREF2_FRAME, NEARMV },
163  { ALTREF2_FRAME, GLOBALMV }, { ALTREF2_FRAME, NEWMV },
164 };
165 
166 static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
167  { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
168  { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
169  { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
170  { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
171  { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
172  { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
173 };
174 
175 static const COMP_REF_MODE
176  comp_ref_mode_set_full[NUM_COMP_INTER_MODES_RT_FULL] = {
177  { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
178  { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
179  { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
180  { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
181  { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
182  { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
183  { { LAST_FRAME, BWDREF_FRAME }, GLOBAL_GLOBALMV },
184  { { LAST_FRAME, BWDREF_FRAME }, NEAREST_NEARESTMV },
185  { { LAST_FRAME, ALTREF2_FRAME }, GLOBAL_GLOBALMV },
186  { { LAST_FRAME, ALTREF2_FRAME }, NEAREST_NEARESTMV },
187  };
188 
189 static const int_interpfilters filters_ref_set[9] = {
190  [0].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR },
191  [1].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
192  [2].as_filters = { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },
193  [3].as_filters = { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
194  [4].as_filters = { MULTITAP_SHARP, MULTITAP_SHARP },
195  [5].as_filters = { EIGHTTAP_REGULAR, MULTITAP_SHARP },
196  [6].as_filters = { MULTITAP_SHARP, EIGHTTAP_REGULAR },
197  [7].as_filters = { EIGHTTAP_SMOOTH, MULTITAP_SHARP },
198  [8].as_filters = { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
199 };
200 
201 enum {
202  // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
203  INTER_NEAREST = (1 << NEARESTMV),
204  INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
205  INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
206  INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
207 };
208 
209 // The original scan order (default_scan_8x8) is modified according to the extra
210 // transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
211 // aom_hadamard_8x8_c.
212 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_transpose[64]) = {
213  0, 8, 1, 2, 9, 16, 24, 17, 10, 3, 4, 11, 18, 25, 32, 40,
214  33, 26, 19, 12, 5, 6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
215  28, 21, 14, 7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
216  23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
217 };
218 
219 // The original scan order (av1_default_iscan_8x8) is modified to match
220 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
221 // aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
222 // order of coefficients, such that the normal scan order is no longer
223 // guaranteed to scan low coefficients first, therefore we modify the scan order
224 // accordingly.
225 // Note that this one has to be used together with default_scan_8x8_transpose.
226 DECLARE_ALIGNED(16, static const int16_t,
227  av1_default_iscan_8x8_transpose[64]) = {
228  0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36,
229  5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49,
230  14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
231  27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
232 };
233 
234 // The original scan order (default_scan_16x16) is modified according to the
235 // extra transpose in hadamard c implementation in lp case, i.e.,
236 // aom_hadamard_lp_16x16_c.
237 DECLARE_ALIGNED(16, static const int16_t,
238  default_scan_lp_16x16_transpose[256]) = {
239  0, 8, 2, 4, 10, 16, 24, 18, 12, 6, 64, 14, 20, 26, 32,
240  40, 34, 28, 22, 72, 66, 68, 74, 80, 30, 36, 42, 48, 56, 50,
241  44, 38, 88, 82, 76, 70, 128, 78, 84, 90, 96, 46, 52, 58, 1,
242  9, 3, 60, 54, 104, 98, 92, 86, 136, 130, 132, 138, 144, 94, 100,
243  106, 112, 62, 5, 11, 17, 25, 19, 13, 7, 120, 114, 108, 102, 152,
244  146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65, 15, 21, 27,
245  33, 41, 35, 29, 23, 73, 67, 124, 118, 168, 162, 156, 150, 200, 194,
246  196, 202, 208, 158, 164, 170, 176, 126, 69, 75, 81, 31, 37, 43, 49,
247  57, 51, 45, 39, 89, 83, 77, 71, 184, 178, 172, 166, 216, 210, 204,
248  198, 206, 212, 218, 224, 174, 180, 186, 129, 79, 85, 91, 97, 47, 53,
249  59, 61, 55, 105, 99, 93, 87, 137, 131, 188, 182, 232, 226, 220, 214,
250  222, 228, 234, 240, 190, 133, 139, 145, 95, 101, 107, 113, 63, 121, 115,
251  109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
252  149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
253  246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
254  211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
255  215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
256  255
257 };
258 
259 #if CONFIG_AV1_HIGHBITDEPTH
260 // The original scan order (default_scan_16x16) is modified according to the
261 // extra shift in hadamard c implementation in fp case, i.e.,
262 // aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
263 // outputs, so we handle them separately.
264 DECLARE_ALIGNED(16, static const int16_t,
265  default_scan_fp_16x16_transpose[256]) = {
266  0, 4, 2, 8, 6, 16, 20, 18, 12, 10, 64, 14, 24, 22, 32,
267  36, 34, 28, 26, 68, 66, 72, 70, 80, 30, 40, 38, 48, 52, 50,
268  44, 42, 84, 82, 76, 74, 128, 78, 88, 86, 96, 46, 56, 54, 1,
269  5, 3, 60, 58, 100, 98, 92, 90, 132, 130, 136, 134, 144, 94, 104,
270  102, 112, 62, 9, 7, 17, 21, 19, 13, 11, 116, 114, 108, 106, 148,
271  146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65, 15, 25, 23,
272  33, 37, 35, 29, 27, 69, 67, 124, 122, 164, 162, 156, 154, 196, 194,
273  200, 198, 208, 158, 168, 166, 176, 126, 73, 71, 81, 31, 41, 39, 49,
274  53, 51, 45, 43, 85, 83, 77, 75, 180, 178, 172, 170, 212, 210, 204,
275  202, 206, 216, 214, 224, 174, 184, 182, 129, 79, 89, 87, 97, 47, 57,
276  55, 61, 59, 101, 99, 93, 91, 133, 131, 188, 186, 228, 226, 220, 218,
277  222, 232, 230, 240, 190, 137, 135, 145, 95, 105, 103, 113, 63, 117, 115,
278  109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
279  153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
280  250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
281  211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
282  219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
283  255
284 };
285 #endif
286 
287 // The original scan order (av1_default_iscan_16x16) is modified to match
288 // hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
289 // Since hadamard AVX2 implementation will modify the order of coefficients,
290 // such that the normal scan order is no longer guaranteed to scan low
291 // coefficients first, therefore we modify the scan order accordingly. Note that
292 // this one has to be used together with default_scan_lp_16x16_transpose.
293 DECLARE_ALIGNED(16, static const int16_t,
294  av1_default_iscan_lp_16x16_transpose[256]) = {
295  0, 44, 2, 46, 3, 63, 9, 69, 1, 45, 4, 64, 8, 68, 11,
296  87, 5, 65, 7, 67, 12, 88, 18, 94, 6, 66, 13, 89, 17, 93,
297  24, 116, 14, 90, 16, 92, 25, 117, 31, 123, 15, 91, 26, 118, 30,
298  122, 41, 148, 27, 119, 29, 121, 42, 149, 48, 152, 28, 120, 43, 150,
299  47, 151, 62, 177, 10, 86, 20, 96, 21, 113, 35, 127, 19, 95, 22,
300  114, 34, 126, 37, 144, 23, 115, 33, 125, 38, 145, 52, 156, 32, 124,
301  39, 146, 51, 155, 58, 173, 40, 147, 50, 154, 59, 174, 73, 181, 49,
302  153, 60, 175, 72, 180, 83, 198, 61, 176, 71, 179, 84, 199, 98, 202,
303  70, 178, 85, 200, 97, 201, 112, 219, 36, 143, 54, 158, 55, 170, 77,
304  185, 53, 157, 56, 171, 76, 184, 79, 194, 57, 172, 75, 183, 80, 195,
305  102, 206, 74, 182, 81, 196, 101, 205, 108, 215, 82, 197, 100, 204, 109,
306  216, 131, 223, 99, 203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
307  141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78, 193, 104,
308  208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
309  133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
310  231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
311  168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
312  255
313 };
314 
315 #if CONFIG_AV1_HIGHBITDEPTH
316 // The original scan order (av1_default_iscan_16x16) is modified to match
317 // hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
318 // Since hadamard AVX2 implementation will modify the order of coefficients,
319 // such that the normal scan order is no longer guaranteed to scan low
320 // coefficients first, therefore we modify the scan order accordingly. Note that
321 // this one has to be used together with default_scan_fp_16x16_transpose.
322 DECLARE_ALIGNED(16, static const int16_t,
323  av1_default_iscan_fp_16x16_transpose[256]) = {
324  0, 44, 2, 46, 1, 45, 4, 64, 3, 63, 9, 69, 8, 68, 11,
325  87, 5, 65, 7, 67, 6, 66, 13, 89, 12, 88, 18, 94, 17, 93,
326  24, 116, 14, 90, 16, 92, 15, 91, 26, 118, 25, 117, 31, 123, 30,
327  122, 41, 148, 27, 119, 29, 121, 28, 120, 43, 150, 42, 149, 48, 152,
328  47, 151, 62, 177, 10, 86, 20, 96, 19, 95, 22, 114, 21, 113, 35,
329  127, 34, 126, 37, 144, 23, 115, 33, 125, 32, 124, 39, 146, 38, 145,
330  52, 156, 51, 155, 58, 173, 40, 147, 50, 154, 49, 153, 60, 175, 59,
331  174, 73, 181, 72, 180, 83, 198, 61, 176, 71, 179, 70, 178, 85, 200,
332  84, 199, 98, 202, 97, 201, 112, 219, 36, 143, 54, 158, 53, 157, 56,
333  171, 55, 170, 77, 185, 76, 184, 79, 194, 57, 172, 75, 183, 74, 182,
334  81, 196, 80, 195, 102, 206, 101, 205, 108, 215, 82, 197, 100, 204, 99,
335  203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
336  128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78, 193, 104,
337  208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
338  133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
339  231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
340  168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
341  255
342 };
343 #endif
344 
345 // For entropy coding, IDTX shares the scan orders of the other 2D-transforms,
346 // but the fastest way to calculate the IDTX transform (i.e. no transposes)
347 // results in coefficients that are a transposition of the entropy coding
348 // versions. These tables are used as substitute for the scan order for the
349 // faster version of IDTX.
350 
351 // Must be used together with av1_fast_idtx_iscan_4x4
352 DECLARE_ALIGNED(16, static const int16_t,
353  av1_fast_idtx_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6,
354  9, 12, 13, 10, 7, 11, 14, 15 };
355 
356 // Must be used together with av1_fast_idtx_scan_4x4
357 DECLARE_ALIGNED(16, static const int16_t,
358  av1_fast_idtx_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12,
359  3, 8, 11, 13, 9, 10, 14, 15 };
360 
361 static const SCAN_ORDER av1_fast_idtx_scan_order_4x4 = {
362  av1_fast_idtx_scan_4x4, av1_fast_idtx_iscan_4x4
363 };
364 
365 // Must be used together with av1_fast_idtx_iscan_8x8
366 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_8x8[64]) = {
367  0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
368  12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
369  35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
370  58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
371 };
372 
373 // Must be used together with av1_fast_idtx_scan_8x8
374 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_8x8[64]) = {
375  0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42,
376  3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53,
377  10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
378  21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
379 };
380 
381 static const SCAN_ORDER av1_fast_idtx_scan_order_8x8 = {
382  av1_fast_idtx_scan_8x8, av1_fast_idtx_iscan_8x8
383 };
384 
385 // Must be used together with av1_fast_idtx_iscan_16x16
386 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_scan_16x16[256]) = {
387  0, 1, 16, 32, 17, 2, 3, 18, 33, 48, 64, 49, 34, 19, 4,
388  5, 20, 35, 50, 65, 80, 96, 81, 66, 51, 36, 21, 6, 7, 22,
389  37, 52, 67, 82, 97, 112, 128, 113, 98, 83, 68, 53, 38, 23, 8,
390  9, 24, 39, 54, 69, 84, 99, 114, 129, 144, 160, 145, 130, 115, 100,
391  85, 70, 55, 40, 25, 10, 11, 26, 41, 56, 71, 86, 101, 116, 131,
392  146, 161, 176, 192, 177, 162, 147, 132, 117, 102, 87, 72, 57, 42, 27,
393  12, 13, 28, 43, 58, 73, 88, 103, 118, 133, 148, 163, 178, 193, 208,
394  224, 209, 194, 179, 164, 149, 134, 119, 104, 89, 74, 59, 44, 29, 14,
395  15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225,
396  240, 241, 226, 211, 196, 181, 166, 151, 136, 121, 106, 91, 76, 61, 46,
397  31, 47, 62, 77, 92, 107, 122, 137, 152, 167, 182, 197, 212, 227, 242,
398  243, 228, 213, 198, 183, 168, 153, 138, 123, 108, 93, 78, 63, 79, 94,
399  109, 124, 139, 154, 169, 184, 199, 214, 229, 244, 245, 230, 215, 200, 185,
400  170, 155, 140, 125, 110, 95, 111, 126, 141, 156, 171, 186, 201, 216, 231,
401  246, 247, 232, 217, 202, 187, 172, 157, 142, 127, 143, 158, 173, 188, 203,
402  218, 233, 248, 249, 234, 219, 204, 189, 174, 159, 175, 190, 205, 220, 235,
403  250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
404  255
405 };
406 
407 // Must be used together with av1_fast_idtx_scan_16x16
408 DECLARE_ALIGNED(16, static const int16_t, av1_fast_idtx_iscan_16x16[256]) = {
409  0, 1, 5, 6, 14, 15, 27, 28, 44, 45, 65, 66, 90, 91, 119,
410  120, 2, 4, 7, 13, 16, 26, 29, 43, 46, 64, 67, 89, 92, 118,
411  121, 150, 3, 8, 12, 17, 25, 30, 42, 47, 63, 68, 88, 93, 117,
412  122, 149, 151, 9, 11, 18, 24, 31, 41, 48, 62, 69, 87, 94, 116,
413  123, 148, 152, 177, 10, 19, 23, 32, 40, 49, 61, 70, 86, 95, 115,
414  124, 147, 153, 176, 178, 20, 22, 33, 39, 50, 60, 71, 85, 96, 114,
415  125, 146, 154, 175, 179, 200, 21, 34, 38, 51, 59, 72, 84, 97, 113,
416  126, 145, 155, 174, 180, 199, 201, 35, 37, 52, 58, 73, 83, 98, 112,
417  127, 144, 156, 173, 181, 198, 202, 219, 36, 53, 57, 74, 82, 99, 111,
418  128, 143, 157, 172, 182, 197, 203, 218, 220, 54, 56, 75, 81, 100, 110,
419  129, 142, 158, 171, 183, 196, 204, 217, 221, 234, 55, 76, 80, 101, 109,
420  130, 141, 159, 170, 184, 195, 205, 216, 222, 233, 235, 77, 79, 102, 108,
421  131, 140, 160, 169, 185, 194, 206, 215, 223, 232, 236, 245, 78, 103, 107,
422  132, 139, 161, 168, 186, 193, 207, 214, 224, 231, 237, 244, 246, 104, 106,
423  133, 138, 162, 167, 187, 192, 208, 213, 225, 230, 238, 243, 247, 252, 105,
424  134, 137, 163, 166, 188, 191, 209, 212, 226, 229, 239, 242, 248, 251, 253,
425  135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
426  255
427 };
428 
429 // Indicates the blocks for which RD model should be based on special logic
430 static inline int get_model_rd_flag(const AV1_COMP *cpi, const MACROBLOCKD *xd,
431  BLOCK_SIZE bsize) {
432  const AV1_COMMON *const cm = &cpi->common;
433  const int large_block = bsize >= BLOCK_32X32;
434  // Only enable for low bitdepth to mitigate issue: b/303023614.
435  return cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
436  !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
437  cm->quant_params.base_qindex && !cpi->oxcf.use_highbitdepth;
438 }
464 static inline void find_predictors(
465  AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
466  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
467  struct buf_2d yv12_mb[8][MAX_MB_PLANE], BLOCK_SIZE bsize,
468  int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame) {
469  AV1_COMMON *const cm = &cpi->common;
470  MACROBLOCKD *const xd = &x->e_mbd;
471  MB_MODE_INFO *const mbmi = xd->mi[0];
472  MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
473  const YV12_BUFFER_CONFIG *ref = get_ref_frame_yv12_buf(cm, ref_frame);
474  const bool ref_is_scaled =
475  ref->y_crop_height != cm->height || ref->y_crop_width != cm->width;
476  const YV12_BUFFER_CONFIG *scaled_ref =
477  av1_get_scaled_ref_frame(cpi, ref_frame);
478  const YV12_BUFFER_CONFIG *yv12 =
479  ref_is_scaled && scaled_ref ? scaled_ref : ref;
480  const int num_planes = av1_num_planes(cm);
481  x->pred_mv_sad[ref_frame] = INT_MAX;
482  x->pred_mv0_sad[ref_frame] = INT_MAX;
483  x->pred_mv1_sad[ref_frame] = INT_MAX;
484  frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
485  // TODO(kyslov) this needs various further optimizations. to be continued..
486  assert(yv12 != NULL);
487  if (yv12 != NULL) {
488  struct scale_factors *const sf =
489  scaled_ref ? NULL : get_ref_scale_factors(cm, ref_frame);
490  av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
491  av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
492  xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
493  mbmi_ext->mode_context);
494  // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
495  // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
496  av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
497  av1_find_best_ref_mvs_from_stack(
498  cm->features.allow_high_precision_mv, mbmi_ext, ref_frame,
499  &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame], 0);
500  frame_mv[GLOBALMV][ref_frame] = mbmi_ext->global_mvs[ref_frame];
501  // Early exit for non-LAST frame if force_skip_low_temp_var is set.
502  if (!is_one_pass_rt_lag_params(cpi) && !ref_is_scaled &&
503  bsize >= BLOCK_8X8 && !skip_pred_mv &&
504  !(force_skip_low_temp_var && ref_frame != LAST_FRAME)) {
505  av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
506  bsize);
507  }
508  }
510  av1_count_overlappable_neighbors(cm, xd);
511  }
512  mbmi->num_proj_ref = 1;
513  *use_scaled_ref_frame = ref_is_scaled && scaled_ref;
514 }
515 
516 static inline void init_mbmi_nonrd(MB_MODE_INFO *mbmi,
517  PREDICTION_MODE pred_mode,
518  MV_REFERENCE_FRAME ref_frame0,
519  MV_REFERENCE_FRAME ref_frame1,
520  const AV1_COMMON *cm) {
521  PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
522  mbmi->ref_mv_idx = 0;
523  mbmi->mode = pred_mode;
524  mbmi->uv_mode = UV_DC_PRED;
525  mbmi->ref_frame[0] = ref_frame0;
526  mbmi->ref_frame[1] = ref_frame1;
527  pmi->palette_size[PLANE_TYPE_Y] = 0;
528  pmi->palette_size[PLANE_TYPE_UV] = 0;
529  mbmi->filter_intra_mode_info.use_filter_intra = 0;
530  mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
531  mbmi->motion_mode = SIMPLE_TRANSLATION;
532  mbmi->num_proj_ref = 1;
533  mbmi->interintra_mode = 0;
534  set_default_interp_filters(mbmi, cm->features.interp_filter);
535 }
536 
537 static inline void init_estimate_block_intra_args(
538  struct estimate_block_intra_args *args, AV1_COMP *cpi, MACROBLOCK *x) {
539  args->cpi = cpi;
540  args->x = x;
541  args->mode = DC_PRED;
542  args->skippable = 1;
543  args->rdc = 0;
544  args->best_sad = UINT_MAX;
545  args->prune_mode_based_on_sad = false;
546  args->prune_palette_sad = false;
547 }
548 
549 static inline int get_pred_buffer(PRED_BUFFER *p, int len) {
550  for (int buf_idx = 0; buf_idx < len; buf_idx++) {
551  if (!p[buf_idx].in_use) {
552  p[buf_idx].in_use = 1;
553  return buf_idx;
554  }
555  }
556  return -1;
557 }
558 
559 static inline bool prune_palette_testing_inter(AV1_COMP *cpi,
560  unsigned int source_variance) {
561  return (
562  cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
563  cpi->oxcf.speed >= 11 && cpi->rc.high_source_sad &&
564  ((cpi->sf.rt_sf.prune_palette_search_nonrd > 2) ||
565  (cpi->sf.rt_sf.rc_compute_spatial_var_sc_kf &&
566  cpi->rc.frame_spatial_variance < 1200 &&
567  cpi->rc.perc_spatial_flat_blocks < 5 &&
568  cpi->rc.percent_blocks_with_motion > 98 && source_variance < 4000)));
569 }
570 
571 static inline void free_pred_buffer(PRED_BUFFER *p) {
572  if (p != NULL) p->in_use = 0;
573 }
574 
575 #if CONFIG_INTERNAL_STATS
576 static inline void store_coding_context_nonrd(MACROBLOCK *x,
577  PICK_MODE_CONTEXT *ctx,
578  int mode_index) {
579 #else
580 static inline void store_coding_context_nonrd(MACROBLOCK *x,
581  PICK_MODE_CONTEXT *ctx) {
582 #endif // CONFIG_INTERNAL_STATS
583  MACROBLOCKD *const xd = &x->e_mbd;
584  TxfmSearchInfo *txfm_info = &x->txfm_search_info;
585 
586  // Take a snapshot of the coding context so it can be
587  // restored if we decide to encode this way
588  ctx->rd_stats.skip_txfm = txfm_info->skip_txfm;
589 
590  ctx->skippable = txfm_info->skip_txfm;
591 #if CONFIG_INTERNAL_STATS
592  ctx->best_mode_index = mode_index;
593 #endif // CONFIG_INTERNAL_STATS
594  ctx->mic = *xd->mi[0];
595  ctx->skippable = txfm_info->skip_txfm;
596  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
597  av1_ref_frame_type(xd->mi[0]->ref_frame));
598 }
599 
600 void av1_block_yrd(MACROBLOCK *x, RD_STATS *this_rdc, int *skippable,
601  BLOCK_SIZE bsize, TX_SIZE tx_size);
602 
603 void av1_block_yrd_idtx(MACROBLOCK *x, const uint8_t *const pred_buf,
604  int pred_stride, RD_STATS *this_rdc, int *skippable,
605  BLOCK_SIZE bsize, TX_SIZE tx_size);
606 
607 int64_t av1_model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
608  MACROBLOCK *x, MACROBLOCKD *xd,
609  RD_STATS *this_rdc, int start_plane,
610  int stop_plane);
611 
612 void av1_estimate_block_intra(int plane, int block, int row, int col,
613  BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
614  void *arg);
615 
616 void av1_estimate_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
617  int best_early_term, unsigned int ref_cost_intra,
618  int reuse_prediction, struct buf_2d *orig_dst,
619  PRED_BUFFER *tmp_buffers,
620  PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
621  BEST_PICKMODE *best_pickmode,
622  unsigned int *best_sad_norm);
623 
624 #endif // AOM_AV1_ENCODER_NONRD_OPT_H_
RATE_CONTROL rc
Definition: encoder.h:3104
Extended mode info derived from mbmi.
Definition: block.h:225
AV1_COMMON common
Definition: encoder.h:2940
MV_REFERENCE_FRAME ref_frame[2]
The reference frames for the MV.
Definition: blockd.h:246
RateControlCfg rc_cfg
Definition: encoder.h:956
uint8_t ref_mv_idx
Which ref_mv to use.
Definition: blockd.h:314
FILTER_INTRA_MODE_INFO filter_intra_mode_info
The type of filter intra mode used (if applicable).
Definition: blockd.h:274
int pred_mv1_sad[REF_FRAMES]
The sad of the 2nd mv ref (near).
Definition: block.h:1131
FeatureFlags features
Definition: av1_common_int.h:921
PREDICTION_MODE mode
The prediction mode used.
Definition: blockd.h:232
Definition: aom_encoder.h:187
BEST_PICKMODE best_pickmode
Structure to hold best inter mode data.
Definition: nonrd_opt.h:87
int pred_mv0_sad[REF_FRAMES]
The sad of the 1st mv ref (nearest).
Definition: block.h:1129
AV1EncoderConfig oxcf
Definition: encoder.h:2945
uint8_t segment_id
The segment id.
Definition: blockd.h:310
CommonQuantParams quant_params
Definition: av1_common_int.h:938
enum aom_rc_mode mode
Definition: encoder.h:598
int_mv mv[2]
The motion vectors used by the current inter mode.
Definition: blockd.h:244
bool switchable_motion_mode
Definition: av1_common_int.h:415
uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]
Number of ref mvs in the drl.
Definition: block.h:232
CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition: blockd.h:776
PALETTE_MODE_INFO palette_mode_info
Stores the size and colors of palette mode.
Definition: blockd.h:280
SPEED_FEATURES sf
Definition: encoder.h:3124
int width
Definition: av1_common_int.h:791
int pred_mv_sad[REF_FRAMES]
Sum absolute distortion of the predicted mv for each ref frame.
Definition: block.h:1121
bool allow_high_precision_mv
Definition: av1_common_int.h:377
MOTION_MODE motion_mode
The motion mode used by the inter prediction.
Definition: blockd.h:250
RD_STATS this_rdc
Structure to RD cost of current mode.
Definition: nonrd_opt.h:89
YV12 frame buffer data structure.
Definition: yv12config.h:46
UV_PREDICTION_MODE uv_mode
The UV mode when intra is used.
Definition: blockd.h:234
int_mv global_mvs[REF_FRAMES]
Global mvs.
Definition: block.h:234
static void find_predictors(AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], struct buf_2d yv12_mb[8][3], BLOCK_SIZE bsize, int force_skip_low_temp_var, int skip_pred_mv, bool *use_scaled_ref_frame)
Finds predicted motion vectors for a block.
Definition: nonrd_opt.h:464
MB_MODE_INFO ** mi
Definition: blockd.h:617
RD_STATS best_rdc
Pointer to the RD Cost for the best mode found so far.
Definition: nonrd_opt.h:91
Variables related to current coding block.
Definition: blockd.h:570
TxfmSearchInfo txfm_search_info
Results of the txfm searches that have been done.
Definition: block.h:1327
Top level encoder structure.
Definition: encoder.h:2897
REAL_TIME_SPEED_FEATURES rt_sf
Definition: speed_features.h:2156
Top level common structure used by both encoder and decoder.
Definition: av1_common_int.h:766
uint8_t num_proj_ref
Number of samples used by warp causal.
Definition: blockd.h:252
InterpFilter interp_filter
Definition: av1_common_int.h:417
Stores the prediction/txfm mode of the current coding block.
Definition: blockd.h:222
Structure to store parameters and statistics used in non-rd inter mode evaluation.
Definition: nonrd_opt.h:85
int16_t mode_context[MODE_CTX_REF_FRAMES]
Context used to encode the current mode.
Definition: block.h:236
uint8_t skip_txfm
Whether to skip transform and quantization on a partition block level.
Definition: block.h:536
int base_qindex
Definition: av1_common_int.h:623
INTERINTRA_MODE interintra_mode
The type of intra mode used by inter-intra.
Definition: blockd.h:259
uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]
Definition: blockd.h:781
Encoder's parameters related to the current coding block.
Definition: block.h:889
int height
Definition: av1_common_int.h:792
Stores various encoding/search decisions related to txfm search.
Definition: block.h:534
MB_MODE_INFO_EXT mbmi_ext
Derived coding information.
Definition: block.h:914
MACROBLOCKD e_mbd
Decoder's view of current coding block.
Definition: block.h:907