summaryrefslogtreecommitdiff
path: root/third_party/libopenjpeg20/0003-dwt-decode.patch
blob: 631219a8b351195529a90608eeee04fd7d28bfcb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
diff --git a/third_party/libopenjpeg20/dwt.c b/third_party/libopenjpeg20/dwt.c
index 83c148002..1455ee84a 100644
--- a/third_party/libopenjpeg20/dwt.c
+++ b/third_party/libopenjpeg20/dwt.c
@@ -62,8 +62,6 @@
 
 /** @defgroup DWT DWT - Implementation of a discrete wavelet transform */
 /*@{*/
-#define OPJ_WS(i) v->mem[(i)*2]
-#define OPJ_WD(i) v->mem[(1+(i)*2)]
 
 #ifdef __AVX2__
 /** Number of int32 values in a AVX2 register */
@@ -81,6 +79,7 @@
 
 typedef struct dwt_local {
     OPJ_INT32* mem;
+    OPJ_SIZE_T mem_count;
     OPJ_INT32 dn;   /* number of elements in high pass band */
     OPJ_INT32 sn;   /* number of elements in low pass band */
     OPJ_INT32 cas;  /* 0 = start on even coord, 1 = start on odd coord */
@@ -132,13 +131,14 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
 /**
 Forward 5-3 wavelet transform in 1-D
 */
-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
-                             OPJ_INT32 cas);
+static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+    OPJ_INT32 sn, OPJ_INT32 cas);
 /**
 Forward 9-7 wavelet transform in 1-D
 */
-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
-                                  OPJ_INT32 cas);
+static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+    OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
+
 
 /**
 Explicit calculation of the Quantization Stepsizes
@@ -149,14 +149,14 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
 Inverse wavelet transform in 2-D.
 */
 static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
-                                    opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i);
+                                    const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i);
 
 static OPJ_BOOL opj_dwt_decode_partial_tile(
     opj_tcd_tilecomp_t* tilec,
     OPJ_UINT32 numres);
 
-static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
-        void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32));
+static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
+        void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32));
 
 static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
         OPJ_UINT32 i);
@@ -205,13 +205,20 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w,
 
 /*@}*/
 
-#define OPJ_S(i) a[(i)*2]
-#define OPJ_D(i) a[(1+(i)*2)]
-#define OPJ_S_(i) ((i)<0?OPJ_S(0):((i)>=sn?OPJ_S(sn-1):OPJ_S(i)))
-#define OPJ_D_(i) ((i)<0?OPJ_D(0):((i)>=dn?OPJ_D(dn-1):OPJ_D(i)))
+#define IDX_S(i) (i)*2
+#define IDX_D(i) 1 + (i)* 2
+#define UNDERFLOW_SN(i) ((i) >= sn&&sn>0)
+#define UNDERFLOW_DN(i) ((i) >= dn&&dn>0)
+#define OVERFLOW_S(i) (IDX_S(i) >= a_count)
+#define OVERFLOW_D(i) (IDX_D(i) >= a_count)
+
+#define OPJ_S(i) a[IDX_S(i)]
+#define OPJ_D(i) a[IDX_D(i)]
+#define OPJ_S_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_SN(i) ? OPJ_S(sn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i)))
+#define OPJ_D_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_DN(i) ? OPJ_D(dn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i)))
 /* new */
-#define OPJ_SS_(i) ((i)<0?OPJ_S(0):((i)>=dn?OPJ_S(dn-1):OPJ_S(i)))
-#define OPJ_DD_(i) ((i)<0?OPJ_D(0):((i)>=sn?OPJ_D(sn-1):OPJ_D(i)))
+#define OPJ_SS_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_DN(i) ? OPJ_S(dn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i)))
+#define OPJ_DD_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_SN(i) ? OPJ_D(sn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i)))
 
 /* <summary>                                                              */
 /* This table contains the norms of the 5-3 wavelets for different bands. */
@@ -344,8 +351,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
 /* <summary>                            */
 /* Forward 5-3 wavelet transform in 1-D. */
 /* </summary>                           */
-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
-                             OPJ_INT32 cas)
+static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+                             OPJ_INT32 sn, OPJ_INT32 cas)
 {
     OPJ_INT32 i;
 
@@ -376,8 +383,8 @@ static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
 /* <summary>                            */
 /* Inverse 5-3 wavelet transform in 1-D. */
 /* </summary>                           */
-static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
-                              OPJ_INT32 cas)
+static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+                              OPJ_INT32 sn, OPJ_INT32 cas)
 {
     OPJ_INT32 i;
 
@@ -406,7 +413,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
 
 static void opj_dwt_decode_1(const opj_dwt_t *v)
 {
-    opj_dwt_decode_1_(v->mem, v->dn, v->sn, v->cas);
+    opj_dwt_decode_1_(v->mem, v->mem_count, v->dn, v->sn, v->cas);
 }
 
 #endif /* STANDARD_SLOW_VERSION */
@@ -1037,8 +1044,8 @@ static void opj_idwt53_v(const opj_dwt_t *dwt,
 /* <summary>                             */
 /* Forward 9-7 wavelet transform in 1-D. */
 /* </summary>                            */
-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
-                                  OPJ_INT32 cas)
+static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+                                  OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas)
 {
     OPJ_INT32 i;
     if (!cas) {
@@ -1106,8 +1113,8 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
 /* <summary>                            */
 /* Forward 5-3 wavelet transform in 2-D. */
 /* </summary>                           */
-static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
-        void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32))
+static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
+        void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32))
 {
     OPJ_INT32 i, j, k;
     OPJ_INT32 *a = 00;
@@ -1117,6 +1124,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
 
     OPJ_INT32 rw;           /* width of the resolution level computed   */
     OPJ_INT32 rh;           /* height of the resolution level computed  */
+    OPJ_SIZE_T l_data_count;
     OPJ_SIZE_T l_data_size;
 
     opj_tcd_resolution_t * l_cur_res = 0;
@@ -1129,13 +1137,13 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
     l_cur_res = tilec->resolutions + l;
     l_last_res = l_cur_res - 1;
 
-    l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
+    l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
     /* overflow check */
-    if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) {
+    if (l_data_count > (SIZE_MAX / sizeof(OPJ_INT32))) {
         /* FIXME event manager error callback */
         return OPJ_FALSE;
     }
-    l_data_size *= sizeof(OPJ_INT32);
+    l_data_size = l_data_count * sizeof(OPJ_INT32);
     bj = (OPJ_INT32*)opj_malloc(l_data_size);
     /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */
     /* in that case, so do not error out */
@@ -1167,7 +1175,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
                 bj[k] = aj[k * w];
             }
 
-            (*p_function)(bj, dn, sn, cas_col);
+            (*p_function) (bj, l_data_count, dn, sn, cas_col);
 
             opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col);
         }
@@ -1180,7 +1188,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
             for (k = 0; k < rw; k++) {
                 bj[k] = aj[k];
             }
-            (*p_function)(bj, dn, sn, cas_row);
+            (*p_function) (bj, l_data_count, dn, sn, cas_row);
             opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row);
         }
 
@@ -1379,7 +1387,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
 /* Inverse wavelet transform in 2-D.    */
 /* </summary>                           */
 static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
-                                    opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres)
+        const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres)
 {
     opj_dwt_t h;
     opj_dwt_t v;
@@ -1401,22 +1409,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
         return OPJ_TRUE;
     }
     num_threads = opj_thread_pool_get_thread_count(tp);
-    h_mem_size = opj_dwt_max_resolution(tr, numres);
+    h.mem_count = opj_dwt_max_resolution(tr, numres);
     /* overflow check */
-    if (h_mem_size > (SIZE_MAX / PARALLEL_COLS_53 / sizeof(OPJ_INT32))) {
+    if (h.mem_count > (SIZE_MAX / PARALLEL_COLS_53 / sizeof(OPJ_INT32))) {
         /* FIXME event manager error callback */
         return OPJ_FALSE;
     }
     /* We need PARALLEL_COLS_53 times the height of the array, */
     /* since for the vertical pass */
     /* we process PARALLEL_COLS_53 columns at a time */
-    h_mem_size *= PARALLEL_COLS_53 * sizeof(OPJ_INT32);
+    h_mem_size = h.mem_count * PARALLEL_COLS_53 * sizeof(OPJ_INT32);
     h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
     if (! h.mem) {
         /* FIXME event manager error callback */
         return OPJ_FALSE;
     }
 
+    v.mem_count = h.mem_count;
     v.mem = h.mem;
 
     while (--numres) {
@@ -1594,7 +1603,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
     OPJ_UNUSED(ret);
 }
 
-static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+static void opj_dwt_decode_partial_1(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+                                     OPJ_INT32 dn, OPJ_INT32 sn,
                                      OPJ_INT32 cas,
                                      OPJ_INT32 win_l_x0,
                                      OPJ_INT32 win_l_x1,
@@ -1974,16 +1984,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
         opj_sparse_array_int32_free(sa);
         return OPJ_TRUE;
     }
-    h_mem_size = opj_dwt_max_resolution(tr, numres);
+    h.mem_count = opj_dwt_max_resolution(tr, numres);
     /* overflow check */
     /* in vertical pass, we process 4 columns at a time */
-    if (h_mem_size > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) {
+    if (h.mem_count > (SIZE_MAX / (4 * sizeof(OPJ_INT32)))) {
         /* FIXME event manager error callback */
         opj_sparse_array_int32_free(sa);
         return OPJ_FALSE;
     }
 
-    h_mem_size *= 4 * sizeof(OPJ_INT32);
+    h_mem_size = h.mem_count * 4 * sizeof(OPJ_INT32);
     h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
     if (! h.mem) {
         /* FIXME event manager error callback */
@@ -1991,6 +2001,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
         return OPJ_FALSE;
     }
 
+    v.mem_count = h.mem_count;
     v.mem = h.mem;
 
     for (resno = 1; resno < numres; resno ++) {
@@ -2101,7 +2112,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
                                              win_ll_x1,
                                              win_hl_x0,
                                              win_hl_x1);
-                opj_dwt_decode_partial_1(h.mem, h.dn, h.sn, h.cas,
+                opj_dwt_decode_partial_1(h.mem, h.mem_count, h.dn, h.sn, h.cas,
                                          (OPJ_INT32)win_ll_x0,
                                          (OPJ_INT32)win_ll_x1,
                                          (OPJ_INT32)win_hl_x0,