comparison semiconginev2/audio/stb_vorbis.c @ 1225:27cd1c21290e compiletime-tests

did: refactor resources
author sam <sam@basx.dev>
date Wed, 17 Jul 2024 22:20:59 +0700
parents semiconginev2/old/resources/stb_vorbis.c@56781cc0fc7c
children
comparison
equal deleted inserted replaced
1224:a3fa15c25026 1225:27cd1c21290e
1 // Ogg Vorbis audio decoder - v1.22 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking implementation
7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 // See end of file for license information.
13 //
14 // Limitations:
15 //
16 // - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 // - lossless sample-truncation at beginning ignored
18 // - cannot concatenate multiple vorbis streams
19 // - sample positions are 32-bit, limiting seekable 192Khz
20 // files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 // Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 // Terje Mathisen Niklas Frykholm Andy Hill
27 // Casey Muratori John Bolton Gargaj
28 // Laurent Gomila Marc LeBlanc Ronny Chevalier
29 // Bernhard Wodo Evan Balster github:alxprd
30 // Tom Beaumont Ingo Leitgeb Nicolas Guillemot
31 // Phillip Bennefall Rohit Thiago Goulart
32 // github:manxorist Saga Musix github:infatum
33 // Timur Gagiev Maxwell Koo Peter Waller
34 // github:audinowho Dougall Johnson David Reid
35 // github:Clownacy Pedro J. Estebanez Remi Verschelde
36 // AnthoFoxo github:morlat Gabriel Ravier
37 //
38 // Partial history:
39 // 1.22 - 2021-07-11 - various small fixes
40 // 1.21 - 2021-07-02 - fix bug for files with no comments
41 // 1.20 - 2020-07-11 - several small fixes
42 // 1.19 - 2020-02-05 - warnings
43 // 1.18 - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
44 // 1.17 - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
45 // 1.16 - 2019-03-04 - fix warnings
46 // 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
47 // 1.14 - 2018-02-11 - delete bogus dealloca usage
48 // 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
49 // 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
50 // 1.11 - 2017-07-23 - fix MinGW compilation
51 // 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
52 // 1.09 - 2016-04-04 - back out 'truncation of last frame' fix from previous version
53 // 1.08 - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
54 // 1.07 - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
55 // 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
56 // some crash fixes when out of memory or with corrupt files
57 // fix some inappropriately signed shifts
58 // 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
59 // 1.04 - 2014-08-27 - fix missing const-correct case in API
60 // 1.03 - 2014-08-07 - warning fixes
61 // 1.02 - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
62 // 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
63 // 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
64 // (API change) report sample rate for decode-full-file funcs
65 //
66 // See end of file for full version history.
67
68
69 //////////////////////////////////////////////////////////////////////////////
70 //
71 // HEADER BEGINS HERE
72 //
73
74 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
75 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
76
77 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
78 #define STB_VORBIS_NO_STDIO 1
79 #endif
80
81 #ifndef STB_VORBIS_NO_STDIO
82 #include <stdio.h>
83 #endif
84
85 #ifdef __cplusplus
86 extern "C" {
87 #endif
88
89 /////////// THREAD SAFETY
90
91 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
92 // them from multiple threads at the same time. However, you can have multiple
93 // stb_vorbis* handles and decode from them independently in multiple thrads.
94
95
96 /////////// MEMORY ALLOCATION
97
98 // normally stb_vorbis uses malloc() to allocate memory at startup,
99 // and alloca() to allocate temporary memory during a frame on the
100 // stack. (Memory consumption will depend on the amount of setup
101 // data in the file and how you set the compile flags for speed
102 // vs. size. In my test files the maximal-size usage is ~150KB.)
103 //
104 // You can modify the wrapper functions in the source (setup_malloc,
105 // setup_temp_malloc, temp_malloc) to change this behavior, or you
106 // can use a simpler allocation model: you pass in a buffer from
107 // which stb_vorbis will allocate _all_ its memory (including the
108 // temp memory). "open" may fail with a VORBIS_outofmem if you
109 // do not pass in enough data; there is no way to determine how
110 // much you do need except to succeed (at which point you can
111 // query get_info to find the exact amount required. yes I know
112 // this is lame).
113 //
114 // If you pass in a non-NULL buffer of the type below, allocation
115 // will occur from it as described above. Otherwise just pass NULL
116 // to use malloc()/alloca()
117
118 typedef struct
119 {
120 char *alloc_buffer;
121 int alloc_buffer_length_in_bytes;
122 } stb_vorbis_alloc;
123
124
125 /////////// FUNCTIONS USEABLE WITH ALL INPUT MODES
126
127 typedef struct stb_vorbis stb_vorbis;
128
129 typedef struct
130 {
131 unsigned int sample_rate;
132 int channels;
133
134 unsigned int setup_memory_required;
135 unsigned int setup_temp_memory_required;
136 unsigned int temp_memory_required;
137
138 int max_frame_size;
139 } stb_vorbis_info;
140
141 typedef struct
142 {
143 char *vendor;
144
145 int comment_list_length;
146 char **comment_list;
147 } stb_vorbis_comment;
148
149 // get general information about the file
150 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
151
152 // get ogg comments
153 extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
154
155 // get the last error detected (clears it, too)
156 extern int stb_vorbis_get_error(stb_vorbis *f);
157
158 // close an ogg vorbis file and free all memory in use
159 extern void stb_vorbis_close(stb_vorbis *f);
160
161 // this function returns the offset (in samples) from the beginning of the
162 // file that will be returned by the next decode, if it is known, or -1
163 // otherwise. after a flush_pushdata() call, this may take a while before
164 // it becomes valid again.
165 // NOT WORKING YET after a seek with PULLDATA API
166 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
167
168 // returns the current seek point within the file, or offset from the beginning
169 // of the memory buffer. In pushdata mode it returns 0.
170 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
171
172 /////////// PUSHDATA API
173
174 #ifndef STB_VORBIS_NO_PUSHDATA_API
175
176 // this API allows you to get blocks of data from any source and hand
177 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
178 // you how much it used, and you have to give it the rest next time;
179 // and stb_vorbis may not have enough data to work with and you will
180 // need to give it the same data again PLUS more. Note that the Vorbis
181 // specification does not bound the size of an individual frame.
182
183 extern stb_vorbis *stb_vorbis_open_pushdata(
184 const unsigned char * datablock, int datablock_length_in_bytes,
185 int *datablock_memory_consumed_in_bytes,
186 int *error,
187 const stb_vorbis_alloc *alloc_buffer);
188 // create a vorbis decoder by passing in the initial data block containing
189 // the ogg&vorbis headers (you don't need to do parse them, just provide
190 // the first N bytes of the file--you're told if it's not enough, see below)
191 // on success, returns an stb_vorbis *, does not set error, returns the amount of
192 // data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
193 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
194 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
195 // incomplete and you need to pass in a larger block from the start of the file
196
197 extern int stb_vorbis_decode_frame_pushdata(
198 stb_vorbis *f,
199 const unsigned char *datablock, int datablock_length_in_bytes,
200 int *channels, // place to write number of float * buffers
201 float ***output, // place to write float ** array of float * buffers
202 int *samples // place to write number of output samples
203 );
204 // decode a frame of audio sample data if possible from the passed-in data block
205 //
206 // return value: number of bytes we used from datablock
207 //
208 // possible cases:
209 // 0 bytes used, 0 samples output (need more data)
210 // N bytes used, 0 samples output (resynching the stream, keep going)
211 // N bytes used, M samples output (one frame of data)
212 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
213 // frame, because Vorbis always "discards" the first frame.
214 //
215 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
216 // instead only datablock_length_in_bytes-3 or less. This is because it wants
217 // to avoid missing parts of a page header if they cross a datablock boundary,
218 // without writing state-machiney code to record a partial detection.
219 //
220 // The number of channels returned are stored in *channels (which can be
221 // NULL--it is always the same as the number of channels reported by
222 // get_info). *output will contain an array of float* buffers, one per
223 // channel. In other words, (*output)[0][0] contains the first sample from
224 // the first channel, and (*output)[1][0] contains the first sample from
225 // the second channel.
226 //
227 // *output points into stb_vorbis's internal output buffer storage; these
228 // buffers are owned by stb_vorbis and application code should not free
229 // them or modify their contents. They are transient and will be overwritten
230 // once you ask for more data to get decoded, so be sure to grab any data
231 // you need before then.
232
233 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
234 // inform stb_vorbis that your next datablock will not be contiguous with
235 // previous ones (e.g. you've seeked in the data); future attempts to decode
236 // frames will cause stb_vorbis to resynchronize (as noted above), and
237 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
238 // will begin decoding the _next_ frame.
239 //
240 // if you want to seek using pushdata, you need to seek in your file, then
241 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
242 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
243 // if you don't like the result, seek your file again and repeat.
244 #endif
245
246
247 ////////// PULLING INPUT API
248
249 #ifndef STB_VORBIS_NO_PULLDATA_API
250 // This API assumes stb_vorbis is allowed to pull data from a source--
251 // either a block of memory containing the _entire_ vorbis stream, or a
252 // FILE * that you or it create, or possibly some other reading mechanism
253 // if you go modify the source to replace the FILE * case with some kind
254 // of callback to your code. (But if you don't support seeking, you may
255 // just want to go ahead and use pushdata.)
256
257 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
258 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
259 #endif
260 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
261 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
262 #endif
263 // decode an entire file and output the data interleaved into a malloc()ed
264 // buffer stored in *output. The return value is the number of samples
265 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
266 // When you're done with it, just free() the pointer returned in *output.
267
268 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
269 int *error, const stb_vorbis_alloc *alloc_buffer);
270 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
271 // this must be the entire stream!). on failure, returns NULL and sets *error
272
273 #ifndef STB_VORBIS_NO_STDIO
274 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
275 int *error, const stb_vorbis_alloc *alloc_buffer);
276 // create an ogg vorbis decoder from a filename via fopen(). on failure,
277 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
278
279 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
280 int *error, const stb_vorbis_alloc *alloc_buffer);
281 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
282 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
283 // note that stb_vorbis must "own" this stream; if you seek it in between
284 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
285 // perform stb_vorbis_seek_*() operations on this file, it will assume it
286 // owns the _entire_ rest of the file after the start point. Use the next
287 // function, stb_vorbis_open_file_section(), to limit it.
288
289 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
290 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
291 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
292 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
293 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
294 // this stream; if you seek it in between calls to stb_vorbis, it will become
295 // confused.
296 #endif
297
298 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
299 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
300 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
301 // after calling seek_frame(), the next call to get_frame_*() will include
302 // the specified sample. after calling stb_vorbis_seek(), the next call to
303 // stb_vorbis_get_samples_* will start with the specified sample. If you
304 // do not need to seek to EXACTLY the target sample when using get_samples_*,
305 // you can also use seek_frame().
306
307 extern int stb_vorbis_seek_start(stb_vorbis *f);
308 // this function is equivalent to stb_vorbis_seek(f,0)
309
310 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
311 extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
312 // these functions return the total length of the vorbis stream
313
314 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
315 // decode the next frame and return the number of samples. the number of
316 // channels returned are stored in *channels (which can be NULL--it is always
317 // the same as the number of channels reported by get_info). *output will
318 // contain an array of float* buffers, one per channel. These outputs will
319 // be overwritten on the next call to stb_vorbis_get_frame_*.
320 //
321 // You generally should not intermix calls to stb_vorbis_get_frame_*()
322 // and stb_vorbis_get_samples_*(), since the latter calls the former.
323
324 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
325 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
326 extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples);
327 #endif
328 // decode the next frame and return the number of *samples* per channel.
329 // Note that for interleaved data, you pass in the number of shorts (the
330 // size of your array), but the return value is the number of samples per
331 // channel, not the total number of samples.
332 //
333 // The data is coerced to the number of channels you request according to the
334 // channel coercion rules (see below). You must pass in the size of your
335 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
336 // The maximum buffer size needed can be gotten from get_info(); however,
337 // the Vorbis I specification implies an absolute maximum of 4096 samples
338 // per channel.
339
340 // Channel coercion rules:
341 // Let M be the number of channels requested, and N the number of channels present,
342 // and Cn be the nth channel; let stereo L be the sum of all L and center channels,
343 // and stereo R be the sum of all R and center channels (channel assignment from the
344 // vorbis spec).
345 // M N output
346 // 1 k sum(Ck) for all k
347 // 2 * stereo L, stereo R
348 // k l k > l, the first l channels, then 0s
349 // k l k <= l, the first k channels
350 // Note that this is not _good_ surround etc. mixing at all! It's just so
351 // you get something useful.
352
353 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
354 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
355 // gets num_samples samples, not necessarily on a frame boundary--this requires
356 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
357 // Returns the number of samples stored per channel; it may be less than requested
358 // at the end of the file. If there are no more samples in the file, returns 0.
359
360 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
361 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
362 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
363 #endif
364 // gets num_samples samples, not necessarily on a frame boundary--this requires
365 // buffering so you have to supply the buffers. Applies the coercion rules above
366 // to produce 'channels' channels. Returns the number of samples stored per channel;
367 // it may be less than requested at the end of the file. If there are no more
368 // samples in the file, returns 0.
369
370 #endif
371
372 //////// ERROR CODES
373
374 enum STBVorbisError
375 {
376 VORBIS__no_error,
377
378 VORBIS_need_more_data=1, // not a real error
379
380 VORBIS_invalid_api_mixing, // can't mix API modes
381 VORBIS_outofmem, // not enough memory
382 VORBIS_feature_not_supported, // uses floor 0
383 VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small
384 VORBIS_file_open_failure, // fopen() failed
385 VORBIS_seek_without_length, // can't seek in unknown-length file
386
387 VORBIS_unexpected_eof=10, // file is truncated?
388 VORBIS_seek_invalid, // seek past EOF
389
390 // decoding errors (corrupt/invalid stream) -- you probably
391 // don't care about the exact details of these
392
393 // vorbis errors:
394 VORBIS_invalid_setup=20,
395 VORBIS_invalid_stream,
396
397 // ogg errors:
398 VORBIS_missing_capture_pattern=30,
399 VORBIS_invalid_stream_structure_version,
400 VORBIS_continued_packet_flag_invalid,
401 VORBIS_incorrect_stream_serial_number,
402 VORBIS_invalid_first_page,
403 VORBIS_bad_packet_type,
404 VORBIS_cant_find_last_page,
405 VORBIS_seek_failed,
406 VORBIS_ogg_skeleton_not_supported
407 };
408
409
410 #ifdef __cplusplus
411 }
412 #endif
413
414 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
415 //
416 // HEADER ENDS HERE
417 //
418 //////////////////////////////////////////////////////////////////////////////
419
420 #ifndef STB_VORBIS_HEADER_ONLY
421
422 // global configuration settings (e.g. set these in the project/makefile),
423 // or just set them in this file at the top (although ideally the first few
424 // should be visible when the header file is compiled too, although it's not
425 // crucial)
426
427 // STB_VORBIS_NO_PUSHDATA_API
428 // does not compile the code for the various stb_vorbis_*_pushdata()
429 // functions
430 // #define STB_VORBIS_NO_PUSHDATA_API
431
432 // STB_VORBIS_NO_PULLDATA_API
433 // does not compile the code for the non-pushdata APIs
434 // #define STB_VORBIS_NO_PULLDATA_API
435
436 // STB_VORBIS_NO_STDIO
437 // does not compile the code for the APIs that use FILE *s internally
438 // or externally (implied by STB_VORBIS_NO_PULLDATA_API)
439 // #define STB_VORBIS_NO_STDIO
440
441 // STB_VORBIS_NO_INTEGER_CONVERSION
442 // does not compile the code for converting audio sample data from
443 // float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
444 // #define STB_VORBIS_NO_INTEGER_CONVERSION
445
446 // STB_VORBIS_NO_FAST_SCALED_FLOAT
447 // does not use a fast float-to-int trick to accelerate float-to-int on
448 // most platforms which requires endianness be defined correctly.
449 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
450
451
452 // STB_VORBIS_MAX_CHANNELS [number]
453 // globally define this to the maximum number of channels you need.
454 // The spec does not put a restriction on channels except that
455 // the count is stored in a byte, so 255 is the hard limit.
456 // Reducing this saves about 16 bytes per value, so using 16 saves
457 // (255-16)*16 or around 4KB. Plus anything other memory usage
458 // I forgot to account for. Can probably go as low as 8 (7.1 audio),
459 // 6 (5.1 audio), or 2 (stereo only).
460 #ifndef STB_VORBIS_MAX_CHANNELS
461 #define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone?
462 #endif
463
464 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
465 // after a flush_pushdata(), stb_vorbis begins scanning for the
466 // next valid page, without backtracking. when it finds something
467 // that looks like a page, it streams through it and verifies its
468 // CRC32. Should that validation fail, it keeps scanning. But it's
469 // possible that _while_ streaming through to check the CRC32 of
470 // one candidate page, it sees another candidate page. This #define
471 // determines how many "overlapping" candidate pages it can search
472 // at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
473 // garbage pages could be as big as 64KB, but probably average ~16KB.
474 // So don't hose ourselves by scanning an apparent 64KB page and
475 // missing a ton of real ones in the interim; so minimum of 2
476 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
477 #define STB_VORBIS_PUSHDATA_CRC_COUNT 4
478 #endif
479
480 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
481 // sets the log size of the huffman-acceleration table. Maximum
482 // supported value is 24. with larger numbers, more decodings are O(1),
483 // but the table size is larger so worse cache missing, so you'll have
484 // to probe (and try multiple ogg vorbis files) to find the sweet spot.
485 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
486 #define STB_VORBIS_FAST_HUFFMAN_LENGTH 10
487 #endif
488
489 // STB_VORBIS_FAST_BINARY_LENGTH [number]
490 // sets the log size of the binary-search acceleration table. this
491 // is used in similar fashion to the fast-huffman size to set initial
492 // parameters for the binary search
493
494 // STB_VORBIS_FAST_HUFFMAN_INT
495 // The fast huffman tables are much more efficient if they can be
496 // stored as 16-bit results instead of 32-bit results. This restricts
497 // the codebooks to having only 65535 possible outcomes, though.
498 // (At least, accelerated by the huffman table.)
499 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
500 #define STB_VORBIS_FAST_HUFFMAN_SHORT
501 #endif
502
503 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
504 // If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
505 // back on binary searching for the correct one. This requires storing
506 // extra tables with the huffman codes in sorted order. Defining this
507 // symbol trades off space for speed by forcing a linear search in the
508 // non-fast case, except for "sparse" codebooks.
509 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
510
511 // STB_VORBIS_DIVIDES_IN_RESIDUE
512 // stb_vorbis precomputes the result of the scalar residue decoding
513 // that would otherwise require a divide per chunk. you can trade off
514 // space for time by defining this symbol.
515 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
516
517 // STB_VORBIS_DIVIDES_IN_CODEBOOK
518 // vorbis VQ codebooks can be encoded two ways: with every case explicitly
519 // stored, or with all elements being chosen from a small range of values,
520 // and all values possible in all elements. By default, stb_vorbis expands
521 // this latter kind out to look like the former kind for ease of decoding,
522 // because otherwise an integer divide-per-vector-element is required to
523 // unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
524 // trade off storage for speed.
525 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
526
527 #ifdef STB_VORBIS_CODEBOOK_SHORTS
528 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
529 #endif
530
531 // STB_VORBIS_DIVIDE_TABLE
532 // this replaces small integer divides in the floor decode loop with
533 // table lookups. made less than 1% difference, so disabled by default.
534
535 // STB_VORBIS_NO_INLINE_DECODE
536 // disables the inlining of the scalar codebook fast-huffman decode.
537 // might save a little codespace; useful for debugging
538 // #define STB_VORBIS_NO_INLINE_DECODE
539
540 // STB_VORBIS_NO_DEFER_FLOOR
541 // Normally we only decode the floor without synthesizing the actual
542 // full curve. We can instead synthesize the curve immediately. This
543 // requires more memory and is very likely slower, so I don't think
544 // you'd ever want to do it except for debugging.
545 // #define STB_VORBIS_NO_DEFER_FLOOR
546
547
548
549
550 //////////////////////////////////////////////////////////////////////////////
551
552 #ifdef STB_VORBIS_NO_PULLDATA_API
553 #define STB_VORBIS_NO_INTEGER_CONVERSION
554 #define STB_VORBIS_NO_STDIO
555 #endif
556
557 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
558 #define STB_VORBIS_NO_STDIO 1
559 #endif
560
561 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
562 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
563
564 // only need endianness for fast-float-to-int, which we don't
565 // use for pushdata
566
567 #ifndef STB_VORBIS_BIG_ENDIAN
568 #define STB_VORBIS_ENDIAN 0
569 #else
570 #define STB_VORBIS_ENDIAN 1
571 #endif
572
573 #endif
574 #endif
575
576
577 #ifndef STB_VORBIS_NO_STDIO
578 #include <stdio.h>
579 #endif
580
581 #ifndef STB_VORBIS_NO_CRT
582 #include <stdlib.h>
583 #include <string.h>
584 #include <assert.h>
585 #include <math.h>
586
587 // find definition of alloca if it's not in stdlib.h:
588 #if defined(_MSC_VER) || defined(__MINGW32__)
589 #include <malloc.h>
590 #endif
591 #if defined(__linux__) || defined(__linux) || defined(__sun__) || defined(__EMSCRIPTEN__) || defined(__NEWLIB__)
592 #include <alloca.h>
593 #endif
594 #else // STB_VORBIS_NO_CRT
595 #define NULL 0
596 #define malloc(s) 0
597 #define free(s) ((void) 0)
598 #define realloc(s) 0
599 #endif // STB_VORBIS_NO_CRT
600
601 #include <limits.h>
602
603 #ifdef __MINGW32__
604 // eff you mingw:
605 // "fixed":
606 // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
607 // "no that broke the build, reverted, who cares about C":
608 // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
609 #ifdef __forceinline
610 #undef __forceinline
611 #endif
612 #define __forceinline
613 #ifndef alloca
614 #define alloca __builtin_alloca
615 #endif
616 #elif !defined(_MSC_VER)
617 #if __GNUC__
618 #define __forceinline inline
619 #else
620 #define __forceinline
621 #endif
622 #endif
623
624 #if STB_VORBIS_MAX_CHANNELS > 256
625 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
626 #endif
627
628 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
629 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
630 #endif
631
632
633 #if 0
634 #include <crtdbg.h>
635 #define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1])
636 #else
637 #define CHECK(f) ((void) 0)
638 #endif
639
640 #define MAX_BLOCKSIZE_LOG 13 // from specification
641 #define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG)
642
643
644 typedef unsigned char uint8;
645 typedef signed char int8;
646 typedef unsigned short uint16;
647 typedef signed short int16;
648 typedef unsigned int uint32;
649 typedef signed int int32;
650
651 #ifndef TRUE
652 #define TRUE 1
653 #define FALSE 0
654 #endif
655
656 typedef float codetype;
657
658 #ifdef _MSC_VER
659 #define STBV_NOTUSED(v) (void)(v)
660 #else
661 #define STBV_NOTUSED(v) (void)sizeof(v)
662 #endif
663
664 // @NOTE
665 //
666 // Some arrays below are tagged "//varies", which means it's actually
667 // a variable-sized piece of data, but rather than malloc I assume it's
668 // small enough it's better to just allocate it all together with the
669 // main thing
670 //
671 // Most of the variables are specified with the smallest size I could pack
672 // them into. It might give better performance to make them all full-sized
673 // integers. It should be safe to freely rearrange the structures or change
674 // the sizes larger--nothing relies on silently truncating etc., nor the
675 // order of variables.
676
677 #define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
678 #define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1)
679
680 typedef struct
681 {
682 int dimensions, entries;
683 uint8 *codeword_lengths;
684 float minimum_value;
685 float delta_value;
686 uint8 value_bits;
687 uint8 lookup_type;
688 uint8 sequence_p;
689 uint8 sparse;
690 uint32 lookup_values;
691 codetype *multiplicands;
692 uint32 *codewords;
693 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
694 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
695 #else
696 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
697 #endif
698 uint32 *sorted_codewords;
699 int *sorted_values;
700 int sorted_entries;
701 } Codebook;
702
703 typedef struct
704 {
705 uint8 order;
706 uint16 rate;
707 uint16 bark_map_size;
708 uint8 amplitude_bits;
709 uint8 amplitude_offset;
710 uint8 number_of_books;
711 uint8 book_list[16]; // varies
712 } Floor0;
713
714 typedef struct
715 {
716 uint8 partitions;
717 uint8 partition_class_list[32]; // varies
718 uint8 class_dimensions[16]; // varies
719 uint8 class_subclasses[16]; // varies
720 uint8 class_masterbooks[16]; // varies
721 int16 subclass_books[16][8]; // varies
722 uint16 Xlist[31*8+2]; // varies
723 uint8 sorted_order[31*8+2];
724 uint8 neighbors[31*8+2][2];
725 uint8 floor1_multiplier;
726 uint8 rangebits;
727 int values;
728 } Floor1;
729
730 typedef union
731 {
732 Floor0 floor0;
733 Floor1 floor1;
734 } Floor;
735
736 typedef struct
737 {
738 uint32 begin, end;
739 uint32 part_size;
740 uint8 classifications;
741 uint8 classbook;
742 uint8 **classdata;
743 int16 (*residue_books)[8];
744 } Residue;
745
746 typedef struct
747 {
748 uint8 magnitude;
749 uint8 angle;
750 uint8 mux;
751 } MappingChannel;
752
753 typedef struct
754 {
755 uint16 coupling_steps;
756 MappingChannel *chan;
757 uint8 submaps;
758 uint8 submap_floor[15]; // varies
759 uint8 submap_residue[15]; // varies
760 } Mapping;
761
762 typedef struct
763 {
764 uint8 blockflag;
765 uint8 mapping;
766 uint16 windowtype;
767 uint16 transformtype;
768 } Mode;
769
770 typedef struct
771 {
772 uint32 goal_crc; // expected crc if match
773 int bytes_left; // bytes left in packet
774 uint32 crc_so_far; // running crc
775 int bytes_done; // bytes processed in _current_ chunk
776 uint32 sample_loc; // granule pos encoded in page
777 } CRCscan;
778
779 typedef struct
780 {
781 uint32 page_start, page_end;
782 uint32 last_decoded_sample;
783 } ProbedPage;
784
785 struct stb_vorbis
786 {
787 // user-accessible info
788 unsigned int sample_rate;
789 int channels;
790
791 unsigned int setup_memory_required;
792 unsigned int temp_memory_required;
793 unsigned int setup_temp_memory_required;
794
795 char *vendor;
796 int comment_list_length;
797 char **comment_list;
798
799 // input config
800 #ifndef STB_VORBIS_NO_STDIO
801 FILE *f;
802 uint32 f_start;
803 int close_on_free;
804 #endif
805
806 uint8 *stream;
807 uint8 *stream_start;
808 uint8 *stream_end;
809
810 uint32 stream_len;
811
812 uint8 push_mode;
813
814 // the page to seek to when seeking to start, may be zero
815 uint32 first_audio_page_offset;
816
817 // p_first is the page on which the first audio packet ends
818 // (but not necessarily the page on which it starts)
819 ProbedPage p_first, p_last;
820
821 // memory management
822 stb_vorbis_alloc alloc;
823 int setup_offset;
824 int temp_offset;
825
826 // run-time results
827 int eof;
828 enum STBVorbisError error;
829
830 // user-useful data
831
832 // header info
833 int blocksize[2];
834 int blocksize_0, blocksize_1;
835 int codebook_count;
836 Codebook *codebooks;
837 int floor_count;
838 uint16 floor_types[64]; // varies
839 Floor *floor_config;
840 int residue_count;
841 uint16 residue_types[64]; // varies
842 Residue *residue_config;
843 int mapping_count;
844 Mapping *mapping;
845 int mode_count;
846 Mode mode_config[64]; // varies
847
848 uint32 total_samples;
849
850 // decode buffer
851 float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
852 float *outputs [STB_VORBIS_MAX_CHANNELS];
853
854 float *previous_window[STB_VORBIS_MAX_CHANNELS];
855 int previous_length;
856
857 #ifndef STB_VORBIS_NO_DEFER_FLOOR
858 int16 *finalY[STB_VORBIS_MAX_CHANNELS];
859 #else
860 float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
861 #endif
862
863 uint32 current_loc; // sample location of next frame to decode
864 int current_loc_valid;
865
866 // per-blocksize precomputed data
867
868 // twiddle factors
869 float *A[2],*B[2],*C[2];
870 float *window[2];
871 uint16 *bit_reverse[2];
872
873 // current page/packet/segment streaming info
874 uint32 serial; // stream serial number for verification
875 int last_page;
876 int segment_count;
877 uint8 segments[255];
878 uint8 page_flag;
879 uint8 bytes_in_seg;
880 uint8 first_decode;
881 int next_seg;
882 int last_seg; // flag that we're on the last segment
883 int last_seg_which; // what was the segment number of the last seg?
884 uint32 acc;
885 int valid_bits;
886 int packet_bytes;
887 int end_seg_with_known_loc;
888 uint32 known_loc_for_packet;
889 int discard_samples_deferred;
890 uint32 samples_output;
891
892 // push mode scanning
893 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
894 #ifndef STB_VORBIS_NO_PUSHDATA_API
895 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
896 #endif
897
898 // sample-access
899 int channel_buffer_start;
900 int channel_buffer_end;
901 };
902
903 #if defined(STB_VORBIS_NO_PUSHDATA_API)
904 #define IS_PUSH_MODE(f) FALSE
905 #elif defined(STB_VORBIS_NO_PULLDATA_API)
906 #define IS_PUSH_MODE(f) TRUE
907 #else
908 #define IS_PUSH_MODE(f) ((f)->push_mode)
909 #endif
910
911 typedef struct stb_vorbis vorb;
912
913 static int error(vorb *f, enum STBVorbisError e)
914 {
915 f->error = e;
916 if (!f->eof && e != VORBIS_need_more_data) {
917 f->error=e; // breakpoint for debugging
918 }
919 return 0;
920 }
921
922
923 // these functions are used for allocating temporary memory
924 // while decoding. if you can afford the stack space, use
925 // alloca(); otherwise, provide a temp buffer and it will
926 // allocate out of those.
927
928 #define array_size_required(count,size) (count*(sizeof(void *)+(size)))
929
930 #define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
931 #define temp_free(f,p) (void)0
932 #define temp_alloc_save(f) ((f)->temp_offset)
933 #define temp_alloc_restore(f,p) ((f)->temp_offset = (p))
934
935 #define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
936
937 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
938 static void *make_block_array(void *mem, int count, int size)
939 {
940 int i;
941 void ** p = (void **) mem;
942 char *q = (char *) (p + count);
943 for (i=0; i < count; ++i) {
944 p[i] = q;
945 q += size;
946 }
947 return p;
948 }
949
950 static void *setup_malloc(vorb *f, int sz)
951 {
952 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
953 f->setup_memory_required += sz;
954 if (f->alloc.alloc_buffer) {
955 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
956 if (f->setup_offset + sz > f->temp_offset) return NULL;
957 f->setup_offset += sz;
958 return p;
959 }
960 return sz ? malloc(sz) : NULL;
961 }
962
963 static void setup_free(vorb *f, void *p)
964 {
965 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
966 free(p);
967 }
968
969 static void *setup_temp_malloc(vorb *f, int sz)
970 {
971 sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
972 if (f->alloc.alloc_buffer) {
973 if (f->temp_offset - sz < f->setup_offset) return NULL;
974 f->temp_offset -= sz;
975 return (char *) f->alloc.alloc_buffer + f->temp_offset;
976 }
977 return malloc(sz);
978 }
979
980 static void setup_temp_free(vorb *f, void *p, int sz)
981 {
982 if (f->alloc.alloc_buffer) {
983 f->temp_offset += (sz+7)&~7;
984 return;
985 }
986 free(p);
987 }
988
989 #define CRC32_POLY 0x04c11db7 // from spec
990
991 static uint32 crc_table[256];
992 static void crc32_init(void)
993 {
994 int i,j;
995 uint32 s;
996 for(i=0; i < 256; i++) {
997 for (s=(uint32) i << 24, j=0; j < 8; ++j)
998 s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
999 crc_table[i] = s;
1000 }
1001 }
1002
1003 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
1004 {
1005 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
1006 }
1007
1008
1009 // used in setup, and for huffman that doesn't go fast path
1010 static unsigned int bit_reverse(unsigned int n)
1011 {
1012 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1);
1013 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2);
1014 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4);
1015 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8);
1016 return (n >> 16) | (n << 16);
1017 }
1018
1019 static float square(float x)
1020 {
1021 return x*x;
1022 }
1023
1024 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
1025 // as required by the specification. fast(?) implementation from stb.h
1026 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
1027 static int ilog(int32 n)
1028 {
1029 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
1030
1031 if (n < 0) return 0; // signed n returns 0
1032
1033 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
1034 if (n < (1 << 14))
1035 if (n < (1 << 4)) return 0 + log2_4[n ];
1036 else if (n < (1 << 9)) return 5 + log2_4[n >> 5];
1037 else return 10 + log2_4[n >> 10];
1038 else if (n < (1 << 24))
1039 if (n < (1 << 19)) return 15 + log2_4[n >> 15];
1040 else return 20 + log2_4[n >> 20];
1041 else if (n < (1 << 29)) return 25 + log2_4[n >> 25];
1042 else return 30 + log2_4[n >> 30];
1043 }
1044
1045 #ifndef M_PI
1046 #define M_PI 3.14159265358979323846264f // from CRC
1047 #endif
1048
1049 // code length assigned to a value with no huffman encoding
1050 #define NO_CODE 255
1051
1052 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
1053 //
1054 // these functions are only called at setup, and only a few times
1055 // per file
1056
1057 static float float32_unpack(uint32 x)
1058 {
1059 // from the specification
1060 uint32 mantissa = x & 0x1fffff;
1061 uint32 sign = x & 0x80000000;
1062 uint32 exp = (x & 0x7fe00000) >> 21;
1063 double res = sign ? -(double)mantissa : (double)mantissa;
1064 return (float) ldexp((float)res, (int)exp-788);
1065 }
1066
1067
1068 // zlib & jpeg huffman tables assume that the output symbols
1069 // can either be arbitrarily arranged, or have monotonically
1070 // increasing frequencies--they rely on the lengths being sorted;
1071 // this makes for a very simple generation algorithm.
1072 // vorbis allows a huffman table with non-sorted lengths. This
1073 // requires a more sophisticated construction, since symbols in
1074 // order do not map to huffman codes "in order".
1075 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
1076 {
1077 if (!c->sparse) {
1078 c->codewords [symbol] = huff_code;
1079 } else {
1080 c->codewords [count] = huff_code;
1081 c->codeword_lengths[count] = len;
1082 values [count] = symbol;
1083 }
1084 }
1085
1086 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
1087 {
1088 int i,k,m=0;
1089 uint32 available[32];
1090
1091 memset(available, 0, sizeof(available));
1092 // find the first entry
1093 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
1094 if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
1095 assert(len[k] < 32); // no error return required, code reading lens checks this
1096 // add to the list
1097 add_entry(c, 0, k, m++, len[k], values);
1098 // add all available leaves
1099 for (i=1; i <= len[k]; ++i)
1100 available[i] = 1U << (32-i);
1101 // note that the above code treats the first case specially,
1102 // but it's really the same as the following code, so they
1103 // could probably be combined (except the initial code is 0,
1104 // and I use 0 in available[] to mean 'empty')
1105 for (i=k+1; i < n; ++i) {
1106 uint32 res;
1107 int z = len[i], y;
1108 if (z == NO_CODE) continue;
1109 assert(z < 32); // no error return required, code reading lens checks this
1110 // find lowest available leaf (should always be earliest,
1111 // which is what the specification calls for)
1112 // note that this property, and the fact we can never have
1113 // more than one free leaf at a given level, isn't totally
1114 // trivial to prove, but it seems true and the assert never
1115 // fires, so!
1116 while (z > 0 && !available[z]) --z;
1117 if (z == 0) { return FALSE; }
1118 res = available[z];
1119 available[z] = 0;
1120 add_entry(c, bit_reverse(res), i, m++, len[i], values);
1121 // propagate availability up the tree
1122 if (z != len[i]) {
1123 for (y=len[i]; y > z; --y) {
1124 assert(available[y] == 0);
1125 available[y] = res + (1 << (32-y));
1126 }
1127 }
1128 }
1129 return TRUE;
1130 }
1131
1132 // accelerated huffman table allows fast O(1) match of all symbols
1133 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
1134 static void compute_accelerated_huffman(Codebook *c)
1135 {
1136 int i, len;
1137 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
1138 c->fast_huffman[i] = -1;
1139
1140 len = c->sparse ? c->sorted_entries : c->entries;
1141 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
1142 if (len > 32767) len = 32767; // largest possible value we can encode!
1143 #endif
1144 for (i=0; i < len; ++i) {
1145 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
1146 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
1147 // set table entries for all bit combinations in the higher bits
1148 while (z < FAST_HUFFMAN_TABLE_SIZE) {
1149 c->fast_huffman[z] = i;
1150 z += 1 << c->codeword_lengths[i];
1151 }
1152 }
1153 }
1154 }
1155
1156 #ifdef _MSC_VER
1157 #define STBV_CDECL __cdecl
1158 #else
1159 #define STBV_CDECL
1160 #endif
1161
1162 static int STBV_CDECL uint32_compare(const void *p, const void *q)
1163 {
1164 uint32 x = * (uint32 *) p;
1165 uint32 y = * (uint32 *) q;
1166 return x < y ? -1 : x > y;
1167 }
1168
1169 static int include_in_sort(Codebook *c, uint8 len)
1170 {
1171 if (c->sparse) { assert(len != NO_CODE); return TRUE; }
1172 if (len == NO_CODE) return FALSE;
1173 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
1174 return FALSE;
1175 }
1176
1177 // if the fast table above doesn't work, we want to binary
1178 // search them... need to reverse the bits
1179 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
1180 {
1181 int i, len;
1182 // build a list of all the entries
1183 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
1184 // this is kind of a frivolous optimization--I don't see any performance improvement,
1185 // but it's like 4 extra lines of code, so.
1186 if (!c->sparse) {
1187 int k = 0;
1188 for (i=0; i < c->entries; ++i)
1189 if (include_in_sort(c, lengths[i]))
1190 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
1191 assert(k == c->sorted_entries);
1192 } else {
1193 for (i=0; i < c->sorted_entries; ++i)
1194 c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
1195 }
1196
1197 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
1198 c->sorted_codewords[c->sorted_entries] = 0xffffffff;
1199
1200 len = c->sparse ? c->sorted_entries : c->entries;
1201 // now we need to indicate how they correspond; we could either
1202 // #1: sort a different data structure that says who they correspond to
1203 // #2: for each sorted entry, search the original list to find who corresponds
1204 // #3: for each original entry, find the sorted entry
1205 // #1 requires extra storage, #2 is slow, #3 can use binary search!
1206 for (i=0; i < len; ++i) {
1207 int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
1208 if (include_in_sort(c,huff_len)) {
1209 uint32 code = bit_reverse(c->codewords[i]);
1210 int x=0, n=c->sorted_entries;
1211 while (n > 1) {
1212 // invariant: sc[x] <= code < sc[x+n]
1213 int m = x + (n >> 1);
1214 if (c->sorted_codewords[m] <= code) {
1215 x = m;
1216 n -= (n>>1);
1217 } else {
1218 n >>= 1;
1219 }
1220 }
1221 assert(c->sorted_codewords[x] == code);
1222 if (c->sparse) {
1223 c->sorted_values[x] = values[i];
1224 c->codeword_lengths[x] = huff_len;
1225 } else {
1226 c->sorted_values[x] = i;
1227 }
1228 }
1229 }
1230 }
1231
1232 // only run while parsing the header (3 times)
1233 static int vorbis_validate(uint8 *data)
1234 {
1235 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
1236 return memcmp(data, vorbis, 6) == 0;
1237 }
1238
1239 // called from setup only, once per code book
1240 // (formula implied by specification)
1241 static int lookup1_values(int entries, int dim)
1242 {
1243 int r = (int) floor(exp((float) log((float) entries) / dim));
1244 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning;
1245 ++r; // floor() to avoid _ftol() when non-CRT
1246 if (pow((float) r+1, dim) <= entries)
1247 return -1;
1248 if ((int) floor(pow((float) r, dim)) > entries)
1249 return -1;
1250 return r;
1251 }
1252
1253 // called twice per file
1254 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
1255 {
1256 int n4 = n >> 2, n8 = n >> 3;
1257 int k,k2;
1258
1259 for (k=k2=0; k < n4; ++k,k2+=2) {
1260 A[k2 ] = (float) cos(4*k*M_PI/n);
1261 A[k2+1] = (float) -sin(4*k*M_PI/n);
1262 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f;
1263 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f;
1264 }
1265 for (k=k2=0; k < n8; ++k,k2+=2) {
1266 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
1267 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
1268 }
1269 }
1270
1271 static void compute_window(int n, float *window)
1272 {
1273 int n2 = n >> 1, i;
1274 for (i=0; i < n2; ++i)
1275 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
1276 }
1277
1278 static void compute_bitreverse(int n, uint16 *rev)
1279 {
1280 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
1281 int i, n8 = n >> 3;
1282 for (i=0; i < n8; ++i)
1283 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
1284 }
1285
1286 static int init_blocksize(vorb *f, int b, int n)
1287 {
1288 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
1289 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1290 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1291 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
1292 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
1293 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
1294 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
1295 if (!f->window[b]) return error(f, VORBIS_outofmem);
1296 compute_window(n, f->window[b]);
1297 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
1298 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
1299 compute_bitreverse(n, f->bit_reverse[b]);
1300 return TRUE;
1301 }
1302
1303 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
1304 {
1305 int low = -1;
1306 int high = 65536;
1307 int i;
1308 for (i=0; i < n; ++i) {
1309 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; }
1310 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
1311 }
1312 }
1313
1314 // this has been repurposed so y is now the original index instead of y
1315 typedef struct
1316 {
1317 uint16 x,id;
1318 } stbv__floor_ordering;
1319
1320 static int STBV_CDECL point_compare(const void *p, const void *q)
1321 {
1322 stbv__floor_ordering *a = (stbv__floor_ordering *) p;
1323 stbv__floor_ordering *b = (stbv__floor_ordering *) q;
1324 return a->x < b->x ? -1 : a->x > b->x;
1325 }
1326
1327 //
1328 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1329
1330
1331 #if defined(STB_VORBIS_NO_STDIO)
1332 #define USE_MEMORY(z) TRUE
1333 #else
1334 #define USE_MEMORY(z) ((z)->stream)
1335 #endif
1336
1337 static uint8 get8(vorb *z)
1338 {
1339 if (USE_MEMORY(z)) {
1340 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
1341 return *z->stream++;
1342 }
1343
1344 #ifndef STB_VORBIS_NO_STDIO
1345 {
1346 int c = fgetc(z->f);
1347 if (c == EOF) { z->eof = TRUE; return 0; }
1348 return c;
1349 }
1350 #endif
1351 }
1352
1353 static uint32 get32(vorb *f)
1354 {
1355 uint32 x;
1356 x = get8(f);
1357 x += get8(f) << 8;
1358 x += get8(f) << 16;
1359 x += (uint32) get8(f) << 24;
1360 return x;
1361 }
1362
1363 static int getn(vorb *z, uint8 *data, int n)
1364 {
1365 if (USE_MEMORY(z)) {
1366 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
1367 memcpy(data, z->stream, n);
1368 z->stream += n;
1369 return 1;
1370 }
1371
1372 #ifndef STB_VORBIS_NO_STDIO
1373 if (fread(data, n, 1, z->f) == 1)
1374 return 1;
1375 else {
1376 z->eof = 1;
1377 return 0;
1378 }
1379 #endif
1380 }
1381
1382 static void skip(vorb *z, int n)
1383 {
1384 if (USE_MEMORY(z)) {
1385 z->stream += n;
1386 if (z->stream >= z->stream_end) z->eof = 1;
1387 return;
1388 }
1389 #ifndef STB_VORBIS_NO_STDIO
1390 {
1391 long x = ftell(z->f);
1392 fseek(z->f, x+n, SEEK_SET);
1393 }
1394 #endif
1395 }
1396
1397 static int set_file_offset(stb_vorbis *f, unsigned int loc)
1398 {
1399 #ifndef STB_VORBIS_NO_PUSHDATA_API
1400 if (f->push_mode) return 0;
1401 #endif
1402 f->eof = 0;
1403 if (USE_MEMORY(f)) {
1404 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1405 f->stream = f->stream_end;
1406 f->eof = 1;
1407 return 0;
1408 } else {
1409 f->stream = f->stream_start + loc;
1410 return 1;
1411 }
1412 }
1413 #ifndef STB_VORBIS_NO_STDIO
1414 if (loc + f->f_start < loc || loc >= 0x80000000) {
1415 loc = 0x7fffffff;
1416 f->eof = 1;
1417 } else {
1418 loc += f->f_start;
1419 }
1420 if (!fseek(f->f, loc, SEEK_SET))
1421 return 1;
1422 f->eof = 1;
1423 fseek(f->f, f->f_start, SEEK_END);
1424 return 0;
1425 #endif
1426 }
1427
1428
1429 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1430
1431 static int capture_pattern(vorb *f)
1432 {
1433 if (0x4f != get8(f)) return FALSE;
1434 if (0x67 != get8(f)) return FALSE;
1435 if (0x67 != get8(f)) return FALSE;
1436 if (0x53 != get8(f)) return FALSE;
1437 return TRUE;
1438 }
1439
1440 #define PAGEFLAG_continued_packet 1
1441 #define PAGEFLAG_first_page 2
1442 #define PAGEFLAG_last_page 4
1443
1444 static int start_page_no_capturepattern(vorb *f)
1445 {
1446 uint32 loc0,loc1,n;
1447 if (f->first_decode && !IS_PUSH_MODE(f)) {
1448 f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
1449 }
1450 // stream structure version
1451 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1452 // header flag
1453 f->page_flag = get8(f);
1454 // absolute granule position
1455 loc0 = get32(f);
1456 loc1 = get32(f);
1457 // @TODO: validate loc0,loc1 as valid positions?
1458 // stream serial number -- vorbis doesn't interleave, so discard
1459 get32(f);
1460 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1461 // page sequence number
1462 n = get32(f);
1463 f->last_page = n;
1464 // CRC32
1465 get32(f);
1466 // page_segments
1467 f->segment_count = get8(f);
1468 if (!getn(f, f->segments, f->segment_count))
1469 return error(f, VORBIS_unexpected_eof);
1470 // assume we _don't_ know any the sample position of any segments
1471 f->end_seg_with_known_loc = -2;
1472 if (loc0 != ~0U || loc1 != ~0U) {
1473 int i;
1474 // determine which packet is the last one that will complete
1475 for (i=f->segment_count-1; i >= 0; --i)
1476 if (f->segments[i] < 255)
1477 break;
1478 // 'i' is now the index of the _last_ segment of a packet that ends
1479 if (i >= 0) {
1480 f->end_seg_with_known_loc = i;
1481 f->known_loc_for_packet = loc0;
1482 }
1483 }
1484 if (f->first_decode) {
1485 int i,len;
1486 len = 0;
1487 for (i=0; i < f->segment_count; ++i)
1488 len += f->segments[i];
1489 len += 27 + f->segment_count;
1490 f->p_first.page_end = f->p_first.page_start + len;
1491 f->p_first.last_decoded_sample = loc0;
1492 }
1493 f->next_seg = 0;
1494 return TRUE;
1495 }
1496
1497 static int start_page(vorb *f)
1498 {
1499 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1500 return start_page_no_capturepattern(f);
1501 }
1502
1503 static int start_packet(vorb *f)
1504 {
1505 while (f->next_seg == -1) {
1506 if (!start_page(f)) return FALSE;
1507 if (f->page_flag & PAGEFLAG_continued_packet)
1508 return error(f, VORBIS_continued_packet_flag_invalid);
1509 }
1510 f->last_seg = FALSE;
1511 f->valid_bits = 0;
1512 f->packet_bytes = 0;
1513 f->bytes_in_seg = 0;
1514 // f->next_seg is now valid
1515 return TRUE;
1516 }
1517
1518 static int maybe_start_packet(vorb *f)
1519 {
1520 if (f->next_seg == -1) {
1521 int x = get8(f);
1522 if (f->eof) return FALSE; // EOF at page boundary is not an error!
1523 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern);
1524 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1525 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1526 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1527 if (!start_page_no_capturepattern(f)) return FALSE;
1528 if (f->page_flag & PAGEFLAG_continued_packet) {
1529 // set up enough state that we can read this packet if we want,
1530 // e.g. during recovery
1531 f->last_seg = FALSE;
1532 f->bytes_in_seg = 0;
1533 return error(f, VORBIS_continued_packet_flag_invalid);
1534 }
1535 }
1536 return start_packet(f);
1537 }
1538
1539 static int next_segment(vorb *f)
1540 {
1541 int len;
1542 if (f->last_seg) return 0;
1543 if (f->next_seg == -1) {
1544 f->last_seg_which = f->segment_count-1; // in case start_page fails
1545 if (!start_page(f)) { f->last_seg = 1; return 0; }
1546 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1547 }
1548 len = f->segments[f->next_seg++];
1549 if (len < 255) {
1550 f->last_seg = TRUE;
1551 f->last_seg_which = f->next_seg-1;
1552 }
1553 if (f->next_seg >= f->segment_count)
1554 f->next_seg = -1;
1555 assert(f->bytes_in_seg == 0);
1556 f->bytes_in_seg = len;
1557 return len;
1558 }
1559
1560 #define EOP (-1)
1561 #define INVALID_BITS (-1)
1562
1563 static int get8_packet_raw(vorb *f)
1564 {
1565 if (!f->bytes_in_seg) { // CLANG!
1566 if (f->last_seg) return EOP;
1567 else if (!next_segment(f)) return EOP;
1568 }
1569 assert(f->bytes_in_seg > 0);
1570 --f->bytes_in_seg;
1571 ++f->packet_bytes;
1572 return get8(f);
1573 }
1574
1575 static int get8_packet(vorb *f)
1576 {
1577 int x = get8_packet_raw(f);
1578 f->valid_bits = 0;
1579 return x;
1580 }
1581
1582 static int get32_packet(vorb *f)
1583 {
1584 uint32 x;
1585 x = get8_packet(f);
1586 x += get8_packet(f) << 8;
1587 x += get8_packet(f) << 16;
1588 x += (uint32) get8_packet(f) << 24;
1589 return x;
1590 }
1591
1592 static void flush_packet(vorb *f)
1593 {
1594 while (get8_packet_raw(f) != EOP);
1595 }
1596
1597 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1598 // as the huffman decoder?
1599 static uint32 get_bits(vorb *f, int n)
1600 {
1601 uint32 z;
1602
1603 if (f->valid_bits < 0) return 0;
1604 if (f->valid_bits < n) {
1605 if (n > 24) {
1606 // the accumulator technique below would not work correctly in this case
1607 z = get_bits(f, 24);
1608 z += get_bits(f, n-24) << 24;
1609 return z;
1610 }
1611 if (f->valid_bits == 0) f->acc = 0;
1612 while (f->valid_bits < n) {
1613 int z = get8_packet_raw(f);
1614 if (z == EOP) {
1615 f->valid_bits = INVALID_BITS;
1616 return 0;
1617 }
1618 f->acc += z << f->valid_bits;
1619 f->valid_bits += 8;
1620 }
1621 }
1622
1623 assert(f->valid_bits >= n);
1624 z = f->acc & ((1 << n)-1);
1625 f->acc >>= n;
1626 f->valid_bits -= n;
1627 return z;
1628 }
1629
1630 // @OPTIMIZE: primary accumulator for huffman
1631 // expand the buffer to as many bits as possible without reading off end of packet
1632 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1633 // e.g. cache them locally and decode locally
1634 static __forceinline void prep_huffman(vorb *f)
1635 {
1636 if (f->valid_bits <= 24) {
1637 if (f->valid_bits == 0) f->acc = 0;
1638 do {
1639 int z;
1640 if (f->last_seg && !f->bytes_in_seg) return;
1641 z = get8_packet_raw(f);
1642 if (z == EOP) return;
1643 f->acc += (unsigned) z << f->valid_bits;
1644 f->valid_bits += 8;
1645 } while (f->valid_bits <= 24);
1646 }
1647 }
1648
1649 enum
1650 {
1651 VORBIS_packet_id = 1,
1652 VORBIS_packet_comment = 3,
1653 VORBIS_packet_setup = 5
1654 };
1655
1656 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1657 {
1658 int i;
1659 prep_huffman(f);
1660
1661 if (c->codewords == NULL && c->sorted_codewords == NULL)
1662 return -1;
1663
1664 // cases to use binary search: sorted_codewords && !c->codewords
1665 // sorted_codewords && c->entries > 8
1666 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1667 // binary search
1668 uint32 code = bit_reverse(f->acc);
1669 int x=0, n=c->sorted_entries, len;
1670
1671 while (n > 1) {
1672 // invariant: sc[x] <= code < sc[x+n]
1673 int m = x + (n >> 1);
1674 if (c->sorted_codewords[m] <= code) {
1675 x = m;
1676 n -= (n>>1);
1677 } else {
1678 n >>= 1;
1679 }
1680 }
1681 // x is now the sorted index
1682 if (!c->sparse) x = c->sorted_values[x];
1683 // x is now sorted index if sparse, or symbol otherwise
1684 len = c->codeword_lengths[x];
1685 if (f->valid_bits >= len) {
1686 f->acc >>= len;
1687 f->valid_bits -= len;
1688 return x;
1689 }
1690
1691 f->valid_bits = 0;
1692 return -1;
1693 }
1694
1695 // if small, linear search
1696 assert(!c->sparse);
1697 for (i=0; i < c->entries; ++i) {
1698 if (c->codeword_lengths[i] == NO_CODE) continue;
1699 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1700 if (f->valid_bits >= c->codeword_lengths[i]) {
1701 f->acc >>= c->codeword_lengths[i];
1702 f->valid_bits -= c->codeword_lengths[i];
1703 return i;
1704 }
1705 f->valid_bits = 0;
1706 return -1;
1707 }
1708 }
1709
1710 error(f, VORBIS_invalid_stream);
1711 f->valid_bits = 0;
1712 return -1;
1713 }
1714
1715 #ifndef STB_VORBIS_NO_INLINE_DECODE
1716
1717 #define DECODE_RAW(var, f,c) \
1718 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \
1719 prep_huffman(f); \
1720 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \
1721 var = c->fast_huffman[var]; \
1722 if (var >= 0) { \
1723 int n = c->codeword_lengths[var]; \
1724 f->acc >>= n; \
1725 f->valid_bits -= n; \
1726 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1727 } else { \
1728 var = codebook_decode_scalar_raw(f,c); \
1729 }
1730
1731 #else
1732
1733 static int codebook_decode_scalar(vorb *f, Codebook *c)
1734 {
1735 int i;
1736 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1737 prep_huffman(f);
1738 // fast huffman table lookup
1739 i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1740 i = c->fast_huffman[i];
1741 if (i >= 0) {
1742 f->acc >>= c->codeword_lengths[i];
1743 f->valid_bits -= c->codeword_lengths[i];
1744 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1745 return i;
1746 }
1747 return codebook_decode_scalar_raw(f,c);
1748 }
1749
1750 #define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c);
1751
1752 #endif
1753
1754 #define DECODE(var,f,c) \
1755 DECODE_RAW(var,f,c) \
1756 if (c->sparse) var = c->sorted_values[var];
1757
1758 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1759 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c)
1760 #else
1761 #define DECODE_VQ(var,f,c) DECODE(var,f,c)
1762 #endif
1763
1764
1765
1766
1767
1768
1769 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1770 // where we avoid one addition
1771 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off])
1772 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off])
1773 #define CODEBOOK_ELEMENT_BASE(c) (0)
1774
1775 static int codebook_decode_start(vorb *f, Codebook *c)
1776 {
1777 int z = -1;
1778
1779 // type 0 is only legal in a scalar context
1780 if (c->lookup_type == 0)
1781 error(f, VORBIS_invalid_stream);
1782 else {
1783 DECODE_VQ(z,f,c);
1784 if (c->sparse) assert(z < c->sorted_entries);
1785 if (z < 0) { // check for EOP
1786 if (!f->bytes_in_seg)
1787 if (f->last_seg)
1788 return z;
1789 error(f, VORBIS_invalid_stream);
1790 }
1791 }
1792 return z;
1793 }
1794
1795 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1796 {
1797 int i,z = codebook_decode_start(f,c);
1798 if (z < 0) return FALSE;
1799 if (len > c->dimensions) len = c->dimensions;
1800
1801 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1802 if (c->lookup_type == 1) {
1803 float last = CODEBOOK_ELEMENT_BASE(c);
1804 int div = 1;
1805 for (i=0; i < len; ++i) {
1806 int off = (z / div) % c->lookup_values;
1807 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1808 output[i] += val;
1809 if (c->sequence_p) last = val + c->minimum_value;
1810 div *= c->lookup_values;
1811 }
1812 return TRUE;
1813 }
1814 #endif
1815
1816 z *= c->dimensions;
1817 if (c->sequence_p) {
1818 float last = CODEBOOK_ELEMENT_BASE(c);
1819 for (i=0; i < len; ++i) {
1820 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1821 output[i] += val;
1822 last = val + c->minimum_value;
1823 }
1824 } else {
1825 float last = CODEBOOK_ELEMENT_BASE(c);
1826 for (i=0; i < len; ++i) {
1827 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1828 }
1829 }
1830
1831 return TRUE;
1832 }
1833
1834 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1835 {
1836 int i,z = codebook_decode_start(f,c);
1837 float last = CODEBOOK_ELEMENT_BASE(c);
1838 if (z < 0) return FALSE;
1839 if (len > c->dimensions) len = c->dimensions;
1840
1841 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1842 if (c->lookup_type == 1) {
1843 int div = 1;
1844 for (i=0; i < len; ++i) {
1845 int off = (z / div) % c->lookup_values;
1846 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1847 output[i*step] += val;
1848 if (c->sequence_p) last = val;
1849 div *= c->lookup_values;
1850 }
1851 return TRUE;
1852 }
1853 #endif
1854
1855 z *= c->dimensions;
1856 for (i=0; i < len; ++i) {
1857 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1858 output[i*step] += val;
1859 if (c->sequence_p) last = val;
1860 }
1861
1862 return TRUE;
1863 }
1864
1865 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1866 {
1867 int c_inter = *c_inter_p;
1868 int p_inter = *p_inter_p;
1869 int i,z, effective = c->dimensions;
1870
1871 // type 0 is only legal in a scalar context
1872 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream);
1873
1874 while (total_decode > 0) {
1875 float last = CODEBOOK_ELEMENT_BASE(c);
1876 DECODE_VQ(z,f,c);
1877 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1878 assert(!c->sparse || z < c->sorted_entries);
1879 #endif
1880 if (z < 0) {
1881 if (!f->bytes_in_seg)
1882 if (f->last_seg) return FALSE;
1883 return error(f, VORBIS_invalid_stream);
1884 }
1885
1886 // if this will take us off the end of the buffers, stop short!
1887 // we check by computing the length of the virtual interleaved
1888 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1889 // and the length we'll be using (effective)
1890 if (c_inter + p_inter*ch + effective > len * ch) {
1891 effective = len*ch - (p_inter*ch - c_inter);
1892 }
1893
1894 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1895 if (c->lookup_type == 1) {
1896 int div = 1;
1897 for (i=0; i < effective; ++i) {
1898 int off = (z / div) % c->lookup_values;
1899 float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1900 if (outputs[c_inter])
1901 outputs[c_inter][p_inter] += val;
1902 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1903 if (c->sequence_p) last = val;
1904 div *= c->lookup_values;
1905 }
1906 } else
1907 #endif
1908 {
1909 z *= c->dimensions;
1910 if (c->sequence_p) {
1911 for (i=0; i < effective; ++i) {
1912 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1913 if (outputs[c_inter])
1914 outputs[c_inter][p_inter] += val;
1915 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1916 last = val;
1917 }
1918 } else {
1919 for (i=0; i < effective; ++i) {
1920 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1921 if (outputs[c_inter])
1922 outputs[c_inter][p_inter] += val;
1923 if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1924 }
1925 }
1926 }
1927
1928 total_decode -= effective;
1929 }
1930 *c_inter_p = c_inter;
1931 *p_inter_p = p_inter;
1932 return TRUE;
1933 }
1934
1935 static int predict_point(int x, int x0, int x1, int y0, int y1)
1936 {
1937 int dy = y1 - y0;
1938 int adx = x1 - x0;
1939 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1940 int err = abs(dy) * (x - x0);
1941 int off = err / adx;
1942 return dy < 0 ? y0 - off : y0 + off;
1943 }
1944
1945 // the following table is block-copied from the specification
1946 static float inverse_db_table[256] =
1947 {
1948 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1949 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1950 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1951 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1952 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1953 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1954 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1955 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1956 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1957 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1958 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1959 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1960 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1961 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1962 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1963 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1964 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1965 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1966 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1967 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1968 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1969 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1970 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1971 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1972 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1973 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1974 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1975 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1976 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1977 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1978 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1979 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1980 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1981 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1982 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1983 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1984 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f,
1985 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f,
1986 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f,
1987 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f,
1988 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f,
1989 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f,
1990 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f,
1991 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f,
1992 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f,
1993 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f,
1994 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f,
1995 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f,
1996 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f,
1997 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f,
1998 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f,
1999 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f,
2000 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f,
2001 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f,
2002 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f,
2003 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f,
2004 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f,
2005 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f,
2006 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f,
2007 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f,
2008 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f,
2009 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f,
2010 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f,
2011 0.82788260f, 0.88168307f, 0.9389798f, 1.0f
2012 };
2013
2014
2015 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
2016 // note that you must produce bit-identical output to decode correctly;
2017 // this specific sequence of operations is specified in the spec (it's
2018 // drawing integer-quantized frequency-space lines that the encoder
2019 // expects to be exactly the same)
2020 // ... also, isn't the whole point of Bresenham's algorithm to NOT
2021 // have to divide in the setup? sigh.
2022 #ifndef STB_VORBIS_NO_DEFER_FLOOR
2023 #define LINE_OP(a,b) a *= b
2024 #else
2025 #define LINE_OP(a,b) a = b
2026 #endif
2027
2028 #ifdef STB_VORBIS_DIVIDE_TABLE
2029 #define DIVTAB_NUMER 32
2030 #define DIVTAB_DENOM 64
2031 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
2032 #endif
2033
2034 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
2035 {
2036 int dy = y1 - y0;
2037 int adx = x1 - x0;
2038 int ady = abs(dy);
2039 int base;
2040 int x=x0,y=y0;
2041 int err = 0;
2042 int sy;
2043
2044 #ifdef STB_VORBIS_DIVIDE_TABLE
2045 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
2046 if (dy < 0) {
2047 base = -integer_divide_table[ady][adx];
2048 sy = base-1;
2049 } else {
2050 base = integer_divide_table[ady][adx];
2051 sy = base+1;
2052 }
2053 } else {
2054 base = dy / adx;
2055 if (dy < 0)
2056 sy = base - 1;
2057 else
2058 sy = base+1;
2059 }
2060 #else
2061 base = dy / adx;
2062 if (dy < 0)
2063 sy = base - 1;
2064 else
2065 sy = base+1;
2066 #endif
2067 ady -= abs(base) * adx;
2068 if (x1 > n) x1 = n;
2069 if (x < x1) {
2070 LINE_OP(output[x], inverse_db_table[y&255]);
2071 for (++x; x < x1; ++x) {
2072 err += ady;
2073 if (err >= adx) {
2074 err -= adx;
2075 y += sy;
2076 } else
2077 y += base;
2078 LINE_OP(output[x], inverse_db_table[y&255]);
2079 }
2080 }
2081 }
2082
2083 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
2084 {
2085 int k;
2086 if (rtype == 0) {
2087 int step = n / book->dimensions;
2088 for (k=0; k < step; ++k)
2089 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
2090 return FALSE;
2091 } else {
2092 for (k=0; k < n; ) {
2093 if (!codebook_decode(f, book, target+offset, n-k))
2094 return FALSE;
2095 k += book->dimensions;
2096 offset += book->dimensions;
2097 }
2098 }
2099 return TRUE;
2100 }
2101
2102 // n is 1/2 of the blocksize --
2103 // specification: "Correct per-vector decode length is [n]/2"
2104 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
2105 {
2106 int i,j,pass;
2107 Residue *r = f->residue_config + rn;
2108 int rtype = f->residue_types[rn];
2109 int c = r->classbook;
2110 int classwords = f->codebooks[c].dimensions;
2111 unsigned int actual_size = rtype == 2 ? n*2 : n;
2112 unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
2113 unsigned int limit_r_end = (r->end < actual_size ? r->end : actual_size);
2114 int n_read = limit_r_end - limit_r_begin;
2115 int part_read = n_read / r->part_size;
2116 int temp_alloc_point = temp_alloc_save(f);
2117 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2118 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
2119 #else
2120 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
2121 #endif
2122
2123 CHECK(f);
2124
2125 for (i=0; i < ch; ++i)
2126 if (!do_not_decode[i])
2127 memset(residue_buffers[i], 0, sizeof(float) * n);
2128
2129 if (rtype == 2 && ch != 1) {
2130 for (j=0; j < ch; ++j)
2131 if (!do_not_decode[j])
2132 break;
2133 if (j == ch)
2134 goto done;
2135
2136 for (pass=0; pass < 8; ++pass) {
2137 int pcount = 0, class_set = 0;
2138 if (ch == 2) {
2139 while (pcount < part_read) {
2140 int z = r->begin + pcount*r->part_size;
2141 int c_inter = (z & 1), p_inter = z>>1;
2142 if (pass == 0) {
2143 Codebook *c = f->codebooks+r->classbook;
2144 int q;
2145 DECODE(q,f,c);
2146 if (q == EOP) goto done;
2147 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2148 part_classdata[0][class_set] = r->classdata[q];
2149 #else
2150 for (i=classwords-1; i >= 0; --i) {
2151 classifications[0][i+pcount] = q % r->classifications;
2152 q /= r->classifications;
2153 }
2154 #endif
2155 }
2156 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2157 int z = r->begin + pcount*r->part_size;
2158 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2159 int c = part_classdata[0][class_set][i];
2160 #else
2161 int c = classifications[0][pcount];
2162 #endif
2163 int b = r->residue_books[c][pass];
2164 if (b >= 0) {
2165 Codebook *book = f->codebooks + b;
2166 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
2167 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2168 goto done;
2169 #else
2170 // saves 1%
2171 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2172 goto done;
2173 #endif
2174 } else {
2175 z += r->part_size;
2176 c_inter = z & 1;
2177 p_inter = z >> 1;
2178 }
2179 }
2180 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2181 ++class_set;
2182 #endif
2183 }
2184 } else if (ch > 2) {
2185 while (pcount < part_read) {
2186 int z = r->begin + pcount*r->part_size;
2187 int c_inter = z % ch, p_inter = z/ch;
2188 if (pass == 0) {
2189 Codebook *c = f->codebooks+r->classbook;
2190 int q;
2191 DECODE(q,f,c);
2192 if (q == EOP) goto done;
2193 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2194 part_classdata[0][class_set] = r->classdata[q];
2195 #else
2196 for (i=classwords-1; i >= 0; --i) {
2197 classifications[0][i+pcount] = q % r->classifications;
2198 q /= r->classifications;
2199 }
2200 #endif
2201 }
2202 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2203 int z = r->begin + pcount*r->part_size;
2204 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2205 int c = part_classdata[0][class_set][i];
2206 #else
2207 int c = classifications[0][pcount];
2208 #endif
2209 int b = r->residue_books[c][pass];
2210 if (b >= 0) {
2211 Codebook *book = f->codebooks + b;
2212 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
2213 goto done;
2214 } else {
2215 z += r->part_size;
2216 c_inter = z % ch;
2217 p_inter = z / ch;
2218 }
2219 }
2220 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2221 ++class_set;
2222 #endif
2223 }
2224 }
2225 }
2226 goto done;
2227 }
2228 CHECK(f);
2229
2230 for (pass=0; pass < 8; ++pass) {
2231 int pcount = 0, class_set=0;
2232 while (pcount < part_read) {
2233 if (pass == 0) {
2234 for (j=0; j < ch; ++j) {
2235 if (!do_not_decode[j]) {
2236 Codebook *c = f->codebooks+r->classbook;
2237 int temp;
2238 DECODE(temp,f,c);
2239 if (temp == EOP) goto done;
2240 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2241 part_classdata[j][class_set] = r->classdata[temp];
2242 #else
2243 for (i=classwords-1; i >= 0; --i) {
2244 classifications[j][i+pcount] = temp % r->classifications;
2245 temp /= r->classifications;
2246 }
2247 #endif
2248 }
2249 }
2250 }
2251 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2252 for (j=0; j < ch; ++j) {
2253 if (!do_not_decode[j]) {
2254 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2255 int c = part_classdata[j][class_set][i];
2256 #else
2257 int c = classifications[j][pcount];
2258 #endif
2259 int b = r->residue_books[c][pass];
2260 if (b >= 0) {
2261 float *target = residue_buffers[j];
2262 int offset = r->begin + pcount * r->part_size;
2263 int n = r->part_size;
2264 Codebook *book = f->codebooks + b;
2265 if (!residue_decode(f, book, target, offset, n, rtype))
2266 goto done;
2267 }
2268 }
2269 }
2270 }
2271 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2272 ++class_set;
2273 #endif
2274 }
2275 }
2276 done:
2277 CHECK(f);
2278 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2279 temp_free(f,part_classdata);
2280 #else
2281 temp_free(f,classifications);
2282 #endif
2283 temp_alloc_restore(f,temp_alloc_point);
2284 }
2285
2286
2287 #if 0
2288 // slow way for debugging
2289 void inverse_mdct_slow(float *buffer, int n)
2290 {
2291 int i,j;
2292 int n2 = n >> 1;
2293 float *x = (float *) malloc(sizeof(*x) * n2);
2294 memcpy(x, buffer, sizeof(*x) * n2);
2295 for (i=0; i < n; ++i) {
2296 float acc = 0;
2297 for (j=0; j < n2; ++j)
2298 // formula from paper:
2299 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2300 // formula from wikipedia
2301 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2302 // these are equivalent, except the formula from the paper inverts the multiplier!
2303 // however, what actually works is NO MULTIPLIER!?!
2304 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2305 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2306 buffer[i] = acc;
2307 }
2308 free(x);
2309 }
2310 #elif 0
2311 // same as above, but just barely able to run in real time on modern machines
2312 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2313 {
2314 float mcos[16384];
2315 int i,j;
2316 int n2 = n >> 1, nmask = (n << 2) -1;
2317 float *x = (float *) malloc(sizeof(*x) * n2);
2318 memcpy(x, buffer, sizeof(*x) * n2);
2319 for (i=0; i < 4*n; ++i)
2320 mcos[i] = (float) cos(M_PI / 2 * i / n);
2321
2322 for (i=0; i < n; ++i) {
2323 float acc = 0;
2324 for (j=0; j < n2; ++j)
2325 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2326 buffer[i] = acc;
2327 }
2328 free(x);
2329 }
2330 #elif 0
2331 // transform to use a slow dct-iv; this is STILL basically trivial,
2332 // but only requires half as many ops
2333 void dct_iv_slow(float *buffer, int n)
2334 {
2335 float mcos[16384];
2336 float x[2048];
2337 int i,j;
2338 int n2 = n >> 1, nmask = (n << 3) - 1;
2339 memcpy(x, buffer, sizeof(*x) * n);
2340 for (i=0; i < 8*n; ++i)
2341 mcos[i] = (float) cos(M_PI / 4 * i / n);
2342 for (i=0; i < n; ++i) {
2343 float acc = 0;
2344 for (j=0; j < n; ++j)
2345 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2346 buffer[i] = acc;
2347 }
2348 }
2349
2350 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2351 {
2352 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2353 float temp[4096];
2354
2355 memcpy(temp, buffer, n2 * sizeof(float));
2356 dct_iv_slow(temp, n2); // returns -c'-d, a-b'
2357
2358 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b'
2359 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d'
2360 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d
2361 }
2362 #endif
2363
2364 #ifndef LIBVORBIS_MDCT
2365 #define LIBVORBIS_MDCT 0
2366 #endif
2367
2368 #if LIBVORBIS_MDCT
2369 // directly call the vorbis MDCT using an interface documented
2370 // by Jeff Roberts... useful for performance comparison
2371 typedef struct
2372 {
2373 int n;
2374 int log2n;
2375
2376 float *trig;
2377 int *bitrev;
2378
2379 float scale;
2380 } mdct_lookup;
2381
2382 extern void mdct_init(mdct_lookup *lookup, int n);
2383 extern void mdct_clear(mdct_lookup *l);
2384 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2385
2386 mdct_lookup M1,M2;
2387
2388 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2389 {
2390 mdct_lookup *M;
2391 if (M1.n == n) M = &M1;
2392 else if (M2.n == n) M = &M2;
2393 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2394 else {
2395 if (M2.n) __asm int 3;
2396 mdct_init(&M2, n);
2397 M = &M2;
2398 }
2399
2400 mdct_backward(M, buffer, buffer);
2401 }
2402 #endif
2403
2404
2405 // the following were split out into separate functions while optimizing;
2406 // they could be pushed back up but eh. __forceinline showed no change;
2407 // they're probably already being inlined.
2408 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2409 {
2410 float *ee0 = e + i_off;
2411 float *ee2 = ee0 + k_off;
2412 int i;
2413
2414 assert((n & 3) == 0);
2415 for (i=(n>>2); i > 0; --i) {
2416 float k00_20, k01_21;
2417 k00_20 = ee0[ 0] - ee2[ 0];
2418 k01_21 = ee0[-1] - ee2[-1];
2419 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2420 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2421 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2422 ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2423 A += 8;
2424
2425 k00_20 = ee0[-2] - ee2[-2];
2426 k01_21 = ee0[-3] - ee2[-3];
2427 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2428 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2429 ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2430 ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2431 A += 8;
2432
2433 k00_20 = ee0[-4] - ee2[-4];
2434 k01_21 = ee0[-5] - ee2[-5];
2435 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2436 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2437 ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2438 ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2439 A += 8;
2440
2441 k00_20 = ee0[-6] - ee2[-6];
2442 k01_21 = ee0[-7] - ee2[-7];
2443 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2444 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2445 ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2446 ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2447 A += 8;
2448 ee0 -= 8;
2449 ee2 -= 8;
2450 }
2451 }
2452
2453 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2454 {
2455 int i;
2456 float k00_20, k01_21;
2457
2458 float *e0 = e + d0;
2459 float *e2 = e0 + k_off;
2460
2461 for (i=lim >> 2; i > 0; --i) {
2462 k00_20 = e0[-0] - e2[-0];
2463 k01_21 = e0[-1] - e2[-1];
2464 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2465 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2466 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2467 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2468
2469 A += k1;
2470
2471 k00_20 = e0[-2] - e2[-2];
2472 k01_21 = e0[-3] - e2[-3];
2473 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2474 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2475 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2476 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2477
2478 A += k1;
2479
2480 k00_20 = e0[-4] - e2[-4];
2481 k01_21 = e0[-5] - e2[-5];
2482 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2483 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2484 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2485 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2486
2487 A += k1;
2488
2489 k00_20 = e0[-6] - e2[-6];
2490 k01_21 = e0[-7] - e2[-7];
2491 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2492 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2493 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2494 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2495
2496 e0 -= 8;
2497 e2 -= 8;
2498
2499 A += k1;
2500 }
2501 }
2502
2503 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2504 {
2505 int i;
2506 float A0 = A[0];
2507 float A1 = A[0+1];
2508 float A2 = A[0+a_off];
2509 float A3 = A[0+a_off+1];
2510 float A4 = A[0+a_off*2+0];
2511 float A5 = A[0+a_off*2+1];
2512 float A6 = A[0+a_off*3+0];
2513 float A7 = A[0+a_off*3+1];
2514
2515 float k00,k11;
2516
2517 float *ee0 = e +i_off;
2518 float *ee2 = ee0+k_off;
2519
2520 for (i=n; i > 0; --i) {
2521 k00 = ee0[ 0] - ee2[ 0];
2522 k11 = ee0[-1] - ee2[-1];
2523 ee0[ 0] = ee0[ 0] + ee2[ 0];
2524 ee0[-1] = ee0[-1] + ee2[-1];
2525 ee2[ 0] = (k00) * A0 - (k11) * A1;
2526 ee2[-1] = (k11) * A0 + (k00) * A1;
2527
2528 k00 = ee0[-2] - ee2[-2];
2529 k11 = ee0[-3] - ee2[-3];
2530 ee0[-2] = ee0[-2] + ee2[-2];
2531 ee0[-3] = ee0[-3] + ee2[-3];
2532 ee2[-2] = (k00) * A2 - (k11) * A3;
2533 ee2[-3] = (k11) * A2 + (k00) * A3;
2534
2535 k00 = ee0[-4] - ee2[-4];
2536 k11 = ee0[-5] - ee2[-5];
2537 ee0[-4] = ee0[-4] + ee2[-4];
2538 ee0[-5] = ee0[-5] + ee2[-5];
2539 ee2[-4] = (k00) * A4 - (k11) * A5;
2540 ee2[-5] = (k11) * A4 + (k00) * A5;
2541
2542 k00 = ee0[-6] - ee2[-6];
2543 k11 = ee0[-7] - ee2[-7];
2544 ee0[-6] = ee0[-6] + ee2[-6];
2545 ee0[-7] = ee0[-7] + ee2[-7];
2546 ee2[-6] = (k00) * A6 - (k11) * A7;
2547 ee2[-7] = (k11) * A6 + (k00) * A7;
2548
2549 ee0 -= k0;
2550 ee2 -= k0;
2551 }
2552 }
2553
2554 static __forceinline void iter_54(float *z)
2555 {
2556 float k00,k11,k22,k33;
2557 float y0,y1,y2,y3;
2558
2559 k00 = z[ 0] - z[-4];
2560 y0 = z[ 0] + z[-4];
2561 y2 = z[-2] + z[-6];
2562 k22 = z[-2] - z[-6];
2563
2564 z[-0] = y0 + y2; // z0 + z4 + z2 + z6
2565 z[-2] = y0 - y2; // z0 + z4 - z2 - z6
2566
2567 // done with y0,y2
2568
2569 k33 = z[-3] - z[-7];
2570
2571 z[-4] = k00 + k33; // z0 - z4 + z3 - z7
2572 z[-6] = k00 - k33; // z0 - z4 - z3 + z7
2573
2574 // done with k33
2575
2576 k11 = z[-1] - z[-5];
2577 y1 = z[-1] + z[-5];
2578 y3 = z[-3] + z[-7];
2579
2580 z[-1] = y1 + y3; // z1 + z5 + z3 + z7
2581 z[-3] = y1 - y3; // z1 + z5 - z3 - z7
2582 z[-5] = k11 - k22; // z1 - z5 + z2 - z6
2583 z[-7] = k11 + k22; // z1 - z5 - z2 + z6
2584 }
2585
2586 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2587 {
2588 int a_off = base_n >> 3;
2589 float A2 = A[0+a_off];
2590 float *z = e + i_off;
2591 float *base = z - 16 * n;
2592
2593 while (z > base) {
2594 float k00,k11;
2595 float l00,l11;
2596
2597 k00 = z[-0] - z[ -8];
2598 k11 = z[-1] - z[ -9];
2599 l00 = z[-2] - z[-10];
2600 l11 = z[-3] - z[-11];
2601 z[ -0] = z[-0] + z[ -8];
2602 z[ -1] = z[-1] + z[ -9];
2603 z[ -2] = z[-2] + z[-10];
2604 z[ -3] = z[-3] + z[-11];
2605 z[ -8] = k00;
2606 z[ -9] = k11;
2607 z[-10] = (l00+l11) * A2;
2608 z[-11] = (l11-l00) * A2;
2609
2610 k00 = z[ -4] - z[-12];
2611 k11 = z[ -5] - z[-13];
2612 l00 = z[ -6] - z[-14];
2613 l11 = z[ -7] - z[-15];
2614 z[ -4] = z[ -4] + z[-12];
2615 z[ -5] = z[ -5] + z[-13];
2616 z[ -6] = z[ -6] + z[-14];
2617 z[ -7] = z[ -7] + z[-15];
2618 z[-12] = k11;
2619 z[-13] = -k00;
2620 z[-14] = (l11-l00) * A2;
2621 z[-15] = (l00+l11) * -A2;
2622
2623 iter_54(z);
2624 iter_54(z-8);
2625 z -= 16;
2626 }
2627 }
2628
2629 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2630 {
2631 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2632 int ld;
2633 // @OPTIMIZE: reduce register pressure by using fewer variables?
2634 int save_point = temp_alloc_save(f);
2635 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2636 float *u=NULL,*v=NULL;
2637 // twiddle factors
2638 float *A = f->A[blocktype];
2639
2640 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2641 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2642
2643 // kernel from paper
2644
2645
2646 // merged:
2647 // copy and reflect spectral data
2648 // step 0
2649
2650 // note that it turns out that the items added together during
2651 // this step are, in fact, being added to themselves (as reflected
2652 // by step 0). inexplicable inefficiency! this became obvious
2653 // once I combined the passes.
2654
2655 // so there's a missing 'times 2' here (for adding X to itself).
2656 // this propagates through linearly to the end, where the numbers
2657 // are 1/2 too small, and need to be compensated for.
2658
2659 {
2660 float *d,*e, *AA, *e_stop;
2661 d = &buf2[n2-2];
2662 AA = A;
2663 e = &buffer[0];
2664 e_stop = &buffer[n2];
2665 while (e != e_stop) {
2666 d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2667 d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2668 d -= 2;
2669 AA += 2;
2670 e += 4;
2671 }
2672
2673 e = &buffer[n2-3];
2674 while (d >= buf2) {
2675 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2676 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2677 d -= 2;
2678 AA += 2;
2679 e -= 4;
2680 }
2681 }
2682
2683 // now we use symbolic names for these, so that we can
2684 // possibly swap their meaning as we change which operations
2685 // are in place
2686
2687 u = buffer;
2688 v = buf2;
2689
2690 // step 2 (paper output is w, now u)
2691 // this could be in place, but the data ends up in the wrong
2692 // place... _somebody_'s got to swap it, so this is nominated
2693 {
2694 float *AA = &A[n2-8];
2695 float *d0,*d1, *e0, *e1;
2696
2697 e0 = &v[n4];
2698 e1 = &v[0];
2699
2700 d0 = &u[n4];
2701 d1 = &u[0];
2702
2703 while (AA >= A) {
2704 float v40_20, v41_21;
2705
2706 v41_21 = e0[1] - e1[1];
2707 v40_20 = e0[0] - e1[0];
2708 d0[1] = e0[1] + e1[1];
2709 d0[0] = e0[0] + e1[0];
2710 d1[1] = v41_21*AA[4] - v40_20*AA[5];
2711 d1[0] = v40_20*AA[4] + v41_21*AA[5];
2712
2713 v41_21 = e0[3] - e1[3];
2714 v40_20 = e0[2] - e1[2];
2715 d0[3] = e0[3] + e1[3];
2716 d0[2] = e0[2] + e1[2];
2717 d1[3] = v41_21*AA[0] - v40_20*AA[1];
2718 d1[2] = v40_20*AA[0] + v41_21*AA[1];
2719
2720 AA -= 8;
2721
2722 d0 += 4;
2723 d1 += 4;
2724 e0 += 4;
2725 e1 += 4;
2726 }
2727 }
2728
2729 // step 3
2730 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2731
2732 // optimized step 3:
2733
2734 // the original step3 loop can be nested r inside s or s inside r;
2735 // it's written originally as s inside r, but this is dumb when r
2736 // iterates many times, and s few. So I have two copies of it and
2737 // switch between them halfway.
2738
2739 // this is iteration 0 of step 3
2740 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2741 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2742
2743 // this is iteration 1 of step 3
2744 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2745 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2746 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2747 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2748
2749 l=2;
2750 for (; l < (ld-3)>>1; ++l) {
2751 int k0 = n >> (l+2), k0_2 = k0>>1;
2752 int lim = 1 << (l+1);
2753 int i;
2754 for (i=0; i < lim; ++i)
2755 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2756 }
2757
2758 for (; l < ld-6; ++l) {
2759 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2760 int rlim = n >> (l+6), r;
2761 int lim = 1 << (l+1);
2762 int i_off;
2763 float *A0 = A;
2764 i_off = n2-1;
2765 for (r=rlim; r > 0; --r) {
2766 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2767 A0 += k1*4;
2768 i_off -= 8;
2769 }
2770 }
2771
2772 // iterations with count:
2773 // ld-6,-5,-4 all interleaved together
2774 // the big win comes from getting rid of needless flops
2775 // due to the constants on pass 5 & 4 being all 1 and 0;
2776 // combining them to be simultaneous to improve cache made little difference
2777 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2778
2779 // output is u
2780
2781 // step 4, 5, and 6
2782 // cannot be in-place because of step 5
2783 {
2784 uint16 *bitrev = f->bit_reverse[blocktype];
2785 // weirdly, I'd have thought reading sequentially and writing
2786 // erratically would have been better than vice-versa, but in
2787 // fact that's not what my testing showed. (That is, with
2788 // j = bitreverse(i), do you read i and write j, or read j and write i.)
2789
2790 float *d0 = &v[n4-4];
2791 float *d1 = &v[n2-4];
2792 while (d0 >= v) {
2793 int k4;
2794
2795 k4 = bitrev[0];
2796 d1[3] = u[k4+0];
2797 d1[2] = u[k4+1];
2798 d0[3] = u[k4+2];
2799 d0[2] = u[k4+3];
2800
2801 k4 = bitrev[1];
2802 d1[1] = u[k4+0];
2803 d1[0] = u[k4+1];
2804 d0[1] = u[k4+2];
2805 d0[0] = u[k4+3];
2806
2807 d0 -= 4;
2808 d1 -= 4;
2809 bitrev += 2;
2810 }
2811 }
2812 // (paper output is u, now v)
2813
2814
2815 // data must be in buf2
2816 assert(v == buf2);
2817
2818 // step 7 (paper output is v, now v)
2819 // this is now in place
2820 {
2821 float *C = f->C[blocktype];
2822 float *d, *e;
2823
2824 d = v;
2825 e = v + n2 - 4;
2826
2827 while (d < e) {
2828 float a02,a11,b0,b1,b2,b3;
2829
2830 a02 = d[0] - e[2];
2831 a11 = d[1] + e[3];
2832
2833 b0 = C[1]*a02 + C[0]*a11;
2834 b1 = C[1]*a11 - C[0]*a02;
2835
2836 b2 = d[0] + e[ 2];
2837 b3 = d[1] - e[ 3];
2838
2839 d[0] = b2 + b0;
2840 d[1] = b3 + b1;
2841 e[2] = b2 - b0;
2842 e[3] = b1 - b3;
2843
2844 a02 = d[2] - e[0];
2845 a11 = d[3] + e[1];
2846
2847 b0 = C[3]*a02 + C[2]*a11;
2848 b1 = C[3]*a11 - C[2]*a02;
2849
2850 b2 = d[2] + e[ 0];
2851 b3 = d[3] - e[ 1];
2852
2853 d[2] = b2 + b0;
2854 d[3] = b3 + b1;
2855 e[0] = b2 - b0;
2856 e[1] = b1 - b3;
2857
2858 C += 4;
2859 d += 4;
2860 e -= 4;
2861 }
2862 }
2863
2864 // data must be in buf2
2865
2866
2867 // step 8+decode (paper output is X, now buffer)
2868 // this generates pairs of data a la 8 and pushes them directly through
2869 // the decode kernel (pushing rather than pulling) to avoid having
2870 // to make another pass later
2871
2872 // this cannot POSSIBLY be in place, so we refer to the buffers directly
2873
2874 {
2875 float *d0,*d1,*d2,*d3;
2876
2877 float *B = f->B[blocktype] + n2 - 8;
2878 float *e = buf2 + n2 - 8;
2879 d0 = &buffer[0];
2880 d1 = &buffer[n2-4];
2881 d2 = &buffer[n2];
2882 d3 = &buffer[n-4];
2883 while (e >= v) {
2884 float p0,p1,p2,p3;
2885
2886 p3 = e[6]*B[7] - e[7]*B[6];
2887 p2 = -e[6]*B[6] - e[7]*B[7];
2888
2889 d0[0] = p3;
2890 d1[3] = - p3;
2891 d2[0] = p2;
2892 d3[3] = p2;
2893
2894 p1 = e[4]*B[5] - e[5]*B[4];
2895 p0 = -e[4]*B[4] - e[5]*B[5];
2896
2897 d0[1] = p1;
2898 d1[2] = - p1;
2899 d2[1] = p0;
2900 d3[2] = p0;
2901
2902 p3 = e[2]*B[3] - e[3]*B[2];
2903 p2 = -e[2]*B[2] - e[3]*B[3];
2904
2905 d0[2] = p3;
2906 d1[1] = - p3;
2907 d2[2] = p2;
2908 d3[1] = p2;
2909
2910 p1 = e[0]*B[1] - e[1]*B[0];
2911 p0 = -e[0]*B[0] - e[1]*B[1];
2912
2913 d0[3] = p1;
2914 d1[0] = - p1;
2915 d2[3] = p0;
2916 d3[0] = p0;
2917
2918 B -= 8;
2919 e -= 8;
2920 d0 += 4;
2921 d2 += 4;
2922 d1 -= 4;
2923 d3 -= 4;
2924 }
2925 }
2926
2927 temp_free(f,buf2);
2928 temp_alloc_restore(f,save_point);
2929 }
2930
2931 #if 0
2932 // this is the original version of the above code, if you want to optimize it from scratch
2933 void inverse_mdct_naive(float *buffer, int n)
2934 {
2935 float s;
2936 float A[1 << 12], B[1 << 12], C[1 << 11];
2937 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2938 int n3_4 = n - n4, ld;
2939 // how can they claim this only uses N words?!
2940 // oh, because they're only used sparsely, whoops
2941 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2942 // set up twiddle factors
2943
2944 for (k=k2=0; k < n4; ++k,k2+=2) {
2945 A[k2 ] = (float) cos(4*k*M_PI/n);
2946 A[k2+1] = (float) -sin(4*k*M_PI/n);
2947 B[k2 ] = (float) cos((k2+1)*M_PI/n/2);
2948 B[k2+1] = (float) sin((k2+1)*M_PI/n/2);
2949 }
2950 for (k=k2=0; k < n8; ++k,k2+=2) {
2951 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n);
2952 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2953 }
2954
2955 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2956 // Note there are bugs in that pseudocode, presumably due to them attempting
2957 // to rename the arrays nicely rather than representing the way their actual
2958 // implementation bounces buffers back and forth. As a result, even in the
2959 // "some formulars corrected" version, a direct implementation fails. These
2960 // are noted below as "paper bug".
2961
2962 // copy and reflect spectral data
2963 for (k=0; k < n2; ++k) u[k] = buffer[k];
2964 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2965 // kernel from paper
2966 // step 1
2967 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2968 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1];
2969 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2970 }
2971 // step 2
2972 for (k=k4=0; k < n8; k+=1, k4+=4) {
2973 w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2974 w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2975 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2976 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2977 }
2978 // step 3
2979 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2980 for (l=0; l < ld-3; ++l) {
2981 int k0 = n >> (l+2), k1 = 1 << (l+3);
2982 int rlim = n >> (l+4), r4, r;
2983 int s2lim = 1 << (l+2), s2;
2984 for (r=r4=0; r < rlim; r4+=4,++r) {
2985 for (s2=0; s2 < s2lim; s2+=2) {
2986 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2987 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2988 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2989 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2990 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2991 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2992 }
2993 }
2994 if (l+1 < ld-3) {
2995 // paper bug: ping-ponging of u&w here is omitted
2996 memcpy(w, u, sizeof(u));
2997 }
2998 }
2999
3000 // step 4
3001 for (i=0; i < n8; ++i) {
3002 int j = bit_reverse(i) >> (32-ld+3);
3003 assert(j < n8);
3004 if (i == j) {
3005 // paper bug: original code probably swapped in place; if copying,
3006 // need to directly copy in this case
3007 int i8 = i << 3;
3008 v[i8+1] = u[i8+1];
3009 v[i8+3] = u[i8+3];
3010 v[i8+5] = u[i8+5];
3011 v[i8+7] = u[i8+7];
3012 } else if (i < j) {
3013 int i8 = i << 3, j8 = j << 3;
3014 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
3015 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
3016 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
3017 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
3018 }
3019 }
3020 // step 5
3021 for (k=0; k < n2; ++k) {
3022 w[k] = v[k*2+1];
3023 }
3024 // step 6
3025 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
3026 u[n-1-k2] = w[k4];
3027 u[n-2-k2] = w[k4+1];
3028 u[n3_4 - 1 - k2] = w[k4+2];
3029 u[n3_4 - 2 - k2] = w[k4+3];
3030 }
3031 // step 7
3032 for (k=k2=0; k < n8; ++k, k2 += 2) {
3033 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3034 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
3035 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3036 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
3037 }
3038 // step 8
3039 for (k=k2=0; k < n4; ++k,k2 += 2) {
3040 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1];
3041 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ];
3042 }
3043
3044 // decode kernel to output
3045 // determined the following value experimentally
3046 // (by first figuring out what made inverse_mdct_slow work); then matching that here
3047 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
3048 s = 0.5; // theoretically would be n4
3049
3050 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
3051 // so it needs to use the "old" B values to behave correctly, or else
3052 // set s to 1.0 ]]]
3053 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4];
3054 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
3055 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4];
3056 }
3057 #endif
3058
3059 static float *get_window(vorb *f, int len)
3060 {
3061 len <<= 1;
3062 if (len == f->blocksize_0) return f->window[0];
3063 if (len == f->blocksize_1) return f->window[1];
3064 return NULL;
3065 }
3066
3067 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3068 typedef int16 YTYPE;
3069 #else
3070 typedef int YTYPE;
3071 #endif
3072 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
3073 {
3074 int n2 = n >> 1;
3075 int s = map->chan[i].mux, floor;
3076 floor = map->submap_floor[s];
3077 if (f->floor_types[floor] == 0) {
3078 return error(f, VORBIS_invalid_stream);
3079 } else {
3080 Floor1 *g = &f->floor_config[floor].floor1;
3081 int j,q;
3082 int lx = 0, ly = finalY[0] * g->floor1_multiplier;
3083 for (q=1; q < g->values; ++q) {
3084 j = g->sorted_order[q];
3085 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3086 STBV_NOTUSED(step2_flag);
3087 if (finalY[j] >= 0)
3088 #else
3089 if (step2_flag[j])
3090 #endif
3091 {
3092 int hy = finalY[j] * g->floor1_multiplier;
3093 int hx = g->Xlist[j];
3094 if (lx != hx)
3095 draw_line(target, lx,ly, hx,hy, n2);
3096 CHECK(f);
3097 lx = hx, ly = hy;
3098 }
3099 }
3100 if (lx < n2) {
3101 // optimization of: draw_line(target, lx,ly, n,ly, n2);
3102 for (j=lx; j < n2; ++j)
3103 LINE_OP(target[j], inverse_db_table[ly]);
3104 CHECK(f);
3105 }
3106 }
3107 return TRUE;
3108 }
3109
3110 // The meaning of "left" and "right"
3111 //
3112 // For a given frame:
3113 // we compute samples from 0..n
3114 // window_center is n/2
3115 // we'll window and mix the samples from left_start to left_end with data from the previous frame
3116 // all of the samples from left_end to right_start can be output without mixing; however,
3117 // this interval is 0-length except when transitioning between short and long frames
3118 // all of the samples from right_start to right_end need to be mixed with the next frame,
3119 // which we don't have, so those get saved in a buffer
3120 // frame N's right_end-right_start, the number of samples to mix with the next frame,
3121 // has to be the same as frame N+1's left_end-left_start (which they are by
3122 // construction)
3123
3124 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
3125 {
3126 Mode *m;
3127 int i, n, prev, next, window_center;
3128 f->channel_buffer_start = f->channel_buffer_end = 0;
3129
3130 retry:
3131 if (f->eof) return FALSE;
3132 if (!maybe_start_packet(f))
3133 return FALSE;
3134 // check packet type
3135 if (get_bits(f,1) != 0) {
3136 if (IS_PUSH_MODE(f))
3137 return error(f,VORBIS_bad_packet_type);
3138 while (EOP != get8_packet(f));
3139 goto retry;
3140 }
3141
3142 if (f->alloc.alloc_buffer)
3143 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3144
3145 i = get_bits(f, ilog(f->mode_count-1));
3146 if (i == EOP) return FALSE;
3147 if (i >= f->mode_count) return FALSE;
3148 *mode = i;
3149 m = f->mode_config + i;
3150 if (m->blockflag) {
3151 n = f->blocksize_1;
3152 prev = get_bits(f,1);
3153 next = get_bits(f,1);
3154 } else {
3155 prev = next = 0;
3156 n = f->blocksize_0;
3157 }
3158
3159 // WINDOWING
3160
3161 window_center = n >> 1;
3162 if (m->blockflag && !prev) {
3163 *p_left_start = (n - f->blocksize_0) >> 2;
3164 *p_left_end = (n + f->blocksize_0) >> 2;
3165 } else {
3166 *p_left_start = 0;
3167 *p_left_end = window_center;
3168 }
3169 if (m->blockflag && !next) {
3170 *p_right_start = (n*3 - f->blocksize_0) >> 2;
3171 *p_right_end = (n*3 + f->blocksize_0) >> 2;
3172 } else {
3173 *p_right_start = window_center;
3174 *p_right_end = n;
3175 }
3176
3177 return TRUE;
3178 }
3179
3180 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
3181 {
3182 Mapping *map;
3183 int i,j,k,n,n2;
3184 int zero_channel[256];
3185 int really_zero_channel[256];
3186
3187 // WINDOWING
3188
3189 STBV_NOTUSED(left_end);
3190 n = f->blocksize[m->blockflag];
3191 map = &f->mapping[m->mapping];
3192
3193 // FLOORS
3194 n2 = n >> 1;
3195
3196 CHECK(f);
3197
3198 for (i=0; i < f->channels; ++i) {
3199 int s = map->chan[i].mux, floor;
3200 zero_channel[i] = FALSE;
3201 floor = map->submap_floor[s];
3202 if (f->floor_types[floor] == 0) {
3203 return error(f, VORBIS_invalid_stream);
3204 } else {
3205 Floor1 *g = &f->floor_config[floor].floor1;
3206 if (get_bits(f, 1)) {
3207 short *finalY;
3208 uint8 step2_flag[256];
3209 static int range_list[4] = { 256, 128, 86, 64 };
3210 int range = range_list[g->floor1_multiplier-1];
3211 int offset = 2;
3212 finalY = f->finalY[i];
3213 finalY[0] = get_bits(f, ilog(range)-1);
3214 finalY[1] = get_bits(f, ilog(range)-1);
3215 for (j=0; j < g->partitions; ++j) {
3216 int pclass = g->partition_class_list[j];
3217 int cdim = g->class_dimensions[pclass];
3218 int cbits = g->class_subclasses[pclass];
3219 int csub = (1 << cbits)-1;
3220 int cval = 0;
3221 if (cbits) {
3222 Codebook *c = f->codebooks + g->class_masterbooks[pclass];
3223 DECODE(cval,f,c);
3224 }
3225 for (k=0; k < cdim; ++k) {
3226 int book = g->subclass_books[pclass][cval & csub];
3227 cval = cval >> cbits;
3228 if (book >= 0) {
3229 int temp;
3230 Codebook *c = f->codebooks + book;
3231 DECODE(temp,f,c);
3232 finalY[offset++] = temp;
3233 } else
3234 finalY[offset++] = 0;
3235 }
3236 }
3237 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
3238 step2_flag[0] = step2_flag[1] = 1;
3239 for (j=2; j < g->values; ++j) {
3240 int low, high, pred, highroom, lowroom, room, val;
3241 low = g->neighbors[j][0];
3242 high = g->neighbors[j][1];
3243 //neighbors(g->Xlist, j, &low, &high);
3244 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
3245 val = finalY[j];
3246 highroom = range - pred;
3247 lowroom = pred;
3248 if (highroom < lowroom)
3249 room = highroom * 2;
3250 else
3251 room = lowroom * 2;
3252 if (val) {
3253 step2_flag[low] = step2_flag[high] = 1;
3254 step2_flag[j] = 1;
3255 if (val >= room)
3256 if (highroom > lowroom)
3257 finalY[j] = val - lowroom + pred;
3258 else
3259 finalY[j] = pred - val + highroom - 1;
3260 else
3261 if (val & 1)
3262 finalY[j] = pred - ((val+1)>>1);
3263 else
3264 finalY[j] = pred + (val>>1);
3265 } else {
3266 step2_flag[j] = 0;
3267 finalY[j] = pred;
3268 }
3269 }
3270
3271 #ifdef STB_VORBIS_NO_DEFER_FLOOR
3272 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3273 #else
3274 // defer final floor computation until _after_ residue
3275 for (j=0; j < g->values; ++j) {
3276 if (!step2_flag[j])
3277 finalY[j] = -1;
3278 }
3279 #endif
3280 } else {
3281 error:
3282 zero_channel[i] = TRUE;
3283 }
3284 // So we just defer everything else to later
3285
3286 // at this point we've decoded the floor into buffer
3287 }
3288 }
3289 CHECK(f);
3290 // at this point we've decoded all floors
3291
3292 if (f->alloc.alloc_buffer)
3293 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3294
3295 // re-enable coupled channels if necessary
3296 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3297 for (i=0; i < map->coupling_steps; ++i)
3298 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3299 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3300 }
3301
3302 CHECK(f);
3303 // RESIDUE DECODE
3304 for (i=0; i < map->submaps; ++i) {
3305 float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3306 int r;
3307 uint8 do_not_decode[256];
3308 int ch = 0;
3309 for (j=0; j < f->channels; ++j) {
3310 if (map->chan[j].mux == i) {
3311 if (zero_channel[j]) {
3312 do_not_decode[ch] = TRUE;
3313 residue_buffers[ch] = NULL;
3314 } else {
3315 do_not_decode[ch] = FALSE;
3316 residue_buffers[ch] = f->channel_buffers[j];
3317 }
3318 ++ch;
3319 }
3320 }
3321 r = map->submap_residue[i];
3322 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3323 }
3324
3325 if (f->alloc.alloc_buffer)
3326 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3327 CHECK(f);
3328
3329 // INVERSE COUPLING
3330 for (i = map->coupling_steps-1; i >= 0; --i) {
3331 int n2 = n >> 1;
3332 float *m = f->channel_buffers[map->chan[i].magnitude];
3333 float *a = f->channel_buffers[map->chan[i].angle ];
3334 for (j=0; j < n2; ++j) {
3335 float a2,m2;
3336 if (m[j] > 0)
3337 if (a[j] > 0)
3338 m2 = m[j], a2 = m[j] - a[j];
3339 else
3340 a2 = m[j], m2 = m[j] + a[j];
3341 else
3342 if (a[j] > 0)
3343 m2 = m[j], a2 = m[j] + a[j];
3344 else
3345 a2 = m[j], m2 = m[j] - a[j];
3346 m[j] = m2;
3347 a[j] = a2;
3348 }
3349 }
3350 CHECK(f);
3351
3352 // finish decoding the floors
3353 #ifndef STB_VORBIS_NO_DEFER_FLOOR
3354 for (i=0; i < f->channels; ++i) {
3355 if (really_zero_channel[i]) {
3356 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3357 } else {
3358 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3359 }
3360 }
3361 #else
3362 for (i=0; i < f->channels; ++i) {
3363 if (really_zero_channel[i]) {
3364 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3365 } else {
3366 for (j=0; j < n2; ++j)
3367 f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3368 }
3369 }
3370 #endif
3371
3372 // INVERSE MDCT
3373 CHECK(f);
3374 for (i=0; i < f->channels; ++i)
3375 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3376 CHECK(f);
3377
3378 // this shouldn't be necessary, unless we exited on an error
3379 // and want to flush to get to the next packet
3380 flush_packet(f);
3381
3382 if (f->first_decode) {
3383 // assume we start so first non-discarded sample is sample 0
3384 // this isn't to spec, but spec would require us to read ahead
3385 // and decode the size of all current frames--could be done,
3386 // but presumably it's not a commonly used feature
3387 f->current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around)
3388 // we might have to discard samples "from" the next frame too,
3389 // if we're lapping a large block then a small at the start?
3390 f->discard_samples_deferred = n - right_end;
3391 f->current_loc_valid = TRUE;
3392 f->first_decode = FALSE;
3393 } else if (f->discard_samples_deferred) {
3394 if (f->discard_samples_deferred >= right_start - left_start) {
3395 f->discard_samples_deferred -= (right_start - left_start);
3396 left_start = right_start;
3397 *p_left = left_start;
3398 } else {
3399 left_start += f->discard_samples_deferred;
3400 *p_left = left_start;
3401 f->discard_samples_deferred = 0;
3402 }
3403 } else if (f->previous_length == 0 && f->current_loc_valid) {
3404 // we're recovering from a seek... that means we're going to discard
3405 // the samples from this packet even though we know our position from
3406 // the last page header, so we need to update the position based on
3407 // the discarded samples here
3408 // but wait, the code below is going to add this in itself even
3409 // on a discard, so we don't need to do it here...
3410 }
3411
3412 // check if we have ogg information about the sample # for this packet
3413 if (f->last_seg_which == f->end_seg_with_known_loc) {
3414 // if we have a valid current loc, and this is final:
3415 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3416 uint32 current_end = f->known_loc_for_packet;
3417 // then let's infer the size of the (probably) short final frame
3418 if (current_end < f->current_loc + (right_end-left_start)) {
3419 if (current_end < f->current_loc) {
3420 // negative truncation, that's impossible!
3421 *len = 0;
3422 } else {
3423 *len = current_end - f->current_loc;
3424 }
3425 *len += left_start; // this doesn't seem right, but has no ill effect on my test files
3426 if (*len > right_end) *len = right_end; // this should never happen
3427 f->current_loc += *len;
3428 return TRUE;
3429 }
3430 }
3431 // otherwise, just set our sample loc
3432 // guess that the ogg granule pos refers to the _middle_ of the
3433 // last frame?
3434 // set f->current_loc to the position of left_start
3435 f->current_loc = f->known_loc_for_packet - (n2-left_start);
3436 f->current_loc_valid = TRUE;
3437 }
3438 if (f->current_loc_valid)
3439 f->current_loc += (right_start - left_start);
3440
3441 if (f->alloc.alloc_buffer)
3442 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3443 *len = right_end; // ignore samples after the window goes to 0
3444 CHECK(f);
3445
3446 return TRUE;
3447 }
3448
3449 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3450 {
3451 int mode, left_end, right_end;
3452 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3453 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3454 }
3455
3456 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3457 {
3458 int prev,i,j;
3459 // we use right&left (the start of the right- and left-window sin()-regions)
3460 // to determine how much to return, rather than inferring from the rules
3461 // (same result, clearer code); 'left' indicates where our sin() window
3462 // starts, therefore where the previous window's right edge starts, and
3463 // therefore where to start mixing from the previous buffer. 'right'
3464 // indicates where our sin() ending-window starts, therefore that's where
3465 // we start saving, and where our returned-data ends.
3466
3467 // mixin from previous window
3468 if (f->previous_length) {
3469 int i,j, n = f->previous_length;
3470 float *w = get_window(f, n);
3471 if (w == NULL) return 0;
3472 for (i=0; i < f->channels; ++i) {
3473 for (j=0; j < n; ++j)
3474 f->channel_buffers[i][left+j] =
3475 f->channel_buffers[i][left+j]*w[ j] +
3476 f->previous_window[i][ j]*w[n-1-j];
3477 }
3478 }
3479
3480 prev = f->previous_length;
3481
3482 // last half of this data becomes previous window
3483 f->previous_length = len - right;
3484
3485 // @OPTIMIZE: could avoid this copy by double-buffering the
3486 // output (flipping previous_window with channel_buffers), but
3487 // then previous_window would have to be 2x as large, and
3488 // channel_buffers couldn't be temp mem (although they're NOT
3489 // currently temp mem, they could be (unless we want to level
3490 // performance by spreading out the computation))
3491 for (i=0; i < f->channels; ++i)
3492 for (j=0; right+j < len; ++j)
3493 f->previous_window[i][j] = f->channel_buffers[i][right+j];
3494
3495 if (!prev)
3496 // there was no previous packet, so this data isn't valid...
3497 // this isn't entirely true, only the would-have-overlapped data
3498 // isn't valid, but this seems to be what the spec requires
3499 return 0;
3500
3501 // truncate a short frame
3502 if (len < right) right = len;
3503
3504 f->samples_output += right-left;
3505
3506 return right - left;
3507 }
3508
3509 static int vorbis_pump_first_frame(stb_vorbis *f)
3510 {
3511 int len, right, left, res;
3512 res = vorbis_decode_packet(f, &len, &left, &right);
3513 if (res)
3514 vorbis_finish_frame(f, len, left, right);
3515 return res;
3516 }
3517
3518 #ifndef STB_VORBIS_NO_PUSHDATA_API
3519 static int is_whole_packet_present(stb_vorbis *f)
3520 {
3521 // make sure that we have the packet available before continuing...
3522 // this requires a full ogg parse, but we know we can fetch from f->stream
3523
3524 // instead of coding this out explicitly, we could save the current read state,
3525 // read the next packet with get8() until end-of-packet, check f->eof, then
3526 // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3527 // of state to restore (primarily the page segment table)
3528
3529 int s = f->next_seg, first = TRUE;
3530 uint8 *p = f->stream;
3531
3532 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3533 for (; s < f->segment_count; ++s) {
3534 p += f->segments[s];
3535 if (f->segments[s] < 255) // stop at first short segment
3536 break;
3537 }
3538 // either this continues, or it ends it...
3539 if (s == f->segment_count)
3540 s = -1; // set 'crosses page' flag
3541 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3542 first = FALSE;
3543 }
3544 for (; s == -1;) {
3545 uint8 *q;
3546 int n;
3547
3548 // check that we have the page header ready
3549 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data);
3550 // validate the page
3551 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream);
3552 if (p[4] != 0) return error(f, VORBIS_invalid_stream);
3553 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3554 if (f->previous_length)
3555 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3556 // if no previous length, we're resynching, so we can come in on a continued-packet,
3557 // which we'll just drop
3558 } else {
3559 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3560 }
3561 n = p[26]; // segment counts
3562 q = p+27; // q points to segment table
3563 p = q + n; // advance past header
3564 // make sure we've read the segment table
3565 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3566 for (s=0; s < n; ++s) {
3567 p += q[s];
3568 if (q[s] < 255)
3569 break;
3570 }
3571 if (s == n)
3572 s = -1; // set 'crosses page' flag
3573 if (p > f->stream_end) return error(f, VORBIS_need_more_data);
3574 first = FALSE;
3575 }
3576 return TRUE;
3577 }
3578 #endif // !STB_VORBIS_NO_PUSHDATA_API
3579
3580 static int start_decoder(vorb *f)
3581 {
3582 uint8 header[6], x,y;
3583 int len,i,j,k, max_submaps = 0;
3584 int longest_floorlist=0;
3585
3586 // first page, first packet
3587 f->first_decode = TRUE;
3588
3589 if (!start_page(f)) return FALSE;
3590 // validate page flag
3591 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page);
3592 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page);
3593 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page);
3594 // check for expected packet length
3595 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page);
3596 if (f->segments[0] != 30) {
3597 // check for the Ogg skeleton fishead identifying header to refine our error
3598 if (f->segments[0] == 64 &&
3599 getn(f, header, 6) &&
3600 header[0] == 'f' &&
3601 header[1] == 'i' &&
3602 header[2] == 's' &&
3603 header[3] == 'h' &&
3604 header[4] == 'e' &&
3605 header[5] == 'a' &&
3606 get8(f) == 'd' &&
3607 get8(f) == '\0') return error(f, VORBIS_ogg_skeleton_not_supported);
3608 else
3609 return error(f, VORBIS_invalid_first_page);
3610 }
3611
3612 // read packet
3613 // check packet header
3614 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page);
3615 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof);
3616 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page);
3617 // vorbis_version
3618 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page);
3619 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page);
3620 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels);
3621 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page);
3622 get32(f); // bitrate_maximum
3623 get32(f); // bitrate_nominal
3624 get32(f); // bitrate_minimum
3625 x = get8(f);
3626 {
3627 int log0,log1;
3628 log0 = x & 15;
3629 log1 = x >> 4;
3630 f->blocksize_0 = 1 << log0;
3631 f->blocksize_1 = 1 << log1;
3632 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup);
3633 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup);
3634 if (log0 > log1) return error(f, VORBIS_invalid_setup);
3635 }
3636
3637 // framing_flag
3638 x = get8(f);
3639 if (!(x & 1)) return error(f, VORBIS_invalid_first_page);
3640
3641 // second packet!
3642 if (!start_page(f)) return FALSE;
3643
3644 if (!start_packet(f)) return FALSE;
3645
3646 if (!next_segment(f)) return FALSE;
3647
3648 if (get8_packet(f) != VORBIS_packet_comment) return error(f, VORBIS_invalid_setup);
3649 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3650 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3651 //file vendor
3652 len = get32_packet(f);
3653 f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
3654 if (f->vendor == NULL) return error(f, VORBIS_outofmem);
3655 for(i=0; i < len; ++i) {
3656 f->vendor[i] = get8_packet(f);
3657 }
3658 f->vendor[len] = (char)'\0';
3659 //user comments
3660 f->comment_list_length = get32_packet(f);
3661 f->comment_list = NULL;
3662 if (f->comment_list_length > 0)
3663 {
3664 f->comment_list = (char**) setup_malloc(f, sizeof(char*) * (f->comment_list_length));
3665 if (f->comment_list == NULL) return error(f, VORBIS_outofmem);
3666 }
3667
3668 for(i=0; i < f->comment_list_length; ++i) {
3669 len = get32_packet(f);
3670 f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
3671 if (f->comment_list[i] == NULL) return error(f, VORBIS_outofmem);
3672
3673 for(j=0; j < len; ++j) {
3674 f->comment_list[i][j] = get8_packet(f);
3675 }
3676 f->comment_list[i][len] = (char)'\0';
3677 }
3678
3679 // framing_flag
3680 x = get8_packet(f);
3681 if (!(x & 1)) return error(f, VORBIS_invalid_setup);
3682
3683
3684 skip(f, f->bytes_in_seg);
3685 f->bytes_in_seg = 0;
3686
3687 do {
3688 len = next_segment(f);
3689 skip(f, len);
3690 f->bytes_in_seg = 0;
3691 } while (len);
3692
3693 // third packet!
3694 if (!start_packet(f)) return FALSE;
3695
3696 #ifndef STB_VORBIS_NO_PUSHDATA_API
3697 if (IS_PUSH_MODE(f)) {
3698 if (!is_whole_packet_present(f)) {
3699 // convert error in ogg header to write type
3700 if (f->error == VORBIS_invalid_stream)
3701 f->error = VORBIS_invalid_setup;
3702 return FALSE;
3703 }
3704 }
3705 #endif
3706
3707 crc32_init(); // always init it, to avoid multithread race conditions
3708
3709 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup);
3710 for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3711 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup);
3712
3713 // codebooks
3714
3715 f->codebook_count = get_bits(f,8) + 1;
3716 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3717 if (f->codebooks == NULL) return error(f, VORBIS_outofmem);
3718 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3719 for (i=0; i < f->codebook_count; ++i) {
3720 uint32 *values;
3721 int ordered, sorted_count;
3722 int total=0;
3723 uint8 *lengths;
3724 Codebook *c = f->codebooks+i;
3725 CHECK(f);
3726 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup);
3727 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup);
3728 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup);
3729 x = get_bits(f, 8);
3730 c->dimensions = (get_bits(f, 8)<<8) + x;
3731 x = get_bits(f, 8);
3732 y = get_bits(f, 8);
3733 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3734 ordered = get_bits(f,1);
3735 c->sparse = ordered ? 0 : get_bits(f,1);
3736
3737 if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup);
3738
3739 if (c->sparse)
3740 lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3741 else
3742 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3743
3744 if (!lengths) return error(f, VORBIS_outofmem);
3745
3746 if (ordered) {
3747 int current_entry = 0;
3748 int current_length = get_bits(f,5) + 1;
3749 while (current_entry < c->entries) {
3750 int limit = c->entries - current_entry;
3751 int n = get_bits(f, ilog(limit));
3752 if (current_length >= 32) return error(f, VORBIS_invalid_setup);
3753 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3754 memset(lengths + current_entry, current_length, n);
3755 current_entry += n;
3756 ++current_length;
3757 }
3758 } else {
3759 for (j=0; j < c->entries; ++j) {
3760 int present = c->sparse ? get_bits(f,1) : 1;
3761 if (present) {
3762 lengths[j] = get_bits(f, 5) + 1;
3763 ++total;
3764 if (lengths[j] == 32)
3765 return error(f, VORBIS_invalid_setup);
3766 } else {
3767 lengths[j] = NO_CODE;
3768 }
3769 }
3770 }
3771
3772 if (c->sparse && total >= c->entries >> 2) {
3773 // convert sparse items to non-sparse!
3774 if (c->entries > (int) f->setup_temp_memory_required)
3775 f->setup_temp_memory_required = c->entries;
3776
3777 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3778 if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
3779 memcpy(c->codeword_lengths, lengths, c->entries);
3780 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3781 lengths = c->codeword_lengths;
3782 c->sparse = 0;
3783 }
3784
3785 // compute the size of the sorted tables
3786 if (c->sparse) {
3787 sorted_count = total;
3788 } else {
3789 sorted_count = 0;
3790 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3791 for (j=0; j < c->entries; ++j)
3792 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3793 ++sorted_count;
3794 #endif
3795 }
3796
3797 c->sorted_entries = sorted_count;
3798 values = NULL;
3799
3800 CHECK(f);
3801 if (!c->sparse) {
3802 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3803 if (!c->codewords) return error(f, VORBIS_outofmem);
3804 } else {
3805 unsigned int size;
3806 if (c->sorted_entries) {
3807 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3808 if (!c->codeword_lengths) return error(f, VORBIS_outofmem);
3809 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3810 if (!c->codewords) return error(f, VORBIS_outofmem);
3811 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3812 if (!values) return error(f, VORBIS_outofmem);
3813 }
3814 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3815 if (size > f->setup_temp_memory_required)
3816 f->setup_temp_memory_required = size;
3817 }
3818
3819 if (!compute_codewords(c, lengths, c->entries, values)) {
3820 if (c->sparse) setup_temp_free(f, values, 0);
3821 return error(f, VORBIS_invalid_setup);
3822 }
3823
3824 if (c->sorted_entries) {
3825 // allocate an extra slot for sentinels
3826 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3827 if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
3828 // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3829 // so that we can catch that case without an extra if
3830 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1));
3831 if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
3832 ++c->sorted_values;
3833 c->sorted_values[-1] = -1;
3834 compute_sorted_huffman(c, lengths, values);
3835 }
3836
3837 if (c->sparse) {
3838 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3839 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3840 setup_temp_free(f, lengths, c->entries);
3841 c->codewords = NULL;
3842 }
3843
3844 compute_accelerated_huffman(c);
3845
3846 CHECK(f);
3847 c->lookup_type = get_bits(f, 4);
3848 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3849 if (c->lookup_type > 0) {
3850 uint16 *mults;
3851 c->minimum_value = float32_unpack(get_bits(f, 32));
3852 c->delta_value = float32_unpack(get_bits(f, 32));
3853 c->value_bits = get_bits(f, 4)+1;
3854 c->sequence_p = get_bits(f,1);
3855 if (c->lookup_type == 1) {
3856 int values = lookup1_values(c->entries, c->dimensions);
3857 if (values < 0) return error(f, VORBIS_invalid_setup);
3858 c->lookup_values = (uint32) values;
3859 } else {
3860 c->lookup_values = c->entries * c->dimensions;
3861 }
3862 if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
3863 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3864 if (mults == NULL) return error(f, VORBIS_outofmem);
3865 for (j=0; j < (int) c->lookup_values; ++j) {
3866 int q = get_bits(f, c->value_bits);
3867 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3868 mults[j] = q;
3869 }
3870
3871 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3872 if (c->lookup_type == 1) {
3873 int len, sparse = c->sparse;
3874 float last=0;
3875 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3876 if (sparse) {
3877 if (c->sorted_entries == 0) goto skip;
3878 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3879 } else
3880 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions);
3881 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3882 len = sparse ? c->sorted_entries : c->entries;
3883 for (j=0; j < len; ++j) {
3884 unsigned int z = sparse ? c->sorted_values[j] : j;
3885 unsigned int div=1;
3886 for (k=0; k < c->dimensions; ++k) {
3887 int off = (z / div) % c->lookup_values;
3888 float val = mults[off]*c->delta_value + c->minimum_value + last;
3889 c->multiplicands[j*c->dimensions + k] = val;
3890 if (c->sequence_p)
3891 last = val;
3892 if (k+1 < c->dimensions) {
3893 if (div > UINT_MAX / (unsigned int) c->lookup_values) {
3894 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3895 return error(f, VORBIS_invalid_setup);
3896 }
3897 div *= c->lookup_values;
3898 }
3899 }
3900 }
3901 c->lookup_type = 2;
3902 }
3903 else
3904 #endif
3905 {
3906 float last=0;
3907 CHECK(f);
3908 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3909 if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3910 for (j=0; j < (int) c->lookup_values; ++j) {
3911 float val = mults[j] * c->delta_value + c->minimum_value + last;
3912 c->multiplicands[j] = val;
3913 if (c->sequence_p)
3914 last = val;
3915 }
3916 }
3917 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3918 skip:;
3919 #endif
3920 setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
3921
3922 CHECK(f);
3923 }
3924 CHECK(f);
3925 }
3926
3927 // time domain transfers (notused)
3928
3929 x = get_bits(f, 6) + 1;
3930 for (i=0; i < x; ++i) {
3931 uint32 z = get_bits(f, 16);
3932 if (z != 0) return error(f, VORBIS_invalid_setup);
3933 }
3934
3935 // Floors
3936 f->floor_count = get_bits(f, 6)+1;
3937 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3938 if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
3939 for (i=0; i < f->floor_count; ++i) {
3940 f->floor_types[i] = get_bits(f, 16);
3941 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3942 if (f->floor_types[i] == 0) {
3943 Floor0 *g = &f->floor_config[i].floor0;
3944 g->order = get_bits(f,8);
3945 g->rate = get_bits(f,16);
3946 g->bark_map_size = get_bits(f,16);
3947 g->amplitude_bits = get_bits(f,6);
3948 g->amplitude_offset = get_bits(f,8);
3949 g->number_of_books = get_bits(f,4) + 1;
3950 for (j=0; j < g->number_of_books; ++j)
3951 g->book_list[j] = get_bits(f,8);
3952 return error(f, VORBIS_feature_not_supported);
3953 } else {
3954 stbv__floor_ordering p[31*8+2];
3955 Floor1 *g = &f->floor_config[i].floor1;
3956 int max_class = -1;
3957 g->partitions = get_bits(f, 5);
3958 for (j=0; j < g->partitions; ++j) {
3959 g->partition_class_list[j] = get_bits(f, 4);
3960 if (g->partition_class_list[j] > max_class)
3961 max_class = g->partition_class_list[j];
3962 }
3963 for (j=0; j <= max_class; ++j) {
3964 g->class_dimensions[j] = get_bits(f, 3)+1;
3965 g->class_subclasses[j] = get_bits(f, 2);
3966 if (g->class_subclasses[j]) {
3967 g->class_masterbooks[j] = get_bits(f, 8);
3968 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3969 }
3970 for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3971 g->subclass_books[j][k] = (int16)get_bits(f,8)-1;
3972 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3973 }
3974 }
3975 g->floor1_multiplier = get_bits(f,2)+1;
3976 g->rangebits = get_bits(f,4);
3977 g->Xlist[0] = 0;
3978 g->Xlist[1] = 1 << g->rangebits;
3979 g->values = 2;
3980 for (j=0; j < g->partitions; ++j) {
3981 int c = g->partition_class_list[j];
3982 for (k=0; k < g->class_dimensions[c]; ++k) {
3983 g->Xlist[g->values] = get_bits(f, g->rangebits);
3984 ++g->values;
3985 }
3986 }
3987 // precompute the sorting
3988 for (j=0; j < g->values; ++j) {
3989 p[j].x = g->Xlist[j];
3990 p[j].id = j;
3991 }
3992 qsort(p, g->values, sizeof(p[0]), point_compare);
3993 for (j=0; j < g->values-1; ++j)
3994 if (p[j].x == p[j+1].x)
3995 return error(f, VORBIS_invalid_setup);
3996 for (j=0; j < g->values; ++j)
3997 g->sorted_order[j] = (uint8) p[j].id;
3998 // precompute the neighbors
3999 for (j=2; j < g->values; ++j) {
4000 int low = 0,hi = 0;
4001 neighbors(g->Xlist, j, &low,&hi);
4002 g->neighbors[j][0] = low;
4003 g->neighbors[j][1] = hi;
4004 }
4005
4006 if (g->values > longest_floorlist)
4007 longest_floorlist = g->values;
4008 }
4009 }
4010
4011 // Residue
4012 f->residue_count = get_bits(f, 6)+1;
4013 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
4014 if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
4015 memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
4016 for (i=0; i < f->residue_count; ++i) {
4017 uint8 residue_cascade[64];
4018 Residue *r = f->residue_config+i;
4019 f->residue_types[i] = get_bits(f, 16);
4020 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
4021 r->begin = get_bits(f, 24);
4022 r->end = get_bits(f, 24);
4023 if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
4024 r->part_size = get_bits(f,24)+1;
4025 r->classifications = get_bits(f,6)+1;
4026 r->classbook = get_bits(f,8);
4027 if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4028 for (j=0; j < r->classifications; ++j) {
4029 uint8 high_bits=0;
4030 uint8 low_bits=get_bits(f,3);
4031 if (get_bits(f,1))
4032 high_bits = get_bits(f,5);
4033 residue_cascade[j] = high_bits*8 + low_bits;
4034 }
4035 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
4036 if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
4037 for (j=0; j < r->classifications; ++j) {
4038 for (k=0; k < 8; ++k) {
4039 if (residue_cascade[j] & (1 << k)) {
4040 r->residue_books[j][k] = get_bits(f, 8);
4041 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
4042 } else {
4043 r->residue_books[j][k] = -1;
4044 }
4045 }
4046 }
4047 // precompute the classifications[] array to avoid inner-loop mod/divide
4048 // call it 'classdata' since we already have r->classifications
4049 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4050 if (!r->classdata) return error(f, VORBIS_outofmem);
4051 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
4052 for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
4053 int classwords = f->codebooks[r->classbook].dimensions;
4054 int temp = j;
4055 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
4056 if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
4057 for (k=classwords-1; k >= 0; --k) {
4058 r->classdata[j][k] = temp % r->classifications;
4059 temp /= r->classifications;
4060 }
4061 }
4062 }
4063
4064 f->mapping_count = get_bits(f,6)+1;
4065 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
4066 if (f->mapping == NULL) return error(f, VORBIS_outofmem);
4067 memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
4068 for (i=0; i < f->mapping_count; ++i) {
4069 Mapping *m = f->mapping + i;
4070 int mapping_type = get_bits(f,16);
4071 if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
4072 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
4073 if (m->chan == NULL) return error(f, VORBIS_outofmem);
4074 if (get_bits(f,1))
4075 m->submaps = get_bits(f,4)+1;
4076 else
4077 m->submaps = 1;
4078 if (m->submaps > max_submaps)
4079 max_submaps = m->submaps;
4080 if (get_bits(f,1)) {
4081 m->coupling_steps = get_bits(f,8)+1;
4082 if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
4083 for (k=0; k < m->coupling_steps; ++k) {
4084 m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
4085 m->chan[k].angle = get_bits(f, ilog(f->channels-1));
4086 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup);
4087 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup);
4088 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup);
4089 }
4090 } else
4091 m->coupling_steps = 0;
4092
4093 // reserved field
4094 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
4095 if (m->submaps > 1) {
4096 for (j=0; j < f->channels; ++j) {
4097 m->chan[j].mux = get_bits(f, 4);
4098 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup);
4099 }
4100 } else
4101 // @SPECIFICATION: this case is missing from the spec
4102 for (j=0; j < f->channels; ++j)
4103 m->chan[j].mux = 0;
4104
4105 for (j=0; j < m->submaps; ++j) {
4106 get_bits(f,8); // discard
4107 m->submap_floor[j] = get_bits(f,8);
4108 m->submap_residue[j] = get_bits(f,8);
4109 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup);
4110 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup);
4111 }
4112 }
4113
4114 // Modes
4115 f->mode_count = get_bits(f, 6)+1;
4116 for (i=0; i < f->mode_count; ++i) {
4117 Mode *m = f->mode_config+i;
4118 m->blockflag = get_bits(f,1);
4119 m->windowtype = get_bits(f,16);
4120 m->transformtype = get_bits(f,16);
4121 m->mapping = get_bits(f,8);
4122 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup);
4123 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup);
4124 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup);
4125 }
4126
4127 flush_packet(f);
4128
4129 f->previous_length = 0;
4130
4131 for (i=0; i < f->channels; ++i) {
4132 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
4133 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4134 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
4135 if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
4136 memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
4137 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4138 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
4139 if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
4140 #endif
4141 }
4142
4143 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
4144 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
4145 f->blocksize[0] = f->blocksize_0;
4146 f->blocksize[1] = f->blocksize_1;
4147
4148 #ifdef STB_VORBIS_DIVIDE_TABLE
4149 if (integer_divide_table[1][1]==0)
4150 for (i=0; i < DIVTAB_NUMER; ++i)
4151 for (j=1; j < DIVTAB_DENOM; ++j)
4152 integer_divide_table[i][j] = i / j;
4153 #endif
4154
4155 // compute how much temporary memory is needed
4156
4157 // 1.
4158 {
4159 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
4160 uint32 classify_mem;
4161 int i,max_part_read=0;
4162 for (i=0; i < f->residue_count; ++i) {
4163 Residue *r = f->residue_config + i;
4164 unsigned int actual_size = f->blocksize_1 / 2;
4165 unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
4166 unsigned int limit_r_end = r->end < actual_size ? r->end : actual_size;
4167 int n_read = limit_r_end - limit_r_begin;
4168 int part_read = n_read / r->part_size;
4169 if (part_read > max_part_read)
4170 max_part_read = part_read;
4171 }
4172 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
4173 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
4174 #else
4175 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
4176 #endif
4177
4178 // maximum reasonable partition size is f->blocksize_1
4179
4180 f->temp_memory_required = classify_mem;
4181 if (imdct_mem > f->temp_memory_required)
4182 f->temp_memory_required = imdct_mem;
4183 }
4184
4185
4186 if (f->alloc.alloc_buffer) {
4187 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
4188 // check if there's enough temp memory so we don't error later
4189 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
4190 return error(f, VORBIS_outofmem);
4191 }
4192
4193 // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
4194 // without PAGEFLAG_continued_packet, so this either points to the first page, or
4195 // the page after the end of the headers. It might be cleaner to point to a page
4196 // in the middle of the headers, when that's the page where the first audio packet
4197 // starts, but we'd have to also correctly skip the end of any continued packet in
4198 // stb_vorbis_seek_start.
4199 if (f->next_seg == -1) {
4200 f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
4201 } else {
4202 f->first_audio_page_offset = 0;
4203 }
4204
4205 return TRUE;
4206 }
4207
4208 static void vorbis_deinit(stb_vorbis *p)
4209 {
4210 int i,j;
4211
4212 setup_free(p, p->vendor);
4213 for (i=0; i < p->comment_list_length; ++i) {
4214 setup_free(p, p->comment_list[i]);
4215 }
4216 setup_free(p, p->comment_list);
4217
4218 if (p->residue_config) {
4219 for (i=0; i < p->residue_count; ++i) {
4220 Residue *r = p->residue_config+i;
4221 if (r->classdata) {
4222 for (j=0; j < p->codebooks[r->classbook].entries; ++j)
4223 setup_free(p, r->classdata[j]);
4224 setup_free(p, r->classdata);
4225 }
4226 setup_free(p, r->residue_books);
4227 }
4228 }
4229
4230 if (p->codebooks) {
4231 CHECK(p);
4232 for (i=0; i < p->codebook_count; ++i) {
4233 Codebook *c = p->codebooks + i;
4234 setup_free(p, c->codeword_lengths);
4235 setup_free(p, c->multiplicands);
4236 setup_free(p, c->codewords);
4237 setup_free(p, c->sorted_codewords);
4238 // c->sorted_values[-1] is the first entry in the array
4239 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
4240 }
4241 setup_free(p, p->codebooks);
4242 }
4243 setup_free(p, p->floor_config);
4244 setup_free(p, p->residue_config);
4245 if (p->mapping) {
4246 for (i=0; i < p->mapping_count; ++i)
4247 setup_free(p, p->mapping[i].chan);
4248 setup_free(p, p->mapping);
4249 }
4250 CHECK(p);
4251 for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
4252 setup_free(p, p->channel_buffers[i]);
4253 setup_free(p, p->previous_window[i]);
4254 #ifdef STB_VORBIS_NO_DEFER_FLOOR
4255 setup_free(p, p->floor_buffers[i]);
4256 #endif
4257 setup_free(p, p->finalY[i]);
4258 }
4259 for (i=0; i < 2; ++i) {
4260 setup_free(p, p->A[i]);
4261 setup_free(p, p->B[i]);
4262 setup_free(p, p->C[i]);
4263 setup_free(p, p->window[i]);
4264 setup_free(p, p->bit_reverse[i]);
4265 }
4266 #ifndef STB_VORBIS_NO_STDIO
4267 if (p->close_on_free) fclose(p->f);
4268 #endif
4269 }
4270
4271 void stb_vorbis_close(stb_vorbis *p)
4272 {
4273 if (p == NULL) return;
4274 vorbis_deinit(p);
4275 setup_free(p,p);
4276 }
4277
4278 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
4279 {
4280 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
4281 if (z) {
4282 p->alloc = *z;
4283 p->alloc.alloc_buffer_length_in_bytes &= ~7;
4284 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
4285 }
4286 p->eof = 0;
4287 p->error = VORBIS__no_error;
4288 p->stream = NULL;
4289 p->codebooks = NULL;
4290 p->page_crc_tests = -1;
4291 #ifndef STB_VORBIS_NO_STDIO
4292 p->close_on_free = FALSE;
4293 p->f = NULL;
4294 #endif
4295 }
4296
4297 int stb_vorbis_get_sample_offset(stb_vorbis *f)
4298 {
4299 if (f->current_loc_valid)
4300 return f->current_loc;
4301 else
4302 return -1;
4303 }
4304
4305 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
4306 {
4307 stb_vorbis_info d;
4308 d.channels = f->channels;
4309 d.sample_rate = f->sample_rate;
4310 d.setup_memory_required = f->setup_memory_required;
4311 d.setup_temp_memory_required = f->setup_temp_memory_required;
4312 d.temp_memory_required = f->temp_memory_required;
4313 d.max_frame_size = f->blocksize_1 >> 1;
4314 return d;
4315 }
4316
4317 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
4318 {
4319 stb_vorbis_comment d;
4320 d.vendor = f->vendor;
4321 d.comment_list_length = f->comment_list_length;
4322 d.comment_list = f->comment_list;
4323 return d;
4324 }
4325
4326 int stb_vorbis_get_error(stb_vorbis *f)
4327 {
4328 int e = f->error;
4329 f->error = VORBIS__no_error;
4330 return e;
4331 }
4332
4333 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
4334 {
4335 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
4336 return p;
4337 }
4338
4339 #ifndef STB_VORBIS_NO_PUSHDATA_API
4340
4341 void stb_vorbis_flush_pushdata(stb_vorbis *f)
4342 {
4343 f->previous_length = 0;
4344 f->page_crc_tests = 0;
4345 f->discard_samples_deferred = 0;
4346 f->current_loc_valid = FALSE;
4347 f->first_decode = FALSE;
4348 f->samples_output = 0;
4349 f->channel_buffer_start = 0;
4350 f->channel_buffer_end = 0;
4351 }
4352
4353 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
4354 {
4355 int i,n;
4356 for (i=0; i < f->page_crc_tests; ++i)
4357 f->scan[i].bytes_done = 0;
4358
4359 // if we have room for more scans, search for them first, because
4360 // they may cause us to stop early if their header is incomplete
4361 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
4362 if (data_len < 4) return 0;
4363 data_len -= 3; // need to look for 4-byte sequence, so don't miss
4364 // one that straddles a boundary
4365 for (i=0; i < data_len; ++i) {
4366 if (data[i] == 0x4f) {
4367 if (0==memcmp(data+i, ogg_page_header, 4)) {
4368 int j,len;
4369 uint32 crc;
4370 // make sure we have the whole page header
4371 if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
4372 // only read up to this page start, so hopefully we'll
4373 // have the whole page header start next time
4374 data_len = i;
4375 break;
4376 }
4377 // ok, we have it all; compute the length of the page
4378 len = 27 + data[i+26];
4379 for (j=0; j < data[i+26]; ++j)
4380 len += data[i+27+j];
4381 // scan everything up to the embedded crc (which we must 0)
4382 crc = 0;
4383 for (j=0; j < 22; ++j)
4384 crc = crc32_update(crc, data[i+j]);
4385 // now process 4 0-bytes
4386 for ( ; j < 26; ++j)
4387 crc = crc32_update(crc, 0);
4388 // len is the total number of bytes we need to scan
4389 n = f->page_crc_tests++;
4390 f->scan[n].bytes_left = len-j;
4391 f->scan[n].crc_so_far = crc;
4392 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
4393 // if the last frame on a page is continued to the next, then
4394 // we can't recover the sample_loc immediately
4395 if (data[i+27+data[i+26]-1] == 255)
4396 f->scan[n].sample_loc = ~0;
4397 else
4398 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
4399 f->scan[n].bytes_done = i+j;
4400 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4401 break;
4402 // keep going if we still have room for more
4403 }
4404 }
4405 }
4406 }
4407
4408 for (i=0; i < f->page_crc_tests;) {
4409 uint32 crc;
4410 int j;
4411 int n = f->scan[i].bytes_done;
4412 int m = f->scan[i].bytes_left;
4413 if (m > data_len - n) m = data_len - n;
4414 // m is the bytes to scan in the current chunk
4415 crc = f->scan[i].crc_so_far;
4416 for (j=0; j < m; ++j)
4417 crc = crc32_update(crc, data[n+j]);
4418 f->scan[i].bytes_left -= m;
4419 f->scan[i].crc_so_far = crc;
4420 if (f->scan[i].bytes_left == 0) {
4421 // does it match?
4422 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4423 // Houston, we have page
4424 data_len = n+m; // consumption amount is wherever that scan ended
4425 f->page_crc_tests = -1; // drop out of page scan mode
4426 f->previous_length = 0; // decode-but-don't-output one frame
4427 f->next_seg = -1; // start a new page
4428 f->current_loc = f->scan[i].sample_loc; // set the current sample location
4429 // to the amount we'd have decoded had we decoded this page
4430 f->current_loc_valid = f->current_loc != ~0U;
4431 return data_len;
4432 }
4433 // delete entry
4434 f->scan[i] = f->scan[--f->page_crc_tests];
4435 } else {
4436 ++i;
4437 }
4438 }
4439
4440 return data_len;
4441 }
4442
4443 // return value: number of bytes we used
4444 int stb_vorbis_decode_frame_pushdata(
4445 stb_vorbis *f, // the file we're decoding
4446 const uint8 *data, int data_len, // the memory available for decoding
4447 int *channels, // place to write number of float * buffers
4448 float ***output, // place to write float ** array of float * buffers
4449 int *samples // place to write number of output samples
4450 )
4451 {
4452 int i;
4453 int len,right,left;
4454
4455 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4456
4457 if (f->page_crc_tests >= 0) {
4458 *samples = 0;
4459 return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
4460 }
4461
4462 f->stream = (uint8 *) data;
4463 f->stream_end = (uint8 *) data + data_len;
4464 f->error = VORBIS__no_error;
4465
4466 // check that we have the entire packet in memory
4467 if (!is_whole_packet_present(f)) {
4468 *samples = 0;
4469 return 0;
4470 }
4471
4472 if (!vorbis_decode_packet(f, &len, &left, &right)) {
4473 // save the actual error we encountered
4474 enum STBVorbisError error = f->error;
4475 if (error == VORBIS_bad_packet_type) {
4476 // flush and resynch
4477 f->error = VORBIS__no_error;
4478 while (get8_packet(f) != EOP)
4479 if (f->eof) break;
4480 *samples = 0;
4481 return (int) (f->stream - data);
4482 }
4483 if (error == VORBIS_continued_packet_flag_invalid) {
4484 if (f->previous_length == 0) {
4485 // we may be resynching, in which case it's ok to hit one
4486 // of these; just discard the packet
4487 f->error = VORBIS__no_error;
4488 while (get8_packet(f) != EOP)
4489 if (f->eof) break;
4490 *samples = 0;
4491 return (int) (f->stream - data);
4492 }
4493 }
4494 // if we get an error while parsing, what to do?
4495 // well, it DEFINITELY won't work to continue from where we are!
4496 stb_vorbis_flush_pushdata(f);
4497 // restore the error that actually made us bail
4498 f->error = error;
4499 *samples = 0;
4500 return 1;
4501 }
4502
4503 // success!
4504 len = vorbis_finish_frame(f, len, left, right);
4505 for (i=0; i < f->channels; ++i)
4506 f->outputs[i] = f->channel_buffers[i] + left;
4507
4508 if (channels) *channels = f->channels;
4509 *samples = len;
4510 *output = f->outputs;
4511 return (int) (f->stream - data);
4512 }
4513
4514 stb_vorbis *stb_vorbis_open_pushdata(
4515 const unsigned char *data, int data_len, // the memory available for decoding
4516 int *data_used, // only defined if result is not NULL
4517 int *error, const stb_vorbis_alloc *alloc)
4518 {
4519 stb_vorbis *f, p;
4520 vorbis_init(&p, alloc);
4521 p.stream = (uint8 *) data;
4522 p.stream_end = (uint8 *) data + data_len;
4523 p.push_mode = TRUE;
4524 if (!start_decoder(&p)) {
4525 if (p.eof)
4526 *error = VORBIS_need_more_data;
4527 else
4528 *error = p.error;
4529 vorbis_deinit(&p);
4530 return NULL;
4531 }
4532 f = vorbis_alloc(&p);
4533 if (f) {
4534 *f = p;
4535 *data_used = (int) (f->stream - data);
4536 *error = 0;
4537 return f;
4538 } else {
4539 vorbis_deinit(&p);
4540 return NULL;
4541 }
4542 }
4543 #endif // STB_VORBIS_NO_PUSHDATA_API
4544
4545 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4546 {
4547 #ifndef STB_VORBIS_NO_PUSHDATA_API
4548 if (f->push_mode) return 0;
4549 #endif
4550 if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
4551 #ifndef STB_VORBIS_NO_STDIO
4552 return (unsigned int) (ftell(f->f) - f->f_start);
4553 #endif
4554 }
4555
4556 #ifndef STB_VORBIS_NO_PULLDATA_API
4557 //
4558 // DATA-PULLING API
4559 //
4560
4561 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4562 {
4563 for(;;) {
4564 int n;
4565 if (f->eof) return 0;
4566 n = get8(f);
4567 if (n == 0x4f) { // page header candidate
4568 unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4569 int i;
4570 // check if we're off the end of a file_section stream
4571 if (retry_loc - 25 > f->stream_len)
4572 return 0;
4573 // check the rest of the header
4574 for (i=1; i < 4; ++i)
4575 if (get8(f) != ogg_page_header[i])
4576 break;
4577 if (f->eof) return 0;
4578 if (i == 4) {
4579 uint8 header[27];
4580 uint32 i, crc, goal, len;
4581 for (i=0; i < 4; ++i)
4582 header[i] = ogg_page_header[i];
4583 for (; i < 27; ++i)
4584 header[i] = get8(f);
4585 if (f->eof) return 0;
4586 if (header[4] != 0) goto invalid;
4587 goal = header[22] + (header[23] << 8) + (header[24]<<16) + ((uint32)header[25]<<24);
4588 for (i=22; i < 26; ++i)
4589 header[i] = 0;
4590 crc = 0;
4591 for (i=0; i < 27; ++i)
4592 crc = crc32_update(crc, header[i]);
4593 len = 0;
4594 for (i=0; i < header[26]; ++i) {
4595 int s = get8(f);
4596 crc = crc32_update(crc, s);
4597 len += s;
4598 }
4599 if (len && f->eof) return 0;
4600 for (i=0; i < len; ++i)
4601 crc = crc32_update(crc, get8(f));
4602 // finished parsing probable page
4603 if (crc == goal) {
4604 // we could now check that it's either got the last
4605 // page flag set, OR it's followed by the capture
4606 // pattern, but I guess TECHNICALLY you could have
4607 // a file with garbage between each ogg page and recover
4608 // from it automatically? So even though that paranoia
4609 // might decrease the chance of an invalid decode by
4610 // another 2^32, not worth it since it would hose those
4611 // invalid-but-useful files?
4612 if (end)
4613 *end = stb_vorbis_get_file_offset(f);
4614 if (last) {
4615 if (header[5] & 0x04)
4616 *last = 1;
4617 else
4618 *last = 0;
4619 }
4620 set_file_offset(f, retry_loc-1);
4621 return 1;
4622 }
4623 }
4624 invalid:
4625 // not a valid page, so rewind and look for next one
4626 set_file_offset(f, retry_loc);
4627 }
4628 }
4629 }
4630
4631
4632 #define SAMPLE_unknown 0xffffffff
4633
4634 // seeking is implemented with a binary search, which narrows down the range to
4635 // 64K, before using a linear search (because finding the synchronization
4636 // pattern can be expensive, and the chance we'd find the end page again is
4637 // relatively high for small ranges)
4638 //
4639 // two initial interpolation-style probes are used at the start of the search
4640 // to try to bound either side of the binary search sensibly, while still
4641 // working in O(log n) time if they fail.
4642
4643 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
4644 {
4645 uint8 header[27], lacing[255];
4646 int i,len;
4647
4648 // record where the page starts
4649 z->page_start = stb_vorbis_get_file_offset(f);
4650
4651 // parse the header
4652 getn(f, header, 27);
4653 if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
4654 return 0;
4655 getn(f, lacing, header[26]);
4656
4657 // determine the length of the payload
4658 len = 0;
4659 for (i=0; i < header[26]; ++i)
4660 len += lacing[i];
4661
4662 // this implies where the page ends
4663 z->page_end = z->page_start + 27 + header[26] + len;
4664
4665 // read the last-decoded sample out of the data
4666 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
4667
4668 // restore file state to where we were
4669 set_file_offset(f, z->page_start);
4670 return 1;
4671 }
4672
4673 // rarely used function to seek back to the preceding page while finding the
4674 // start of a packet
4675 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
4676 {
4677 unsigned int previous_safe, end;
4678
4679 // now we want to seek back 64K from the limit
4680 if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
4681 previous_safe = limit_offset - 65536;
4682 else
4683 previous_safe = f->first_audio_page_offset;
4684
4685 set_file_offset(f, previous_safe);
4686
4687 while (vorbis_find_page(f, &end, NULL)) {
4688 if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
4689 return 1;
4690 set_file_offset(f, end);
4691 }
4692
4693 return 0;
4694 }
4695
4696 // implements the search logic for finding a page and starting decoding. if
4697 // the function succeeds, current_loc_valid will be true and current_loc will
4698 // be less than or equal to the provided sample number (the closer the
4699 // better).
4700 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
4701 {
4702 ProbedPage left, right, mid;
4703 int i, start_seg_with_known_loc, end_pos, page_start;
4704 uint32 delta, stream_length, padding, last_sample_limit;
4705 double offset = 0.0, bytes_per_sample = 0.0;
4706 int probe = 0;
4707
4708 // find the last page and validate the target sample
4709 stream_length = stb_vorbis_stream_length_in_samples(f);
4710 if (stream_length == 0) return error(f, VORBIS_seek_without_length);
4711 if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
4712
4713 // this is the maximum difference between the window-center (which is the
4714 // actual granule position value), and the right-start (which the spec
4715 // indicates should be the granule position (give or take one)).
4716 padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
4717 if (sample_number < padding)
4718 last_sample_limit = 0;
4719 else
4720 last_sample_limit = sample_number - padding;
4721
4722 left = f->p_first;
4723 while (left.last_decoded_sample == ~0U) {
4724 // (untested) the first page does not have a 'last_decoded_sample'
4725 set_file_offset(f, left.page_end);
4726 if (!get_seek_page_info(f, &left)) goto error;
4727 }
4728
4729 right = f->p_last;
4730 assert(right.last_decoded_sample != ~0U);
4731
4732 // starting from the start is handled differently
4733 if (last_sample_limit <= left.last_decoded_sample) {
4734 if (stb_vorbis_seek_start(f)) {
4735 if (f->current_loc > sample_number)
4736 return error(f, VORBIS_seek_failed);
4737 return 1;
4738 }
4739 return 0;
4740 }
4741
4742 while (left.page_end != right.page_start) {
4743 assert(left.page_end < right.page_start);
4744 // search range in bytes
4745 delta = right.page_start - left.page_end;
4746 if (delta <= 65536) {
4747 // there's only 64K left to search - handle it linearly
4748 set_file_offset(f, left.page_end);
4749 } else {
4750 if (probe < 2) {
4751 if (probe == 0) {
4752 // first probe (interpolate)
4753 double data_bytes = right.page_end - left.page_start;
4754 bytes_per_sample = data_bytes / right.last_decoded_sample;
4755 offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
4756 } else {
4757 // second probe (try to bound the other side)
4758 double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
4759 if (error >= 0 && error < 8000) error = 8000;
4760 if (error < 0 && error > -8000) error = -8000;
4761 offset += error * 2;
4762 }
4763
4764 // ensure the offset is valid
4765 if (offset < left.page_end)
4766 offset = left.page_end;
4767 if (offset > right.page_start - 65536)
4768 offset = right.page_start - 65536;
4769
4770 set_file_offset(f, (unsigned int) offset);
4771 } else {
4772 // binary search for large ranges (offset by 32K to ensure
4773 // we don't hit the right page)
4774 set_file_offset(f, left.page_end + (delta / 2) - 32768);
4775 }
4776
4777 if (!vorbis_find_page(f, NULL, NULL)) goto error;
4778 }
4779
4780 for (;;) {
4781 if (!get_seek_page_info(f, &mid)) goto error;
4782 if (mid.last_decoded_sample != ~0U) break;
4783 // (untested) no frames end on this page
4784 set_file_offset(f, mid.page_end);
4785 assert(mid.page_start < right.page_start);
4786 }
4787
4788 // if we've just found the last page again then we're in a tricky file,
4789 // and we're close enough (if it wasn't an interpolation probe).
4790 if (mid.page_start == right.page_start) {
4791 if (probe >= 2 || delta <= 65536)
4792 break;
4793 } else {
4794 if (last_sample_limit < mid.last_decoded_sample)
4795 right = mid;
4796 else
4797 left = mid;
4798 }
4799
4800 ++probe;
4801 }
4802
4803 // seek back to start of the last packet
4804 page_start = left.page_start;
4805 set_file_offset(f, page_start);
4806 if (!start_page(f)) return error(f, VORBIS_seek_failed);
4807 end_pos = f->end_seg_with_known_loc;
4808 assert(end_pos >= 0);
4809
4810 for (;;) {
4811 for (i = end_pos; i > 0; --i)
4812 if (f->segments[i-1] != 255)
4813 break;
4814
4815 start_seg_with_known_loc = i;
4816
4817 if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
4818 break;
4819
4820 // (untested) the final packet begins on an earlier page
4821 if (!go_to_page_before(f, page_start))
4822 goto error;
4823
4824 page_start = stb_vorbis_get_file_offset(f);
4825 if (!start_page(f)) goto error;
4826 end_pos = f->segment_count - 1;
4827 }
4828
4829 // prepare to start decoding
4830 f->current_loc_valid = FALSE;
4831 f->last_seg = FALSE;
4832 f->valid_bits = 0;
4833 f->packet_bytes = 0;
4834 f->bytes_in_seg = 0;
4835 f->previous_length = 0;
4836 f->next_seg = start_seg_with_known_loc;
4837
4838 for (i = 0; i < start_seg_with_known_loc; i++)
4839 skip(f, f->segments[i]);
4840
4841 // start decoding (optimizable - this frame is generally discarded)
4842 if (!vorbis_pump_first_frame(f))
4843 return 0;
4844 if (f->current_loc > sample_number)
4845 return error(f, VORBIS_seek_failed);
4846 return 1;
4847
4848 error:
4849 // try to restore the file to a valid state
4850 stb_vorbis_seek_start(f);
4851 return error(f, VORBIS_seek_failed);
4852 }
4853
4854 // the same as vorbis_decode_initial, but without advancing
4855 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
4856 {
4857 int bits_read, bytes_read;
4858
4859 if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
4860 return 0;
4861
4862 // either 1 or 2 bytes were read, figure out which so we can rewind
4863 bits_read = 1 + ilog(f->mode_count-1);
4864 if (f->mode_config[*mode].blockflag)
4865 bits_read += 2;
4866 bytes_read = (bits_read + 7) / 8;
4867
4868 f->bytes_in_seg += bytes_read;
4869 f->packet_bytes -= bytes_read;
4870 skip(f, -bytes_read);
4871 if (f->next_seg == -1)
4872 f->next_seg = f->segment_count - 1;
4873 else
4874 f->next_seg--;
4875 f->valid_bits = 0;
4876
4877 return 1;
4878 }
4879
4880 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4881 {
4882 uint32 max_frame_samples;
4883
4884 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4885
4886 // fast page-level search
4887 if (!seek_to_sample_coarse(f, sample_number))
4888 return 0;
4889
4890 assert(f->current_loc_valid);
4891 assert(f->current_loc <= sample_number);
4892
4893 // linear search for the relevant packet
4894 max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
4895 while (f->current_loc < sample_number) {
4896 int left_start, left_end, right_start, right_end, mode, frame_samples;
4897 if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4898 return error(f, VORBIS_seek_failed);
4899 // calculate the number of samples returned by the next frame
4900 frame_samples = right_start - left_start;
4901 if (f->current_loc + frame_samples > sample_number) {
4902 return 1; // the next frame will contain the sample
4903 } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
4904 // there's a chance the frame after this could contain the sample
4905 vorbis_pump_first_frame(f);
4906 } else {
4907 // this frame is too early to be relevant
4908 f->current_loc += frame_samples;
4909 f->previous_length = 0;
4910 maybe_start_packet(f);
4911 flush_packet(f);
4912 }
4913 }
4914 // the next frame should start with the sample
4915 if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
4916 return 1;
4917 }
4918
4919 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4920 {
4921 if (!stb_vorbis_seek_frame(f, sample_number))
4922 return 0;
4923
4924 if (sample_number != f->current_loc) {
4925 int n;
4926 uint32 frame_start = f->current_loc;
4927 stb_vorbis_get_frame_float(f, &n, NULL);
4928 assert(sample_number > frame_start);
4929 assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
4930 f->channel_buffer_start += (sample_number - frame_start);
4931 }
4932
4933 return 1;
4934 }
4935
4936 int stb_vorbis_seek_start(stb_vorbis *f)
4937 {
4938 if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
4939 set_file_offset(f, f->first_audio_page_offset);
4940 f->previous_length = 0;
4941 f->first_decode = TRUE;
4942 f->next_seg = -1;
4943 return vorbis_pump_first_frame(f);
4944 }
4945
4946 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4947 {
4948 unsigned int restore_offset, previous_safe;
4949 unsigned int end, last_page_loc;
4950
4951 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4952 if (!f->total_samples) {
4953 unsigned int last;
4954 uint32 lo,hi;
4955 char header[6];
4956
4957 // first, store the current decode position so we can restore it
4958 restore_offset = stb_vorbis_get_file_offset(f);
4959
4960 // now we want to seek back 64K from the end (the last page must
4961 // be at most a little less than 64K, but let's allow a little slop)
4962 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4963 previous_safe = f->stream_len - 65536;
4964 else
4965 previous_safe = f->first_audio_page_offset;
4966
4967 set_file_offset(f, previous_safe);
4968 // previous_safe is now our candidate 'earliest known place that seeking
4969 // to will lead to the final page'
4970
4971 if (!vorbis_find_page(f, &end, &last)) {
4972 // if we can't find a page, we're hosed!
4973 f->error = VORBIS_cant_find_last_page;
4974 f->total_samples = 0xffffffff;
4975 goto done;
4976 }
4977
4978 // check if there are more pages
4979 last_page_loc = stb_vorbis_get_file_offset(f);
4980
4981 // stop when the last_page flag is set, not when we reach eof;
4982 // this allows us to stop short of a 'file_section' end without
4983 // explicitly checking the length of the section
4984 while (!last) {
4985 set_file_offset(f, end);
4986 if (!vorbis_find_page(f, &end, &last)) {
4987 // the last page we found didn't have the 'last page' flag
4988 // set. whoops!
4989 break;
4990 }
4991 //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging
4992 last_page_loc = stb_vorbis_get_file_offset(f);
4993 }
4994
4995 set_file_offset(f, last_page_loc);
4996
4997 // parse the header
4998 getn(f, (unsigned char *)header, 6);
4999 // extract the absolute granule position
5000 lo = get32(f);
5001 hi = get32(f);
5002 if (lo == 0xffffffff && hi == 0xffffffff) {
5003 f->error = VORBIS_cant_find_last_page;
5004 f->total_samples = SAMPLE_unknown;
5005 goto done;
5006 }
5007 if (hi)
5008 lo = 0xfffffffe; // saturate
5009 f->total_samples = lo;
5010
5011 f->p_last.page_start = last_page_loc;
5012 f->p_last.page_end = end;
5013 f->p_last.last_decoded_sample = lo;
5014
5015 done:
5016 set_file_offset(f, restore_offset);
5017 }
5018 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
5019 }
5020
5021 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
5022 {
5023 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
5024 }
5025
5026
5027
5028 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
5029 {
5030 int len, right,left,i;
5031 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
5032
5033 if (!vorbis_decode_packet(f, &len, &left, &right)) {
5034 f->channel_buffer_start = f->channel_buffer_end = 0;
5035 return 0;
5036 }
5037
5038 len = vorbis_finish_frame(f, len, left, right);
5039 for (i=0; i < f->channels; ++i)
5040 f->outputs[i] = f->channel_buffers[i] + left;
5041
5042 f->channel_buffer_start = left;
5043 f->channel_buffer_end = left+len;
5044
5045 if (channels) *channels = f->channels;
5046 if (output) *output = f->outputs;
5047 return len;
5048 }
5049
5050 #ifndef STB_VORBIS_NO_STDIO
5051
5052 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
5053 {
5054 stb_vorbis *f, p;
5055 vorbis_init(&p, alloc);
5056 p.f = file;
5057 p.f_start = (uint32) ftell(file);
5058 p.stream_len = length;
5059 p.close_on_free = close_on_free;
5060 if (start_decoder(&p)) {
5061 f = vorbis_alloc(&p);
5062 if (f) {
5063 *f = p;
5064 vorbis_pump_first_frame(f);
5065 return f;
5066 }
5067 }
5068 if (error) *error = p.error;
5069 vorbis_deinit(&p);
5070 return NULL;
5071 }
5072
5073 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
5074 {
5075 unsigned int len, start;
5076 start = (unsigned int) ftell(file);
5077 fseek(file, 0, SEEK_END);
5078 len = (unsigned int) (ftell(file) - start);
5079 fseek(file, start, SEEK_SET);
5080 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
5081 }
5082
5083 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
5084 {
5085 FILE *f;
5086 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
5087 if (0 != fopen_s(&f, filename, "rb"))
5088 f = NULL;
5089 #else
5090 f = fopen(filename, "rb");
5091 #endif
5092 if (f)
5093 return stb_vorbis_open_file(f, TRUE, error, alloc);
5094 if (error) *error = VORBIS_file_open_failure;
5095 return NULL;
5096 }
5097 #endif // STB_VORBIS_NO_STDIO
5098
5099 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
5100 {
5101 stb_vorbis *f, p;
5102 if (!data) {
5103 if (error) *error = VORBIS_unexpected_eof;
5104 return NULL;
5105 }
5106 vorbis_init(&p, alloc);
5107 p.stream = (uint8 *) data;
5108 p.stream_end = (uint8 *) data + len;
5109 p.stream_start = (uint8 *) p.stream;
5110 p.stream_len = len;
5111 p.push_mode = FALSE;
5112 if (start_decoder(&p)) {
5113 f = vorbis_alloc(&p);
5114 if (f) {
5115 *f = p;
5116 vorbis_pump_first_frame(f);
5117 if (error) *error = VORBIS__no_error;
5118 return f;
5119 }
5120 }
5121 if (error) *error = p.error;
5122 vorbis_deinit(&p);
5123 return NULL;
5124 }
5125
5126 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
5127 #define PLAYBACK_MONO 1
5128 #define PLAYBACK_LEFT 2
5129 #define PLAYBACK_RIGHT 4
5130
5131 #define L (PLAYBACK_LEFT | PLAYBACK_MONO)
5132 #define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO)
5133 #define R (PLAYBACK_RIGHT | PLAYBACK_MONO)
5134
5135 static int8 channel_position[7][6] =
5136 {
5137 { 0 },
5138 { C },
5139 { L, R },
5140 { L, C, R },
5141 { L, R, L, R },
5142 { L, C, R, L, R },
5143 { L, C, R, L, R, C },
5144 };
5145
5146
5147 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
5148 typedef union {
5149 float f;
5150 int i;
5151 } float_conv;
5152 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
5153 #define FASTDEF(x) float_conv x
5154 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
5155 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
5156 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
5157 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
5158 #define check_endianness()
5159 #else
5160 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
5161 #define check_endianness()
5162 #define FASTDEF(x)
5163 #endif
5164
5165 static void copy_samples(short *dest, float *src, int len)
5166 {
5167 int i;
5168 check_endianness();
5169 for (i=0; i < len; ++i) {
5170 FASTDEF(temp);
5171 int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
5172 if ((unsigned int) (v + 32768) > 65535)
5173 v = v < 0 ? -32768 : 32767;
5174 dest[i] = v;
5175 }
5176 }
5177
5178 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
5179 {
5180 #define STB_BUFFER_SIZE 32
5181 float buffer[STB_BUFFER_SIZE];
5182 int i,j,o,n = STB_BUFFER_SIZE;
5183 check_endianness();
5184 for (o = 0; o < len; o += STB_BUFFER_SIZE) {
5185 memset(buffer, 0, sizeof(buffer));
5186 if (o + n > len) n = len - o;
5187 for (j=0; j < num_c; ++j) {
5188 if (channel_position[num_c][j] & mask) {
5189 for (i=0; i < n; ++i)
5190 buffer[i] += data[j][d_offset+o+i];
5191 }
5192 }
5193 for (i=0; i < n; ++i) {
5194 FASTDEF(temp);
5195 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5196 if ((unsigned int) (v + 32768) > 65535)
5197 v = v < 0 ? -32768 : 32767;
5198 output[o+i] = v;
5199 }
5200 }
5201 #undef STB_BUFFER_SIZE
5202 }
5203
5204 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
5205 {
5206 #define STB_BUFFER_SIZE 32
5207 float buffer[STB_BUFFER_SIZE];
5208 int i,j,o,n = STB_BUFFER_SIZE >> 1;
5209 // o is the offset in the source data
5210 check_endianness();
5211 for (o = 0; o < len; o += STB_BUFFER_SIZE >> 1) {
5212 // o2 is the offset in the output data
5213 int o2 = o << 1;
5214 memset(buffer, 0, sizeof(buffer));
5215 if (o + n > len) n = len - o;
5216 for (j=0; j < num_c; ++j) {
5217 int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
5218 if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
5219 for (i=0; i < n; ++i) {
5220 buffer[i*2+0] += data[j][d_offset+o+i];
5221 buffer[i*2+1] += data[j][d_offset+o+i];
5222 }
5223 } else if (m == PLAYBACK_LEFT) {
5224 for (i=0; i < n; ++i) {
5225 buffer[i*2+0] += data[j][d_offset+o+i];
5226 }
5227 } else if (m == PLAYBACK_RIGHT) {
5228 for (i=0; i < n; ++i) {
5229 buffer[i*2+1] += data[j][d_offset+o+i];
5230 }
5231 }
5232 }
5233 for (i=0; i < (n<<1); ++i) {
5234 FASTDEF(temp);
5235 int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
5236 if ((unsigned int) (v + 32768) > 65535)
5237 v = v < 0 ? -32768 : 32767;
5238 output[o2+i] = v;
5239 }
5240 }
5241 #undef STB_BUFFER_SIZE
5242 }
5243
5244 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
5245 {
5246 int i;
5247 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5248 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
5249 for (i=0; i < buf_c; ++i)
5250 compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
5251 } else {
5252 int limit = buf_c < data_c ? buf_c : data_c;
5253 for (i=0; i < limit; ++i)
5254 copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
5255 for ( ; i < buf_c; ++i)
5256 memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
5257 }
5258 }
5259
5260 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
5261 {
5262 float **output = NULL;
5263 int len = stb_vorbis_get_frame_float(f, NULL, &output);
5264 if (len > num_samples) len = num_samples;
5265 if (len)
5266 convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
5267 return len;
5268 }
5269
5270 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
5271 {
5272 int i;
5273 check_endianness();
5274 if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
5275 assert(buf_c == 2);
5276 for (i=0; i < buf_c; ++i)
5277 compute_stereo_samples(buffer, data_c, data, d_offset, len);
5278 } else {
5279 int limit = buf_c < data_c ? buf_c : data_c;
5280 int j;
5281 for (j=0; j < len; ++j) {
5282 for (i=0; i < limit; ++i) {
5283 FASTDEF(temp);
5284 float f = data[i][d_offset+j];
5285 int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
5286 if ((unsigned int) (v + 32768) > 65535)
5287 v = v < 0 ? -32768 : 32767;
5288 *buffer++ = v;
5289 }
5290 for ( ; i < buf_c; ++i)
5291 *buffer++ = 0;
5292 }
5293 }
5294 }
5295
5296 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
5297 {
5298 float **output;
5299 int len;
5300 if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
5301 len = stb_vorbis_get_frame_float(f, NULL, &output);
5302 if (len) {
5303 if (len*num_c > num_shorts) len = num_shorts / num_c;
5304 convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
5305 }
5306 return len;
5307 }
5308
5309 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
5310 {
5311 float **outputs;
5312 int len = num_shorts / channels;
5313 int n=0;
5314 while (n < len) {
5315 int k = f->channel_buffer_end - f->channel_buffer_start;
5316 if (n+k >= len) k = len - n;
5317 if (k)
5318 convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5319 buffer += k*channels;
5320 n += k;
5321 f->channel_buffer_start += k;
5322 if (n == len) break;
5323 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5324 }
5325 return n;
5326 }
5327
5328 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
5329 {
5330 float **outputs;
5331 int n=0;
5332 while (n < len) {
5333 int k = f->channel_buffer_end - f->channel_buffer_start;
5334 if (n+k >= len) k = len - n;
5335 if (k)
5336 convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
5337 n += k;
5338 f->channel_buffer_start += k;
5339 if (n == len) break;
5340 if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
5341 }
5342 return n;
5343 }
5344
5345 #ifndef STB_VORBIS_NO_STDIO
5346 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
5347 {
5348 int data_len, offset, total, limit, error;
5349 short *data;
5350 stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
5351 if (v == NULL) return -1;
5352 limit = v->channels * 4096;
5353 *channels = v->channels;
5354 if (sample_rate)
5355 *sample_rate = v->sample_rate;
5356 offset = data_len = 0;
5357 total = limit;
5358 data = (short *) malloc(total * sizeof(*data));
5359 if (data == NULL) {
5360 stb_vorbis_close(v);
5361 return -2;
5362 }
5363 for (;;) {
5364 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5365 if (n == 0) break;
5366 data_len += n;
5367 offset += n * v->channels;
5368 if (offset + limit > total) {
5369 short *data2;
5370 total *= 2;
5371 data2 = (short *) realloc(data, total * sizeof(*data));
5372 if (data2 == NULL) {
5373 free(data);
5374 stb_vorbis_close(v);
5375 return -2;
5376 }
5377 data = data2;
5378 }
5379 }
5380 *output = data;
5381 stb_vorbis_close(v);
5382 return data_len;
5383 }
5384 #endif // NO_STDIO
5385
5386 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
5387 {
5388 int data_len, offset, total, limit, error;
5389 short *data;
5390 stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
5391 if (v == NULL) return -1;
5392 limit = v->channels * 4096;
5393 *channels = v->channels;
5394 if (sample_rate)
5395 *sample_rate = v->sample_rate;
5396 offset = data_len = 0;
5397 total = limit;
5398 data = (short *) malloc(total * sizeof(*data));
5399 if (data == NULL) {
5400 stb_vorbis_close(v);
5401 return -2;
5402 }
5403 for (;;) {
5404 int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
5405 if (n == 0) break;
5406 data_len += n;
5407 offset += n * v->channels;
5408 if (offset + limit > total) {
5409 short *data2;
5410 total *= 2;
5411 data2 = (short *) realloc(data, total * sizeof(*data));
5412 if (data2 == NULL) {
5413 free(data);
5414 stb_vorbis_close(v);
5415 return -2;
5416 }
5417 data = data2;
5418 }
5419 }
5420 *output = data;
5421 stb_vorbis_close(v);
5422 return data_len;
5423 }
5424 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
5425
5426 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
5427 {
5428 float **outputs;
5429 int len = num_floats / channels;
5430 int n=0;
5431 int z = f->channels;
5432 if (z > channels) z = channels;
5433 while (n < len) {
5434 int i,j;
5435 int k = f->channel_buffer_end - f->channel_buffer_start;
5436 if (n+k >= len) k = len - n;
5437 for (j=0; j < k; ++j) {
5438 for (i=0; i < z; ++i)
5439 *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
5440 for ( ; i < channels; ++i)
5441 *buffer++ = 0;
5442 }
5443 n += k;
5444 f->channel_buffer_start += k;
5445 if (n == len)
5446 break;
5447 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5448 break;
5449 }
5450 return n;
5451 }
5452
5453 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
5454 {
5455 float **outputs;
5456 int n=0;
5457 int z = f->channels;
5458 if (z > channels) z = channels;
5459 while (n < num_samples) {
5460 int i;
5461 int k = f->channel_buffer_end - f->channel_buffer_start;
5462 if (n+k >= num_samples) k = num_samples - n;
5463 if (k) {
5464 for (i=0; i < z; ++i)
5465 memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
5466 for ( ; i < channels; ++i)
5467 memset(buffer[i]+n, 0, sizeof(float) * k);
5468 }
5469 n += k;
5470 f->channel_buffer_start += k;
5471 if (n == num_samples)
5472 break;
5473 if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
5474 break;
5475 }
5476 return n;
5477 }
5478 #endif // STB_VORBIS_NO_PULLDATA_API
5479
5480 /* Version history
5481 1.17 - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
5482 found with Mayhem by ForAllSecure
5483 1.16 - 2019-03-04 - fix warnings
5484 1.15 - 2019-02-07 - explicit failure if Ogg Skeleton data is found
5485 1.14 - 2018-02-11 - delete bogus dealloca usage
5486 1.13 - 2018-01-29 - fix truncation of last frame (hopefully)
5487 1.12 - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
5488 1.11 - 2017-07-23 - fix MinGW compilation
5489 1.10 - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
5490 1.09 - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
5491 1.08 - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
5492 avoid discarding last frame of audio data
5493 1.07 - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
5494 some more crash fixes when out of memory or with corrupt files
5495 1.06 - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
5496 some crash fixes when out of memory or with corrupt files
5497 1.05 - 2015-04-19 - don't define __forceinline if it's redundant
5498 1.04 - 2014-08-27 - fix missing const-correct case in API
5499 1.03 - 2014-08-07 - Warning fixes
5500 1.02 - 2014-07-09 - Declare qsort compare function _cdecl on windows
5501 1.01 - 2014-06-18 - fix stb_vorbis_get_samples_float
5502 1.0 - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
5503 (API change) report sample rate for decode-full-file funcs
5504 0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5505 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5506 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5507 0.99993 - remove assert that fired on legal files with empty tables
5508 0.99992 - rewind-to-start
5509 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5510 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5511 0.9998 - add a full-decode function with a memory source
5512 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5513 0.9996 - query length of vorbis stream in samples/seconds
5514 0.9995 - bugfix to another optimization that only happened in certain files
5515 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5516 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5517 0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5518 0.9991 - performance improvement of IMDCT
5519 0.999 - (should have been 0.9990) performance improvement of IMDCT
5520 0.998 - no-CRT support from Casey Muratori
5521 0.997 - bugfixes for bugs found by Terje Mathisen
5522 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5523 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5524 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5525 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5526 0.992 - fixes for MinGW warning
5527 0.991 - turn fast-float-conversion on by default
5528 0.990 - fix push-mode seek recovery if you seek into the headers
5529 0.98b - fix to bad release of 0.98
5530 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5531 0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5532 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5533 0.95 - clamping code for 16-bit functions
5534 0.94 - not publically released
5535 0.93 - fixed all-zero-floor case (was decoding garbage)
5536 0.92 - fixed a memory leak
5537 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5538 0.90 - first public release
5539 */
5540
5541 #endif // STB_VORBIS_HEADER_ONLY
5542
5543
5544 /*
5545 ------------------------------------------------------------------------------
5546 This software is available under 2 licenses -- choose whichever you prefer.
5547 ------------------------------------------------------------------------------
5548 ALTERNATIVE A - MIT License
5549 Copyright (c) 2017 Sean Barrett
5550 Permission is hereby granted, free of charge, to any person obtaining a copy of
5551 this software and associated documentation files (the "Software"), to deal in
5552 the Software without restriction, including without limitation the rights to
5553 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5554 of the Software, and to permit persons to whom the Software is furnished to do
5555 so, subject to the following conditions:
5556 The above copyright notice and this permission notice shall be included in all
5557 copies or substantial portions of the Software.
5558 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5559 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5560 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5561 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5562 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5563 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5564 SOFTWARE.
5565 ------------------------------------------------------------------------------
5566 ALTERNATIVE B - Public Domain (www.unlicense.org)
5567 This is free and unencumbered software released into the public domain.
5568 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5569 software, either in source code form or as a compiled binary, for any purpose,
5570 commercial or non-commercial, and by any means.
5571 In jurisdictions that recognize copyright laws, the author or authors of this
5572 software dedicate any and all copyright interest in the software to the public
5573 domain. We make this dedication for the benefit of the public at large and to
5574 the detriment of our heirs and successors. We intend this dedication to be an
5575 overt act of relinquishment in perpetuity of all present and future rights to
5576 this software under copyright law.
5577 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5578 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5579 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5580 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5581 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5582 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5583 ------------------------------------------------------------------------------
5584 */