1 /* inffast.c -- fast decoding
\r
2 * Copyright (C) 1995-2004 Mark Adler
\r
3 * For conditions of distribution and use, see copyright notice in zlib.h
\r
7 #include "inftrees.h"
\r
13 /* Allow machine dependent optimization for post-increment or pre-increment.
\r
14 Based on testing to date,
\r
15 Pre-increment preferred for:
\r
16 - PowerPC G3 (Adler)
\r
17 - MIPS R5000 (Randers-Pehrson)
\r
18 Post-increment preferred for:
\r
20 No measurable difference:
\r
21 - Pentium III (Anderson)
\r
26 # define PUP(a) *(a)++
\r
29 # define PUP(a) *++(a)
\r
33 Decode literal, length, and distance codes and write out the resulting
\r
34 literal and match bytes until either not enough input or output is
\r
35 available, an end-of-block is encountered, or a data error is encountered.
\r
36 When large enough input and output buffers are supplied to inflate(), for
\r
37 example, a 16K input buffer and a 64K output buffer, more than 95% of the
\r
38 inflate execution time is spent in this routine.
\r
44 strm->avail_out >= 258
\r
45 start >= strm->avail_out
\r
48 On return, state->mode is one of:
\r
50 LEN -- ran out of enough output space or enough available input
\r
51 TYPE -- reached end of block code, inflate() to interpret next block
\r
52 BAD -- error in block data
\r
56 - The maximum input bits used by a length/distance pair is 15 bits for the
\r
57 length code, 5 bits for the length extra, 15 bits for the distance code,
\r
58 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
\r
59 Therefore if strm->avail_in >= 6, then there is enough input to avoid
\r
60 checking for available input while decoding.
\r
62 - The maximum bytes that a single length/distance pair can output is 258
\r
63 bytes, which is the maximum length that can be coded. inflate_fast()
\r
64 requires strm->avail_out >= 258 for each loop to avoid checking for
\r
67 void inflate_fast(strm, start)
\r
69 unsigned start; /* inflate()'s starting value for strm->avail_out */
\r
71 struct inflate_state FAR *state;
\r
72 unsigned char FAR *in; /* local strm->next_in */
\r
73 unsigned char FAR *last; /* while in < last, enough input available */
\r
74 unsigned char FAR *out; /* local strm->next_out */
\r
75 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
\r
76 unsigned char FAR *end; /* while out < end, enough space available */
\r
77 #ifdef INFLATE_STRICT
\r
78 unsigned dmax; /* maximum distance from zlib header */
\r
80 unsigned wsize; /* window size or zero if not using window */
\r
81 unsigned whave; /* valid bytes in the window */
\r
82 unsigned write; /* window write index */
\r
83 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
\r
84 unsigned long hold; /* local strm->hold */
\r
85 unsigned bits; /* local strm->bits */
\r
86 code const FAR *lcode; /* local strm->lencode */
\r
87 code const FAR *dcode; /* local strm->distcode */
\r
88 unsigned lmask; /* mask for first level of length codes */
\r
89 unsigned dmask; /* mask for first level of distance codes */
\r
90 code this; /* retrieved table entry */
\r
91 unsigned op; /* code bits, operation, extra bits, or */
\r
92 /* window position, window bytes to copy */
\r
93 unsigned len; /* match length, unused bytes */
\r
94 unsigned dist; /* match distance */
\r
95 unsigned char FAR *from; /* where to copy match from */
\r
97 /* copy state to local variables */
\r
98 state = (struct inflate_state FAR *)strm->state;
\r
99 in = strm->next_in - OFF;
\r
100 last = in + (strm->avail_in - 5);
\r
101 out = strm->next_out - OFF;
\r
102 beg = out - (start - strm->avail_out);
\r
103 end = out + (strm->avail_out - 257);
\r
104 #ifdef INFLATE_STRICT
\r
105 dmax = state->dmax;
\r
107 wsize = state->wsize;
\r
108 whave = state->whave;
\r
109 write = state->write;
\r
110 window = state->window;
\r
111 hold = state->hold;
\r
112 bits = state->bits;
\r
113 lcode = state->lencode;
\r
114 dcode = state->distcode;
\r
115 lmask = (1U << state->lenbits) - 1;
\r
116 dmask = (1U << state->distbits) - 1;
\r
118 /* decode literals and length/distances until end-of-block or not enough
\r
119 input data or output space */
\r
122 hold += (unsigned long)(PUP(in)) << bits;
\r
124 hold += (unsigned long)(PUP(in)) << bits;
\r
127 this = lcode[hold & lmask];
\r
129 op = (unsigned)(this.bits);
\r
132 op = (unsigned)(this.op);
\r
133 if (op == 0) { /* literal */
\r
134 Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
\r
135 "inflate: literal '%c'\n" :
\r
136 "inflate: literal 0x%02x\n", this.val));
\r
137 PUP(out) = (unsigned char)(this.val);
\r
139 else if (op & 16) { /* length base */
\r
140 len = (unsigned)(this.val);
\r
141 op &= 15; /* number of extra bits */
\r
144 hold += (unsigned long)(PUP(in)) << bits;
\r
147 len += (unsigned)hold & ((1U << op) - 1);
\r
151 Tracevv((stderr, "inflate: length %u\n", len));
\r
153 hold += (unsigned long)(PUP(in)) << bits;
\r
155 hold += (unsigned long)(PUP(in)) << bits;
\r
158 this = dcode[hold & dmask];
\r
160 op = (unsigned)(this.bits);
\r
163 op = (unsigned)(this.op);
\r
164 if (op & 16) { /* distance base */
\r
165 dist = (unsigned)(this.val);
\r
166 op &= 15; /* number of extra bits */
\r
168 hold += (unsigned long)(PUP(in)) << bits;
\r
171 hold += (unsigned long)(PUP(in)) << bits;
\r
175 dist += (unsigned)hold & ((1U << op) - 1);
\r
176 #ifdef INFLATE_STRICT
\r
178 strm->msg = (char *)"invalid distance too far back";
\r
185 Tracevv((stderr, "inflate: distance %u\n", dist));
\r
186 op = (unsigned)(out - beg); /* max distance in output */
\r
187 if (dist > op) { /* see if copy from window */
\r
188 op = dist - op; /* distance back in window */
\r
190 strm->msg = (char *)"invalid distance too far back";
\r
194 from = window - OFF;
\r
195 if (write == 0) { /* very common case */
\r
196 from += wsize - op;
\r
197 if (op < len) { /* some from window */
\r
200 PUP(out) = PUP(from);
\r
202 from = out - dist; /* rest from output */
\r
205 else if (write < op) { /* wrap around window */
\r
206 from += wsize + write - op;
\r
208 if (op < len) { /* some from end of window */
\r
211 PUP(out) = PUP(from);
\r
213 from = window - OFF;
\r
214 if (write < len) { /* some from start of window */
\r
218 PUP(out) = PUP(from);
\r
220 from = out - dist; /* rest from output */
\r
224 else { /* contiguous in window */
\r
225 from += write - op;
\r
226 if (op < len) { /* some from window */
\r
229 PUP(out) = PUP(from);
\r
231 from = out - dist; /* rest from output */
\r
235 PUP(out) = PUP(from);
\r
236 PUP(out) = PUP(from);
\r
237 PUP(out) = PUP(from);
\r
241 PUP(out) = PUP(from);
\r
243 PUP(out) = PUP(from);
\r
247 from = out - dist; /* copy direct from output */
\r
248 do { /* minimum length is three */
\r
249 PUP(out) = PUP(from);
\r
250 PUP(out) = PUP(from);
\r
251 PUP(out) = PUP(from);
\r
255 PUP(out) = PUP(from);
\r
257 PUP(out) = PUP(from);
\r
261 else if ((op & 64) == 0) { /* 2nd level distance code */
\r
262 this = dcode[this.val + (hold & ((1U << op) - 1))];
\r
266 strm->msg = (char *)"invalid distance code";
\r
271 else if ((op & 64) == 0) { /* 2nd level length code */
\r
272 this = lcode[this.val + (hold & ((1U << op) - 1))];
\r
275 else if (op & 32) { /* end-of-block */
\r
276 Tracevv((stderr, "inflate: end of block\n"));
\r
277 state->mode = TYPE;
\r
281 strm->msg = (char *)"invalid literal/length code";
\r
285 } while (in < last && out < end);
\r
287 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
\r
291 hold &= (1U << bits) - 1;
\r
293 /* update state and return */
\r
294 strm->next_in = in + OFF;
\r
295 strm->next_out = out + OFF;
\r
296 strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
\r
297 strm->avail_out = (unsigned)(out < end ?
\r
298 257 + (end - out) : 257 - (out - end));
\r
299 state->hold = hold;
\r
300 state->bits = bits;
\r
305 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
\r
306 - Using bit fields for code structure
\r
307 - Different op definition to avoid & for extra bits (do & for table bits)
\r
308 - Three separate decoding do-loops for direct, window, and write == 0
\r
309 - Special case for distance > 1 copies to do overlapped load and store copy
\r
310 - Explicit branch predictions (based on measured branch probabilities)
\r
311 - Deferring match copy and interspersed it with decoding subsequent codes
\r
312 - Swapping literal/length else
\r
313 - Swapping window/direct else
\r
314 - Larger unrolled copy loops (three is about right)
\r
315 - Moving len -= 3 statement into middle of loop
\r
318 #endif /* !ASMINF */
\r