1 module wren.utils;
2
3 // We need buffers of a few different types.
4 // No pre-processor here -- realistically, this should be a mixin template,
5 // but a mixin template... still requires mixins to declare our functions like this.
6 // We also can't technically mark this @nogc, since std.format *does* use the GC
7 // but this should *only* ever be run at compile-time.
8 package string DECLARE_BUFFER(string name, string type) {
9 if (!__ctfe) assert(0, "This function should never be run outside of CTFE.");
10
11 import std.format : format;
12
13 return format!q"{
14 struct %1$sBuffer {
15 %2$s* data;
16 int count;
17 int capacity;
18 };
19
20 void wren%1$sBufferInit(%1$sBuffer* buffer) @nogc {
21 buffer.data = null;
22 buffer.capacity = 0;
23 buffer.count = 0;
24 }
25
26 void wren%1$sBufferClear(VM)(VM* vm, %1$sBuffer* buffer) @nogc {
27 import wren.vm : wrenReallocate;
28 wrenReallocate(vm, buffer.data, 0, 0);
29 wren%1$sBufferInit(buffer);
30 }
31
32 void wren%1$sBufferFill(VM)(VM* vm, %1$sBuffer* buffer, %2$s data, int count) @nogc {
33 import wren.vm : wrenReallocate;
34 if (buffer.capacity < buffer.count + count) {
35 int capacity = wrenPowerOf2Ceil(buffer.count + count);
36 buffer.data = cast(%2$s*)wrenReallocate(vm, buffer.data,
37 buffer.capacity * (%2$s).sizeof, capacity * (%2$s).sizeof);
38 buffer.capacity = capacity;
39 }
40
41 for (int i = 0; i < count; i++) {
42 buffer.data[buffer.count++] = data;
43 }
44 }
45
46 void wren%1$sBufferWrite(VM)(VM* vm, %1$sBuffer* buffer, %2$s data) @nogc {
47 wren%1$sBufferFill(vm, buffer, data, 1);
48 }
49 }"(name, type);
50 }
51
52 mixin(DECLARE_BUFFER("Byte", "ubyte"));
53 mixin(DECLARE_BUFFER("Int", "int"));
54
55 // Returns the number of bytes needed to encode [value] in UTF-8.
56 //
57 // Returns 0 if [value] is too large to encode.
58 int wrenUtf8EncodeNumBytes(int value) @nogc
59 {
60 assert(value >= 0, "Cannot encode a negative value.");
61
62 if (value <= 0x7f) return 1;
63 if (value <= 0x7ff) return 2;
64 if (value <= 0xffff) return 3;
65 if (value <= 0x10ffff) return 4;
66 return 0;
67 }
68
69
70 // Encodes value as a series of bytes in [bytes], which is assumed to be large
71 // enough to hold the encoded result.
72 //
73 // Returns the number of written bytes.
74 int wrenUtf8Encode(int value, ubyte* bytes) @nogc
75 {
76 if (value <= 0x7f)
77 {
78 // Single byte (i.e. fits in ASCII).
79 *bytes = value & 0x7f;
80 return 1;
81 }
82 else if (value <= 0x7ff)
83 {
84 // Two byte sequence: 110xxxxx 10xxxxxx.
85 *bytes = 0xc0 | ((value & 0x7c0) >> 6);
86 bytes++;
87 *bytes = 0x80 | (value & 0x3f);
88 return 2;
89 }
90 else if (value <= 0xffff)
91 {
92 // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx.
93 *bytes = 0xe0 | ((value & 0xf000) >> 12);
94 bytes++;
95 *bytes = 0x80 | ((value & 0xfc0) >> 6);
96 bytes++;
97 *bytes = 0x80 | (value & 0x3f);
98 return 3;
99 }
100 else if (value <= 0x10ffff)
101 {
102 // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
103 *bytes = 0xf0 | ((value & 0x1c0000) >> 18);
104 bytes++;
105 *bytes = 0x80 | ((value & 0x3f000) >> 12);
106 bytes++;
107 *bytes = 0x80 | ((value & 0xfc0) >> 6);
108 bytes++;
109 *bytes = 0x80 | (value & 0x3f);
110 return 4;
111 }
112
113 // Invalid Unicode value. See: http://tools.ietf.org/html/rfc3629
114 assert(0, "Unreachable");
115 }
116
117 // Decodes the UTF-8 sequence starting at [bytes] (which has max [length]),
118 // returning the code point.
119 //
120 // Returns -1 if the bytes are not a valid UTF-8 sequence.
121 int wrenUtf8Decode(ubyte* bytes, uint length) @nogc
122 {
123 // Single byte (i.e. fits in ASCII).
124 if (*bytes <= 0x7f) return *bytes;
125
126 int value;
127 uint remainingBytes;
128 if ((*bytes & 0xe0) == 0xc0)
129 {
130 // Two byte sequence: 110xxxxx 10xxxxxx.
131 value = *bytes & 0x1f;
132 remainingBytes = 1;
133 }
134 else if ((*bytes & 0xf0) == 0xe0)
135 {
136 // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx.
137 value = *bytes & 0x0f;
138 remainingBytes = 2;
139 }
140 else if ((*bytes & 0xf8) == 0xf0)
141 {
142 // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
143 value = *bytes & 0x07;
144 remainingBytes = 3;
145 }
146 else
147 {
148 // Invalid UTF-8 sequence.
149 return -1;
150 }
151
152 // Don't read past the end of the buffer on truncated UTF-8.
153 if (remainingBytes > length - 1) return -1;
154
155 while (remainingBytes > 0)
156 {
157 bytes++;
158 remainingBytes--;
159
160 // Remaining bytes must be of form 10xxxxxx.
161 if ((*bytes & 0xc0) != 0x80) return -1;
162
163 value = value << 6 | (*bytes & 0x3f);
164 }
165
166 return value;
167 }
168
169 // Returns the number of bytes in the UTF-8 sequence starting with [byte].
170 //
171 // If the character at that index is not the beginning of a UTF-8 sequence,
172 // returns 0.
173 int wrenUtf8DecodeNumBytes(ubyte byte_) @nogc
174 {
175 // If the byte starts with 10xxxxx, it's the middle of a UTF-8 sequence, so
176 // don't count it at all.
177 if ((byte_ & 0xc0) == 0x80) return 0;
178
179 // The first byte's high bits tell us how many bytes are in the UTF-8
180 // sequence.
181 if ((byte_ & 0xf8) == 0xf0) return 4;
182 if ((byte_ & 0xf0) == 0xe0) return 3;
183 if ((byte_ & 0xe0) == 0xc0) return 2;
184 return 1;
185 }
186
187 // From: http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float
188 // Returns the smallest power of two that is equal to or greater than [n].
189 int wrenPowerOf2Ceil(int n) @nogc
190 {
191 n--;
192 n |= n >> 1;
193 n |= n >> 2;
194 n |= n >> 4;
195 n |= n >> 8;
196 n |= n >> 16;
197 n++;
198
199 return n;
200 }
201
202 // Validates that [value] is within `[0, count)`. Also allows
203 // negative indices which map backwards from the end. Returns the valid positive
204 // index value. If invalid, returns `uint.max`.
205 uint wrenValidateIndex(uint count, long value) @nogc
206 {
207 // Negative indices count from the end.
208 if (value < 0) value = count + value;
209
210 // Check bounds.
211 if (value >= 0 && value < count) return cast(uint)value;
212
213 return uint.max;
214 }