1 module wren.utils;
2 
3 // We need buffers of a few different types.
4 // No pre-processor here -- realistically, this should be a mixin template,
5 // but a mixin template... still requires mixins to declare our functions like this.
6 // We also can't technically mark this @nogc, since std.format *does* use the GC
7 // but this should *only* ever be run at compile-time.
8 package string DECLARE_BUFFER(string name, string type) {
9     if (!__ctfe) assert(0, "This function should never be run outside of CTFE.");
10 
11     import std.format : format;
12 
13     return format!q"{
14     struct %1$sBuffer {
15         %2$s* data;
16         int count;
17         int capacity;
18     };
19 
20     void wren%1$sBufferInit(%1$sBuffer* buffer) @nogc {
21         buffer.data = null;
22         buffer.capacity = 0;
23         buffer.count = 0;
24     }
25 
26     void wren%1$sBufferClear(VM)(VM* vm, %1$sBuffer* buffer) @nogc {
27         import wren.vm : wrenReallocate;
28         wrenReallocate(vm, buffer.data, 0, 0);
29         wren%1$sBufferInit(buffer);
30     }
31 
32     void wren%1$sBufferFill(VM)(VM* vm, %1$sBuffer* buffer, %2$s data, int count) @nogc {
33         import wren.vm : wrenReallocate;
34         if (buffer.capacity < buffer.count + count) {
35             int capacity = wrenPowerOf2Ceil(buffer.count + count);
36             buffer.data = cast(%2$s*)wrenReallocate(vm, buffer.data, 
37                 buffer.capacity * (%2$s).sizeof, capacity * (%2$s).sizeof);
38             buffer.capacity = capacity;
39         }
40 
41         for (int i = 0; i < count; i++) {
42             buffer.data[buffer.count++] = data;
43         }
44     }
45 
46     void wren%1$sBufferWrite(VM)(VM* vm, %1$sBuffer* buffer, %2$s data) @nogc {
47         wren%1$sBufferFill(vm, buffer, data, 1);
48     }
49     }"(name, type);
50 }
51 
52 mixin(DECLARE_BUFFER("Byte", "ubyte"));
53 mixin(DECLARE_BUFFER("Int", "int"));
54 
55 // Returns the number of bytes needed to encode [value] in UTF-8.
56 //
57 // Returns 0 if [value] is too large to encode.
58 int wrenUtf8EncodeNumBytes(int value) @nogc
59 {
60   assert(value >= 0, "Cannot encode a negative value.");
61   
62   if (value <= 0x7f) return 1;
63   if (value <= 0x7ff) return 2;
64   if (value <= 0xffff) return 3;
65   if (value <= 0x10ffff) return 4;
66   return 0;
67 }
68 
69 
70 // Encodes value as a series of bytes in [bytes], which is assumed to be large
71 // enough to hold the encoded result.
72 //
73 // Returns the number of written bytes.
74 int wrenUtf8Encode(int value, ubyte* bytes) @nogc
75 {
76   if (value <= 0x7f)
77   {
78     // Single byte (i.e. fits in ASCII).
79     *bytes = value & 0x7f;
80     return 1;
81   }
82   else if (value <= 0x7ff)
83   {
84     // Two byte sequence: 110xxxxx 10xxxxxx.
85     *bytes = 0xc0 | ((value & 0x7c0) >> 6);
86     bytes++;
87     *bytes = 0x80 | (value & 0x3f);
88     return 2;
89   }
90   else if (value <= 0xffff)
91   {
92     // Three byte sequence: 1110xxxx 10xxxxxx 10xxxxxx.
93     *bytes = 0xe0 | ((value & 0xf000) >> 12);
94     bytes++;
95     *bytes = 0x80 | ((value & 0xfc0) >> 6);
96     bytes++;
97     *bytes = 0x80 | (value & 0x3f);
98     return 3;
99   }
100   else if (value <= 0x10ffff)
101   {
102     // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
103     *bytes = 0xf0 | ((value & 0x1c0000) >> 18);
104     bytes++;
105     *bytes = 0x80 | ((value & 0x3f000) >> 12);
106     bytes++;
107     *bytes = 0x80 | ((value & 0xfc0) >> 6);
108     bytes++;
109     *bytes = 0x80 | (value & 0x3f);
110     return 4;
111   }
112 
113   // Invalid Unicode value. See: http://tools.ietf.org/html/rfc3629
114   assert(0, "Unreachable");
115 }
116 
117 // Decodes the UTF-8 sequence starting at [bytes] (which has max [length]),
118 // returning the code point.
119 //
120 // Returns -1 if the bytes are not a valid UTF-8 sequence.
121 int wrenUtf8Decode(ubyte* bytes, uint length) @nogc
122 {
123   // Single byte (i.e. fits in ASCII).
124   if (*bytes <= 0x7f) return *bytes;
125 
126   int value;
127   uint remainingBytes;
128   if ((*bytes & 0xe0) == 0xc0)
129   {
130     // Two byte sequence: 110xxxxx 10xxxxxx.
131     value = *bytes & 0x1f;
132     remainingBytes = 1;
133   }
134   else if ((*bytes & 0xf0) == 0xe0)
135   {
136     // Three byte sequence: 1110xxxx	 10xxxxxx 10xxxxxx.
137     value = *bytes & 0x0f;
138     remainingBytes = 2;
139   }
140   else if ((*bytes & 0xf8) == 0xf0)
141   {
142     // Four byte sequence: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
143     value = *bytes & 0x07;
144     remainingBytes = 3;
145   }
146   else
147   {
148     // Invalid UTF-8 sequence.
149     return -1;
150   }
151 
152   // Don't read past the end of the buffer on truncated UTF-8.
153   if (remainingBytes > length - 1) return -1;
154 
155   while (remainingBytes > 0)
156   {
157     bytes++;
158     remainingBytes--;
159 
160     // Remaining bytes must be of form 10xxxxxx.
161     if ((*bytes & 0xc0) != 0x80) return -1;
162 
163     value = value << 6 | (*bytes & 0x3f);
164   }
165 
166   return value;
167 }
168 
169 // Returns the number of bytes in the UTF-8 sequence starting with [byte].
170 //
171 // If the character at that index is not the beginning of a UTF-8 sequence,
172 // returns 0.
173 int wrenUtf8DecodeNumBytes(ubyte byte_) @nogc
174 {
175   // If the byte starts with 10xxxxx, it's the middle of a UTF-8 sequence, so
176   // don't count it at all.
177   if ((byte_ & 0xc0) == 0x80) return 0;
178   
179   // The first byte's high bits tell us how many bytes are in the UTF-8
180   // sequence.
181   if ((byte_ & 0xf8) == 0xf0) return 4;
182   if ((byte_ & 0xf0) == 0xe0) return 3;
183   if ((byte_ & 0xe0) == 0xc0) return 2;
184   return 1;
185 }
186 
187 // From: http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2Float
188 // Returns the smallest power of two that is equal to or greater than [n].
189 int wrenPowerOf2Ceil(int n) @nogc
190 {
191   n--;
192   n |= n >> 1;
193   n |= n >> 2;
194   n |= n >> 4;
195   n |= n >> 8;
196   n |= n >> 16;
197   n++;
198   
199   return n;
200 }
201 
202 // Validates that [value] is within `[0, count)`. Also allows
203 // negative indices which map backwards from the end. Returns the valid positive
204 // index value. If invalid, returns `uint.max`.
205 uint wrenValidateIndex(uint count, long value) @nogc
206 {
207   // Negative indices count from the end.
208   if (value < 0) value = count + value;
209 
210   // Check bounds.
211   if (value >= 0 && value < count) return cast(uint)value;
212 
213   return uint.max;
214 }