1 module css.selector;
2 
3 
4 import std.algorithm;
5 import std.array;
6 import std.ascii;
7 import std.string;
8 
9 
10 private bool isSpace(Char)(Char ch) {
11 	return (ch == 32) || ((ch >= 9) && (ch <= 13));
12 }
13 
14 
15 private bool equalsCI(CharA, CharB)(const(CharA)[] a, const(CharB)[] b) {
16 	if (a.length == b.length) {
17 		for (uint i = 0; i < a.length; ++i) {
18 			if (std.ascii.toLower(a[i]) != std.ascii.toLower(b[i]))
19 				return false;
20 		}
21 		return true;
22 	}
23 	return false;
24 }
25 
26 
27 private struct Rule {
28 	enum Flags : size_t {
29 		HasTag          = 1 << 0,
30 		HasAttr         = 1 << 1,
31 		HasPseudo       = 1 << 2,
32 		CaseSensitive   = 1 << 3,
33 		HasAny          = 1 << 4,
34 	}
35 
36 	enum MatchType : ubyte {
37 		None = 0,
38 		Set,
39 		Exact,
40 		ContainWord,
41 		Contain,
42 		Begin,
43 		BeginHyphen,
44 		End,
45 	}
46 
47 	enum Relation : ubyte {
48 		None = 0,
49 		Descendant,
50 		Child,
51 		DirectAdjacent,
52 		IndirectAdjacent,
53 	}
54 
55 	bool matches(ElementType)(ElementType element) const {
56 		if (flags_ == 0)
57 			return false;
58 
59 		if (flags_ & Flags.HasTag) {
60 			if (!tag_.equalsCI(element.tag))
61 				return false;
62 		}
63 
64 		if (flags_ & Flags.HasAttr) {
65 			auto cs = (flags_ & Flags.CaseSensitive) != 0;
66 			final switch (match_) with (MatchType) {
67 			case None:
68 				break;
69 			case Set:
70 				if (element.attr(attr_) == null)
71 					return false;
72 				break;
73 			case Exact:
74 				if (value_.empty) return false;
75 				auto pattr = element.attr(attr_);
76 				if (!pattr || (cs ? (value_ != *pattr) : !value_.equalsCI(*pattr)))
77 					return false;
78 				break;
79 			case Contain:
80 				if (value_.empty) return false;
81 				auto pattr = element.attr(attr_);
82 				if (!pattr || (((*pattr).indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) == -1))
83 					return false;
84 				break;
85 			case ContainWord:
86 				if (value_.empty) return false;
87 				auto pattr = element.attr(attr_);
88 				if (!pattr)
89 					return false;
90 
91 				size_t start = 0;
92 				while (true) {
93 					auto index = (*pattr).indexOf(value_, start, cs ? CaseSensitive.yes : CaseSensitive.no);
94 					if (index == -1)
95 						return false;
96 					if (index && !isSpace((*pattr)[index - 1]))
97 						return false;
98 					if ((index + value_.length == pattr.length) || isSpace((*pattr)[index + value_.length]))
99 						break;
100 					start = index + 1;
101 				}
102 				break;
103 			case Begin:
104 				if (value_.empty) return false;
105 				auto pattr = element.attr(attr_);
106 				if (!pattr || (((*pattr).indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0))
107 					return false;
108 				break;
109 			case End:
110 				if (value_.empty) return false;
111 				auto pattr = element.attr(attr_);
112 				if (!pattr || (((*pattr).lastIndexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != (pattr.length - value_.length)))
113 					return false;
114 				break;
115 			case BeginHyphen:
116 				if (value_.empty) return false;
117 				auto pattr = element.attr(attr_);
118 				if (!pattr || (((*pattr).indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0) || ((pattr.length > value_.length) && ((*pattr)[value_.length] != '-')))
119 					return false;
120 				break;
121 		   }
122 		}
123 
124 		if (flags_ & Flags.HasPseudo) {
125 			if (!element.pseudo(pseudo_, pseudoArg_))
126 				return false;
127 		}
128 
129 		return true;
130 	}
131 
132 	@property Relation relation() const {
133 		return relation_;
134 	}
135 
136 package:
137 	size_t flags_;
138 	MatchType match_;
139 	Relation relation_;
140 	const(char)[] tag_;
141 	const(char)[] attr_;
142 	const(char)[] value_;
143 	const(char)[] pseudo_;
144 	const(char)[] pseudoArg_;
145 }
146 
147 
148 struct Selector {
149 	static Selector parse(const(char)[] value) {
150 		enum ParserStates {
151 			Identifier = 0,
152 			PostIdentifier,
153 			Tag,
154 			Class,
155 			ID,
156 			AttrName,
157 			AttrOp,
158 			PreAttrValue,
159 			AttrValueDQ,
160 			AttrValueSQ,
161 			AttrValueNQ,
162 			PostAttrValue,
163 			Pseudo,
164 			PseudoArgs,
165 			Relation,
166 		}
167 
168 		size_t ids;
169 		size_t tags;
170 		size_t classes;
171 
172 		value = value.strip;
173 		auto source = uninitializedArray!(char[])(value.length + 1);
174 		source[0..value.length] = value;
175 		source[$-1] = ' '; // add a padding space to ease parsing
176 
177 		auto selector = Selector(source);
178 		Rule[] rules;
179 		rules.reserve(2);
180 		++rules.length;
181 
182 		auto rule = &rules.back;
183 
184 		auto ptr = source.ptr;
185 		auto end = source.ptr + source.length;
186 		auto start = ptr;
187 
188 		ParserStates state = ParserStates.Identifier;
189 
190 		while (ptr != end) {
191 			final switch (state) with (ParserStates) {
192 			case Identifier:
193 				if (*ptr == '#') {
194 					state = ID;
195 					start = ptr + 1;
196 				} else if (*ptr == '.') {
197 					state = Class;
198 					start = ptr + 1;
199 				} else if (*ptr == '[') {
200 					state = AttrName;
201 					start = ptr + 1;
202 				} else if (*ptr == ':') {
203 					rule.flags_ |= Rule.Flags.HasAny;
204 					state = PostIdentifier;
205 					continue;
206 				} else if (isAlpha(*ptr)) {
207 					state = Tag;
208 					start = ptr;
209 					continue;
210 				} else if (*ptr == '*') {
211 					rule.flags_ |= Rule.Flags.HasAny;
212 					state = PostIdentifier;
213 				}
214 				break;
215 
216 			case PostIdentifier:
217 				switch (*ptr) {
218 				case '#':
219 					state = ID;
220 					start = ptr + 1;
221 					break;
222 				case '.':
223 					state = Class;
224 					start = ptr + 1;
225 					break;
226 				case '[':
227 					state = AttrName;
228 					start = ptr + 1;
229 					break;
230 				case ':':
231 					state = Pseudo;
232 					if ((ptr + 1 != end) && (*(ptr + 1) == ':'))
233 						++ptr;
234 					start = ptr + 1;
235 					break;
236 				default:
237 					state = Relation;
238 					continue;
239 				}
240 				break;
241 
242 			case Tag:
243 				while ((ptr != end) && isAlpha(*ptr))
244 					++ptr;
245 				if (ptr == end)
246 					continue;
247 
248 				rule.flags_ |= Rule.Flags.HasTag;
249 				rule.tag_ = start[0..ptr-start];
250 				++tags;
251 
252 				state = PostIdentifier;
253 				continue;
254 
255 			case Class:
256 				while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_')))
257 					++ptr;
258 				if (ptr == end)
259 					continue;
260 
261 				rule.flags_ |= Rule.Flags.HasAttr;
262 				rule.match_ = Rule.MatchType.ContainWord;
263 				rule.attr_ = "class";
264 				rule.value_ = start[0..ptr-start];
265 				++classes;
266 
267 				state = PostIdentifier;
268 				break;
269 
270 			case ID:
271 				while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_')))
272 					++ptr;
273 				if (ptr == end)
274 					continue;
275 
276 				rule.flags_ |= Rule.Flags.HasAttr;
277 				rule.match_ = Rule.MatchType.Exact;
278 				rule.attr_ = "id";
279 				rule.value_ = start[0..ptr-start];
280 				++ids;
281 
282 				state = PostIdentifier;
283 				break;
284 
285 			case AttrName:
286 				while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_')))
287 					++ptr;
288 				if (ptr == end)
289 					continue;
290 
291 				rule.flags_ |= Rule.Flags.HasAttr;
292 				rule.flags_ |= Rule.Flags.CaseSensitive;
293 				rule.attr_ = start[0..ptr-start];
294 				++classes;
295 
296 				state = AttrOp;
297 				continue;
298 
299 			case AttrOp:
300 				while ((ptr != end) && (isSpace(*ptr)))
301 					++ptr;
302 				if (ptr == end)
303 					continue;
304 
305 				switch (*ptr) {
306 				case ']':
307 					rule.match_ = Rule.MatchType.Set;
308 					state = PostIdentifier;
309 					break;
310 				case '=':
311 					rule.match_ = Rule.MatchType.Exact;
312 					state = PreAttrValue;
313 					break;
314 				default:
315 					if ((ptr + 1 != end) && (*(ptr + 1) == '=')) {
316 						switch (*ptr) {
317 						case '~':
318 							rule.match_ = Rule.MatchType.ContainWord;
319 							break;
320 						case '^':
321 							rule.match_ = Rule.MatchType.Begin;
322 							break;
323 						case '$':
324 							rule.match_ = Rule.MatchType.End;
325 							break;
326 						case '*':
327 							rule.match_ = Rule.MatchType.Contain;
328 							break;
329 						case '|':
330 							rule.match_ = Rule.MatchType.BeginHyphen;
331 							break;
332 						default:
333 							rule.flags_ = 0; // error
334 							ptr = end - 1;
335 							break;
336 						}
337 
338 						state = PreAttrValue;
339 						++ptr;
340 					}
341 					break;
342 				}
343 				break;
344 
345 			case PreAttrValue:
346 				while ((ptr != end) && isSpace(*ptr))
347 					++ptr;
348 				if (ptr == end)
349 					continue;
350 
351 				if (*ptr == '\"') {
352 					state = AttrValueDQ;
353 					start = ptr + 1;
354 				} else if (*ptr == '\'') {
355 					state = AttrValueSQ;
356 					start = ptr + 1;
357 				} else {
358 					state = AttrValueNQ;
359 					start = ptr;
360 				}
361 				break;
362 
363 			case AttrValueDQ:
364 				while ((ptr != end) && (*ptr != '\"'))
365 					++ptr;
366 				if (ptr == end)
367 					continue;
368 
369 				rule.value_ = start[0..ptr-start];
370 				state = PostAttrValue;
371 				break;
372 
373 			case AttrValueSQ:
374 				while ((ptr != end) && (*ptr != '\''))
375 					++ptr;
376 				if (ptr == end)
377 					continue;
378 
379 				rule.value_ = start[0..ptr-start];
380 				state = PostAttrValue;
381 				break;
382 
383 			case AttrValueNQ:
384 				while ((ptr != end) && !isSpace(*ptr) && (*ptr != ']'))
385 					++ptr;
386 				if (ptr == end)
387 					continue;
388 
389 				rule.value_ = start[0..ptr-start];
390 				state = PostAttrValue;
391 				continue;
392 
393 			case PostAttrValue:
394 				while ((ptr != end) && (*ptr != ']') && (*ptr != 'i'))
395 					++ptr;
396 				if (ptr == end)
397 					continue;
398 
399 				if (*ptr == ']') {
400 					state = PostIdentifier;
401 				} else if (*ptr == 'i') {
402 					rule.flags_ &= ~(Rule.Flags.CaseSensitive);
403 				}
404 				break;
405 
406 			case Pseudo:
407 				while ((ptr != end) && (isAlpha(*ptr) || (*ptr == '-')))
408 					++ptr;
409 				if (ptr == end)
410 					continue;
411 
412 				rule.pseudo_ = start[0..ptr-start];
413 				rule.flags_ |= Rule.Flags.HasPseudo;
414 				if (*ptr != '(') {
415 					state = PostIdentifier;
416 					continue;
417 				} else {
418 					state = PseudoArgs;
419 					start = ptr + 1;
420 				}
421 				break;
422 
423 			case PseudoArgs:
424 				while ((ptr != end) && (*ptr != ')'))
425 					++ptr;
426 				if (ptr == end)
427 					continue;
428 
429 				rule.pseudoArg_ = start[0..ptr-start];
430 				state = PostIdentifier;
431 				break;
432 
433 			case Relation:
434 				while ((ptr != end) && isSpace(*ptr))
435 					++ptr;
436 				if (ptr == end)
437 					continue;
438 
439 				++rules.length;
440 				rule = &rules.back;
441 
442 				state = Identifier;
443 				switch (*ptr) {
444 				case '>':
445 					rule.relation_ = Rule.Relation.Child;
446 					break;
447 				case '+':
448 					rule.relation_ = Rule.Relation.DirectAdjacent;
449 					break;
450 				case '~':
451 					rule.relation_ = Rule.Relation.IndirectAdjacent;
452 					break;
453 				default:
454 					rule.relation_ = Rule.Relation.Descendant;
455 					continue;
456 				}
457 				break;
458 			}
459 
460 			++ptr;
461 		}
462 
463 		rules.reverse();
464 		selector.rules_ = rules;
465 		selector.specificity_ = (ids << 14) | (classes << 7) | (tags & 127);
466 
467 		return selector;
468 	}
469 
470 	bool matches(ElementType)(ElementType element) {
471 		if (rules_.empty)
472 			return false;
473 
474 		Rule.Relation relation = Rule.Relation.None;
475 		foreach(ref rule; rules_) {
476 			final switch (relation) with (Rule.Relation) {
477 			case None:
478 				if (!rule.matches(element))
479 					return false;
480 				break;
481 			case Descendant:
482 				auto ancestors = element.ancestors();
483 				while (true) {
484 					if (ancestors.empty())
485 						return false;
486 					auto ancestor = ancestors.front;
487 					if (rule.matches(ancestor)) {
488 						element = ancestor;
489 						break;
490 					}
491 					ancestors.popFront;
492 				}
493 				break;
494 			case Child:
495 				auto ancestors = element.ancestors;
496 				if (ancestors.empty)
497 					return false;
498 				auto ancestor = ancestors.front;
499 				if (!rule.matches(ancestor))
500 					return false;
501 				element = ancestor;
502 				break;
503 			case DirectAdjacent:
504 				auto adjacents = element.adjacents;
505 				if (adjacents.empty)
506 					return false;
507 				auto adjacent = adjacents.front;
508 				if (!rule.matches(adjacent))
509 					return false;
510 				element = adjacent;
511 				break;
512 			case IndirectAdjacent:
513 				auto adjacents = element.adjacents;
514 				while (true) {
515 					if (adjacents.empty)
516 						return false;
517 					auto adjacent = adjacents.front;
518 					if (rule.matches(adjacent)) {
519 						element = adjacent;
520 						break;
521 					}
522 					adjacents.popFront;
523 				}
524 				break;
525 			}
526 
527 			relation = rule.relation;
528 		}
529 
530 		return true;
531 	}
532 
533 	@property size_t specificity() const {
534 		return specificity_;
535 	}
536 
537 private:
538 	const(char)[] source_;
539 	Rule[] rules_;
540 	size_t specificity_;
541 }
542 
543 
544 private struct ElementRange {
545 	@property bool empty() const {
546 		return index_ >= elements_.length;
547 	}
548 
549 	@property Element front() {
550 		return elements_[index_];
551 	}
552 
553 	@property void popFront() {
554 		++index_;
555 	}
556 
557 	private Element[] elements_;
558 	private size_t index_;
559 }
560 
561 private struct Element {
562 	const(char)[] tag() const {
563 		return tag_;
564 	}
565 
566 	const(char[])* attr(const(char)[] name) const {
567 		return name in attrs_;
568 	}
569 
570 	bool pseudo(const(char)[] name, const(char)[] arg) const {
571 		return (name == "active");
572 	}
573 
574 	auto adjacents() {
575 		return ElementRange(adjacents_);
576 	}
577 
578 	auto ancestors() {
579 		return ElementRange(ancestors_);
580 	}
581 
582 	const(char)[] tag_;
583 	const(char)[][const(char)[]] attrs_;
584 	Element[] ancestors_;
585 	Element[] adjacents_;
586 }
587 
588 
589 unittest {
590 	bool testSelector(const(char)[] selector, Element e) {
591 		return Selector.parse(selector).matches(e);
592 	}
593 
594 	//writeln(`<div id="odiv" class="container"><div id="idiv"><p id="bar"><span id="foo" class="meh moo"></span><span id="error" class="alert">/span></p></div></div>`);
595 
596 	auto span = Element("span");
597 	span.attrs_["id"] = "foo";
598 	span.attrs_["class"] = "meh moo bleh";
599 
600 	auto error = Element("span");
601 	error.attrs_["id"] = "error";
602 	error.attrs_["class"] = "alert";
603 
604 	auto aerror = Element("span");
605 	aerror.attrs_["id"] = "aerror";
606 	aerror.attrs_["class"] = "alert";
607 
608 	auto p = Element("p");
609 	p.attrs_["id"] = "bar";
610 
611 	auto idiv = Element("div");
612 	idiv.attrs_["id"] = "idiv";
613 
614 	auto odiv = Element("div");
615 	odiv.attrs_["id"] = "odiv";
616 	odiv.attrs_["class"] = "container";
617 
618 	span.ancestors_ ~= [ p, idiv, odiv ]; // ancestors in closest-first order
619 	error.ancestors_ ~= [ p, idiv, odiv ];
620 	error.adjacents_ ~= span; // previous sibblings in closest-first order
621 
622 	aerror.ancestors_ ~= [ p, idiv, odiv ];
623 	aerror.adjacents_ ~= [ error, span ]; // previous sibblings in closest-first order
624 
625 	p.ancestors_ ~= [ idiv, odiv ];
626 	idiv.ancestors_ ~= [ odiv ];
627 
628 	assert(testSelector("#bar", p));
629 	assert(!testSelector("#bar", span));
630 	assert(testSelector(".meh", span));
631 	assert(testSelector(".moo", span));
632 	assert(testSelector(".bleh", span));
633 	assert(testSelector("span.bleh", span));
634 	assert(testSelector(".alert", error));
635 	assert(testSelector("span.alert", error));
636 	assert(!testSelector("div.alert", error));
637 	assert(testSelector("div p", p));
638 	assert(testSelector("div > p", p));
639 	assert(testSelector("div span", span));
640 	assert(testSelector("div span", error));
641 	assert(!testSelector("div span + span", span));
642 	assert(!testSelector("div span#foo + span", aerror));
643 	assert(testSelector("div span#foo ~ span", aerror));
644 	assert(testSelector("div span.bleh ~ span", error));
645 	assert(!testSelector("div span#error", span));
646 	assert(testSelector("div span#error", error));
647 	assert(testSelector(`div[id="idiv"]`, idiv));
648 	assert(testSelector(`div[id='idiv']`, idiv));
649 	assert(testSelector(`div[id=idiv]`, idiv));
650 	assert(!testSelector(`div[id=IDIV]`, idiv));
651 	assert(!testSelector(`div[id=IDIV] i`, idiv));
652 	assert(testSelector(`div[id ^= idiv ]`, idiv));
653 	assert(testSelector(`div[id ~= idiv i]`, idiv));
654 	assert(testSelector(`div[id *= div]`, idiv));
655 	assert(testSelector(`div[id |= IDIV i]:active`, idiv));
656 	assert(!testSelector(`div[id |= IDIV i]:focus`, idiv));
657 }