1 module css.selector; 2 3 4 import std.algorithm; 5 import std.array; 6 import std.ascii; 7 import std.string; 8 9 10 private bool isSpace(Char)(Char ch) { 11 return (ch == 32) || ((ch >= 9) && (ch <= 13)); 12 } 13 14 15 private bool equalsCI(CharA, CharB)(const(CharA)[] a, const(CharB)[] b) { 16 if (a.length == b.length) { 17 for (uint i = 0; i < a.length; ++i) { 18 if (std.ascii.toLower(a[i]) != std.ascii.toLower(b[i])) 19 return false; 20 } 21 return true; 22 } 23 return false; 24 } 25 26 27 private struct Rule { 28 enum Flags : size_t { 29 HasTag = 1 << 0, 30 HasAttr = 1 << 1, 31 HasPseudo = 1 << 2, 32 CaseSensitive = 1 << 3, 33 HasAny = 1 << 4, 34 } 35 36 enum MatchType : ubyte { 37 None = 0, 38 Set, 39 Exact, 40 ContainWord, 41 Contain, 42 Begin, 43 BeginHyphen, 44 End, 45 } 46 47 enum Relation : ubyte { 48 None = 0, 49 Descendant, 50 Child, 51 DirectAdjacent, 52 IndirectAdjacent, 53 } 54 55 bool matches(ElementType)(ElementType element) const { 56 if (flags_ == 0) 57 return false; 58 59 if (flags_ & Flags.HasTag) { 60 if (!tag_.equalsCI(element.tag)) 61 return false; 62 } 63 64 if (flags_ & Flags.HasAttr) { 65 auto cs = (flags_ & Flags.CaseSensitive) != 0; 66 final switch (match_) with (MatchType) { 67 case None: 68 break; 69 case Set: 70 if (element.attr(attr_) == null) 71 return false; 72 break; 73 case Exact: 74 if (value_.empty) return false; 75 auto pattr = element.attr(attr_); 76 if (!pattr || (cs ? (value_ != *pattr) : !value_.equalsCI(*pattr))) 77 return false; 78 break; 79 case Contain: 80 if (value_.empty) return false; 81 auto pattr = element.attr(attr_); 82 if (!pattr || (((*pattr).indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) == -1)) 83 return false; 84 break; 85 case ContainWord: 86 if (value_.empty) return false; 87 auto pattr = element.attr(attr_); 88 if (!pattr) 89 return false; 90 91 size_t start = 0; 92 while (true) { 93 auto index = (*pattr).indexOf(value_, start, cs ? CaseSensitive.yes : CaseSensitive.no); 94 if (index == -1) 95 return false; 96 if (index && !isSpace((*pattr)[index - 1])) 97 return false; 98 if ((index + value_.length == pattr.length) || isSpace((*pattr)[index + value_.length])) 99 break; 100 start = index + 1; 101 } 102 break; 103 case Begin: 104 if (value_.empty) return false; 105 auto pattr = element.attr(attr_); 106 if (!pattr || (((*pattr).indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0)) 107 return false; 108 break; 109 case End: 110 if (value_.empty) return false; 111 auto pattr = element.attr(attr_); 112 if (!pattr || (((*pattr).lastIndexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != (pattr.length - value_.length))) 113 return false; 114 break; 115 case BeginHyphen: 116 if (value_.empty) return false; 117 auto pattr = element.attr(attr_); 118 if (!pattr || (((*pattr).indexOf(value_, cs ? CaseSensitive.yes : CaseSensitive.no)) != 0) || ((pattr.length > value_.length) && ((*pattr)[value_.length] != '-'))) 119 return false; 120 break; 121 } 122 } 123 124 if (flags_ & Flags.HasPseudo) { 125 if (!element.pseudo(pseudo_, pseudoArg_)) 126 return false; 127 } 128 129 return true; 130 } 131 132 @property Relation relation() const { 133 return relation_; 134 } 135 136 package: 137 size_t flags_; 138 MatchType match_; 139 Relation relation_; 140 const(char)[] tag_; 141 const(char)[] attr_; 142 const(char)[] value_; 143 const(char)[] pseudo_; 144 const(char)[] pseudoArg_; 145 } 146 147 148 struct Selector { 149 static Selector parse(const(char)[] value) { 150 enum ParserStates { 151 Identifier = 0, 152 PostIdentifier, 153 Tag, 154 Class, 155 ID, 156 AttrName, 157 AttrOp, 158 PreAttrValue, 159 AttrValueDQ, 160 AttrValueSQ, 161 AttrValueNQ, 162 PostAttrValue, 163 Pseudo, 164 PseudoArgs, 165 Relation, 166 } 167 168 size_t ids; 169 size_t tags; 170 size_t classes; 171 172 value = value.strip; 173 auto source = uninitializedArray!(char[])(value.length + 1); 174 source[0..value.length] = value; 175 source[$-1] = ' '; // add a padding space to ease parsing 176 177 auto selector = Selector(source); 178 Rule[] rules; 179 rules.reserve(2); 180 ++rules.length; 181 182 auto rule = &rules.back; 183 184 auto ptr = source.ptr; 185 auto end = source.ptr + source.length; 186 auto start = ptr; 187 188 ParserStates state = ParserStates.Identifier; 189 190 while (ptr != end) { 191 final switch (state) with (ParserStates) { 192 case Identifier: 193 if (*ptr == '#') { 194 state = ID; 195 start = ptr + 1; 196 } else if (*ptr == '.') { 197 state = Class; 198 start = ptr + 1; 199 } else if (*ptr == '[') { 200 state = AttrName; 201 start = ptr + 1; 202 } else if (*ptr == ':') { 203 rule.flags_ |= Rule.Flags.HasAny; 204 state = PostIdentifier; 205 continue; 206 } else if (isAlpha(*ptr)) { 207 state = Tag; 208 start = ptr; 209 continue; 210 } else if (*ptr == '*') { 211 rule.flags_ |= Rule.Flags.HasAny; 212 state = PostIdentifier; 213 } 214 break; 215 216 case PostIdentifier: 217 switch (*ptr) { 218 case '#': 219 state = ID; 220 start = ptr + 1; 221 break; 222 case '.': 223 state = Class; 224 start = ptr + 1; 225 break; 226 case '[': 227 state = AttrName; 228 start = ptr + 1; 229 break; 230 case ':': 231 state = Pseudo; 232 if ((ptr + 1 != end) && (*(ptr + 1) == ':')) 233 ++ptr; 234 start = ptr + 1; 235 break; 236 default: 237 state = Relation; 238 continue; 239 } 240 break; 241 242 case Tag: 243 while ((ptr != end) && isAlpha(*ptr)) 244 ++ptr; 245 if (ptr == end) 246 continue; 247 248 rule.flags_ |= Rule.Flags.HasTag; 249 rule.tag_ = start[0..ptr-start]; 250 ++tags; 251 252 state = PostIdentifier; 253 continue; 254 255 case Class: 256 while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_'))) 257 ++ptr; 258 if (ptr == end) 259 continue; 260 261 rule.flags_ |= Rule.Flags.HasAttr; 262 rule.match_ = Rule.MatchType.ContainWord; 263 rule.attr_ = "class"; 264 rule.value_ = start[0..ptr-start]; 265 ++classes; 266 267 state = PostIdentifier; 268 break; 269 270 case ID: 271 while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_'))) 272 ++ptr; 273 if (ptr == end) 274 continue; 275 276 rule.flags_ |= Rule.Flags.HasAttr; 277 rule.match_ = Rule.MatchType.Exact; 278 rule.attr_ = "id"; 279 rule.value_ = start[0..ptr-start]; 280 ++ids; 281 282 state = PostIdentifier; 283 break; 284 285 case AttrName: 286 while ((ptr != end) && (isAlphaNum(*ptr) || (*ptr == '-') || (*ptr == '_'))) 287 ++ptr; 288 if (ptr == end) 289 continue; 290 291 rule.flags_ |= Rule.Flags.HasAttr; 292 rule.flags_ |= Rule.Flags.CaseSensitive; 293 rule.attr_ = start[0..ptr-start]; 294 ++classes; 295 296 state = AttrOp; 297 continue; 298 299 case AttrOp: 300 while ((ptr != end) && (isSpace(*ptr))) 301 ++ptr; 302 if (ptr == end) 303 continue; 304 305 switch (*ptr) { 306 case ']': 307 rule.match_ = Rule.MatchType.Set; 308 state = PostIdentifier; 309 break; 310 case '=': 311 rule.match_ = Rule.MatchType.Exact; 312 state = PreAttrValue; 313 break; 314 default: 315 if ((ptr + 1 != end) && (*(ptr + 1) == '=')) { 316 switch (*ptr) { 317 case '~': 318 rule.match_ = Rule.MatchType.ContainWord; 319 break; 320 case '^': 321 rule.match_ = Rule.MatchType.Begin; 322 break; 323 case '$': 324 rule.match_ = Rule.MatchType.End; 325 break; 326 case '*': 327 rule.match_ = Rule.MatchType.Contain; 328 break; 329 case '|': 330 rule.match_ = Rule.MatchType.BeginHyphen; 331 break; 332 default: 333 rule.flags_ = 0; // error 334 ptr = end - 1; 335 break; 336 } 337 338 state = PreAttrValue; 339 ++ptr; 340 } 341 break; 342 } 343 break; 344 345 case PreAttrValue: 346 while ((ptr != end) && isSpace(*ptr)) 347 ++ptr; 348 if (ptr == end) 349 continue; 350 351 if (*ptr == '\"') { 352 state = AttrValueDQ; 353 start = ptr + 1; 354 } else if (*ptr == '\'') { 355 state = AttrValueSQ; 356 start = ptr + 1; 357 } else { 358 state = AttrValueNQ; 359 start = ptr; 360 } 361 break; 362 363 case AttrValueDQ: 364 while ((ptr != end) && (*ptr != '\"')) 365 ++ptr; 366 if (ptr == end) 367 continue; 368 369 rule.value_ = start[0..ptr-start]; 370 state = PostAttrValue; 371 break; 372 373 case AttrValueSQ: 374 while ((ptr != end) && (*ptr != '\'')) 375 ++ptr; 376 if (ptr == end) 377 continue; 378 379 rule.value_ = start[0..ptr-start]; 380 state = PostAttrValue; 381 break; 382 383 case AttrValueNQ: 384 while ((ptr != end) && !isSpace(*ptr) && (*ptr != ']')) 385 ++ptr; 386 if (ptr == end) 387 continue; 388 389 rule.value_ = start[0..ptr-start]; 390 state = PostAttrValue; 391 continue; 392 393 case PostAttrValue: 394 while ((ptr != end) && (*ptr != ']') && (*ptr != 'i')) 395 ++ptr; 396 if (ptr == end) 397 continue; 398 399 if (*ptr == ']') { 400 state = PostIdentifier; 401 } else if (*ptr == 'i') { 402 rule.flags_ &= ~(Rule.Flags.CaseSensitive); 403 } 404 break; 405 406 case Pseudo: 407 while ((ptr != end) && (isAlpha(*ptr) || (*ptr == '-'))) 408 ++ptr; 409 if (ptr == end) 410 continue; 411 412 rule.pseudo_ = start[0..ptr-start]; 413 rule.flags_ |= Rule.Flags.HasPseudo; 414 if (*ptr != '(') { 415 state = PostIdentifier; 416 continue; 417 } else { 418 state = PseudoArgs; 419 start = ptr + 1; 420 } 421 break; 422 423 case PseudoArgs: 424 while ((ptr != end) && (*ptr != ')')) 425 ++ptr; 426 if (ptr == end) 427 continue; 428 429 rule.pseudoArg_ = start[0..ptr-start]; 430 state = PostIdentifier; 431 break; 432 433 case Relation: 434 while ((ptr != end) && isSpace(*ptr)) 435 ++ptr; 436 if (ptr == end) 437 continue; 438 439 ++rules.length; 440 rule = &rules.back; 441 442 state = Identifier; 443 switch (*ptr) { 444 case '>': 445 rule.relation_ = Rule.Relation.Child; 446 break; 447 case '+': 448 rule.relation_ = Rule.Relation.DirectAdjacent; 449 break; 450 case '~': 451 rule.relation_ = Rule.Relation.IndirectAdjacent; 452 break; 453 default: 454 rule.relation_ = Rule.Relation.Descendant; 455 continue; 456 } 457 break; 458 } 459 460 ++ptr; 461 } 462 463 rules.reverse(); 464 selector.rules_ = rules; 465 selector.specificity_ = (ids << 14) | (classes << 7) | (tags & 127); 466 467 return selector; 468 } 469 470 bool matches(ElementType)(ElementType element) { 471 if (rules_.empty) 472 return false; 473 474 Rule.Relation relation = Rule.Relation.None; 475 foreach(ref rule; rules_) { 476 final switch (relation) with (Rule.Relation) { 477 case None: 478 if (!rule.matches(element)) 479 return false; 480 break; 481 case Descendant: 482 auto ancestors = element.ancestors(); 483 while (true) { 484 if (ancestors.empty()) 485 return false; 486 auto ancestor = ancestors.front; 487 if (rule.matches(ancestor)) { 488 element = ancestor; 489 break; 490 } 491 ancestors.popFront; 492 } 493 break; 494 case Child: 495 auto ancestors = element.ancestors; 496 if (ancestors.empty) 497 return false; 498 auto ancestor = ancestors.front; 499 if (!rule.matches(ancestor)) 500 return false; 501 element = ancestor; 502 break; 503 case DirectAdjacent: 504 auto adjacents = element.adjacents; 505 if (adjacents.empty) 506 return false; 507 auto adjacent = adjacents.front; 508 if (!rule.matches(adjacent)) 509 return false; 510 element = adjacent; 511 break; 512 case IndirectAdjacent: 513 auto adjacents = element.adjacents; 514 while (true) { 515 if (adjacents.empty) 516 return false; 517 auto adjacent = adjacents.front; 518 if (rule.matches(adjacent)) { 519 element = adjacent; 520 break; 521 } 522 adjacents.popFront; 523 } 524 break; 525 } 526 527 relation = rule.relation; 528 } 529 530 return true; 531 } 532 533 @property size_t specificity() const { 534 return specificity_; 535 } 536 537 private: 538 const(char)[] source_; 539 Rule[] rules_; 540 size_t specificity_; 541 } 542 543 544 private struct ElementRange { 545 @property bool empty() const { 546 return index_ >= elements_.length; 547 } 548 549 @property Element front() { 550 return elements_[index_]; 551 } 552 553 @property void popFront() { 554 ++index_; 555 } 556 557 private Element[] elements_; 558 private size_t index_; 559 } 560 561 private struct Element { 562 const(char)[] tag() const { 563 return tag_; 564 } 565 566 const(char[])* attr(const(char)[] name) const { 567 return name in attrs_; 568 } 569 570 bool pseudo(const(char)[] name, const(char)[] arg) const { 571 return (name == "active"); 572 } 573 574 auto adjacents() { 575 return ElementRange(adjacents_); 576 } 577 578 auto ancestors() { 579 return ElementRange(ancestors_); 580 } 581 582 const(char)[] tag_; 583 const(char)[][const(char)[]] attrs_; 584 Element[] ancestors_; 585 Element[] adjacents_; 586 } 587 588 589 unittest { 590 bool testSelector(const(char)[] selector, Element e) { 591 return Selector.parse(selector).matches(e); 592 } 593 594 //writeln(`<div id="odiv" class="container"><div id="idiv"><p id="bar"><span id="foo" class="meh moo"></span><span id="error" class="alert">/span></p></div></div>`); 595 596 auto span = Element("span"); 597 span.attrs_["id"] = "foo"; 598 span.attrs_["class"] = "meh moo bleh"; 599 600 auto error = Element("span"); 601 error.attrs_["id"] = "error"; 602 error.attrs_["class"] = "alert"; 603 604 auto aerror = Element("span"); 605 aerror.attrs_["id"] = "aerror"; 606 aerror.attrs_["class"] = "alert"; 607 608 auto p = Element("p"); 609 p.attrs_["id"] = "bar"; 610 611 auto idiv = Element("div"); 612 idiv.attrs_["id"] = "idiv"; 613 614 auto odiv = Element("div"); 615 odiv.attrs_["id"] = "odiv"; 616 odiv.attrs_["class"] = "container"; 617 618 span.ancestors_ ~= [ p, idiv, odiv ]; // ancestors in closest-first order 619 error.ancestors_ ~= [ p, idiv, odiv ]; 620 error.adjacents_ ~= span; // previous sibblings in closest-first order 621 622 aerror.ancestors_ ~= [ p, idiv, odiv ]; 623 aerror.adjacents_ ~= [ error, span ]; // previous sibblings in closest-first order 624 625 p.ancestors_ ~= [ idiv, odiv ]; 626 idiv.ancestors_ ~= [ odiv ]; 627 628 assert(testSelector("#bar", p)); 629 assert(!testSelector("#bar", span)); 630 assert(testSelector(".meh", span)); 631 assert(testSelector(".moo", span)); 632 assert(testSelector(".bleh", span)); 633 assert(testSelector("span.bleh", span)); 634 assert(testSelector(".alert", error)); 635 assert(testSelector("span.alert", error)); 636 assert(!testSelector("div.alert", error)); 637 assert(testSelector("div p", p)); 638 assert(testSelector("div > p", p)); 639 assert(testSelector("div span", span)); 640 assert(testSelector("div span", error)); 641 assert(!testSelector("div span + span", span)); 642 assert(!testSelector("div span#foo + span", aerror)); 643 assert(testSelector("div span#foo ~ span", aerror)); 644 assert(testSelector("div span.bleh ~ span", error)); 645 assert(!testSelector("div span#error", span)); 646 assert(testSelector("div span#error", error)); 647 assert(testSelector(`div[id="idiv"]`, idiv)); 648 assert(testSelector(`div[id='idiv']`, idiv)); 649 assert(testSelector(`div[id=idiv]`, idiv)); 650 assert(!testSelector(`div[id=IDIV]`, idiv)); 651 assert(!testSelector(`div[id=IDIV] i`, idiv)); 652 assert(testSelector(`div[id ^= idiv ]`, idiv)); 653 assert(testSelector(`div[id ~= idiv i]`, idiv)); 654 assert(testSelector(`div[id *= div]`, idiv)); 655 assert(testSelector(`div[id |= IDIV i]:active`, idiv)); 656 assert(!testSelector(`div[id |= IDIV i]:focus`, idiv)); 657 }