Домой Edit me on GitHub

2019-06-19

Каналы передачи данных | Сетевое программирование | Базы данных | Основы Веб-программирования

Red lang

Red []
{
grammar HTML
    document          <-   (doctype / text / tag)*
    tag               <-   open_tag (text / tag)* close_tag
    open_tag          <-   "<" [0-9a-zA-Z \"'=-]+ ">"
    close_tag         <-   "</" [0-9a-zA-Z]+ ">"
    doctype           <-   "<!DOCTYPE " [0-9a-zA-Z]+ ">"
    text              <-   [^<]+
}

ws: charset reduce [newline space tab]
digits: charset {0123456789}
chars: union charset [#"a" - #"z"] charset [#"A" - #"Z"]
alphanum: union digits chars
alphanum-with-specials: union ws union alphanum charset {"'=-}

tags-stack: copy []

handle-open-tag:  func [name] [
  append tags-stack name
  ;print ["open" name]
  print tags-stack
]
handle-close-tag: func [name] [
  take/last tags-stack
  ;print ["close" name]
  print tags-stack
]

document: [any [ahead "<" [ tag | doctype ] | text]]
tag: [whitespace open-tag any [ahead not "<" text | tag] close-tag]
open-tag: ["<" copy name tag-name (handle-open-tag name) any tag-parameter ">"]
tag-name: [some alphanum]
tag-parameter: [whitespace some alphanum opt ["=" "^"" some [not "^"" skip] "^""] ]
close-tag: ["</" copy name tag-name (handle-close-tag name) ">"]
doctype: ["<!DOCTYPE " some alphanum ">"]
text: [any [not "<" skip]]
whitespace: [any ws]

html: {
<html>
<body>
<img src="picture1.jpg" alt="<title>"></img>тут точно не тайтл<img src="picture2.jpg" alt="</title>"></img>
<img src="picture1.jpg" alt="<u>"></img>тут точно не подчеркнуто<img src="picture2.jpg" alt="</u>"></img>
<u>а тут подчеркнуто</u>
</body>
</html>
}

probe parse html document
Previous: aiohttp Next: Qt