mirror of
https://github.com/golang/go.git
synced 2026-01-30 15:42:04 +03:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6174b5e21e | ||
|
|
4081aa1277 |
@@ -1,5 +0,0 @@
|
||||
# Copyright 2012 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../Make.dist
|
||||
@@ -1,34 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
|
||||
Cov is a rudimentary code coverage tool.
|
||||
|
||||
Usage:
|
||||
go tool cov [-lsv] [-g substring] [-m minlines] [6.out args]
|
||||
|
||||
Given a command to run, it runs the command while tracking which
|
||||
sections of code have been executed. When the command finishes,
|
||||
cov prints the line numbers of sections of code in the binary that
|
||||
were not executed. With no arguments it assumes the command "6.out".
|
||||
|
||||
|
||||
The options are:
|
||||
|
||||
-l
|
||||
print full path names instead of paths relative to the current directory
|
||||
-s
|
||||
show the source code that didn't execute, in addition to the line numbers.
|
||||
-v
|
||||
print debugging information during the run.
|
||||
-g substring
|
||||
restrict the coverage analysis to functions or files whose names contain substring
|
||||
-m minlines
|
||||
only report uncovered sections of code larger than minlines lines
|
||||
|
||||
The program is the same for all architectures: 386, amd64, and arm.
|
||||
|
||||
*/
|
||||
package documentation
|
||||
@@ -1,480 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
* code coverage
|
||||
*/
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "tree.h"
|
||||
|
||||
#include <ureg_amd64.h>
|
||||
#include <mach.h>
|
||||
typedef struct Ureg Ureg;
|
||||
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
fprint(2, "usage: cov [-lsv] [-g substring] [-m minlines] [6.out args...]\n");
|
||||
fprint(2, "-g specifies pattern of interesting functions or files\n");
|
||||
exits("usage");
|
||||
}
|
||||
|
||||
typedef struct Range Range;
|
||||
struct Range
|
||||
{
|
||||
uvlong pc;
|
||||
uvlong epc;
|
||||
};
|
||||
|
||||
int chatty;
|
||||
int fd;
|
||||
int longnames;
|
||||
int pid;
|
||||
int doshowsrc;
|
||||
Map *mem;
|
||||
Map *text;
|
||||
Fhdr fhdr;
|
||||
char *substring;
|
||||
char cwd[1000];
|
||||
int ncwd;
|
||||
int minlines = -1000;
|
||||
|
||||
Tree breakpoints; // code ranges not run
|
||||
|
||||
/*
|
||||
* comparison for Range structures
|
||||
* they are "equal" if they overlap, so
|
||||
* that a search for [pc, pc+1) finds the
|
||||
* Range containing pc.
|
||||
*/
|
||||
int
|
||||
rangecmp(void *va, void *vb)
|
||||
{
|
||||
Range *a = va, *b = vb;
|
||||
if(a->epc <= b->pc)
|
||||
return 1;
|
||||
if(b->epc <= a->pc)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* remember that we ran the section of code [pc, epc).
|
||||
*/
|
||||
void
|
||||
ran(uvlong pc, uvlong epc)
|
||||
{
|
||||
Range key;
|
||||
Range *r;
|
||||
uvlong oldepc;
|
||||
|
||||
if(chatty)
|
||||
print("run %#llux-%#llux\n", pc, epc);
|
||||
|
||||
key.pc = pc;
|
||||
key.epc = pc+1;
|
||||
r = treeget(&breakpoints, &key);
|
||||
if(r == nil)
|
||||
sysfatal("unchecked breakpoint at %#llux+%d", pc, (int)(epc-pc));
|
||||
|
||||
// Might be that the tail of the sequence
|
||||
// was run already, so r->epc is before the end.
|
||||
// Adjust len.
|
||||
if(epc > r->epc)
|
||||
epc = r->epc;
|
||||
|
||||
if(r->pc == pc) {
|
||||
r->pc = epc;
|
||||
} else {
|
||||
// Chop r to before pc;
|
||||
// add new entry for after if needed.
|
||||
// Changing r->epc does not affect r's position in the tree.
|
||||
oldepc = r->epc;
|
||||
r->epc = pc;
|
||||
if(epc < oldepc) {
|
||||
Range *n;
|
||||
n = malloc(sizeof *n);
|
||||
n->pc = epc;
|
||||
n->epc = oldepc;
|
||||
treeput(&breakpoints, n, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
showsrc(char *file, int line1, int line2)
|
||||
{
|
||||
Biobuf *b;
|
||||
char *p;
|
||||
int n, stop;
|
||||
|
||||
if((b = Bopen(file, OREAD)) == nil) {
|
||||
print("\topen %s: %r\n", file);
|
||||
return;
|
||||
}
|
||||
|
||||
for(n=1; n<line1 && (p = Brdstr(b, '\n', 1)) != nil; n++)
|
||||
free(p);
|
||||
|
||||
// print up to five lines (this one and 4 more).
|
||||
// if there are more than five lines, print 4 and "..."
|
||||
stop = n+4;
|
||||
if(stop > line2)
|
||||
stop = line2;
|
||||
if(stop < line2)
|
||||
stop--;
|
||||
for(; n<=stop && (p = Brdstr(b, '\n', 1)) != nil; n++) {
|
||||
print(" %d %s\n", n, p);
|
||||
free(p);
|
||||
}
|
||||
if(n < line2)
|
||||
print(" ...\n");
|
||||
Bterm(b);
|
||||
}
|
||||
|
||||
/*
|
||||
* if s is in the current directory or below,
|
||||
* return the relative path.
|
||||
*/
|
||||
char*
|
||||
shortname(char *s)
|
||||
{
|
||||
if(!longnames && strlen(s) > ncwd && memcmp(s, cwd, ncwd) == 0 && s[ncwd] == '/')
|
||||
return s+ncwd+1;
|
||||
return s;
|
||||
}
|
||||
|
||||
/*
|
||||
* we've decided that [pc, epc) did not run.
|
||||
* do something about it.
|
||||
*/
|
||||
void
|
||||
missing(uvlong pc, uvlong epc)
|
||||
{
|
||||
char file[1000];
|
||||
int line1, line2;
|
||||
char buf[100];
|
||||
Symbol s;
|
||||
char *p;
|
||||
uvlong uv;
|
||||
|
||||
if(!findsym(pc, CTEXT, &s) || !fileline(file, sizeof file, pc)) {
|
||||
notfound:
|
||||
print("%#llux-%#llux\n", pc, epc);
|
||||
return;
|
||||
}
|
||||
p = strrchr(file, ':');
|
||||
*p++ = 0;
|
||||
line1 = atoi(p);
|
||||
for(uv=pc; uv<epc; ) {
|
||||
if(!fileline(file, sizeof file, epc-2))
|
||||
goto notfound;
|
||||
uv += machdata->instsize(text, uv);
|
||||
}
|
||||
p = strrchr(file, ':');
|
||||
*p++ = 0;
|
||||
line2 = atoi(p);
|
||||
|
||||
if(line2+1-line2 < minlines)
|
||||
return;
|
||||
|
||||
if(pc == s.value) {
|
||||
// never entered function
|
||||
print("%s:%d %s never called (%#llux-%#llux)\n", shortname(file), line1, s.name, pc, epc);
|
||||
return;
|
||||
}
|
||||
if(pc <= s.value+13) {
|
||||
// probably stub for stack growth.
|
||||
// check whether last instruction is call to morestack.
|
||||
// the -5 below is the length of
|
||||
// CALL sys.morestack.
|
||||
buf[0] = 0;
|
||||
machdata->das(text, epc-5, 0, buf, sizeof buf);
|
||||
if(strstr(buf, "morestack"))
|
||||
return;
|
||||
}
|
||||
|
||||
if(epc - pc == 5) {
|
||||
// check for CALL sys.panicindex
|
||||
buf[0] = 0;
|
||||
machdata->das(text, pc, 0, buf, sizeof buf);
|
||||
if(strstr(buf, "panicindex"))
|
||||
return;
|
||||
}
|
||||
|
||||
if(epc - pc == 2 || epc -pc == 3) {
|
||||
// check for XORL inside shift.
|
||||
// (on x86 have to implement large left or unsigned right shift with explicit zeroing).
|
||||
// f+90 0x00002c9f CMPL CX,$20
|
||||
// f+93 0x00002ca2 JCS f+97(SB)
|
||||
// f+95 0x00002ca4 XORL AX,AX <<<
|
||||
// f+97 0x00002ca6 SHLL CL,AX
|
||||
// f+99 0x00002ca8 MOVL $1,CX
|
||||
//
|
||||
// f+c8 0x00002cd7 CMPL CX,$40
|
||||
// f+cb 0x00002cda JCS f+d0(SB)
|
||||
// f+cd 0x00002cdc XORQ AX,AX <<<
|
||||
// f+d0 0x00002cdf SHLQ CL,AX
|
||||
// f+d3 0x00002ce2 MOVQ $1,CX
|
||||
buf[0] = 0;
|
||||
machdata->das(text, pc, 0, buf, sizeof buf);
|
||||
if(strncmp(buf, "XOR", 3) == 0) {
|
||||
machdata->das(text, epc, 0, buf, sizeof buf);
|
||||
if(strncmp(buf, "SHL", 3) == 0 || strncmp(buf, "SHR", 3) == 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if(epc - pc == 3) {
|
||||
// check for SAR inside shift.
|
||||
// (on x86 have to implement large signed right shift as >>31).
|
||||
// f+36 0x00016216 CMPL CX,$20
|
||||
// f+39 0x00016219 JCS f+3e(SB)
|
||||
// f+3b 0x0001621b SARL $1f,AX <<<
|
||||
// f+3e 0x0001621e SARL CL,AX
|
||||
// f+40 0x00016220 XORL CX,CX
|
||||
// f+42 0x00016222 CMPL CX,AX
|
||||
buf[0] = 0;
|
||||
machdata->das(text, pc, 0, buf, sizeof buf);
|
||||
if(strncmp(buf, "SAR", 3) == 0) {
|
||||
machdata->das(text, epc, 0, buf, sizeof buf);
|
||||
if(strncmp(buf, "SAR", 3) == 0)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// show first instruction to make clear where we were.
|
||||
machdata->das(text, pc, 0, buf, sizeof buf);
|
||||
|
||||
if(line1 != line2)
|
||||
print("%s:%d,%d %#llux-%#llux %s\n",
|
||||
shortname(file), line1, line2, pc, epc, buf);
|
||||
else
|
||||
print("%s:%d %#llux-%#llux %s\n",
|
||||
shortname(file), line1, pc, epc, buf);
|
||||
if(doshowsrc)
|
||||
showsrc(file, line1, line2);
|
||||
}
|
||||
|
||||
/*
|
||||
* walk the tree, calling missing for each non-empty
|
||||
* section of missing code.
|
||||
*/
|
||||
void
|
||||
walktree(TreeNode *t)
|
||||
{
|
||||
Range *n;
|
||||
|
||||
if(t == nil)
|
||||
return;
|
||||
walktree(t->left);
|
||||
n = t->key;
|
||||
if(n->pc < n->epc)
|
||||
missing(n->pc, n->epc);
|
||||
walktree(t->right);
|
||||
}
|
||||
|
||||
/*
|
||||
* set a breakpoint all over [pc, epc)
|
||||
* and remember that we did.
|
||||
*/
|
||||
void
|
||||
breakpoint(uvlong pc, uvlong epc)
|
||||
{
|
||||
Range *r;
|
||||
|
||||
r = malloc(sizeof *r);
|
||||
r->pc = pc;
|
||||
r->epc = epc;
|
||||
treeput(&breakpoints, r, r);
|
||||
|
||||
for(; pc < epc; pc+=machdata->bpsize)
|
||||
put1(mem, pc, machdata->bpinst, machdata->bpsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* install breakpoints over all text symbols
|
||||
* that match the pattern.
|
||||
*/
|
||||
void
|
||||
cover(void)
|
||||
{
|
||||
Symbol s;
|
||||
char *lastfn;
|
||||
uvlong lastpc;
|
||||
int i;
|
||||
char buf[200];
|
||||
|
||||
lastfn = nil;
|
||||
lastpc = 0;
|
||||
for(i=0; textsym(&s, i); i++) {
|
||||
switch(s.type) {
|
||||
case 'T':
|
||||
case 't':
|
||||
if(lastpc != 0) {
|
||||
breakpoint(lastpc, s.value);
|
||||
lastpc = 0;
|
||||
}
|
||||
// Ignore second entry for a given name;
|
||||
// that's the debugging blob.
|
||||
if(lastfn && strcmp(s.name, lastfn) == 0)
|
||||
break;
|
||||
lastfn = s.name;
|
||||
buf[0] = 0;
|
||||
fileline(buf, sizeof buf, s.value);
|
||||
if(substring == nil || strstr(buf, substring) || strstr(s.name, substring))
|
||||
lastpc = s.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uvlong
|
||||
rgetzero(Map *map, char *reg)
|
||||
{
|
||||
USED(map);
|
||||
USED(reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* remove the breakpoints at pc and successive instructions,
|
||||
* up to and including the first jump or other control flow transfer.
|
||||
*/
|
||||
void
|
||||
uncover(uvlong pc)
|
||||
{
|
||||
uchar buf[1000];
|
||||
int n, n1, n2;
|
||||
uvlong foll[2];
|
||||
|
||||
// Double-check that we stopped at a breakpoint.
|
||||
if(get1(mem, pc, buf, machdata->bpsize) < 0)
|
||||
sysfatal("read mem inst at %#llux: %r", pc);
|
||||
if(memcmp(buf, machdata->bpinst, machdata->bpsize) != 0)
|
||||
sysfatal("stopped at %#llux; not at breakpoint %d", pc, machdata->bpsize);
|
||||
|
||||
// Figure out how many bytes of straight-line code
|
||||
// there are in the text starting at pc.
|
||||
n = 0;
|
||||
while(n < sizeof buf) {
|
||||
n1 = machdata->instsize(text, pc+n);
|
||||
if(n+n1 > sizeof buf)
|
||||
break;
|
||||
n2 = machdata->foll(text, pc+n, rgetzero, foll);
|
||||
n += n1;
|
||||
if(n2 != 1 || foll[0] != pc+n)
|
||||
break;
|
||||
}
|
||||
|
||||
// Record that this section of code ran.
|
||||
ran(pc, pc+n);
|
||||
|
||||
// Put original instructions back.
|
||||
if(get1(text, pc, buf, n) < 0)
|
||||
sysfatal("get1: %r");
|
||||
if(put1(mem, pc, buf, n) < 0)
|
||||
sysfatal("put1: %r");
|
||||
}
|
||||
|
||||
int
|
||||
startprocess(char **argv)
|
||||
{
|
||||
int pid;
|
||||
|
||||
if((pid = fork()) < 0)
|
||||
sysfatal("fork: %r");
|
||||
if(pid == 0) {
|
||||
pid = getpid();
|
||||
if(ctlproc(pid, "hang") < 0)
|
||||
sysfatal("ctlproc hang: %r");
|
||||
exec(argv[0], argv);
|
||||
sysfatal("exec %s: %r", argv[0]);
|
||||
}
|
||||
if(ctlproc(pid, "attached") < 0 || ctlproc(pid, "waitstop") < 0)
|
||||
sysfatal("attach %d %s: %r", pid, argv[0]);
|
||||
return pid;
|
||||
}
|
||||
|
||||
int
|
||||
go(void)
|
||||
{
|
||||
uvlong pc;
|
||||
char buf[100];
|
||||
int n;
|
||||
|
||||
for(n = 0;; n++) {
|
||||
ctlproc(pid, "startstop");
|
||||
if(get8(mem, offsetof(Ureg, ip), &pc) < 0) {
|
||||
rerrstr(buf, sizeof buf);
|
||||
if(strstr(buf, "exited") || strstr(buf, "No such process"))
|
||||
return n;
|
||||
sysfatal("cannot read pc: %r");
|
||||
}
|
||||
pc--;
|
||||
if(put8(mem, offsetof(Ureg, ip), pc) < 0)
|
||||
sysfatal("cannot write pc: %r");
|
||||
uncover(pc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int n;
|
||||
|
||||
ARGBEGIN{
|
||||
case 'g':
|
||||
substring = EARGF(usage());
|
||||
break;
|
||||
case 'l':
|
||||
longnames++;
|
||||
break;
|
||||
case 'n':
|
||||
minlines = atoi(EARGF(usage()));
|
||||
break;
|
||||
case 's':
|
||||
doshowsrc = 1;
|
||||
break;
|
||||
case 'v':
|
||||
chatty++;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}ARGEND
|
||||
|
||||
getwd(cwd, sizeof cwd);
|
||||
ncwd = strlen(cwd);
|
||||
|
||||
if(argc == 0) {
|
||||
*--argv = "6.out";
|
||||
}
|
||||
fd = open(argv[0], OREAD);
|
||||
if(fd < 0)
|
||||
sysfatal("open %s: %r", argv[0]);
|
||||
if(crackhdr(fd, &fhdr) <= 0)
|
||||
sysfatal("crackhdr: %r");
|
||||
machbytype(fhdr.type);
|
||||
if(syminit(fd, &fhdr) <= 0)
|
||||
sysfatal("syminit: %r");
|
||||
text = loadmap(nil, fd, &fhdr);
|
||||
if(text == nil)
|
||||
sysfatal("loadmap: %r");
|
||||
pid = startprocess(argv);
|
||||
mem = attachproc(pid, &fhdr);
|
||||
if(mem == nil)
|
||||
sysfatal("attachproc: %r");
|
||||
breakpoints.cmp = rangecmp;
|
||||
cover();
|
||||
n = go();
|
||||
walktree(breakpoints.root);
|
||||
if(chatty)
|
||||
print("%d breakpoints\n", n);
|
||||
detachproc(mem);
|
||||
exits(0);
|
||||
}
|
||||
|
||||
@@ -1,243 +0,0 @@
|
||||
// Renamed from Map to Tree to avoid conflict with libmach.
|
||||
|
||||
/*
|
||||
Copyright (c) 2003-2007 Russ Cox, Tom Bergan, Austin Clements,
|
||||
Massachusetts Institute of Technology
|
||||
Portions Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Mutable map structure, but still based on
|
||||
// Okasaki, Red Black Trees in a Functional Setting, JFP 1999,
|
||||
// which is a lot easier than the traditional red-black
|
||||
// and plenty fast enough for me. (Also I could copy
|
||||
// and edit fmap.c.)
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "tree.h"
|
||||
|
||||
enum
|
||||
{
|
||||
Red = 0,
|
||||
Black = 1
|
||||
};
|
||||
|
||||
|
||||
// Red-black trees are binary trees with this property:
|
||||
// 1. No red node has a red parent.
|
||||
// 2. Every path from the root to a leaf contains the
|
||||
// same number of black nodes.
|
||||
|
||||
static TreeNode*
|
||||
rwTreeNode(TreeNode *p, int color, TreeNode *left, void *key, void *value, TreeNode *right)
|
||||
{
|
||||
if(p == nil)
|
||||
p = malloc(sizeof *p);
|
||||
p->color = color;
|
||||
p->left = left;
|
||||
p->key = key;
|
||||
p->value = value;
|
||||
p->right = right;
|
||||
return p;
|
||||
}
|
||||
|
||||
static TreeNode*
|
||||
balance(TreeNode *m0)
|
||||
{
|
||||
void *xk, *xv, *yk, *yv, *zk, *zv;
|
||||
TreeNode *a, *b, *c, *d;
|
||||
TreeNode *m1, *m2;
|
||||
int color;
|
||||
TreeNode *left, *right;
|
||||
void *key, *value;
|
||||
|
||||
color = m0->color;
|
||||
left = m0->left;
|
||||
key = m0->key;
|
||||
value = m0->value;
|
||||
right = m0->right;
|
||||
|
||||
// Okasaki notation: (T is mkTreeNode, B is Black, R is Red, x, y, z are key-value.
|
||||
//
|
||||
// balance B (T R (T R a x b) y c) z d
|
||||
// balance B (T R a x (T R b y c)) z d
|
||||
// balance B a x (T R (T R b y c) z d)
|
||||
// balance B a x (T R b y (T R c z d))
|
||||
//
|
||||
// = T R (T B a x b) y (T B c z d)
|
||||
|
||||
if(color == Black){
|
||||
if(left && left->color == Red){
|
||||
if(left->left && left->left->color == Red){
|
||||
a = left->left->left;
|
||||
xk = left->left->key;
|
||||
xv = left->left->value;
|
||||
b = left->left->right;
|
||||
yk = left->key;
|
||||
yv = left->value;
|
||||
c = left->right;
|
||||
zk = key;
|
||||
zv = value;
|
||||
d = right;
|
||||
m1 = left;
|
||||
m2 = left->left;
|
||||
goto hard;
|
||||
}else if(left->right && left->right->color == Red){
|
||||
a = left->left;
|
||||
xk = left->key;
|
||||
xv = left->value;
|
||||
b = left->right->left;
|
||||
yk = left->right->key;
|
||||
yv = left->right->value;
|
||||
c = left->right->right;
|
||||
zk = key;
|
||||
zv = value;
|
||||
d = right;
|
||||
m1 = left;
|
||||
m2 = left->right;
|
||||
goto hard;
|
||||
}
|
||||
}else if(right && right->color == Red){
|
||||
if(right->left && right->left->color == Red){
|
||||
a = left;
|
||||
xk = key;
|
||||
xv = value;
|
||||
b = right->left->left;
|
||||
yk = right->left->key;
|
||||
yv = right->left->value;
|
||||
c = right->left->right;
|
||||
zk = right->key;
|
||||
zv = right->value;
|
||||
d = right->right;
|
||||
m1 = right;
|
||||
m2 = right->left;
|
||||
goto hard;
|
||||
}else if(right->right && right->right->color == Red){
|
||||
a = left;
|
||||
xk = key;
|
||||
xv = value;
|
||||
b = right->left;
|
||||
yk = right->key;
|
||||
yv = right->value;
|
||||
c = right->right->left;
|
||||
zk = right->right->key;
|
||||
zv = right->right->value;
|
||||
d = right->right->right;
|
||||
m1 = right;
|
||||
m2 = right->right;
|
||||
goto hard;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rwTreeNode(m0, color, left, key, value, right);
|
||||
|
||||
hard:
|
||||
return rwTreeNode(m0, Red, rwTreeNode(m1, Black, a, xk, xv, b),
|
||||
yk, yv, rwTreeNode(m2, Black, c, zk, zv, d));
|
||||
}
|
||||
|
||||
static TreeNode*
|
||||
ins0(TreeNode *p, void *k, void *v, TreeNode *rw)
|
||||
{
|
||||
if(p == nil)
|
||||
return rwTreeNode(rw, Red, nil, k, v, nil);
|
||||
if(p->key == k){
|
||||
if(rw)
|
||||
return rwTreeNode(rw, p->color, p->left, k, v, p->right);
|
||||
p->value = v;
|
||||
return p;
|
||||
}
|
||||
if(p->key < k)
|
||||
p->left = ins0(p->left, k, v, rw);
|
||||
else
|
||||
p->right = ins0(p->right, k, v, rw);
|
||||
return balance(p);
|
||||
}
|
||||
|
||||
static TreeNode*
|
||||
ins1(Tree *m, TreeNode *p, void *k, void *v, TreeNode *rw)
|
||||
{
|
||||
int i;
|
||||
|
||||
if(p == nil)
|
||||
return rwTreeNode(rw, Red, nil, k, v, nil);
|
||||
i = m->cmp(p->key, k);
|
||||
if(i == 0){
|
||||
if(rw)
|
||||
return rwTreeNode(rw, p->color, p->left, k, v, p->right);
|
||||
p->value = v;
|
||||
return p;
|
||||
}
|
||||
if(i < 0)
|
||||
p->left = ins1(m, p->left, k, v, rw);
|
||||
else
|
||||
p->right = ins1(m, p->right, k, v, rw);
|
||||
return balance(p);
|
||||
}
|
||||
|
||||
void
|
||||
treeputelem(Tree *m, void *key, void *val, TreeNode *rw)
|
||||
{
|
||||
if(m->cmp)
|
||||
m->root = ins1(m, m->root, key, val, rw);
|
||||
else
|
||||
m->root = ins0(m->root, key, val, rw);
|
||||
}
|
||||
|
||||
void
|
||||
treeput(Tree *m, void *key, void *val)
|
||||
{
|
||||
treeputelem(m, key, val, nil);
|
||||
}
|
||||
|
||||
void*
|
||||
treeget(Tree *m, void *key)
|
||||
{
|
||||
int i;
|
||||
TreeNode *p;
|
||||
|
||||
p = m->root;
|
||||
if(m->cmp){
|
||||
for(;;){
|
||||
if(p == nil)
|
||||
return nil;
|
||||
i = m->cmp(p->key, key);
|
||||
if(i < 0)
|
||||
p = p->left;
|
||||
else if(i > 0)
|
||||
p = p->right;
|
||||
else
|
||||
return p->value;
|
||||
}
|
||||
}else{
|
||||
for(;;){
|
||||
if(p == nil)
|
||||
return nil;
|
||||
if(p->key == key)
|
||||
return p->value;
|
||||
if(p->key < key)
|
||||
p = p->left;
|
||||
else
|
||||
p = p->right;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
// Renamed from Map to Tree to avoid conflict with libmach.
|
||||
|
||||
/*
|
||||
Copyright (c) 2003-2007 Russ Cox, Tom Bergan, Austin Clements,
|
||||
Massachusetts Institute of Technology
|
||||
Portions Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
typedef struct Tree Tree;
|
||||
typedef struct TreeNode TreeNode;
|
||||
struct Tree
|
||||
{
|
||||
int (*cmp)(void*, void*);
|
||||
TreeNode *root;
|
||||
};
|
||||
|
||||
struct TreeNode
|
||||
{
|
||||
int color;
|
||||
TreeNode *left;
|
||||
void *key;
|
||||
void *value;
|
||||
TreeNode *right;
|
||||
};
|
||||
|
||||
void *treeget(Tree*, void*);
|
||||
void treeput(Tree*, void*, void*);
|
||||
void treeputelem(Tree*, void*, void*, TreeNode*);
|
||||
@@ -1,5 +0,0 @@
|
||||
# Copyright 2012 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
include ../../Make.dist
|
||||
@@ -1,47 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
|
||||
Prof is a rudimentary real-time profiler.
|
||||
|
||||
Given a command to run or the process id (pid) of a command already
|
||||
running, it samples the program's state at regular intervals and reports
|
||||
on its behavior. With no options, it prints a histogram of the locations
|
||||
in the code that were sampled during execution.
|
||||
|
||||
Since it is a real-time profiler, unlike a traditional profiler it samples
|
||||
the program's state even when it is not running, such as when it is
|
||||
asleep or waiting for I/O. Each thread contributes equally to the
|
||||
statistics.
|
||||
|
||||
Usage:
|
||||
go tool prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]
|
||||
|
||||
The output modes (default -h) are:
|
||||
|
||||
-P file.prof:
|
||||
Write the profile information to file.prof, in the format used by pprof.
|
||||
At the moment, this only works on Linux amd64 binaries and requires that the
|
||||
binary be written using 6l -e to produce ELF debug info.
|
||||
See http://code.google.com/p/google-perftools for details.
|
||||
-h: histograms
|
||||
How many times a sample occurred at each location.
|
||||
-f: dynamic functions
|
||||
At each sample period, print the name of the executing function.
|
||||
-l: dynamic file and line numbers
|
||||
At each sample period, print the file and line number of the executing instruction.
|
||||
-r: dynamic registers
|
||||
At each sample period, print the register contents.
|
||||
-s: dynamic function stack traces
|
||||
At each sample period, print the symbolic stack trace.
|
||||
|
||||
Flag -t sets the maximum real time to sample, in seconds, and -d
|
||||
sets the sampling interval in milliseconds. The default is to sample
|
||||
every 100ms until the program completes.
|
||||
|
||||
It is installed as go tool prof and is architecture-independent.
|
||||
|
||||
*/
|
||||
package documentation
|
||||
@@ -1,899 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
#include <u.h>
|
||||
#include <time.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#define Ureg Ureg_amd64
|
||||
#include <ureg_amd64.h>
|
||||
#undef Ureg
|
||||
#define Ureg Ureg_x86
|
||||
#include <ureg_x86.h>
|
||||
#undef Ureg
|
||||
#include <mach.h>
|
||||
|
||||
char* file = "6.out";
|
||||
static Fhdr fhdr;
|
||||
int have_syms;
|
||||
int fd;
|
||||
struct Ureg_amd64 ureg_amd64;
|
||||
struct Ureg_x86 ureg_x86;
|
||||
int total_sec = 0;
|
||||
int delta_msec = 100;
|
||||
int nsample;
|
||||
int nsamplethread;
|
||||
|
||||
// pprof data, stored as sequences of N followed by N PC values.
|
||||
// See http://code.google.com/p/google-perftools .
|
||||
uvlong *ppdata; // traces
|
||||
Biobuf* pproffd; // file descriptor to write trace info
|
||||
long ppstart; // start position of current trace
|
||||
long nppdata; // length of data
|
||||
long ppalloc; // size of allocated data
|
||||
char ppmapdata[10*1024]; // the map information for the output file
|
||||
|
||||
// output formats
|
||||
int pprof; // print pprof output to named file
|
||||
int functions; // print functions
|
||||
int histograms; // print histograms
|
||||
int linenums; // print file and line numbers rather than function names
|
||||
int registers; // print registers
|
||||
int stacks; // print stack traces
|
||||
|
||||
int pid; // main process pid
|
||||
|
||||
int nthread; // number of threads
|
||||
int thread[32]; // thread pids
|
||||
Map *map[32]; // thread maps
|
||||
|
||||
void
|
||||
Usage(void)
|
||||
{
|
||||
fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec]\n");
|
||||
fprint(2, " prof [-t total_secs] [-d delta_msec] 6.out args ...\n");
|
||||
fprint(2, "\tformats (default -h):\n");
|
||||
fprint(2, "\t\t-P file.prof: write [c]pprof output to file.prof\n");
|
||||
fprint(2, "\t\t-h: histograms\n");
|
||||
fprint(2, "\t\t-f: dynamic functions\n");
|
||||
fprint(2, "\t\t-l: dynamic file and line numbers\n");
|
||||
fprint(2, "\t\t-r: dynamic registers\n");
|
||||
fprint(2, "\t\t-s: dynamic function stack traces\n");
|
||||
fprint(2, "\t\t-hs: include stack info in histograms\n");
|
||||
exit(2);
|
||||
}
|
||||
|
||||
typedef struct PC PC;
|
||||
struct PC {
|
||||
uvlong pc;
|
||||
uvlong callerpc;
|
||||
unsigned int count;
|
||||
PC* next;
|
||||
};
|
||||
|
||||
enum {
|
||||
Ncounters = 256
|
||||
};
|
||||
|
||||
PC *counters[Ncounters];
|
||||
|
||||
// Set up by setarch() to make most of the code architecture-independent.
|
||||
typedef struct Arch Arch;
|
||||
struct Arch {
|
||||
char* name;
|
||||
void (*regprint)(void);
|
||||
int (*getregs)(Map*);
|
||||
int (*getPC)(Map*);
|
||||
int (*getSP)(Map*);
|
||||
uvlong (*uregPC)(void);
|
||||
uvlong (*uregSP)(void);
|
||||
void (*ppword)(uvlong w);
|
||||
};
|
||||
|
||||
void
|
||||
amd64_regprint(void)
|
||||
{
|
||||
fprint(2, "ax\t0x%llux\n", ureg_amd64.ax);
|
||||
fprint(2, "bx\t0x%llux\n", ureg_amd64.bx);
|
||||
fprint(2, "cx\t0x%llux\n", ureg_amd64.cx);
|
||||
fprint(2, "dx\t0x%llux\n", ureg_amd64.dx);
|
||||
fprint(2, "si\t0x%llux\n", ureg_amd64.si);
|
||||
fprint(2, "di\t0x%llux\n", ureg_amd64.di);
|
||||
fprint(2, "bp\t0x%llux\n", ureg_amd64.bp);
|
||||
fprint(2, "r8\t0x%llux\n", ureg_amd64.r8);
|
||||
fprint(2, "r9\t0x%llux\n", ureg_amd64.r9);
|
||||
fprint(2, "r10\t0x%llux\n", ureg_amd64.r10);
|
||||
fprint(2, "r11\t0x%llux\n", ureg_amd64.r11);
|
||||
fprint(2, "r12\t0x%llux\n", ureg_amd64.r12);
|
||||
fprint(2, "r13\t0x%llux\n", ureg_amd64.r13);
|
||||
fprint(2, "r14\t0x%llux\n", ureg_amd64.r14);
|
||||
fprint(2, "r15\t0x%llux\n", ureg_amd64.r15);
|
||||
fprint(2, "ds\t0x%llux\n", ureg_amd64.ds);
|
||||
fprint(2, "es\t0x%llux\n", ureg_amd64.es);
|
||||
fprint(2, "fs\t0x%llux\n", ureg_amd64.fs);
|
||||
fprint(2, "gs\t0x%llux\n", ureg_amd64.gs);
|
||||
fprint(2, "type\t0x%llux\n", ureg_amd64.type);
|
||||
fprint(2, "error\t0x%llux\n", ureg_amd64.error);
|
||||
fprint(2, "pc\t0x%llux\n", ureg_amd64.ip);
|
||||
fprint(2, "cs\t0x%llux\n", ureg_amd64.cs);
|
||||
fprint(2, "flags\t0x%llux\n", ureg_amd64.flags);
|
||||
fprint(2, "sp\t0x%llux\n", ureg_amd64.sp);
|
||||
fprint(2, "ss\t0x%llux\n", ureg_amd64.ss);
|
||||
}
|
||||
|
||||
int
|
||||
amd64_getregs(Map *map)
|
||||
{
|
||||
int i;
|
||||
union {
|
||||
uvlong regs[1];
|
||||
struct Ureg_amd64 ureg;
|
||||
} u;
|
||||
|
||||
for(i = 0; i < sizeof ureg_amd64; i+=8) {
|
||||
if(get8(map, (uvlong)i, &u.regs[i/8]) < 0)
|
||||
return -1;
|
||||
}
|
||||
ureg_amd64 = u.ureg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
amd64_getPC(Map *map)
|
||||
{
|
||||
uvlong x;
|
||||
int r;
|
||||
|
||||
r = get8(map, offsetof(struct Ureg_amd64, ip), &x);
|
||||
ureg_amd64.ip = x;
|
||||
return r;
|
||||
}
|
||||
|
||||
int
|
||||
amd64_getSP(Map *map)
|
||||
{
|
||||
uvlong x;
|
||||
int r;
|
||||
|
||||
r = get8(map, offsetof(struct Ureg_amd64, sp), &x);
|
||||
ureg_amd64.sp = x;
|
||||
return r;
|
||||
}
|
||||
|
||||
uvlong
|
||||
amd64_uregPC(void)
|
||||
{
|
||||
return ureg_amd64.ip;
|
||||
}
|
||||
|
||||
uvlong
|
||||
amd64_uregSP(void) {
|
||||
return ureg_amd64.sp;
|
||||
}
|
||||
|
||||
void
|
||||
amd64_ppword(uvlong w)
|
||||
{
|
||||
uchar buf[8];
|
||||
|
||||
buf[0] = w;
|
||||
buf[1] = w >> 8;
|
||||
buf[2] = w >> 16;
|
||||
buf[3] = w >> 24;
|
||||
buf[4] = w >> 32;
|
||||
buf[5] = w >> 40;
|
||||
buf[6] = w >> 48;
|
||||
buf[7] = w >> 56;
|
||||
Bwrite(pproffd, buf, 8);
|
||||
}
|
||||
|
||||
void
|
||||
x86_regprint(void)
|
||||
{
|
||||
fprint(2, "ax\t0x%ux\n", ureg_x86.ax);
|
||||
fprint(2, "bx\t0x%ux\n", ureg_x86.bx);
|
||||
fprint(2, "cx\t0x%ux\n", ureg_x86.cx);
|
||||
fprint(2, "dx\t0x%ux\n", ureg_x86.dx);
|
||||
fprint(2, "si\t0x%ux\n", ureg_x86.si);
|
||||
fprint(2, "di\t0x%ux\n", ureg_x86.di);
|
||||
fprint(2, "bp\t0x%ux\n", ureg_x86.bp);
|
||||
fprint(2, "ds\t0x%ux\n", ureg_x86.ds);
|
||||
fprint(2, "es\t0x%ux\n", ureg_x86.es);
|
||||
fprint(2, "fs\t0x%ux\n", ureg_x86.fs);
|
||||
fprint(2, "gs\t0x%ux\n", ureg_x86.gs);
|
||||
fprint(2, "cs\t0x%ux\n", ureg_x86.cs);
|
||||
fprint(2, "flags\t0x%ux\n", ureg_x86.flags);
|
||||
fprint(2, "pc\t0x%ux\n", ureg_x86.pc);
|
||||
fprint(2, "sp\t0x%ux\n", ureg_x86.sp);
|
||||
fprint(2, "ss\t0x%ux\n", ureg_x86.ss);
|
||||
}
|
||||
|
||||
int
|
||||
x86_getregs(Map *map)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < sizeof ureg_x86; i+=4) {
|
||||
if(get4(map, (uvlong)i, &((uint32*)&ureg_x86)[i/4]) < 0)
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
x86_getPC(Map* map)
|
||||
{
|
||||
return get4(map, offsetof(struct Ureg_x86, pc), &ureg_x86.pc);
|
||||
}
|
||||
|
||||
int
|
||||
x86_getSP(Map* map)
|
||||
{
|
||||
return get4(map, offsetof(struct Ureg_x86, sp), &ureg_x86.sp);
|
||||
}
|
||||
|
||||
uvlong
|
||||
x86_uregPC(void)
|
||||
{
|
||||
return (uvlong)ureg_x86.pc;
|
||||
}
|
||||
|
||||
uvlong
|
||||
x86_uregSP(void)
|
||||
{
|
||||
return (uvlong)ureg_x86.sp;
|
||||
}
|
||||
|
||||
void
|
||||
x86_ppword(uvlong w)
|
||||
{
|
||||
uchar buf[4];
|
||||
|
||||
buf[0] = w;
|
||||
buf[1] = w >> 8;
|
||||
buf[2] = w >> 16;
|
||||
buf[3] = w >> 24;
|
||||
Bwrite(pproffd, buf, 4);
|
||||
}
|
||||
|
||||
Arch archtab[] = {
|
||||
{
|
||||
"amd64",
|
||||
amd64_regprint,
|
||||
amd64_getregs,
|
||||
amd64_getPC,
|
||||
amd64_getSP,
|
||||
amd64_uregPC,
|
||||
amd64_uregSP,
|
||||
amd64_ppword,
|
||||
},
|
||||
{
|
||||
"386",
|
||||
x86_regprint,
|
||||
x86_getregs,
|
||||
x86_getPC,
|
||||
x86_getSP,
|
||||
x86_uregPC,
|
||||
x86_uregSP,
|
||||
x86_ppword,
|
||||
},
|
||||
{
|
||||
nil
|
||||
}
|
||||
};
|
||||
|
||||
Arch *arch;
|
||||
|
||||
int
|
||||
setarch(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
if(mach != nil) {
|
||||
for(i = 0; archtab[i].name != nil; i++) {
|
||||
if (strcmp(mach->name, archtab[i].name) == 0) {
|
||||
arch = &archtab[i];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
getthreads(void)
|
||||
{
|
||||
int i, j, curn, found;
|
||||
Map *curmap[nelem(map)];
|
||||
int curthread[nelem(map)];
|
||||
static int complained = 0;
|
||||
|
||||
curn = procthreadpids(pid, curthread, nelem(curthread));
|
||||
if(curn <= 0)
|
||||
return curn;
|
||||
|
||||
if(curn > nelem(map)) {
|
||||
if(complained == 0) {
|
||||
fprint(2, "prof: too many threads; limiting to %d\n", nthread, nelem(map));
|
||||
complained = 1;
|
||||
}
|
||||
curn = nelem(map);
|
||||
}
|
||||
if(curn == nthread && memcmp(thread, curthread, curn*sizeof(*thread)) == 0)
|
||||
return curn; // no changes
|
||||
|
||||
// Number of threads has changed (might be the init case).
|
||||
// A bit expensive but rare enough not to bother being clever.
|
||||
for(i = 0; i < curn; i++) {
|
||||
found = 0;
|
||||
for(j = 0; j < nthread; j++) {
|
||||
if(curthread[i] == thread[j]) {
|
||||
found = 1;
|
||||
curmap[i] = map[j];
|
||||
map[j] = nil;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(found)
|
||||
continue;
|
||||
|
||||
// map new thread
|
||||
curmap[i] = attachproc(curthread[i], &fhdr);
|
||||
if(curmap[i] == nil) {
|
||||
fprint(2, "prof: can't attach to %d: %r\n", curthread[i]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < nthread; j++)
|
||||
if(map[j] != nil)
|
||||
detachproc(map[j]);
|
||||
|
||||
nthread = curn;
|
||||
memmove(thread, curthread, nthread*sizeof thread[0]);
|
||||
memmove(map, curmap, sizeof map);
|
||||
return nthread;
|
||||
}
|
||||
|
||||
int
|
||||
sample(Map *map)
|
||||
{
|
||||
static int n;
|
||||
|
||||
n++;
|
||||
if(registers) {
|
||||
if(arch->getregs(map) < 0)
|
||||
goto bad;
|
||||
} else {
|
||||
// we need only two registers
|
||||
if(arch->getPC(map) < 0)
|
||||
goto bad;
|
||||
if(arch->getSP(map) < 0)
|
||||
goto bad;
|
||||
}
|
||||
return 1;
|
||||
bad:
|
||||
if(n == 1)
|
||||
fprint(2, "prof: can't read registers: %r\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
addtohistogram(uvlong pc, uvlong callerpc, uvlong sp)
|
||||
{
|
||||
int h;
|
||||
PC *x;
|
||||
|
||||
USED(sp);
|
||||
|
||||
h = (pc + callerpc*101) % Ncounters;
|
||||
for(x = counters[h]; x != NULL; x = x->next) {
|
||||
if(x->pc == pc && x->callerpc == callerpc) {
|
||||
x->count++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
x = malloc(sizeof(PC));
|
||||
x->pc = pc;
|
||||
x->callerpc = callerpc;
|
||||
x->count = 1;
|
||||
x->next = counters[h];
|
||||
counters[h] = x;
|
||||
}
|
||||
|
||||
void
|
||||
addppword(uvlong pc)
|
||||
{
|
||||
if(pc == 0) {
|
||||
return;
|
||||
}
|
||||
if(nppdata == ppalloc) {
|
||||
ppalloc = (1000+nppdata)*2;
|
||||
ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]);
|
||||
if(ppdata == nil) {
|
||||
fprint(2, "prof: realloc failed: %r\n");
|
||||
exit(2);
|
||||
}
|
||||
}
|
||||
ppdata[nppdata++] = pc;
|
||||
}
|
||||
|
||||
void
|
||||
startpptrace()
|
||||
{
|
||||
ppstart = nppdata;
|
||||
addppword(~0);
|
||||
}
|
||||
|
||||
void
|
||||
endpptrace()
|
||||
{
|
||||
ppdata[ppstart] = nppdata-ppstart-1;
|
||||
}
|
||||
|
||||
uvlong nextpc;
|
||||
|
||||
void
|
||||
xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
|
||||
{
|
||||
USED(map);
|
||||
|
||||
char buf[1024];
|
||||
if(sym == nil){
|
||||
fprint(2, "syms\n");
|
||||
return;
|
||||
}
|
||||
if(histograms)
|
||||
addtohistogram(nextpc, pc, sp);
|
||||
if(!histograms || stacks > 1 || pprof) {
|
||||
if(nextpc == 0)
|
||||
nextpc = sym->value;
|
||||
if(stacks){
|
||||
fprint(2, "%s(", sym->name);
|
||||
fprint(2, ")");
|
||||
if(nextpc != sym->value)
|
||||
fprint(2, "+%#llux ", nextpc - sym->value);
|
||||
if(have_syms && linenums && fileline(buf, sizeof buf, pc)) {
|
||||
fprint(2, " %s", buf);
|
||||
}
|
||||
fprint(2, "\n");
|
||||
}
|
||||
if (pprof) {
|
||||
addppword(nextpc);
|
||||
}
|
||||
}
|
||||
nextpc = pc;
|
||||
}
|
||||
|
||||
void
|
||||
stacktracepcsp(Map *map, uvlong pc, uvlong sp)
|
||||
{
|
||||
nextpc = pc;
|
||||
if(pprof){
|
||||
startpptrace();
|
||||
}
|
||||
if(machdata->ctrace==nil)
|
||||
fprint(2, "no machdata->ctrace\n");
|
||||
else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0)
|
||||
fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp);
|
||||
else {
|
||||
addtohistogram(nextpc, 0, sp);
|
||||
if(stacks)
|
||||
fprint(2, "\n");
|
||||
}
|
||||
if(pprof){
|
||||
endpptrace();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
printpc(Map *map, uvlong pc, uvlong sp)
|
||||
{
|
||||
char buf[1024];
|
||||
if(registers)
|
||||
arch->regprint();
|
||||
if(have_syms > 0 && linenums && fileline(buf, sizeof buf, pc))
|
||||
fprint(2, "%s\n", buf);
|
||||
if(have_syms > 0 && functions) {
|
||||
symoff(buf, sizeof(buf), pc, CANY);
|
||||
fprint(2, "%s\n", buf);
|
||||
}
|
||||
if(stacks || pprof){
|
||||
stacktracepcsp(map, pc, sp);
|
||||
}
|
||||
else if(histograms){
|
||||
addtohistogram(pc, 0, sp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ppmaps(void)
|
||||
{
|
||||
int fd, n;
|
||||
char tmp[100];
|
||||
Seg *seg;
|
||||
|
||||
// If it's Linux, the info is in /proc/$pid/maps
|
||||
snprint(tmp, sizeof tmp, "/proc/%d/maps", pid);
|
||||
fd = open(tmp, 0);
|
||||
if(fd >= 0) {
|
||||
n = read(fd, ppmapdata, sizeof ppmapdata - 1);
|
||||
close(fd);
|
||||
if(n < 0) {
|
||||
fprint(2, "prof: can't read %s: %r\n", tmp);
|
||||
exit(2);
|
||||
}
|
||||
ppmapdata[n] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
// It's probably a mac. Synthesize an entry for the text file.
|
||||
// The register segment may come first but it has a zero offset, so grab the first non-zero offset segment.
|
||||
for(n = 0; n < 3; n++){
|
||||
seg = &map[0]->seg[n];
|
||||
if(seg->b == 0) {
|
||||
continue;
|
||||
}
|
||||
snprint(ppmapdata, sizeof ppmapdata,
|
||||
"%.16x-%.16x r-xp %d 00:00 34968549 %s\n",
|
||||
seg->b, seg->e, seg->f, "/home/r/6.out"
|
||||
);
|
||||
return;
|
||||
}
|
||||
fprint(2, "prof: no text segment in maps for %s\n", file);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
void
|
||||
samples(void)
|
||||
{
|
||||
int i, pid, msec;
|
||||
struct timespec req;
|
||||
int getmaps;
|
||||
|
||||
req.tv_sec = delta_msec/1000;
|
||||
req.tv_nsec = 1000000*(delta_msec % 1000);
|
||||
getmaps = 0;
|
||||
if(pprof)
|
||||
getmaps= 1;
|
||||
for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) {
|
||||
nsample++;
|
||||
nsamplethread += nthread;
|
||||
for(i = 0; i < nthread; i++) {
|
||||
pid = thread[i];
|
||||
if(ctlproc(pid, "stop") < 0)
|
||||
return;
|
||||
if(!sample(map[i])) {
|
||||
ctlproc(pid, "start");
|
||||
return;
|
||||
}
|
||||
printpc(map[i], arch->uregPC(), arch->uregSP());
|
||||
ctlproc(pid, "start");
|
||||
}
|
||||
nanosleep(&req, NULL);
|
||||
getthreads();
|
||||
if(nthread == 0)
|
||||
break;
|
||||
if(getmaps) {
|
||||
getmaps = 0;
|
||||
ppmaps();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct Func Func;
|
||||
struct Func
|
||||
{
|
||||
Func *next;
|
||||
Symbol s;
|
||||
uint onstack;
|
||||
uint leaf;
|
||||
};
|
||||
|
||||
Func *func[257];
|
||||
int nfunc;
|
||||
|
||||
Func*
|
||||
findfunc(uvlong pc)
|
||||
{
|
||||
Func *f;
|
||||
uint h;
|
||||
Symbol s;
|
||||
|
||||
if(pc == 0)
|
||||
return nil;
|
||||
|
||||
if(!findsym(pc, CTEXT, &s))
|
||||
return nil;
|
||||
|
||||
h = s.value % nelem(func);
|
||||
for(f = func[h]; f != NULL; f = f->next)
|
||||
if(f->s.value == s.value)
|
||||
return f;
|
||||
|
||||
f = malloc(sizeof *f);
|
||||
memset(f, 0, sizeof *f);
|
||||
f->s = s;
|
||||
f->next = func[h];
|
||||
func[h] = f;
|
||||
nfunc++;
|
||||
return f;
|
||||
}
|
||||
|
||||
int
|
||||
compareleaf(const void *va, const void *vb)
|
||||
{
|
||||
Func *a, *b;
|
||||
|
||||
a = *(Func**)va;
|
||||
b = *(Func**)vb;
|
||||
if(a->leaf != b->leaf)
|
||||
return b->leaf - a->leaf;
|
||||
if(a->onstack != b->onstack)
|
||||
return b->onstack - a->onstack;
|
||||
return strcmp(a->s.name, b->s.name);
|
||||
}
|
||||
|
||||
void
|
||||
dumphistogram()
|
||||
{
|
||||
int i, h, n;
|
||||
PC *x;
|
||||
Func *f, **ff;
|
||||
|
||||
if(!histograms)
|
||||
return;
|
||||
|
||||
// assign counts to functions.
|
||||
for(h = 0; h < Ncounters; h++) {
|
||||
for(x = counters[h]; x != NULL; x = x->next) {
|
||||
f = findfunc(x->pc);
|
||||
if(f) {
|
||||
f->onstack += x->count;
|
||||
f->leaf += x->count;
|
||||
}
|
||||
f = findfunc(x->callerpc);
|
||||
if(f)
|
||||
f->leaf -= x->count;
|
||||
}
|
||||
}
|
||||
|
||||
// build array
|
||||
ff = malloc(nfunc*sizeof ff[0]);
|
||||
n = 0;
|
||||
for(h = 0; h < nelem(func); h++)
|
||||
for(f = func[h]; f != NULL; f = f->next)
|
||||
ff[n++] = f;
|
||||
|
||||
// sort by leaf counts
|
||||
qsort(ff, nfunc, sizeof ff[0], compareleaf);
|
||||
|
||||
// print.
|
||||
fprint(2, "%d samples (avg %.1g threads)\n", nsample, (double)nsamplethread/nsample);
|
||||
for(i = 0; i < nfunc; i++) {
|
||||
f = ff[i];
|
||||
fprint(2, "%6.2f%%\t", 100.0*(double)f->leaf/nsample);
|
||||
if(stacks)
|
||||
fprint(2, "%6.2f%%\t", 100.0*(double)f->onstack/nsample);
|
||||
fprint(2, "%s\n", f->s.name);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct Trace Trace;
|
||||
struct Trace {
|
||||
int count;
|
||||
int npc;
|
||||
uvlong *pc;
|
||||
Trace *next;
|
||||
};
|
||||
|
||||
void
|
||||
dumppprof()
|
||||
{
|
||||
uvlong i, n, *p, *e;
|
||||
int ntrace;
|
||||
Trace *trace, *tp, *up, *prev;
|
||||
|
||||
if(!pprof)
|
||||
return;
|
||||
e = ppdata + nppdata;
|
||||
// Create list of traces. First, count the traces
|
||||
ntrace = 0;
|
||||
for(p = ppdata; p < e;) {
|
||||
n = *p++;
|
||||
p += n;
|
||||
if(n == 0)
|
||||
continue;
|
||||
ntrace++;
|
||||
}
|
||||
if(ntrace <= 0)
|
||||
return;
|
||||
// Allocate and link the traces together.
|
||||
trace = malloc(ntrace * sizeof(Trace));
|
||||
tp = trace;
|
||||
for(p = ppdata; p < e;) {
|
||||
n = *p++;
|
||||
if(n == 0)
|
||||
continue;
|
||||
tp->count = 1;
|
||||
tp->npc = n;
|
||||
tp->pc = p;
|
||||
tp->next = tp+1;
|
||||
tp++;
|
||||
p += n;
|
||||
}
|
||||
trace[ntrace-1].next = nil;
|
||||
// Eliminate duplicates. Lousy algorithm, although not as bad as it looks because
|
||||
// the list collapses fast.
|
||||
for(tp = trace; tp != nil; tp = tp->next) {
|
||||
prev = tp;
|
||||
for(up = tp->next; up != nil; up = up->next) {
|
||||
if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) {
|
||||
tp->count++;
|
||||
prev->next = up->next;
|
||||
} else {
|
||||
prev = up;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Write file.
|
||||
// See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html
|
||||
// 1) Header
|
||||
arch->ppword(0); // must be zero
|
||||
arch->ppword(3); // 3 words follow in header
|
||||
arch->ppword(0); // must be zero
|
||||
arch->ppword(delta_msec * 1000); // sampling period in microseconds
|
||||
arch->ppword(0); // must be zero (padding)
|
||||
// 2) One record for each trace.
|
||||
for(tp = trace; tp != nil; tp = tp->next) {
|
||||
arch->ppword(tp->count);
|
||||
arch->ppword(tp->npc);
|
||||
for(i = 0; i < tp->npc; i++) {
|
||||
arch->ppword(tp->pc[i]);
|
||||
}
|
||||
}
|
||||
// 3) Binary trailer
|
||||
arch->ppword(0); // must be zero
|
||||
arch->ppword(1); // must be one
|
||||
arch->ppword(0); // must be zero
|
||||
// 4) Mapped objects.
|
||||
Bwrite(pproffd, ppmapdata, strlen(ppmapdata));
|
||||
// 5) That's it.
|
||||
Bterm(pproffd);
|
||||
}
|
||||
|
||||
int
|
||||
startprocess(char **argv)
|
||||
{
|
||||
int pid;
|
||||
|
||||
if((pid = fork()) == 0) {
|
||||
pid = getpid();
|
||||
if(ctlproc(pid, "hang") < 0){
|
||||
fprint(2, "prof: child process could not hang\n");
|
||||
exits(0);
|
||||
}
|
||||
execv(argv[0], argv);
|
||||
fprint(2, "prof: could not exec %s: %r\n", argv[0]);
|
||||
exits(0);
|
||||
}
|
||||
|
||||
if(pid == -1) {
|
||||
fprint(2, "prof: could not fork\n");
|
||||
exit(1);
|
||||
}
|
||||
if(ctlproc(pid, "attached") < 0 || ctlproc(pid, "waitstop") < 0) {
|
||||
fprint(2, "prof: could not attach to child process: %r\n");
|
||||
exit(1);
|
||||
}
|
||||
return pid;
|
||||
}
|
||||
|
||||
void
|
||||
detach(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < nthread; i++)
|
||||
detachproc(map[i]);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int i;
|
||||
char *ppfile;
|
||||
|
||||
ARGBEGIN{
|
||||
case 'P':
|
||||
pprof =1;
|
||||
ppfile = EARGF(Usage());
|
||||
pproffd = Bopen(ppfile, OWRITE);
|
||||
if(pproffd == nil) {
|
||||
fprint(2, "prof: cannot open %s: %r\n", ppfile);
|
||||
exit(2);
|
||||
}
|
||||
break;
|
||||
case 'd':
|
||||
delta_msec = atoi(EARGF(Usage()));
|
||||
break;
|
||||
case 't':
|
||||
total_sec = atoi(EARGF(Usage()));
|
||||
break;
|
||||
case 'p':
|
||||
pid = atoi(EARGF(Usage()));
|
||||
break;
|
||||
case 'f':
|
||||
functions = 1;
|
||||
break;
|
||||
case 'h':
|
||||
histograms = 1;
|
||||
break;
|
||||
case 'l':
|
||||
linenums = 1;
|
||||
break;
|
||||
case 'r':
|
||||
registers = 1;
|
||||
break;
|
||||
case 's':
|
||||
stacks++;
|
||||
break;
|
||||
default:
|
||||
Usage();
|
||||
}ARGEND
|
||||
if(pid <= 0 && argc == 0)
|
||||
Usage();
|
||||
if(functions+linenums+registers+stacks+pprof == 0)
|
||||
histograms = 1;
|
||||
if(!machbyname("amd64")) {
|
||||
fprint(2, "prof: no amd64 support\n", pid);
|
||||
exit(1);
|
||||
}
|
||||
if(argc > 0)
|
||||
file = argv[0];
|
||||
else if(pid) {
|
||||
file = proctextfile(pid);
|
||||
if (file == NULL) {
|
||||
fprint(2, "prof: can't find file for pid %d: %r\n", pid);
|
||||
fprint(2, "prof: on Darwin, need to provide file name explicitly\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
fd = open(file, 0);
|
||||
if(fd < 0) {
|
||||
fprint(2, "prof: can't open %s: %r\n", file);
|
||||
exit(1);
|
||||
}
|
||||
if(crackhdr(fd, &fhdr)) {
|
||||
have_syms = syminit(fd, &fhdr);
|
||||
if(!have_syms) {
|
||||
fprint(2, "prof: no symbols for %s: %r\n", file);
|
||||
}
|
||||
} else {
|
||||
fprint(2, "prof: crack header for %s: %r\n", file);
|
||||
exit(1);
|
||||
}
|
||||
if(pid <= 0)
|
||||
pid = startprocess(argv);
|
||||
attachproc(pid, &fhdr); // initializes thread list
|
||||
if(setarch() < 0) {
|
||||
detach();
|
||||
fprint(2, "prof: can't identify binary architecture for pid %d\n", pid);
|
||||
exit(1);
|
||||
}
|
||||
if(getthreads() <= 0) {
|
||||
detach();
|
||||
fprint(2, "prof: can't find threads for pid %d\n", pid);
|
||||
exit(1);
|
||||
}
|
||||
for(i = 0; i < nthread; i++)
|
||||
ctlproc(thread[i], "start");
|
||||
samples();
|
||||
detach();
|
||||
dumphistogram();
|
||||
dumppprof();
|
||||
exit(0);
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
This directory tree contains experimental packages and
|
||||
unfinished code that is subject to even more change than the
|
||||
rest of the Go tree.
|
||||
@@ -1,269 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
|
||||
// satisfying the following grammar (represented itself in EBNF):
|
||||
//
|
||||
// Production = name "=" [ Expression ] "." .
|
||||
// Expression = Alternative { "|" Alternative } .
|
||||
// Alternative = Term { Term } .
|
||||
// Term = name | token [ "…" token ] | Group | Option | Repetition .
|
||||
// Group = "(" Expression ")" .
|
||||
// Option = "[" Expression "]" .
|
||||
// Repetition = "{" Expression "}" .
|
||||
//
|
||||
// A name is a Go identifier, a token is a Go string, and comments
|
||||
// and white space follow the same rules as for the Go language.
|
||||
// Production names starting with an uppercase Unicode letter denote
|
||||
// non-terminal productions (i.e., productions which allow white-space
|
||||
// and comments between tokens); all other production names denote
|
||||
// lexical productions.
|
||||
//
|
||||
package ebnf
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"text/scanner"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Error handling
|
||||
|
||||
type errorList []error
|
||||
|
||||
func (list errorList) Err() error {
|
||||
if len(list) == 0 {
|
||||
return nil
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
func (list errorList) Error() string {
|
||||
switch len(list) {
|
||||
case 0:
|
||||
return "no errors"
|
||||
case 1:
|
||||
return list[0].Error()
|
||||
}
|
||||
return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
|
||||
}
|
||||
|
||||
func newError(pos scanner.Position, msg string) error {
|
||||
return errors.New(fmt.Sprintf("%s: %s", pos, msg))
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Internal representation
|
||||
|
||||
type (
|
||||
// An Expression node represents a production expression.
|
||||
Expression interface {
|
||||
// Pos is the position of the first character of the syntactic construct
|
||||
Pos() scanner.Position
|
||||
}
|
||||
|
||||
// An Alternative node represents a non-empty list of alternative expressions.
|
||||
Alternative []Expression // x | y | z
|
||||
|
||||
// A Sequence node represents a non-empty list of sequential expressions.
|
||||
Sequence []Expression // x y z
|
||||
|
||||
// A Name node represents a production name.
|
||||
Name struct {
|
||||
StringPos scanner.Position
|
||||
String string
|
||||
}
|
||||
|
||||
// A Token node represents a literal.
|
||||
Token struct {
|
||||
StringPos scanner.Position
|
||||
String string
|
||||
}
|
||||
|
||||
// A List node represents a range of characters.
|
||||
Range struct {
|
||||
Begin, End *Token // begin ... end
|
||||
}
|
||||
|
||||
// A Group node represents a grouped expression.
|
||||
Group struct {
|
||||
Lparen scanner.Position
|
||||
Body Expression // (body)
|
||||
}
|
||||
|
||||
// An Option node represents an optional expression.
|
||||
Option struct {
|
||||
Lbrack scanner.Position
|
||||
Body Expression // [body]
|
||||
}
|
||||
|
||||
// A Repetition node represents a repeated expression.
|
||||
Repetition struct {
|
||||
Lbrace scanner.Position
|
||||
Body Expression // {body}
|
||||
}
|
||||
|
||||
// A Production node represents an EBNF production.
|
||||
Production struct {
|
||||
Name *Name
|
||||
Expr Expression
|
||||
}
|
||||
|
||||
// A Bad node stands for pieces of source code that lead to a parse error.
|
||||
Bad struct {
|
||||
TokPos scanner.Position
|
||||
Error string // parser error message
|
||||
}
|
||||
|
||||
// A Grammar is a set of EBNF productions. The map
|
||||
// is indexed by production name.
|
||||
//
|
||||
Grammar map[string]*Production
|
||||
)
|
||||
|
||||
func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
|
||||
func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences
|
||||
func (x *Name) Pos() scanner.Position { return x.StringPos }
|
||||
func (x *Token) Pos() scanner.Position { return x.StringPos }
|
||||
func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }
|
||||
func (x *Group) Pos() scanner.Position { return x.Lparen }
|
||||
func (x *Option) Pos() scanner.Position { return x.Lbrack }
|
||||
func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
|
||||
func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
|
||||
func (x *Bad) Pos() scanner.Position { return x.TokPos }
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Grammar verification
|
||||
|
||||
func isLexical(name string) bool {
|
||||
ch, _ := utf8.DecodeRuneInString(name)
|
||||
return !unicode.IsUpper(ch)
|
||||
}
|
||||
|
||||
type verifier struct {
|
||||
errors errorList
|
||||
worklist []*Production
|
||||
reached Grammar // set of productions reached from (and including) the root production
|
||||
grammar Grammar
|
||||
}
|
||||
|
||||
func (v *verifier) error(pos scanner.Position, msg string) {
|
||||
v.errors = append(v.errors, newError(pos, msg))
|
||||
}
|
||||
|
||||
func (v *verifier) push(prod *Production) {
|
||||
name := prod.Name.String
|
||||
if _, found := v.reached[name]; !found {
|
||||
v.worklist = append(v.worklist, prod)
|
||||
v.reached[name] = prod
|
||||
}
|
||||
}
|
||||
|
||||
func (v *verifier) verifyChar(x *Token) rune {
|
||||
s := x.String
|
||||
if utf8.RuneCountInString(s) != 1 {
|
||||
v.error(x.Pos(), "single char expected, found "+s)
|
||||
return 0
|
||||
}
|
||||
ch, _ := utf8.DecodeRuneInString(s)
|
||||
return ch
|
||||
}
|
||||
|
||||
func (v *verifier) verifyExpr(expr Expression, lexical bool) {
|
||||
switch x := expr.(type) {
|
||||
case nil:
|
||||
// empty expression
|
||||
case Alternative:
|
||||
for _, e := range x {
|
||||
v.verifyExpr(e, lexical)
|
||||
}
|
||||
case Sequence:
|
||||
for _, e := range x {
|
||||
v.verifyExpr(e, lexical)
|
||||
}
|
||||
case *Name:
|
||||
// a production with this name must exist;
|
||||
// add it to the worklist if not yet processed
|
||||
if prod, found := v.grammar[x.String]; found {
|
||||
v.push(prod)
|
||||
} else {
|
||||
v.error(x.Pos(), "missing production "+x.String)
|
||||
}
|
||||
// within a lexical production references
|
||||
// to non-lexical productions are invalid
|
||||
if lexical && !isLexical(x.String) {
|
||||
v.error(x.Pos(), "reference to non-lexical production "+x.String)
|
||||
}
|
||||
case *Token:
|
||||
// nothing to do for now
|
||||
case *Range:
|
||||
i := v.verifyChar(x.Begin)
|
||||
j := v.verifyChar(x.End)
|
||||
if i >= j {
|
||||
v.error(x.Pos(), "decreasing character range")
|
||||
}
|
||||
case *Group:
|
||||
v.verifyExpr(x.Body, lexical)
|
||||
case *Option:
|
||||
v.verifyExpr(x.Body, lexical)
|
||||
case *Repetition:
|
||||
v.verifyExpr(x.Body, lexical)
|
||||
case *Bad:
|
||||
v.error(x.Pos(), x.Error)
|
||||
default:
|
||||
panic(fmt.Sprintf("internal error: unexpected type %T", expr))
|
||||
}
|
||||
}
|
||||
|
||||
func (v *verifier) verify(grammar Grammar, start string) {
|
||||
// find root production
|
||||
root, found := grammar[start]
|
||||
if !found {
|
||||
var noPos scanner.Position
|
||||
v.error(noPos, "no start production "+start)
|
||||
return
|
||||
}
|
||||
|
||||
// initialize verifier
|
||||
v.worklist = v.worklist[0:0]
|
||||
v.reached = make(Grammar)
|
||||
v.grammar = grammar
|
||||
|
||||
// work through the worklist
|
||||
v.push(root)
|
||||
for {
|
||||
n := len(v.worklist) - 1
|
||||
if n < 0 {
|
||||
break
|
||||
}
|
||||
prod := v.worklist[n]
|
||||
v.worklist = v.worklist[0:n]
|
||||
v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
|
||||
}
|
||||
|
||||
// check if all productions were reached
|
||||
if len(v.reached) < len(v.grammar) {
|
||||
for name, prod := range v.grammar {
|
||||
if _, found := v.reached[name]; !found {
|
||||
v.error(prod.Pos(), name+" is unreachable")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks that:
|
||||
// - all productions used are defined
|
||||
// - all productions defined are used when beginning at start
|
||||
// - lexical productions refer only to other lexical productions
|
||||
//
|
||||
// Position information is interpreted relative to the file set fset.
|
||||
//
|
||||
func Verify(grammar Grammar, start string) error {
|
||||
var v verifier
|
||||
v.verify(grammar, start)
|
||||
return v.errors.Err()
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ebnf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var goodGrammars = []string{
|
||||
`Program = .`,
|
||||
|
||||
`Program = foo .
|
||||
foo = "foo" .`,
|
||||
|
||||
`Program = "a" | "b" "c" .`,
|
||||
|
||||
`Program = "a" … "z" .`,
|
||||
|
||||
`Program = Song .
|
||||
Song = { Note } .
|
||||
Note = Do | (Re | Mi | Fa | So | La) | Ti .
|
||||
Do = "c" .
|
||||
Re = "d" .
|
||||
Mi = "e" .
|
||||
Fa = "f" .
|
||||
So = "g" .
|
||||
La = "a" .
|
||||
Ti = ti .
|
||||
ti = "b" .`,
|
||||
}
|
||||
|
||||
var badGrammars = []string{
|
||||
`Program = | .`,
|
||||
`Program = | b .`,
|
||||
`Program = a … b .`,
|
||||
`Program = "a" … .`,
|
||||
`Program = … "b" .`,
|
||||
`Program = () .`,
|
||||
`Program = [] .`,
|
||||
`Program = {} .`,
|
||||
}
|
||||
|
||||
func checkGood(t *testing.T, src string) {
|
||||
grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
|
||||
if err != nil {
|
||||
t.Errorf("Parse(%s) failed: %v", src, err)
|
||||
return
|
||||
}
|
||||
if err = Verify(grammar, "Program"); err != nil {
|
||||
t.Errorf("Verify(%s) failed: %v", src, err)
|
||||
}
|
||||
}
|
||||
|
||||
func checkBad(t *testing.T, src string) {
|
||||
_, err := Parse("", bytes.NewBuffer([]byte(src)))
|
||||
if err == nil {
|
||||
t.Errorf("Parse(%s) should have failed", src)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGrammars(t *testing.T) {
|
||||
for _, src := range goodGrammars {
|
||||
checkGood(t, src)
|
||||
}
|
||||
for _, src := range badGrammars {
|
||||
checkBad(t, src)
|
||||
}
|
||||
}
|
||||
@@ -1,190 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ebnf
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strconv"
|
||||
"text/scanner"
|
||||
)
|
||||
|
||||
type parser struct {
|
||||
errors errorList
|
||||
scanner scanner.Scanner
|
||||
pos scanner.Position // token position
|
||||
tok rune // one token look-ahead
|
||||
lit string // token literal
|
||||
}
|
||||
|
||||
func (p *parser) next() {
|
||||
p.tok = p.scanner.Scan()
|
||||
p.pos = p.scanner.Position
|
||||
p.lit = p.scanner.TokenText()
|
||||
}
|
||||
|
||||
func (p *parser) error(pos scanner.Position, msg string) {
|
||||
p.errors = append(p.errors, newError(pos, msg))
|
||||
}
|
||||
|
||||
func (p *parser) errorExpected(pos scanner.Position, msg string) {
|
||||
msg = `expected "` + msg + `"`
|
||||
if pos.Offset == p.pos.Offset {
|
||||
// the error happened at the current position;
|
||||
// make the error message more specific
|
||||
msg += ", found " + scanner.TokenString(p.tok)
|
||||
if p.tok < 0 {
|
||||
msg += " " + p.lit
|
||||
}
|
||||
}
|
||||
p.error(pos, msg)
|
||||
}
|
||||
|
||||
func (p *parser) expect(tok rune) scanner.Position {
|
||||
pos := p.pos
|
||||
if p.tok != tok {
|
||||
p.errorExpected(pos, scanner.TokenString(tok))
|
||||
}
|
||||
p.next() // make progress in any case
|
||||
return pos
|
||||
}
|
||||
|
||||
func (p *parser) parseIdentifier() *Name {
|
||||
pos := p.pos
|
||||
name := p.lit
|
||||
p.expect(scanner.Ident)
|
||||
return &Name{pos, name}
|
||||
}
|
||||
|
||||
func (p *parser) parseToken() *Token {
|
||||
pos := p.pos
|
||||
value := ""
|
||||
if p.tok == scanner.String {
|
||||
value, _ = strconv.Unquote(p.lit)
|
||||
// Unquote may fail with an error, but only if the scanner found
|
||||
// an illegal string in the first place. In this case the error
|
||||
// has already been reported.
|
||||
p.next()
|
||||
} else {
|
||||
p.expect(scanner.String)
|
||||
}
|
||||
return &Token{pos, value}
|
||||
}
|
||||
|
||||
// ParseTerm returns nil if no term was found.
|
||||
func (p *parser) parseTerm() (x Expression) {
|
||||
pos := p.pos
|
||||
|
||||
switch p.tok {
|
||||
case scanner.Ident:
|
||||
x = p.parseIdentifier()
|
||||
|
||||
case scanner.String:
|
||||
tok := p.parseToken()
|
||||
x = tok
|
||||
const ellipsis = '…' // U+2026, the horizontal ellipsis character
|
||||
if p.tok == ellipsis {
|
||||
p.next()
|
||||
x = &Range{tok, p.parseToken()}
|
||||
}
|
||||
|
||||
case '(':
|
||||
p.next()
|
||||
x = &Group{pos, p.parseExpression()}
|
||||
p.expect(')')
|
||||
|
||||
case '[':
|
||||
p.next()
|
||||
x = &Option{pos, p.parseExpression()}
|
||||
p.expect(']')
|
||||
|
||||
case '{':
|
||||
p.next()
|
||||
x = &Repetition{pos, p.parseExpression()}
|
||||
p.expect('}')
|
||||
}
|
||||
|
||||
return x
|
||||
}
|
||||
|
||||
func (p *parser) parseSequence() Expression {
|
||||
var list Sequence
|
||||
|
||||
for x := p.parseTerm(); x != nil; x = p.parseTerm() {
|
||||
list = append(list, x)
|
||||
}
|
||||
|
||||
// no need for a sequence if list.Len() < 2
|
||||
switch len(list) {
|
||||
case 0:
|
||||
p.errorExpected(p.pos, "term")
|
||||
return &Bad{p.pos, "term expected"}
|
||||
case 1:
|
||||
return list[0]
|
||||
}
|
||||
|
||||
return list
|
||||
}
|
||||
|
||||
func (p *parser) parseExpression() Expression {
|
||||
var list Alternative
|
||||
|
||||
for {
|
||||
list = append(list, p.parseSequence())
|
||||
if p.tok != '|' {
|
||||
break
|
||||
}
|
||||
p.next()
|
||||
}
|
||||
// len(list) > 0
|
||||
|
||||
// no need for an Alternative node if list.Len() < 2
|
||||
if len(list) == 1 {
|
||||
return list[0]
|
||||
}
|
||||
|
||||
return list
|
||||
}
|
||||
|
||||
func (p *parser) parseProduction() *Production {
|
||||
name := p.parseIdentifier()
|
||||
p.expect('=')
|
||||
var expr Expression
|
||||
if p.tok != '.' {
|
||||
expr = p.parseExpression()
|
||||
}
|
||||
p.expect('.')
|
||||
return &Production{name, expr}
|
||||
}
|
||||
|
||||
func (p *parser) parse(filename string, src io.Reader) Grammar {
|
||||
p.scanner.Init(src)
|
||||
p.scanner.Filename = filename
|
||||
p.next() // initializes pos, tok, lit
|
||||
|
||||
grammar := make(Grammar)
|
||||
for p.tok != scanner.EOF {
|
||||
prod := p.parseProduction()
|
||||
name := prod.Name.String
|
||||
if _, found := grammar[name]; !found {
|
||||
grammar[name] = prod
|
||||
} else {
|
||||
p.error(prod.Pos(), name+" declared already")
|
||||
}
|
||||
}
|
||||
|
||||
return grammar
|
||||
}
|
||||
|
||||
// Parse parses a set of EBNF productions from source src.
|
||||
// It returns a set of productions. Errors are reported
|
||||
// for incorrect syntax and if a production is declared
|
||||
// more than once; the filename is used only for error
|
||||
// positions.
|
||||
//
|
||||
func Parse(filename string, src io.Reader) (Grammar, error) {
|
||||
var p parser
|
||||
grammar := p.parse(filename, src)
|
||||
return grammar, p.errors.Err()
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
|
||||
Ebnflint verifies that EBNF productions are consistent and grammatically correct.
|
||||
It reads them from an HTML document such as the Go specification.
|
||||
|
||||
Grammar productions are grouped in boxes demarcated by the HTML elements
|
||||
<pre class="ebnf">
|
||||
</pre>
|
||||
|
||||
|
||||
Usage:
|
||||
go tool ebnflint [--start production] [file]
|
||||
|
||||
The --start flag specifies the name of the start production for
|
||||
the grammar; it defaults to "Start".
|
||||
|
||||
*/
|
||||
package documentation
|
||||
@@ -1,122 +0,0 @@
|
||||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"exp/ebnf"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/scanner"
|
||||
"go/token"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
var fset = token.NewFileSet()
|
||||
var start = flag.String("start", "Start", "name of start production")
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, "usage: go tool ebnflint [flags] [filename]\n")
|
||||
flag.PrintDefaults()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Markers around EBNF sections in .html files
|
||||
var (
|
||||
open = []byte(`<pre class="ebnf">`)
|
||||
close = []byte(`</pre>`)
|
||||
)
|
||||
|
||||
func report(err error) {
|
||||
scanner.PrintError(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func extractEBNF(src []byte) []byte {
|
||||
var buf bytes.Buffer
|
||||
|
||||
for {
|
||||
// i = beginning of EBNF text
|
||||
i := bytes.Index(src, open)
|
||||
if i < 0 {
|
||||
break // no EBNF found - we are done
|
||||
}
|
||||
i += len(open)
|
||||
|
||||
// write as many newlines as found in the excluded text
|
||||
// to maintain correct line numbers in error messages
|
||||
for _, ch := range src[0:i] {
|
||||
if ch == '\n' {
|
||||
buf.WriteByte('\n')
|
||||
}
|
||||
}
|
||||
|
||||
// j = end of EBNF text (or end of source)
|
||||
j := bytes.Index(src[i:], close) // close marker
|
||||
if j < 0 {
|
||||
j = len(src) - i
|
||||
}
|
||||
j += i
|
||||
|
||||
// copy EBNF text
|
||||
buf.Write(src[i:j])
|
||||
|
||||
// advance
|
||||
src = src[j:]
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
var (
|
||||
name string
|
||||
r io.Reader
|
||||
)
|
||||
switch flag.NArg() {
|
||||
case 0:
|
||||
name, r = "<stdin>", os.Stdin
|
||||
case 1:
|
||||
name = flag.Arg(0)
|
||||
default:
|
||||
usage()
|
||||
}
|
||||
|
||||
if err := verify(name, *start, r); err != nil {
|
||||
report(err)
|
||||
}
|
||||
}
|
||||
|
||||
func verify(name, start string, r io.Reader) error {
|
||||
if r == nil {
|
||||
f, err := os.Open(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
r = f
|
||||
}
|
||||
|
||||
src, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if filepath.Ext(name) == ".html" || bytes.Index(src, open) >= 0 {
|
||||
src = extractEBNF(src)
|
||||
}
|
||||
|
||||
grammar, err := ebnf.Parse(name, bytes.NewBuffer(src))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return ebnf.Verify(grammar, start)
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSpec(t *testing.T) {
|
||||
if err := verify(runtime.GOROOT()+"/doc/go_spec.html", "SourceFile", nil); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
The gotype command does syntactic and semantic analysis of Go files
|
||||
and packages similar to the analysis performed by the front-end of
|
||||
a Go compiler. Errors are reported if the analysis fails; otherwise
|
||||
gotype is quiet (unless -v is set).
|
||||
|
||||
Without a list of paths, gotype processes the standard input, which must
|
||||
be the source of a single package file.
|
||||
|
||||
Given a list of file names, each file must be a source file belonging to
|
||||
the same package unless the package name is explicitly specified with the
|
||||
-p flag.
|
||||
|
||||
Given a directory name, gotype collects all .go files in the directory
|
||||
and processes them as if they were provided as an explicit list of file
|
||||
names. Each directory is processed independently. Files starting with .
|
||||
or not ending in .go are ignored.
|
||||
|
||||
Usage:
|
||||
gotype [flags] [path ...]
|
||||
|
||||
The flags are:
|
||||
-e
|
||||
Print all (including spurious) errors.
|
||||
-p pkgName
|
||||
Process only those files in package pkgName.
|
||||
-r
|
||||
Recursively process subdirectories.
|
||||
-v
|
||||
Verbose mode.
|
||||
|
||||
Debugging flags:
|
||||
-comments
|
||||
Parse comments (ignored if -ast not set).
|
||||
-ast
|
||||
Print AST (disables concurrent parsing).
|
||||
-trace
|
||||
Print parse trace (disables concurrent parsing).
|
||||
|
||||
|
||||
Examples
|
||||
|
||||
To check the files file.go, old.saved, and .ignored:
|
||||
|
||||
gotype file.go old.saved .ignored
|
||||
|
||||
To check all .go files belonging to package main in the current directory
|
||||
and recursively in all subdirectories:
|
||||
|
||||
gotype -p main -r .
|
||||
|
||||
To verify the output of a pipe:
|
||||
|
||||
echo "package foo" | gotype
|
||||
|
||||
*/
|
||||
package documentation
|
||||
|
||||
// BUG(gri): At the moment, only single-file scope analysis is performed.
|
||||
@@ -1,197 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"exp/types"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/scanner"
|
||||
"go/token"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
// main operation modes
|
||||
pkgName = flag.String("p", "", "process only those files in package pkgName")
|
||||
recursive = flag.Bool("r", false, "recursively process subdirectories")
|
||||
verbose = flag.Bool("v", false, "verbose mode")
|
||||
allErrors = flag.Bool("e", false, "print all (including spurious) errors")
|
||||
|
||||
// debugging support
|
||||
parseComments = flag.Bool("comments", false, "parse comments (ignored if -ast not set)")
|
||||
printTrace = flag.Bool("trace", false, "print parse trace")
|
||||
printAST = flag.Bool("ast", false, "print AST")
|
||||
)
|
||||
|
||||
var exitCode = 0
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, "usage: gotype [flags] [path ...]\n")
|
||||
flag.PrintDefaults()
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
func report(err error) {
|
||||
scanner.PrintError(os.Stderr, err)
|
||||
exitCode = 2
|
||||
}
|
||||
|
||||
// parse returns the AST for the Go source src.
|
||||
// The filename is for error reporting only.
|
||||
// The result is nil if there were errors or if
|
||||
// the file does not belong to the -p package.
|
||||
func parse(fset *token.FileSet, filename string, src []byte) *ast.File {
|
||||
if *verbose {
|
||||
fmt.Println(filename)
|
||||
}
|
||||
|
||||
// ignore files with different package name
|
||||
if *pkgName != "" {
|
||||
file, err := parser.ParseFile(fset, filename, src, parser.PackageClauseOnly)
|
||||
if err != nil {
|
||||
report(err)
|
||||
return nil
|
||||
}
|
||||
if file.Name.Name != *pkgName {
|
||||
if *verbose {
|
||||
fmt.Printf("\tignored (package %s)\n", file.Name.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// parse entire file
|
||||
mode := parser.DeclarationErrors
|
||||
if *allErrors {
|
||||
mode |= parser.SpuriousErrors
|
||||
}
|
||||
if *parseComments && *printAST {
|
||||
mode |= parser.ParseComments
|
||||
}
|
||||
if *printTrace {
|
||||
mode |= parser.Trace
|
||||
}
|
||||
file, err := parser.ParseFile(fset, filename, src, mode)
|
||||
if err != nil {
|
||||
report(err)
|
||||
return nil
|
||||
}
|
||||
if *printAST {
|
||||
ast.Print(fset, file)
|
||||
}
|
||||
|
||||
return file
|
||||
}
|
||||
|
||||
func parseStdin(fset *token.FileSet) (files map[string]*ast.File) {
|
||||
files = make(map[string]*ast.File)
|
||||
src, err := ioutil.ReadAll(os.Stdin)
|
||||
if err != nil {
|
||||
report(err)
|
||||
return
|
||||
}
|
||||
const filename = "<standard input>"
|
||||
if file := parse(fset, filename, src); file != nil {
|
||||
files[filename] = file
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseFiles(fset *token.FileSet, filenames []string) (files map[string]*ast.File) {
|
||||
files = make(map[string]*ast.File)
|
||||
for _, filename := range filenames {
|
||||
src, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
report(err)
|
||||
continue
|
||||
}
|
||||
if file := parse(fset, filename, src); file != nil {
|
||||
if files[filename] != nil {
|
||||
report(errors.New(fmt.Sprintf("%q: duplicate file", filename)))
|
||||
continue
|
||||
}
|
||||
files[filename] = file
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func isGoFilename(filename string) bool {
|
||||
// ignore non-Go files
|
||||
return !strings.HasPrefix(filename, ".") && strings.HasSuffix(filename, ".go")
|
||||
}
|
||||
|
||||
func processDirectory(dirname string) {
|
||||
f, err := os.Open(dirname)
|
||||
if err != nil {
|
||||
report(err)
|
||||
return
|
||||
}
|
||||
filenames, err := f.Readdirnames(-1)
|
||||
f.Close()
|
||||
if err != nil {
|
||||
report(err)
|
||||
// continue since filenames may not be empty
|
||||
}
|
||||
for i, filename := range filenames {
|
||||
filenames[i] = filepath.Join(dirname, filename)
|
||||
}
|
||||
processFiles(filenames, false)
|
||||
}
|
||||
|
||||
func processFiles(filenames []string, allFiles bool) {
|
||||
i := 0
|
||||
for _, filename := range filenames {
|
||||
switch info, err := os.Stat(filename); {
|
||||
case err != nil:
|
||||
report(err)
|
||||
case info.IsDir():
|
||||
if allFiles || *recursive {
|
||||
processDirectory(filename)
|
||||
}
|
||||
default:
|
||||
if allFiles || isGoFilename(info.Name()) {
|
||||
filenames[i] = filename
|
||||
i++
|
||||
}
|
||||
}
|
||||
}
|
||||
fset := token.NewFileSet()
|
||||
processPackage(fset, parseFiles(fset, filenames[0:i]))
|
||||
}
|
||||
|
||||
func processPackage(fset *token.FileSet, files map[string]*ast.File) {
|
||||
// make a package (resolve all identifiers)
|
||||
pkg, err := ast.NewPackage(fset, files, types.GcImport, types.Universe)
|
||||
if err != nil {
|
||||
report(err)
|
||||
return
|
||||
}
|
||||
_, err = types.Check(fset, pkg)
|
||||
if err != nil {
|
||||
report(err)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
|
||||
if flag.NArg() == 0 {
|
||||
fset := token.NewFileSet()
|
||||
processPackage(fset, parseStdin(fset))
|
||||
} else {
|
||||
processFiles(flag.Args(), true)
|
||||
}
|
||||
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func runTest(t *testing.T, path, pkg string) {
|
||||
exitCode = 0
|
||||
*pkgName = pkg
|
||||
*recursive = false
|
||||
|
||||
if pkg == "" {
|
||||
processFiles([]string{path}, true)
|
||||
} else {
|
||||
processDirectory(path)
|
||||
}
|
||||
|
||||
if exitCode != 0 {
|
||||
t.Errorf("processing %s failed: exitCode = %d", path, exitCode)
|
||||
}
|
||||
}
|
||||
|
||||
var tests = []struct {
|
||||
path string
|
||||
pkg string
|
||||
}{
|
||||
// individual files
|
||||
{"testdata/test1.go", ""},
|
||||
|
||||
// directories
|
||||
{filepath.Join(runtime.GOROOT(), "src/pkg/go/ast"), "ast"},
|
||||
{filepath.Join(runtime.GOROOT(), "src/pkg/go/doc"), "doc"},
|
||||
{filepath.Join(runtime.GOROOT(), "src/pkg/go/token"), "scanner"},
|
||||
{filepath.Join(runtime.GOROOT(), "src/pkg/go/scanner"), "scanner"},
|
||||
{filepath.Join(runtime.GOROOT(), "src/pkg/go/parser"), "parser"},
|
||||
{filepath.Join(runtime.GOROOT(), "src/pkg/exp/types"), "types"},
|
||||
}
|
||||
|
||||
func Test(t *testing.T) {
|
||||
for _, test := range tests {
|
||||
runTest(t, test.path, test.pkg)
|
||||
}
|
||||
}
|
||||
27
src/pkg/exp/gotype/testdata/test1.go
vendored
27
src/pkg/exp/gotype/testdata/test1.go
vendored
@@ -1,27 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package p
|
||||
|
||||
func _() {
|
||||
// the scope of a local type declaration starts immediately after the type name
|
||||
type T struct{ _ *T }
|
||||
}
|
||||
|
||||
func _(x interface{}) {
|
||||
// the variable defined by a TypeSwitchGuard is declared in each TypeCaseClause
|
||||
switch t := x.(type) {
|
||||
case int:
|
||||
_ = t
|
||||
case float32:
|
||||
_ = t
|
||||
default:
|
||||
_ = t
|
||||
}
|
||||
|
||||
// the variable defined by a TypeSwitchGuard must not conflict with other
|
||||
// variables declared in the initial simple statement
|
||||
switch t := 0; t := x.(type) {
|
||||
}
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
// Section 12.2.3.2 of the HTML5 specification says "The following elements
|
||||
// have varying levels of special parsing rules".
|
||||
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
|
||||
var isSpecialElementMap = map[string]bool{
|
||||
"address": true,
|
||||
"applet": true,
|
||||
"area": true,
|
||||
"article": true,
|
||||
"aside": true,
|
||||
"base": true,
|
||||
"basefont": true,
|
||||
"bgsound": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"button": true,
|
||||
"caption": true,
|
||||
"center": true,
|
||||
"col": true,
|
||||
"colgroup": true,
|
||||
"command": true,
|
||||
"dd": true,
|
||||
"details": true,
|
||||
"dir": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"embed": true,
|
||||
"fieldset": true,
|
||||
"figcaption": true,
|
||||
"figure": true,
|
||||
"footer": true,
|
||||
"form": true,
|
||||
"frame": true,
|
||||
"frameset": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"header": true,
|
||||
"hgroup": true,
|
||||
"hr": true,
|
||||
"html": true,
|
||||
"iframe": true,
|
||||
"img": true,
|
||||
"input": true,
|
||||
"isindex": true,
|
||||
"li": true,
|
||||
"link": true,
|
||||
"listing": true,
|
||||
"marquee": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nav": true,
|
||||
"noembed": true,
|
||||
"noframes": true,
|
||||
"noscript": true,
|
||||
"object": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"param": true,
|
||||
"plaintext": true,
|
||||
"pre": true,
|
||||
"script": true,
|
||||
"section": true,
|
||||
"select": true,
|
||||
"style": true,
|
||||
"summary": true,
|
||||
"table": true,
|
||||
"tbody": true,
|
||||
"td": true,
|
||||
"textarea": true,
|
||||
"tfoot": true,
|
||||
"th": true,
|
||||
"thead": true,
|
||||
"title": true,
|
||||
"tr": true,
|
||||
"ul": true,
|
||||
"wbr": true,
|
||||
"xmp": true,
|
||||
}
|
||||
|
||||
func isSpecialElement(element *Node) bool {
|
||||
switch element.Namespace {
|
||||
case "", "html":
|
||||
return isSpecialElementMap[element.Data]
|
||||
case "svg":
|
||||
return element.Data == "foreignObject"
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package html implements an HTML5-compliant tokenizer and parser.
|
||||
INCOMPLETE.
|
||||
|
||||
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
|
||||
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
|
||||
|
||||
z := html.NewTokenizer(r)
|
||||
|
||||
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
|
||||
which parses the next token and returns its type, or an error:
|
||||
|
||||
for {
|
||||
tt := z.Next()
|
||||
if tt == html.ErrorToken {
|
||||
// ...
|
||||
return ...
|
||||
}
|
||||
// Process the current token.
|
||||
}
|
||||
|
||||
There are two APIs for retrieving the current token. The high-level API is to
|
||||
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
|
||||
allow optionally calling Raw after Next but before Token, Text, TagName, or
|
||||
TagAttr. In EBNF notation, the valid call sequence per token is:
|
||||
|
||||
Next {Raw} [ Token | Text | TagName {TagAttr} ]
|
||||
|
||||
Token returns an independent data structure that completely describes a token.
|
||||
Entities (such as "<") are unescaped, tag names and attribute keys are
|
||||
lower-cased, and attributes are collected into a []Attribute. For example:
|
||||
|
||||
for {
|
||||
if z.Next() == html.ErrorToken {
|
||||
// Returning io.EOF indicates success.
|
||||
return z.Err()
|
||||
}
|
||||
emitToken(z.Token())
|
||||
}
|
||||
|
||||
The low-level API performs fewer allocations and copies, but the contents of
|
||||
the []byte values returned by Text, TagName and TagAttr may change on the next
|
||||
call to Next. For example, to extract an HTML page's anchor text:
|
||||
|
||||
depth := 0
|
||||
for {
|
||||
tt := z.Next()
|
||||
switch tt {
|
||||
case ErrorToken:
|
||||
return z.Err()
|
||||
case TextToken:
|
||||
if depth > 0 {
|
||||
// emitBytes should copy the []byte it receives,
|
||||
// if it doesn't process it immediately.
|
||||
emitBytes(z.Text())
|
||||
}
|
||||
case StartTagToken, EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if len(tn) == 1 && tn[0] == 'a' {
|
||||
if tt == StartTagToken {
|
||||
depth++
|
||||
} else {
|
||||
depth--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Parsing is done by calling Parse with an io.Reader, which returns the root of
|
||||
the parse tree (the document element) as a *Node. It is the caller's
|
||||
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
|
||||
example, to process each anchor node in depth-first order:
|
||||
|
||||
doc, err := html.Parse(r)
|
||||
if err != nil {
|
||||
// ...
|
||||
}
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
// Do something with n...
|
||||
}
|
||||
for _, c := range n.Child {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
f(doc)
|
||||
|
||||
The relevant specifications include:
|
||||
http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html and
|
||||
http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
|
||||
*/
|
||||
package html
|
||||
|
||||
// The tokenization algorithm implemented by this package is not a line-by-line
|
||||
// transliteration of the relatively verbose state-machine in the WHATWG
|
||||
// specification. A more direct approach is used instead, where the program
|
||||
// counter implies the state, such as whether it is tokenizing a tag or a text
|
||||
// node. Specification compliance is verified by checking expected and actual
|
||||
// outputs over a test suite rather than aiming for algorithmic fidelity.
|
||||
|
||||
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
|
||||
// TODO(nigeltao): How does parsing interact with a JavaScript engine?
|
||||
@@ -1,156 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// parseDoctype parses the data from a DoctypeToken into a name,
|
||||
// public identifier, and system identifier. It returns a Node whose Type
|
||||
// is DoctypeNode, whose Data is the name, and which has attributes
|
||||
// named "system" and "public" for the two identifiers if they were present.
|
||||
// quirks is whether the document should be parsed in "quirks mode".
|
||||
func parseDoctype(s string) (n *Node, quirks bool) {
|
||||
n = &Node{Type: DoctypeNode}
|
||||
|
||||
// Find the name.
|
||||
space := strings.IndexAny(s, whitespace)
|
||||
if space == -1 {
|
||||
space = len(s)
|
||||
}
|
||||
n.Data = s[:space]
|
||||
// The comparison to "html" is case-sensitive.
|
||||
if n.Data != "html" {
|
||||
quirks = true
|
||||
}
|
||||
n.Data = strings.ToLower(n.Data)
|
||||
s = strings.TrimLeft(s[space:], whitespace)
|
||||
|
||||
if len(s) < 6 {
|
||||
// It can't start with "PUBLIC" or "SYSTEM".
|
||||
// Ignore the rest of the string.
|
||||
return n, quirks || s != ""
|
||||
}
|
||||
|
||||
key := strings.ToLower(s[:6])
|
||||
s = s[6:]
|
||||
for key == "public" || key == "system" {
|
||||
s = strings.TrimLeft(s, whitespace)
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
quote := s[0]
|
||||
if quote != '"' && quote != '\'' {
|
||||
break
|
||||
}
|
||||
s = s[1:]
|
||||
q := strings.IndexRune(s, rune(quote))
|
||||
var id string
|
||||
if q == -1 {
|
||||
id = s
|
||||
s = ""
|
||||
} else {
|
||||
id = s[:q]
|
||||
s = s[q+1:]
|
||||
}
|
||||
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
|
||||
if key == "public" {
|
||||
key = "system"
|
||||
} else {
|
||||
key = ""
|
||||
}
|
||||
}
|
||||
|
||||
if key != "" || s != "" {
|
||||
quirks = true
|
||||
} else if len(n.Attr) > 0 {
|
||||
if n.Attr[0].Key == "public" {
|
||||
public := strings.ToLower(n.Attr[0].Val)
|
||||
switch public {
|
||||
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
|
||||
quirks = true
|
||||
default:
|
||||
for _, q := range quirkyIDs {
|
||||
if strings.HasPrefix(public, q) {
|
||||
quirks = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// The following two public IDs only cause quirks mode if there is no system ID.
|
||||
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
|
||||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
|
||||
quirks = true
|
||||
}
|
||||
}
|
||||
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
|
||||
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
|
||||
quirks = true
|
||||
}
|
||||
}
|
||||
|
||||
return n, quirks
|
||||
}
|
||||
|
||||
// quirkyIDs is a list of public doctype identifiers that cause a document
|
||||
// to be interpreted in quirks mode. The identifiers should be in lower case.
|
||||
var quirkyIDs = []string{
|
||||
"+//silmaril//dtd html pro v0r11 19970101//",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//",
|
||||
"-//ietf//dtd html 2.0 level 1//",
|
||||
"-//ietf//dtd html 2.0 level 2//",
|
||||
"-//ietf//dtd html 2.0 strict level 1//",
|
||||
"-//ietf//dtd html 2.0 strict level 2//",
|
||||
"-//ietf//dtd html 2.0 strict//",
|
||||
"-//ietf//dtd html 2.0//",
|
||||
"-//ietf//dtd html 2.1e//",
|
||||
"-//ietf//dtd html 3.0//",
|
||||
"-//ietf//dtd html 3.2 final//",
|
||||
"-//ietf//dtd html 3.2//",
|
||||
"-//ietf//dtd html 3//",
|
||||
"-//ietf//dtd html level 0//",
|
||||
"-//ietf//dtd html level 1//",
|
||||
"-//ietf//dtd html level 2//",
|
||||
"-//ietf//dtd html level 3//",
|
||||
"-//ietf//dtd html strict level 0//",
|
||||
"-//ietf//dtd html strict level 1//",
|
||||
"-//ietf//dtd html strict level 2//",
|
||||
"-//ietf//dtd html strict level 3//",
|
||||
"-//ietf//dtd html strict//",
|
||||
"-//ietf//dtd html//",
|
||||
"-//metrius//dtd metrius presentational//",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//",
|
||||
"-//netscape comm. corp.//dtd html//",
|
||||
"-//netscape comm. corp.//dtd strict html//",
|
||||
"-//o'reilly and associates//dtd html 2.0//",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//",
|
||||
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
||||
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
|
||||
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
|
||||
"-//spyglass//dtd html 2.0 extended//",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//",
|
||||
"-//sun microsystems corp.//dtd hotjava html//",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//",
|
||||
"-//w3c//dtd html 3 1995-03-24//",
|
||||
"-//w3c//dtd html 3.2 draft//",
|
||||
"-//w3c//dtd html 3.2 final//",
|
||||
"-//w3c//dtd html 3.2//",
|
||||
"-//w3c//dtd html 3.2s draft//",
|
||||
"-//w3c//dtd html 4.0 frameset//",
|
||||
"-//w3c//dtd html 4.0 transitional//",
|
||||
"-//w3c//dtd html experimental 19960712//",
|
||||
"-//w3c//dtd html experimental 970421//",
|
||||
"-//w3c//dtd w3 html//",
|
||||
"-//w3o//dtd w3 html 3.0//",
|
||||
"-//webtechs//dtd mozilla html 2.0//",
|
||||
"-//webtechs//dtd mozilla html//",
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,29 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestEntityLength(t *testing.T) {
|
||||
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
|
||||
// The +1 comes from the leading "&". This property implies that the length of
|
||||
// unescaped text is <= the length of escaped text.
|
||||
for k, v := range entity {
|
||||
if 1+len(k) < utf8.RuneLen(v) {
|
||||
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
|
||||
}
|
||||
if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
|
||||
t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
|
||||
}
|
||||
}
|
||||
for k, v := range entity2 {
|
||||
if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
|
||||
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,253 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// These replacements permit compatibility with old numeric entities that
|
||||
// assumed Windows-1252 encoding.
|
||||
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
|
||||
var replacementTable = [...]rune{
|
||||
'\u20AC', // First entry is what 0x80 should be replaced with.
|
||||
'\u0081',
|
||||
'\u201A',
|
||||
'\u0192',
|
||||
'\u201E',
|
||||
'\u2026',
|
||||
'\u2020',
|
||||
'\u2021',
|
||||
'\u02C6',
|
||||
'\u2030',
|
||||
'\u0160',
|
||||
'\u2039',
|
||||
'\u0152',
|
||||
'\u008D',
|
||||
'\u017D',
|
||||
'\u008F',
|
||||
'\u0090',
|
||||
'\u2018',
|
||||
'\u2019',
|
||||
'\u201C',
|
||||
'\u201D',
|
||||
'\u2022',
|
||||
'\u2013',
|
||||
'\u2014',
|
||||
'\u02DC',
|
||||
'\u2122',
|
||||
'\u0161',
|
||||
'\u203A',
|
||||
'\u0153',
|
||||
'\u009D',
|
||||
'\u017E',
|
||||
'\u0178', // Last entry is 0x9F.
|
||||
// 0x00->'\uFFFD' is handled programmatically.
|
||||
// 0x0D->'\u000D' is a no-op.
|
||||
}
|
||||
|
||||
// unescapeEntity reads an entity like "<" from b[src:] and writes the
|
||||
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
|
||||
// Precondition: b[src] == '&' && dst <= src.
|
||||
// attribute should be true if parsing an attribute value.
|
||||
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
|
||||
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
|
||||
|
||||
// i starts at 1 because we already know that s[0] == '&'.
|
||||
i, s := 1, b[src:]
|
||||
|
||||
if len(s) <= 1 {
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if s[i] == '#' {
|
||||
if len(s) <= 3 { // We need to have at least "&#.".
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
i++
|
||||
c := s[i]
|
||||
hex := false
|
||||
if c == 'x' || c == 'X' {
|
||||
hex = true
|
||||
i++
|
||||
}
|
||||
|
||||
x := '\x00'
|
||||
for i < len(s) {
|
||||
c = s[i]
|
||||
i++
|
||||
if hex {
|
||||
if '0' <= c && c <= '9' {
|
||||
x = 16*x + rune(c) - '0'
|
||||
continue
|
||||
} else if 'a' <= c && c <= 'f' {
|
||||
x = 16*x + rune(c) - 'a' + 10
|
||||
continue
|
||||
} else if 'A' <= c && c <= 'F' {
|
||||
x = 16*x + rune(c) - 'A' + 10
|
||||
continue
|
||||
}
|
||||
} else if '0' <= c && c <= '9' {
|
||||
x = 10*x + rune(c) - '0'
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
if i <= 3 { // No characters matched.
|
||||
b[dst] = b[src]
|
||||
return dst + 1, src + 1
|
||||
}
|
||||
|
||||
if 0x80 <= x && x <= 0x9F {
|
||||
// Replace characters from Windows-1252 with UTF-8 equivalents.
|
||||
x = replacementTable[x-0x80]
|
||||
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
|
||||
// Replace invalid characters with the replacement character.
|
||||
x = '\uFFFD'
|
||||
}
|
||||
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
}
|
||||
|
||||
// Consume the maximum number of characters possible, with the
|
||||
// consumed characters matching one of the named references.
|
||||
|
||||
for i < len(s) {
|
||||
c := s[i]
|
||||
i++
|
||||
// Lower-cased characters are more common in entities, so we check for them first.
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
|
||||
continue
|
||||
}
|
||||
if c != ';' {
|
||||
i--
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
entityName := string(s[1:i])
|
||||
if entityName == "" {
|
||||
// No-op.
|
||||
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
|
||||
// No-op.
|
||||
} else if x := entity[entityName]; x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + i
|
||||
} else if x := entity2[entityName]; x[0] != 0 {
|
||||
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
|
||||
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
|
||||
} else if !attribute {
|
||||
maxLen := len(entityName) - 1
|
||||
if maxLen > longestEntityWithoutSemicolon {
|
||||
maxLen = longestEntityWithoutSemicolon
|
||||
}
|
||||
for j := maxLen; j > 1; j-- {
|
||||
if x := entity[entityName[:j]]; x != 0 {
|
||||
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dst1, src1 = dst+i, src+i
|
||||
copy(b[dst:dst1], b[src:src1])
|
||||
return dst1, src1
|
||||
}
|
||||
|
||||
// unescape unescapes b's entities in-place, so that "a<b" becomes "a<b".
|
||||
func unescape(b []byte) []byte {
|
||||
for i, c := range b {
|
||||
if c == '&' {
|
||||
dst, src := unescapeEntity(b, i, i, false)
|
||||
for src < len(b) {
|
||||
c := b[src]
|
||||
if c == '&' {
|
||||
dst, src = unescapeEntity(b, dst, src, false)
|
||||
} else {
|
||||
b[dst] = c
|
||||
dst, src = dst+1, src+1
|
||||
}
|
||||
}
|
||||
return b[0:dst]
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
|
||||
func lower(b []byte) []byte {
|
||||
for i, c := range b {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
b[i] = c + 'a' - 'A'
|
||||
}
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
const escapedChars = `&'<>"`
|
||||
|
||||
func escape(w writer, s string) error {
|
||||
i := strings.IndexAny(s, escapedChars)
|
||||
for i != -1 {
|
||||
if _, err := w.WriteString(s[:i]); err != nil {
|
||||
return err
|
||||
}
|
||||
var esc string
|
||||
switch s[i] {
|
||||
case '&':
|
||||
esc = "&"
|
||||
case '\'':
|
||||
esc = "'"
|
||||
case '<':
|
||||
esc = "<"
|
||||
case '>':
|
||||
esc = ">"
|
||||
case '"':
|
||||
esc = """
|
||||
default:
|
||||
panic("unrecognized escape character")
|
||||
}
|
||||
s = s[i+1:]
|
||||
if _, err := w.WriteString(esc); err != nil {
|
||||
return err
|
||||
}
|
||||
i = strings.IndexAny(s, escapedChars)
|
||||
}
|
||||
_, err := w.WriteString(s)
|
||||
return err
|
||||
}
|
||||
|
||||
// EscapeString escapes special characters like "<" to become "<". It
|
||||
// escapes only five such characters: amp, apos, lt, gt and quot.
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func EscapeString(s string) string {
|
||||
if strings.IndexAny(s, escapedChars) == -1 {
|
||||
return s
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
escape(&buf, s)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// UnescapeString unescapes entities like "<" to become "<". It unescapes a
|
||||
// larger range of entities than EscapeString escapes. For example, "á"
|
||||
// unescapes to "á", as does "á" and "&xE1;".
|
||||
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
|
||||
// always true.
|
||||
func UnescapeString(s string) string {
|
||||
for _, c := range s {
|
||||
if c == '&' {
|
||||
return string(unescape([]byte(s)))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func adjustForeignAttributes(aa []Attribute) {
|
||||
for i, a := range aa {
|
||||
if a.Key == "" || a.Key[0] != 'x' {
|
||||
continue
|
||||
}
|
||||
switch a.Key {
|
||||
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
|
||||
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
|
||||
j := strings.Index(a.Key, ":")
|
||||
aa[i].Namespace = a.Key[:j]
|
||||
aa[i].Key = a.Key[j+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func htmlIntegrationPoint(n *Node) bool {
|
||||
if n.Type != ElementNode {
|
||||
return false
|
||||
}
|
||||
switch n.Namespace {
|
||||
case "math":
|
||||
// TODO: annotation-xml elements whose start tags have "text/html" or
|
||||
// "application/xhtml+xml" encodings.
|
||||
case "svg":
|
||||
switch n.Data {
|
||||
case "desc", "foreignObject", "title":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.5.
|
||||
var breakout = map[string]bool{
|
||||
"b": true,
|
||||
"big": true,
|
||||
"blockquote": true,
|
||||
"body": true,
|
||||
"br": true,
|
||||
"center": true,
|
||||
"code": true,
|
||||
"dd": true,
|
||||
"div": true,
|
||||
"dl": true,
|
||||
"dt": true,
|
||||
"em": true,
|
||||
"embed": true,
|
||||
"font": true,
|
||||
"h1": true,
|
||||
"h2": true,
|
||||
"h3": true,
|
||||
"h4": true,
|
||||
"h5": true,
|
||||
"h6": true,
|
||||
"head": true,
|
||||
"hr": true,
|
||||
"i": true,
|
||||
"img": true,
|
||||
"li": true,
|
||||
"listing": true,
|
||||
"menu": true,
|
||||
"meta": true,
|
||||
"nobr": true,
|
||||
"ol": true,
|
||||
"p": true,
|
||||
"pre": true,
|
||||
"ruby": true,
|
||||
"s": true,
|
||||
"small": true,
|
||||
"span": true,
|
||||
"strong": true,
|
||||
"strike": true,
|
||||
"sub": true,
|
||||
"sup": true,
|
||||
"table": true,
|
||||
"tt": true,
|
||||
"u": true,
|
||||
"ul": true,
|
||||
"var": true,
|
||||
}
|
||||
|
||||
// Section 12.2.5.5.
|
||||
var svgTagNameAdjustments = map[string]string{
|
||||
"altglyph": "altGlyph",
|
||||
"altglyphdef": "altGlyphDef",
|
||||
"altglyphitem": "altGlyphItem",
|
||||
"animatecolor": "animateColor",
|
||||
"animatemotion": "animateMotion",
|
||||
"animatetransform": "animateTransform",
|
||||
"clippath": "clipPath",
|
||||
"feblend": "feBlend",
|
||||
"fecolormatrix": "feColorMatrix",
|
||||
"fecomponenttransfer": "feComponentTransfer",
|
||||
"fecomposite": "feComposite",
|
||||
"feconvolvematrix": "feConvolveMatrix",
|
||||
"fediffuselighting": "feDiffuseLighting",
|
||||
"fedisplacementmap": "feDisplacementMap",
|
||||
"fedistantlight": "feDistantLight",
|
||||
"feflood": "feFlood",
|
||||
"fefunca": "feFuncA",
|
||||
"fefuncb": "feFuncB",
|
||||
"fefuncg": "feFuncG",
|
||||
"fefuncr": "feFuncR",
|
||||
"fegaussianblur": "feGaussianBlur",
|
||||
"feimage": "feImage",
|
||||
"femerge": "feMerge",
|
||||
"femergenode": "feMergeNode",
|
||||
"femorphology": "feMorphology",
|
||||
"feoffset": "feOffset",
|
||||
"fepointlight": "fePointLight",
|
||||
"fespecularlighting": "feSpecularLighting",
|
||||
"fespotlight": "feSpotLight",
|
||||
"fetile": "feTile",
|
||||
"feturbulence": "feTurbulence",
|
||||
"foreignobject": "foreignObject",
|
||||
"glyphref": "glyphRef",
|
||||
"lineargradient": "linearGradient",
|
||||
"radialgradient": "radialGradient",
|
||||
"textpath": "textPath",
|
||||
}
|
||||
|
||||
// TODO: add look-up tables for MathML and SVG attribute adjustments.
|
||||
@@ -1,154 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
// A NodeType is the type of a Node.
|
||||
type NodeType int
|
||||
|
||||
const (
|
||||
ErrorNode NodeType = iota
|
||||
TextNode
|
||||
DocumentNode
|
||||
ElementNode
|
||||
CommentNode
|
||||
DoctypeNode
|
||||
scopeMarkerNode
|
||||
)
|
||||
|
||||
// Section 12.2.3.3 says "scope markers are inserted when entering applet
|
||||
// elements, buttons, object elements, marquees, table cells, and table
|
||||
// captions, and are used to prevent formatting from 'leaking'".
|
||||
var scopeMarker = Node{Type: scopeMarkerNode}
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
||||
// content for text) and are part of a tree of Nodes. Element nodes may also
|
||||
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
|
||||
// that it looks like "a<b" rather than "a<b".
|
||||
//
|
||||
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
|
||||
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
|
||||
// "svg" is short for "http://www.w3.org/2000/svg".
|
||||
type Node struct {
|
||||
Parent *Node
|
||||
Child []*Node
|
||||
Type NodeType
|
||||
Data string
|
||||
Namespace string
|
||||
Attr []Attribute
|
||||
}
|
||||
|
||||
// Add adds a node as a child of n.
|
||||
// It will panic if the child's parent is not nil.
|
||||
func (n *Node) Add(child *Node) {
|
||||
if child.Parent != nil {
|
||||
panic("html: Node.Add called for a child Node that already has a parent")
|
||||
}
|
||||
child.Parent = n
|
||||
n.Child = append(n.Child, child)
|
||||
}
|
||||
|
||||
// Remove removes a node as a child of n.
|
||||
// It will panic if the child's parent is not n.
|
||||
func (n *Node) Remove(child *Node) {
|
||||
if child.Parent == n {
|
||||
child.Parent = nil
|
||||
for i, m := range n.Child {
|
||||
if m == child {
|
||||
copy(n.Child[i:], n.Child[i+1:])
|
||||
j := len(n.Child) - 1
|
||||
n.Child[j] = nil
|
||||
n.Child = n.Child[:j]
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
panic("html: Node.Remove called for a non-child Node")
|
||||
}
|
||||
|
||||
// reparentChildren reparents all of src's child nodes to dst.
|
||||
func reparentChildren(dst, src *Node) {
|
||||
for _, n := range src.Child {
|
||||
if n.Parent != src {
|
||||
panic("html: nodes have an inconsistent parent/child relationship")
|
||||
}
|
||||
n.Parent = dst
|
||||
}
|
||||
dst.Child = append(dst.Child, src.Child...)
|
||||
src.Child = nil
|
||||
}
|
||||
|
||||
// clone returns a new node with the same type, data and attributes.
|
||||
// The clone has no parent and no children.
|
||||
func (n *Node) clone() *Node {
|
||||
m := &Node{
|
||||
Type: n.Type,
|
||||
Data: n.Data,
|
||||
Attr: make([]Attribute, len(n.Attr)),
|
||||
}
|
||||
copy(m.Attr, n.Attr)
|
||||
return m
|
||||
}
|
||||
|
||||
// nodeStack is a stack of nodes.
|
||||
type nodeStack []*Node
|
||||
|
||||
// pop pops the stack. It will panic if s is empty.
|
||||
func (s *nodeStack) pop() *Node {
|
||||
i := len(*s)
|
||||
n := (*s)[i-1]
|
||||
*s = (*s)[:i-1]
|
||||
return n
|
||||
}
|
||||
|
||||
// top returns the most recently pushed node, or nil if s is empty.
|
||||
func (s *nodeStack) top() *Node {
|
||||
if i := len(*s); i > 0 {
|
||||
return (*s)[i-1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// index returns the index of the top-most occurrence of n in the stack, or -1
|
||||
// if n is not present.
|
||||
func (s *nodeStack) index(n *Node) int {
|
||||
for i := len(*s) - 1; i >= 0; i-- {
|
||||
if (*s)[i] == n {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// insert inserts a node at the given index.
|
||||
func (s *nodeStack) insert(i int, n *Node) {
|
||||
(*s) = append(*s, nil)
|
||||
copy((*s)[i+1:], (*s)[i:])
|
||||
(*s)[i] = n
|
||||
}
|
||||
|
||||
// remove removes a node from the stack. It is a no-op if n is not present.
|
||||
func (s *nodeStack) remove(n *Node) {
|
||||
i := s.index(n)
|
||||
if i == -1 {
|
||||
return
|
||||
}
|
||||
copy((*s)[i:], (*s)[i+1:])
|
||||
j := len(*s) - 1
|
||||
(*s)[j] = nil
|
||||
*s = (*s)[:j]
|
||||
}
|
||||
|
||||
// TODO(nigeltao): forTag no longer used. Should it be deleted?
|
||||
|
||||
// forTag returns the top-most element node with the given tag.
|
||||
func (s *nodeStack) forTag(tag string) *Node {
|
||||
for i := len(*s) - 1; i >= 0; i-- {
|
||||
n := (*s)[i]
|
||||
if n.Type == ElementNode && n.Data == tag {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,276 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// readParseTest reads a single test case from r.
|
||||
func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
|
||||
line, err := r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
var b []byte
|
||||
|
||||
// Read the HTML.
|
||||
if string(line) != "#data\n" {
|
||||
return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
|
||||
}
|
||||
for {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
if line[0] == '#' {
|
||||
break
|
||||
}
|
||||
b = append(b, line...)
|
||||
}
|
||||
text = strings.TrimRight(string(b), "\n")
|
||||
b = b[:0]
|
||||
|
||||
// Skip the error list.
|
||||
if string(line) != "#errors\n" {
|
||||
return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
|
||||
}
|
||||
for {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
if line[0] == '#' {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if string(line) == "#document-fragment\n" {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
context = strings.TrimSpace(string(line))
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Read the dump of what the parse tree should be.
|
||||
if string(line) != "#document\n" {
|
||||
return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
|
||||
}
|
||||
for {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil && err != io.EOF {
|
||||
return "", "", "", err
|
||||
}
|
||||
if len(line) == 0 || len(line) == 1 && line[0] == '\n' {
|
||||
break
|
||||
}
|
||||
b = append(b, line...)
|
||||
}
|
||||
return text, string(b), context, nil
|
||||
}
|
||||
|
||||
func dumpIndent(w io.Writer, level int) {
|
||||
io.WriteString(w, "| ")
|
||||
for i := 0; i < level; i++ {
|
||||
io.WriteString(w, " ")
|
||||
}
|
||||
}
|
||||
|
||||
func dumpLevel(w io.Writer, n *Node, level int) error {
|
||||
dumpIndent(w, level)
|
||||
switch n.Type {
|
||||
case ErrorNode:
|
||||
return errors.New("unexpected ErrorNode")
|
||||
case DocumentNode:
|
||||
return errors.New("unexpected DocumentNode")
|
||||
case ElementNode:
|
||||
if n.Namespace != "" {
|
||||
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
|
||||
} else {
|
||||
fmt.Fprintf(w, "<%s>", n.Data)
|
||||
}
|
||||
attr := n.Attr
|
||||
if len(attr) == 2 && attr[0].Namespace == "xml" && attr[1].Namespace == "xlink" {
|
||||
// Some of the test cases in tests10.dat change the order of adjusted
|
||||
// foreign attributes, but that behavior is not in the spec, and could
|
||||
// simply be an implementation detail of html5lib's python map ordering.
|
||||
attr[0], attr[1] = attr[1], attr[0]
|
||||
}
|
||||
for _, a := range attr {
|
||||
io.WriteString(w, "\n")
|
||||
dumpIndent(w, level+1)
|
||||
if a.Namespace != "" {
|
||||
fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
|
||||
} else {
|
||||
fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
|
||||
}
|
||||
}
|
||||
case TextNode:
|
||||
fmt.Fprintf(w, `"%s"`, n.Data)
|
||||
case CommentNode:
|
||||
fmt.Fprintf(w, "<!-- %s -->", n.Data)
|
||||
case DoctypeNode:
|
||||
fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
|
||||
if n.Attr != nil {
|
||||
var p, s string
|
||||
for _, a := range n.Attr {
|
||||
switch a.Key {
|
||||
case "public":
|
||||
p = a.Val
|
||||
case "system":
|
||||
s = a.Val
|
||||
}
|
||||
}
|
||||
if p != "" || s != "" {
|
||||
fmt.Fprintf(w, ` "%s"`, p)
|
||||
fmt.Fprintf(w, ` "%s"`, s)
|
||||
}
|
||||
}
|
||||
io.WriteString(w, ">")
|
||||
case scopeMarkerNode:
|
||||
return errors.New("unexpected scopeMarkerNode")
|
||||
default:
|
||||
return errors.New("unknown node type")
|
||||
}
|
||||
io.WriteString(w, "\n")
|
||||
for _, c := range n.Child {
|
||||
if err := dumpLevel(w, c, level+1); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func dump(n *Node) (string, error) {
|
||||
if n == nil || len(n.Child) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
var b bytes.Buffer
|
||||
for _, child := range n.Child {
|
||||
if err := dumpLevel(&b, child, 0); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func TestParser(t *testing.T) {
|
||||
testFiles := []struct {
|
||||
filename string
|
||||
// n is the number of test cases to run from that file.
|
||||
// -1 means all test cases.
|
||||
n int
|
||||
}{
|
||||
// TODO(nigeltao): Process all the test cases from all the .dat files.
|
||||
{"adoption01.dat", -1},
|
||||
{"doctype01.dat", -1},
|
||||
{"tests1.dat", -1},
|
||||
{"tests2.dat", -1},
|
||||
{"tests3.dat", -1},
|
||||
{"tests4.dat", -1},
|
||||
{"tests5.dat", -1},
|
||||
{"tests6.dat", -1},
|
||||
{"tests10.dat", 35},
|
||||
}
|
||||
for _, tf := range testFiles {
|
||||
f, err := os.Open("testdata/webkit/" + tf.filename)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
r := bufio.NewReader(f)
|
||||
for i := 0; i != tf.n; i++ {
|
||||
text, want, context, err := readParseTest(r)
|
||||
if err == io.EOF && tf.n == -1 {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var doc *Node
|
||||
if context == "" {
|
||||
doc, err = Parse(strings.NewReader(text))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
} else {
|
||||
contextNode := &Node{
|
||||
Type: ElementNode,
|
||||
Data: context,
|
||||
}
|
||||
nodes, err := ParseFragment(strings.NewReader(text), contextNode)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
doc = &Node{
|
||||
Type: DocumentNode,
|
||||
}
|
||||
for _, n := range nodes {
|
||||
doc.Add(n)
|
||||
}
|
||||
}
|
||||
|
||||
got, err := dump(doc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// Compare the parsed tree to the #document section.
|
||||
if got != want {
|
||||
t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", tf.filename, i, text, got, want)
|
||||
continue
|
||||
}
|
||||
if renderTestBlacklist[text] || context != "" {
|
||||
continue
|
||||
}
|
||||
// Check that rendering and re-parsing results in an identical tree.
|
||||
pr, pw := io.Pipe()
|
||||
go func() {
|
||||
pw.CloseWithError(Render(pw, doc))
|
||||
}()
|
||||
doc1, err := Parse(pr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
got1, err := dump(doc1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got != got1 {
|
||||
t.Errorf("%s test #%d %q, got vs got1:\n----\n%s----\n%s----", tf.filename, i, text, got, got1)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Some test input result in parse trees are not 'well-formed' despite
|
||||
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
|
||||
// tree will not result in an exact clone of that tree. We blacklist such
|
||||
// inputs from the render test.
|
||||
var renderTestBlacklist = map[string]bool{
|
||||
// The second <a> will be reparented to the first <table>'s parent. This
|
||||
// results in an <a> whose parent is an <a>, which is not 'well-formed'.
|
||||
`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
|
||||
// More cases of <a> being reparented:
|
||||
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
|
||||
`<a><table><a></table><p><a><div><a>`: true,
|
||||
`<a><table><td><a><table></table><a></tr><a></table><a>`: true,
|
||||
// A <plaintext> element is reparented, putting it before a table.
|
||||
// A <plaintext> element can't have anything after it in HTML.
|
||||
`<table><plaintext><td>`: true,
|
||||
}
|
||||
@@ -1,277 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type writer interface {
|
||||
io.Writer
|
||||
WriteByte(byte) error
|
||||
WriteString(string) (int, error)
|
||||
}
|
||||
|
||||
// Render renders the parse tree n to the given writer.
|
||||
//
|
||||
// Rendering is done on a 'best effort' basis: calling Parse on the output of
|
||||
// Render will always result in something similar to the original tree, but it
|
||||
// is not necessarily an exact clone unless the original tree was 'well-formed'.
|
||||
// 'Well-formed' is not easily specified; the HTML5 specification is
|
||||
// complicated.
|
||||
//
|
||||
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
|
||||
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
|
||||
// For example, in a 'well-formed' parse tree, no <a> element is a child of
|
||||
// another <a> element: parsing "<a><a>" results in two sibling elements.
|
||||
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
|
||||
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
|
||||
// children; the <a> is reparented to the <table>'s parent. However, calling
|
||||
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
|
||||
// element with an <a> child, and is therefore not 'well-formed'.
|
||||
//
|
||||
// Programmatically constructed trees are typically also 'well-formed', but it
|
||||
// is possible to construct a tree that looks innocuous but, when rendered and
|
||||
// re-parsed, results in a different tree. A simple example is that a solitary
|
||||
// text node would become a tree containing <html>, <head> and <body> elements.
|
||||
// Another example is that the programmatic equivalent of "a<head>b</head>c"
|
||||
// becomes "<html><head><head/><body>abc</body></html>".
|
||||
func Render(w io.Writer, n *Node) error {
|
||||
if x, ok := w.(writer); ok {
|
||||
return render(x, n)
|
||||
}
|
||||
buf := bufio.NewWriter(w)
|
||||
if err := render(buf, n); err != nil {
|
||||
return err
|
||||
}
|
||||
return buf.Flush()
|
||||
}
|
||||
|
||||
// plaintextAbort is returned from render1 when a <plaintext> element
|
||||
// has been rendered. No more end tags should be rendered after that.
|
||||
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
|
||||
|
||||
func render(w writer, n *Node) error {
|
||||
err := render1(w, n)
|
||||
if err == plaintextAbort {
|
||||
err = nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func render1(w writer, n *Node) error {
|
||||
// Render non-element nodes; these are the easy cases.
|
||||
switch n.Type {
|
||||
case ErrorNode:
|
||||
return errors.New("html: cannot render an ErrorNode node")
|
||||
case TextNode:
|
||||
return escape(w, n.Data)
|
||||
case DocumentNode:
|
||||
for _, c := range n.Child {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case ElementNode:
|
||||
// No-op.
|
||||
case CommentNode:
|
||||
if _, err := w.WriteString("<!--"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString("-->"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
case DoctypeNode:
|
||||
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.Attr != nil {
|
||||
var p, s string
|
||||
for _, a := range n.Attr {
|
||||
switch a.Key {
|
||||
case "public":
|
||||
p = a.Val
|
||||
case "system":
|
||||
s = a.Val
|
||||
}
|
||||
}
|
||||
if p != "" {
|
||||
if _, err := w.WriteString(" PUBLIC "); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, p); err != nil {
|
||||
return err
|
||||
}
|
||||
if s != "" {
|
||||
if err := w.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if s != "" {
|
||||
if _, err := w.WriteString(" SYSTEM "); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeQuoted(w, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return w.WriteByte('>')
|
||||
default:
|
||||
return errors.New("html: unknown node type")
|
||||
}
|
||||
|
||||
// Render the <xxx> opening tag.
|
||||
if err := w.WriteByte('<'); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, a := range n.Attr {
|
||||
if err := w.WriteByte(' '); err != nil {
|
||||
return err
|
||||
}
|
||||
if a.Namespace != "" {
|
||||
if _, err := w.WriteString(a.Namespace); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte(':'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := w.WriteString(a.Key); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(`="`); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := escape(w, a.Val); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte('"'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if voidElements[n.Data] {
|
||||
if len(n.Child) != 0 {
|
||||
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
|
||||
}
|
||||
_, err := w.WriteString("/>")
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte('>'); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add initial newline where there is danger of a newline beging ignored.
|
||||
if len(n.Child) > 0 && n.Child[0].Type == TextNode && strings.HasPrefix(n.Child[0].Data, "\n") {
|
||||
switch n.Data {
|
||||
case "pre", "listing", "textarea":
|
||||
if err := w.WriteByte('\n'); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render any child nodes.
|
||||
switch n.Data {
|
||||
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
|
||||
for _, c := range n.Child {
|
||||
if c.Type != TextNode {
|
||||
return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data)
|
||||
}
|
||||
if _, err := w.WriteString(c.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if n.Data == "plaintext" {
|
||||
// Don't render anything else. <plaintext> must be the
|
||||
// last element in the file, with no closing tag.
|
||||
return plaintextAbort
|
||||
}
|
||||
case "textarea", "title":
|
||||
for _, c := range n.Child {
|
||||
if c.Type != TextNode {
|
||||
return fmt.Errorf("html: RCDATA element <%s> has non-text child node", n.Data)
|
||||
}
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
for _, c := range n.Child {
|
||||
if err := render1(w, c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Render the </xxx> closing tag.
|
||||
if _, err := w.WriteString("</"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
return w.WriteByte('>')
|
||||
}
|
||||
|
||||
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
|
||||
// quotes, but if s contains a double quote, it will use single quotes.
|
||||
// It is used for writing the identifiers in a doctype declaration.
|
||||
// In valid HTML, they can't contain both types of quotes.
|
||||
func writeQuoted(w writer, s string) error {
|
||||
var q byte = '"'
|
||||
if strings.Contains(s, `"`) {
|
||||
q = '\''
|
||||
}
|
||||
if err := w.WriteByte(q); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString(s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.WriteByte(q); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
|
||||
// are those that can't have any contents.
|
||||
var voidElements = map[string]bool{
|
||||
"area": true,
|
||||
"base": true,
|
||||
"br": true,
|
||||
"col": true,
|
||||
"command": true,
|
||||
"embed": true,
|
||||
"hr": true,
|
||||
"img": true,
|
||||
"input": true,
|
||||
"keygen": true,
|
||||
"link": true,
|
||||
"meta": true,
|
||||
"param": true,
|
||||
"source": true,
|
||||
"track": true,
|
||||
"wbr": true,
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRenderer(t *testing.T) {
|
||||
n := &Node{
|
||||
Type: ElementNode,
|
||||
Data: "html",
|
||||
Child: []*Node{
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "head",
|
||||
},
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "body",
|
||||
Child: []*Node{
|
||||
{
|
||||
Type: TextNode,
|
||||
Data: "0<1",
|
||||
},
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "p",
|
||||
Attr: []Attribute{
|
||||
{
|
||||
Key: "id",
|
||||
Val: "A",
|
||||
},
|
||||
{
|
||||
Key: "foo",
|
||||
Val: `abc"def`,
|
||||
},
|
||||
},
|
||||
Child: []*Node{
|
||||
{
|
||||
Type: TextNode,
|
||||
Data: "2",
|
||||
},
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "b",
|
||||
Attr: []Attribute{
|
||||
{
|
||||
Key: "empty",
|
||||
Val: "",
|
||||
},
|
||||
},
|
||||
Child: []*Node{
|
||||
{
|
||||
Type: TextNode,
|
||||
Data: "3",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "i",
|
||||
Attr: []Attribute{
|
||||
{
|
||||
Key: "backslash",
|
||||
Val: `\`,
|
||||
},
|
||||
},
|
||||
Child: []*Node{
|
||||
{
|
||||
Type: TextNode,
|
||||
Data: "&4",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: TextNode,
|
||||
Data: "5",
|
||||
},
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "blockquote",
|
||||
},
|
||||
{
|
||||
Type: ElementNode,
|
||||
Data: "br",
|
||||
},
|
||||
{
|
||||
Type: TextNode,
|
||||
Data: "6",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
want := `<html><head></head><body>0<1<p id="A" foo="abc"def">` +
|
||||
`2<b empty="">3</b><i backslash="\">&4</i></p>` +
|
||||
`5<blockquote></blockquote><br/>6</body></html>`
|
||||
b := new(bytes.Buffer)
|
||||
if err := Render(b, n); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got := b.String(); got != want {
|
||||
t.Errorf("got vs want:\n%s\n%s\n", got, want)
|
||||
}
|
||||
}
|
||||
28
src/pkg/exp/html/testdata/webkit/README
vendored
28
src/pkg/exp/html/testdata/webkit/README
vendored
@@ -1,28 +0,0 @@
|
||||
The *.dat files in this directory are copied from The WebKit Open Source
|
||||
Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
|
||||
WebKit is licensed under a BSD style license.
|
||||
http://webkit.org/coding/bsd-license.html says:
|
||||
|
||||
Copyright (C) 2009 Apple Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
194
src/pkg/exp/html/testdata/webkit/adoption01.dat
vendored
194
src/pkg/exp/html/testdata/webkit/adoption01.dat
vendored
@@ -1,194 +0,0 @@
|
||||
#data
|
||||
<a><p></a></p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a>1<p>2</a>3</p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <p>
|
||||
| <a>
|
||||
| "2"
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<a>1<button>2</a>3</button>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <button>
|
||||
| <a>
|
||||
| "2"
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<a>1<b>2</a>3</b>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <b>
|
||||
| "2"
|
||||
| <b>
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<a>1<div>2<div>3</a>4</div>5</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <div>
|
||||
| <a>
|
||||
| "2"
|
||||
| <div>
|
||||
| <a>
|
||||
| "3"
|
||||
| "4"
|
||||
| "5"
|
||||
|
||||
#data
|
||||
<table><a>1<p>2</a>3</p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <p>
|
||||
| <a>
|
||||
| "2"
|
||||
| "3"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<b><b><a><p></a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <b>
|
||||
| <a>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<b><a><b><p></a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a><b><b><p></a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <b>
|
||||
| <p>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "1"
|
||||
| <s>
|
||||
| id="A"
|
||||
| "2"
|
||||
| <b>
|
||||
| id="B"
|
||||
| "3"
|
||||
| <s>
|
||||
| id="A"
|
||||
| <b>
|
||||
| id="B"
|
||||
| "4"
|
||||
| <b>
|
||||
| id="B"
|
||||
| "5"
|
||||
|
||||
#data
|
||||
<table><a>1<td>2</td>3</table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "1"
|
||||
| <a>
|
||||
| "3"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "2"
|
||||
|
||||
#data
|
||||
<table>A<td>B</td>C</table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "AC"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "B"
|
||||
|
||||
#data
|
||||
<a><svg><tr><input></a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <svg svg>
|
||||
| <svg tr>
|
||||
| <svg input>
|
||||
31
src/pkg/exp/html/testdata/webkit/adoption02.dat
vendored
31
src/pkg/exp/html/testdata/webkit/adoption02.dat
vendored
@@ -1,31 +0,0 @@
|
||||
#data
|
||||
<b>1<i>2<p>3</b>4
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| "1"
|
||||
| <i>
|
||||
| "2"
|
||||
| <i>
|
||||
| <p>
|
||||
| <b>
|
||||
| "3"
|
||||
| "4"
|
||||
|
||||
#data
|
||||
<a><div><style></style><address><a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <style>
|
||||
| <address>
|
||||
| <a>
|
||||
| <a>
|
||||
135
src/pkg/exp/html/testdata/webkit/comments01.dat
vendored
135
src/pkg/exp/html/testdata/webkit/comments01.dat
vendored
@@ -1,135 +0,0 @@
|
||||
#data
|
||||
FOO<!-- BAR -->BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR --!>BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- >BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- >BAZ -->
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- <QUX> -- MUX -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- <QUX> -- MUX -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
|
||||
|
||||
#data
|
||||
FOO<!---->BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!--->BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
FOO<!-->BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- -->
|
||||
| "BAZ"
|
||||
|
||||
#data
|
||||
<?xml version="1.0">Hi
|
||||
#errors
|
||||
#document
|
||||
| <!-- ?xml version="1.0" -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hi"
|
||||
|
||||
#data
|
||||
<?xml version="1.0">
|
||||
#errors
|
||||
#document
|
||||
| <!-- ?xml version="1.0" -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<?xml version
|
||||
#errors
|
||||
#document
|
||||
| <!-- ?xml version -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
FOO<!----->BAZ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <!-- - -->
|
||||
| "BAZ"
|
||||
370
src/pkg/exp/html/testdata/webkit/doctype01.dat
vendored
370
src/pkg/exp/html/testdata/webkit/doctype01.dat
vendored
@@ -1,370 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!dOctYpE HtMl>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPEhtml>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE >
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE >Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE >
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato >Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato taco>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato taco "ddd>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato sYstEM>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato sYstEM >Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato sYstEM ggg>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM taco >Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM 'taco"'>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "" "taco"">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM "taco">Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "" "taco">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEM "tai'co">Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "" "tai'co">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato grass SYSTEM taco>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato pUbLIc>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato pUbLIc >Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato pUbLIcgoof>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC goof>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC "go'of">Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "go'of" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC 'go'of'>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "go" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "go:hh of" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE potato "W3C-//dfdf" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE ...>Hello
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE ...>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
|
||||
<!-- internal declarations -->
|
||||
]>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE root-element>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "]>"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC
|
||||
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
|
||||
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| "Mine!"
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
603
src/pkg/exp/html/testdata/webkit/entities01.dat
vendored
603
src/pkg/exp/html/testdata/webkit/entities01.dat
vendored
@@ -1,603 +0,0 @@
|
||||
#data
|
||||
FOO>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO>BAR"
|
||||
|
||||
#data
|
||||
FOO>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO>BAR"
|
||||
|
||||
#data
|
||||
FOO> BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO> BAR"
|
||||
|
||||
#data
|
||||
FOO>;;BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO>;;BAR"
|
||||
|
||||
#data
|
||||
I'm ¬it; I tell you
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "I'm ¬it; I tell you"
|
||||
|
||||
#data
|
||||
I'm ∉ I tell you
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "I'm ∉ I tell you"
|
||||
|
||||
#data
|
||||
FOO& BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO& BAR"
|
||||
|
||||
#data
|
||||
FOO&<BAR>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&"
|
||||
| <bar>
|
||||
|
||||
#data
|
||||
FOO&&&>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&&&>BAR"
|
||||
|
||||
#data
|
||||
FOO)BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO)BAR"
|
||||
|
||||
#data
|
||||
FOOABAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOABAR"
|
||||
|
||||
#data
|
||||
FOOABAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOABAR"
|
||||
|
||||
#data
|
||||
FOO&#BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#BAR"
|
||||
|
||||
#data
|
||||
FOO&#ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#ZOO"
|
||||
|
||||
#data
|
||||
FOOºR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOºR"
|
||||
|
||||
#data
|
||||
FOO&#xZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#xZOO"
|
||||
|
||||
#data
|
||||
FOO&#XZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO&#XZOO"
|
||||
|
||||
#data
|
||||
FOO)BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO)BAR"
|
||||
|
||||
#data
|
||||
FOO䆺R
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO䆺R"
|
||||
|
||||
#data
|
||||
FOOAZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOAZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOOxZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOxZOO"
|
||||
|
||||
#data
|
||||
FOOyZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOyZOO"
|
||||
|
||||
#data
|
||||
FOO€ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO€ZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO‚ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‚ZOO"
|
||||
|
||||
#data
|
||||
FOOƒZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOƒZOO"
|
||||
|
||||
#data
|
||||
FOO„ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO„ZOO"
|
||||
|
||||
#data
|
||||
FOO…ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO…ZOO"
|
||||
|
||||
#data
|
||||
FOO†ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO†ZOO"
|
||||
|
||||
#data
|
||||
FOO‡ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‡ZOO"
|
||||
|
||||
#data
|
||||
FOOˆZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOˆZOO"
|
||||
|
||||
#data
|
||||
FOO‰ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‰ZOO"
|
||||
|
||||
#data
|
||||
FOOŠZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŠZOO"
|
||||
|
||||
#data
|
||||
FOO‹ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‹ZOO"
|
||||
|
||||
#data
|
||||
FOOŒZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŒZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOŽZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŽZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO‘ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO‘ZOO"
|
||||
|
||||
#data
|
||||
FOO’ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO’ZOO"
|
||||
|
||||
#data
|
||||
FOO“ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO“ZOO"
|
||||
|
||||
#data
|
||||
FOO”ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO”ZOO"
|
||||
|
||||
#data
|
||||
FOO•ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO•ZOO"
|
||||
|
||||
#data
|
||||
FOO–ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO–ZOO"
|
||||
|
||||
#data
|
||||
FOO—ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO—ZOO"
|
||||
|
||||
#data
|
||||
FOO˜ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO˜ZOO"
|
||||
|
||||
#data
|
||||
FOO™ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO™ZOO"
|
||||
|
||||
#data
|
||||
FOOšZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOšZOO"
|
||||
|
||||
#data
|
||||
FOO›ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO›ZOO"
|
||||
|
||||
#data
|
||||
FOOœZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOœZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOžZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOžZOO"
|
||||
|
||||
#data
|
||||
FOOŸZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOŸZOO"
|
||||
|
||||
#data
|
||||
FOO ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO ZOO"
|
||||
|
||||
#data
|
||||
FOO퟿ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO􈟔ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOOZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOOZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
|
||||
#data
|
||||
FOO�ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO<4F>ZOO"
|
||||
249
src/pkg/exp/html/testdata/webkit/entities02.dat
vendored
249
src/pkg/exp/html/testdata/webkit/entities02.dat
vendored
@@ -1,249 +0,0 @@
|
||||
#data
|
||||
<div bar="ZZ>YY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>YY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ&"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ&"
|
||||
|
||||
#data
|
||||
<div bar='ZZ&'></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ&"
|
||||
|
||||
#data
|
||||
<div bar=ZZ&></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ&"
|
||||
|
||||
#data
|
||||
<div bar="ZZ>=YY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>=YY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ>0YY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>0YY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ>9YY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>9YY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ>aYY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>aYY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ>ZYY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>ZYY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ> YY"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ> YY"
|
||||
|
||||
#data
|
||||
<div bar="ZZ>"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>"
|
||||
|
||||
#data
|
||||
<div bar='ZZ>'></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>"
|
||||
|
||||
#data
|
||||
<div bar=ZZ>></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ>"
|
||||
|
||||
#data
|
||||
<div bar="ZZ£_id=23"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ£_id=23"
|
||||
|
||||
#data
|
||||
<div bar="ZZ&prod_id=23"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ&prod_id=23"
|
||||
|
||||
#data
|
||||
<div bar="ZZ£_id=23"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ£_id=23"
|
||||
|
||||
#data
|
||||
<div bar="ZZ∏_id=23"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ∏_id=23"
|
||||
|
||||
#data
|
||||
<div bar="ZZ£=23"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ£=23"
|
||||
|
||||
#data
|
||||
<div bar="ZZ&prod=23"></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| bar="ZZ&prod=23"
|
||||
|
||||
#data
|
||||
<div>ZZ£_id=23</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "ZZ£_id=23"
|
||||
|
||||
#data
|
||||
<div>ZZ&prod_id=23</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "ZZ&prod_id=23"
|
||||
|
||||
#data
|
||||
<div>ZZ£_id=23</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "ZZ£_id=23"
|
||||
|
||||
#data
|
||||
<div>ZZ∏_id=23</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "ZZ∏_id=23"
|
||||
|
||||
#data
|
||||
<div>ZZ£=23</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "ZZ£=23"
|
||||
|
||||
#data
|
||||
<div>ZZ&prod=23</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "ZZ&prod=23"
|
||||
246
src/pkg/exp/html/testdata/webkit/html5test-com.dat
vendored
246
src/pkg/exp/html/testdata/webkit/html5test-com.dat
vendored
@@ -1,246 +0,0 @@
|
||||
#data
|
||||
<div<div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div<div>
|
||||
|
||||
#data
|
||||
<div foo<bar=''>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| foo<bar=""
|
||||
|
||||
#data
|
||||
<div foo=`bar`>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| foo="`bar`"
|
||||
|
||||
#data
|
||||
<div \"foo=''>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| \"foo=""
|
||||
|
||||
#data
|
||||
<a href='\nbar'></a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| href="\nbar"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
⟨⟩
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "⟨⟩"
|
||||
|
||||
#data
|
||||
'
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "'"
|
||||
|
||||
#data
|
||||
ⅈ
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "ⅈ"
|
||||
|
||||
#data
|
||||
𝕂
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "𝕂"
|
||||
|
||||
#data
|
||||
∉
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "∉"
|
||||
|
||||
#data
|
||||
<?import namespace="foo" implementation="#bar">
|
||||
#errors
|
||||
#document
|
||||
| <!-- ?import namespace="foo" implementation="#bar" -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!--foo--bar-->
|
||||
#errors
|
||||
#document
|
||||
| <!-- foo--bar -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<![CDATA[x]]>
|
||||
#errors
|
||||
#document
|
||||
| <!-- [CDATA[x]] -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<textarea><!--</textarea>--></textarea>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "<!--"
|
||||
| "-->"
|
||||
|
||||
#data
|
||||
<textarea><!--</textarea>-->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "<!--"
|
||||
| "-->"
|
||||
|
||||
#data
|
||||
<style><!--</style>--></style>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| "<!--"
|
||||
| <body>
|
||||
| "-->"
|
||||
|
||||
#data
|
||||
<style><!--</style>-->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| "<!--"
|
||||
| <body>
|
||||
| "-->"
|
||||
|
||||
#data
|
||||
<ul><li>A </li> <li>B</li></ul>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <ul>
|
||||
| <li>
|
||||
| "A "
|
||||
| " "
|
||||
| <li>
|
||||
| "B"
|
||||
|
||||
#data
|
||||
<table><form><input type=hidden><input></form><div></div></table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <input>
|
||||
| <div>
|
||||
| <table>
|
||||
| <form>
|
||||
| <input>
|
||||
| type="hidden"
|
||||
|
||||
#data
|
||||
<i>A<b>B<p></i>C</b>D
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <i>
|
||||
| "A"
|
||||
| <b>
|
||||
| "B"
|
||||
| <b>
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| "C"
|
||||
| "D"
|
||||
|
||||
#data
|
||||
<div></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<svg></svg>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<math></math>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
43
src/pkg/exp/html/testdata/webkit/inbody01.dat
vendored
43
src/pkg/exp/html/testdata/webkit/inbody01.dat
vendored
@@ -1,43 +0,0 @@
|
||||
#data
|
||||
<button>1</foo>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <button>
|
||||
| "1"
|
||||
|
||||
#data
|
||||
<foo>1<p>2</foo>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| "1"
|
||||
| <p>
|
||||
| "2"
|
||||
|
||||
#data
|
||||
<dd>1</foo>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <dd>
|
||||
| "1"
|
||||
|
||||
#data
|
||||
<foo>1<dd>2</foo>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| "1"
|
||||
| <dd>
|
||||
| "2"
|
||||
40
src/pkg/exp/html/testdata/webkit/isindex.dat
vendored
40
src/pkg/exp/html/testdata/webkit/isindex.dat
vendored
@@ -1,40 +0,0 @@
|
||||
#data
|
||||
<isindex>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| <hr>
|
||||
| <label>
|
||||
| "This is a searchable index. Enter search keywords: "
|
||||
| <input>
|
||||
| name="isindex"
|
||||
| <hr>
|
||||
|
||||
#data
|
||||
<isindex name="A" action="B" prompt="C" foo="D">
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| action="B"
|
||||
| <hr>
|
||||
| <label>
|
||||
| "C"
|
||||
| <input>
|
||||
| foo="D"
|
||||
| name="isindex"
|
||||
| <hr>
|
||||
|
||||
#data
|
||||
<form><isindex>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
Binary file not shown.
@@ -1,28 +0,0 @@
|
||||
#data
|
||||
<input type="hidden"><frameset>
|
||||
#errors
|
||||
21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
|
||||
31: “frameset” start tag seen.
|
||||
31: End of file seen and there were open elements.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><table><caption><svg>foo</table>bar
|
||||
#errors
|
||||
47: End tag “table” did not match the name of the current open element (“svg”).
|
||||
47: “table” closed but “caption” was still open.
|
||||
47: End tag “table” seen, but there were open elements.
|
||||
36: Unclosed element “svg”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <svg svg>
|
||||
| "foo"
|
||||
| "bar"
|
||||
@@ -1,8 +0,0 @@
|
||||
#data
|
||||
FOO
ZOO
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO
|
||||
308
src/pkg/exp/html/testdata/webkit/scriptdata01.dat
vendored
308
src/pkg/exp/html/testdata/webkit/scriptdata01.dat
vendored
@@ -1,308 +0,0 @@
|
||||
#data
|
||||
FOO<script>'Hello'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'Hello'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script></script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script></script >BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script></script/>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script></script/ >BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain"></scriptx>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "</scriptx>BAR"
|
||||
|
||||
#data
|
||||
FOO<script></script foo=">" dd>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!-'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!-'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!--'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!--'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!---'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!---'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!-->'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!-->'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!-->'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!-->'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!-- potato'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!-- potato'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!-- <sCrIpt'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!-- <sCrIpt'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt>'</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt> -'</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt> --'</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| "'<!-- <sCrIpt> -->'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt> --!>'</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt> -- >'</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt '</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt/'</script>BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt\'"
|
||||
| "BAR"
|
||||
|
||||
#data
|
||||
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "FOO"
|
||||
| <script>
|
||||
| type="text/plain"
|
||||
| "'<!-- <sCrIpt/'</script>BAR"
|
||||
| "QUX"
|
||||
@@ -1,15 +0,0 @@
|
||||
#data
|
||||
<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| id="B"
|
||||
| <script>
|
||||
| "document.getElementById("A").id = "B""
|
||||
| <b>
|
||||
| id="A"
|
||||
| "TEXT"
|
||||
@@ -1,28 +0,0 @@
|
||||
#data
|
||||
1<script>document.write("2")</script>3
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "1"
|
||||
| <script>
|
||||
| "document.write("2")"
|
||||
| "23"
|
||||
|
||||
#data
|
||||
1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "1"
|
||||
| <script>
|
||||
| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
|
||||
| <script>
|
||||
| "document.write('2')"
|
||||
| "2"
|
||||
| <script>
|
||||
| "document.write('3')"
|
||||
| "34"
|
||||
197
src/pkg/exp/html/testdata/webkit/tables01.dat
vendored
197
src/pkg/exp/html/testdata/webkit/tables01.dat
vendored
@@ -1,197 +0,0 @@
|
||||
#data
|
||||
<table><th>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <th>
|
||||
|
||||
#data
|
||||
<table><td>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><col foo='bar'>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <colgroup>
|
||||
| <col>
|
||||
| foo="bar"
|
||||
|
||||
#data
|
||||
<table><colgroup></html>foo
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "foo"
|
||||
| <table>
|
||||
| <colgroup>
|
||||
|
||||
#data
|
||||
<table></table><p>foo
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <p>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><select><option>3</select></table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
| "3"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<table><select><table></table></select></table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <table>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<table><select></table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<table><select><option>A<tr><td>B</td></tr></table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
| "A"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "B"
|
||||
|
||||
#data
|
||||
<table><td></body></caption></col></colgroup></html>foo
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<table><td>A</table>B
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "A"
|
||||
| "B"
|
||||
|
||||
#data
|
||||
<table><tr><caption>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <caption>
|
||||
|
||||
#data
|
||||
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<table><td><tr>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<table><td><button><td>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <button>
|
||||
| <td>
|
||||
1952
src/pkg/exp/html/testdata/webkit/tests1.dat
vendored
1952
src/pkg/exp/html/testdata/webkit/tests1.dat
vendored
File diff suppressed because it is too large
Load Diff
799
src/pkg/exp/html/testdata/webkit/tests10.dat
vendored
799
src/pkg/exp/html/testdata/webkit/tests10.dat
vendored
@@ -1,799 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><svg></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><svg></svg><![CDATA[a]]>
|
||||
#errors
|
||||
29: Bogus comment
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <!-- [CDATA[a]] -->
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><svg></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><select><svg></svg></select>
|
||||
#errors
|
||||
35: Stray “svg” start tag.
|
||||
42: Stray end tag “svg”
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><select><option><svg></svg></option></select>
|
||||
#errors
|
||||
43: Stray “svg” start tag.
|
||||
50: Stray end tag “svg”
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><svg></svg></table>
|
||||
#errors
|
||||
34: Start tag “svg” seen in “table”.
|
||||
41: Stray end tag “svg”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
|
||||
#errors
|
||||
34: Start tag “svg” seen in “table”.
|
||||
46: Stray end tag “g”.
|
||||
53: Stray end tag “svg”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
|
||||
#errors
|
||||
34: Start tag “svg” seen in “table”.
|
||||
46: Stray end tag “g”.
|
||||
58: Stray end tag “g”.
|
||||
65: Stray end tag “svg”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
|
||||
#errors
|
||||
41: Start tag “svg” seen in “table”.
|
||||
53: Stray end tag “g”.
|
||||
65: Stray end tag “g”.
|
||||
72: Stray end tag “svg”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <table>
|
||||
| <tbody>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
|
||||
#errors
|
||||
45: Start tag “svg” seen in “table”.
|
||||
57: Stray end tag “g”.
|
||||
69: Stray end tag “g”.
|
||||
76: Stray end tag “svg”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
|
||||
#errors
|
||||
70: HTML start tag “p” in a foreign namespace context.
|
||||
81: “table” closed but “caption” was still open.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
|
||||
#errors
|
||||
78: “table” closed but “caption” was still open.
|
||||
78: Unclosed elements on stack.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| "baz"
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
|
||||
#errors
|
||||
44: Start tag “svg” seen in “table”.
|
||||
56: Stray end tag “g”.
|
||||
68: Stray end tag “g”.
|
||||
71: HTML start tag “p” in a foreign namespace context.
|
||||
71: Start tag “p” seen in “table”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
| <table>
|
||||
| <colgroup>
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
|
||||
#errors
|
||||
50: Stray “svg” start tag.
|
||||
54: Stray “g” start tag.
|
||||
62: Stray end tag “g”
|
||||
66: Stray “g” start tag.
|
||||
74: Stray end tag “g”
|
||||
77: Stray “p” start tag.
|
||||
88: “table” end tag with “select” open.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <select>
|
||||
| "foobarbaz"
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
|
||||
#errors
|
||||
36: Start tag “select” seen in “table”.
|
||||
42: Stray “svg” start tag.
|
||||
46: Stray “g” start tag.
|
||||
54: Stray end tag “g”
|
||||
58: Stray “g” start tag.
|
||||
66: Stray end tag “g”
|
||||
69: Stray “p” start tag.
|
||||
80: “table” end tag with “select” open.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| "foobarbaz"
|
||||
| <table>
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
|
||||
#errors
|
||||
41: Stray “svg” start tag.
|
||||
68: HTML start tag “p” in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
|
||||
#errors
|
||||
34: Stray “svg” start tag.
|
||||
61: HTML start tag “p” in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| "foo"
|
||||
| <svg g>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
|
||||
#errors
|
||||
31: Stray “svg” start tag.
|
||||
35: Stray “g” start tag.
|
||||
40: Stray end tag “g”
|
||||
44: Stray “g” start tag.
|
||||
49: Stray end tag “g”
|
||||
52: Stray “p” start tag.
|
||||
58: Stray “span” start tag.
|
||||
58: End of file seen and there were open elements.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
|
||||
#errors
|
||||
42: Stray “svg” start tag.
|
||||
46: Stray “g” start tag.
|
||||
51: Stray end tag “g”
|
||||
55: Stray “g” start tag.
|
||||
60: Stray end tag “g”
|
||||
63: Stray “p” start tag.
|
||||
69: Stray “span” start tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| <svg svg>
|
||||
| xlink href="foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| xml:lang="en"
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| xlink href="foo"
|
||||
| xml lang="en"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| xml:lang="en"
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| xlink href="foo"
|
||||
| xml lang="en"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| xml:lang="en"
|
||||
| <svg svg>
|
||||
| <svg g>
|
||||
| xlink href="foo"
|
||||
| xml lang="en"
|
||||
| "bar"
|
||||
|
||||
#data
|
||||
<svg></path>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<div><svg></div>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<div><svg><path></div>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| <svg path>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<div><svg><path></svg><path>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| <svg path>
|
||||
| <path>
|
||||
|
||||
#data
|
||||
<div><svg><path><foreignObject><math></div>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| <svg path>
|
||||
| <svg foreignObject>
|
||||
| <math math>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<div><svg><path><foreignObject><p></div>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| <svg path>
|
||||
| <svg foreignObject>
|
||||
| <p>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><svg><desc><div><svg><ul>a
|
||||
#errors
|
||||
40: HTML start tag “ul” in a foreign namespace context.
|
||||
41: End of file in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg desc>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| <ul>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><svg><desc><svg><ul>a
|
||||
#errors
|
||||
35: HTML start tag “ul” in a foreign namespace context.
|
||||
36: End of file in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg desc>
|
||||
| <svg svg>
|
||||
| <ul>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><p><svg><desc><p>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <svg svg>
|
||||
| <svg desc>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><p><svg><title><p>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <svg svg>
|
||||
| <svg title>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<div><svg><path><foreignObject><p></foreignObject><p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <svg svg>
|
||||
| <svg path>
|
||||
| <svg foreignObject>
|
||||
| <p>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| <div>
|
||||
| <object>
|
||||
| <div>
|
||||
| <span>
|
||||
| <math mi>
|
||||
|
||||
#data
|
||||
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| <svg svg>
|
||||
| <svg foreignObject>
|
||||
| <div>
|
||||
| <div>
|
||||
| <math mi>
|
||||
|
||||
#data
|
||||
<svg><script></script><path>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg script>
|
||||
| <svg path>
|
||||
|
||||
#data
|
||||
<table><svg></svg><tr>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<math><mi><mglyph>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| <math mglyph>
|
||||
|
||||
#data
|
||||
<math><mi><malignmark>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| <math malignmark>
|
||||
|
||||
#data
|
||||
<math><mo><mglyph>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mo>
|
||||
| <math mglyph>
|
||||
|
||||
#data
|
||||
<math><mo><malignmark>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mo>
|
||||
| <math malignmark>
|
||||
|
||||
#data
|
||||
<math><mn><mglyph>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mn>
|
||||
| <math mglyph>
|
||||
|
||||
#data
|
||||
<math><mn><malignmark>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mn>
|
||||
| <math malignmark>
|
||||
|
||||
#data
|
||||
<math><ms><mglyph>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math ms>
|
||||
| <math mglyph>
|
||||
|
||||
#data
|
||||
<math><ms><malignmark>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math ms>
|
||||
| <math malignmark>
|
||||
|
||||
#data
|
||||
<math><mtext><mglyph>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mtext>
|
||||
| <math mglyph>
|
||||
|
||||
#data
|
||||
<math><mtext><malignmark>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mtext>
|
||||
| <math malignmark>
|
||||
|
||||
#data
|
||||
<math><annotation-xml><svg></svg></annotation-xml><mi>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| <svg svg>
|
||||
| <math mi>
|
||||
|
||||
#data
|
||||
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| <svg svg>
|
||||
| <svg foreignObject>
|
||||
| <div>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| <span>
|
||||
| <svg path>
|
||||
| <math mi>
|
||||
|
||||
#data
|
||||
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| <svg svg>
|
||||
| <svg foreignObject>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| <svg svg>
|
||||
| <math mo>
|
||||
| <span>
|
||||
| <svg path>
|
||||
| <math mi>
|
||||
482
src/pkg/exp/html/testdata/webkit/tests11.dat
vendored
482
src/pkg/exp/html/testdata/webkit/tests11.dat
vendored
@@ -1,482 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| attributeName=""
|
||||
| attributeType=""
|
||||
| baseFrequency=""
|
||||
| baseProfile=""
|
||||
| calcMode=""
|
||||
| clipPathUnits=""
|
||||
| contentScriptType=""
|
||||
| contentStyleType=""
|
||||
| diffuseConstant=""
|
||||
| edgeMode=""
|
||||
| externalResourcesRequired=""
|
||||
| filterRes=""
|
||||
| filterUnits=""
|
||||
| glyphRef=""
|
||||
| gradientTransform=""
|
||||
| gradientUnits=""
|
||||
| kernelMatrix=""
|
||||
| kernelUnitLength=""
|
||||
| keyPoints=""
|
||||
| keySplines=""
|
||||
| keyTimes=""
|
||||
| lengthAdjust=""
|
||||
| limitingConeAngle=""
|
||||
| markerHeight=""
|
||||
| markerUnits=""
|
||||
| markerWidth=""
|
||||
| maskContentUnits=""
|
||||
| maskUnits=""
|
||||
| numOctaves=""
|
||||
| pathLength=""
|
||||
| patternContentUnits=""
|
||||
| patternTransform=""
|
||||
| patternUnits=""
|
||||
| pointsAtX=""
|
||||
| pointsAtY=""
|
||||
| pointsAtZ=""
|
||||
| preserveAlpha=""
|
||||
| preserveAspectRatio=""
|
||||
| primitiveUnits=""
|
||||
| refX=""
|
||||
| refY=""
|
||||
| repeatCount=""
|
||||
| repeatDur=""
|
||||
| requiredExtensions=""
|
||||
| requiredFeatures=""
|
||||
| specularConstant=""
|
||||
| specularExponent=""
|
||||
| spreadMethod=""
|
||||
| startOffset=""
|
||||
| stdDeviation=""
|
||||
| stitchTiles=""
|
||||
| surfaceScale=""
|
||||
| systemLanguage=""
|
||||
| tableValues=""
|
||||
| targetX=""
|
||||
| targetY=""
|
||||
| textLength=""
|
||||
| viewBox=""
|
||||
| viewTarget=""
|
||||
| xChannelSelector=""
|
||||
| yChannelSelector=""
|
||||
| zoomAndPan=""
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| attributeName=""
|
||||
| attributeType=""
|
||||
| baseFrequency=""
|
||||
| baseProfile=""
|
||||
| calcMode=""
|
||||
| clipPathUnits=""
|
||||
| contentScriptType=""
|
||||
| contentStyleType=""
|
||||
| diffuseConstant=""
|
||||
| edgeMode=""
|
||||
| externalResourcesRequired=""
|
||||
| filterRes=""
|
||||
| filterUnits=""
|
||||
| glyphRef=""
|
||||
| gradientTransform=""
|
||||
| gradientUnits=""
|
||||
| kernelMatrix=""
|
||||
| kernelUnitLength=""
|
||||
| keyPoints=""
|
||||
| keySplines=""
|
||||
| keyTimes=""
|
||||
| lengthAdjust=""
|
||||
| limitingConeAngle=""
|
||||
| markerHeight=""
|
||||
| markerUnits=""
|
||||
| markerWidth=""
|
||||
| maskContentUnits=""
|
||||
| maskUnits=""
|
||||
| numOctaves=""
|
||||
| pathLength=""
|
||||
| patternContentUnits=""
|
||||
| patternTransform=""
|
||||
| patternUnits=""
|
||||
| pointsAtX=""
|
||||
| pointsAtY=""
|
||||
| pointsAtZ=""
|
||||
| preserveAlpha=""
|
||||
| preserveAspectRatio=""
|
||||
| primitiveUnits=""
|
||||
| refX=""
|
||||
| refY=""
|
||||
| repeatCount=""
|
||||
| repeatDur=""
|
||||
| requiredExtensions=""
|
||||
| requiredFeatures=""
|
||||
| specularConstant=""
|
||||
| specularExponent=""
|
||||
| spreadMethod=""
|
||||
| startOffset=""
|
||||
| stdDeviation=""
|
||||
| stitchTiles=""
|
||||
| surfaceScale=""
|
||||
| systemLanguage=""
|
||||
| tableValues=""
|
||||
| targetX=""
|
||||
| targetY=""
|
||||
| textLength=""
|
||||
| viewBox=""
|
||||
| viewTarget=""
|
||||
| xChannelSelector=""
|
||||
| yChannelSelector=""
|
||||
| zoomAndPan=""
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| attributeName=""
|
||||
| attributeType=""
|
||||
| baseFrequency=""
|
||||
| baseProfile=""
|
||||
| calcMode=""
|
||||
| clipPathUnits=""
|
||||
| contentScriptType=""
|
||||
| contentStyleType=""
|
||||
| diffuseConstant=""
|
||||
| edgeMode=""
|
||||
| externalResourcesRequired=""
|
||||
| filterRes=""
|
||||
| filterUnits=""
|
||||
| glyphRef=""
|
||||
| gradientTransform=""
|
||||
| gradientUnits=""
|
||||
| kernelMatrix=""
|
||||
| kernelUnitLength=""
|
||||
| keyPoints=""
|
||||
| keySplines=""
|
||||
| keyTimes=""
|
||||
| lengthAdjust=""
|
||||
| limitingConeAngle=""
|
||||
| markerHeight=""
|
||||
| markerUnits=""
|
||||
| markerWidth=""
|
||||
| maskContentUnits=""
|
||||
| maskUnits=""
|
||||
| numOctaves=""
|
||||
| pathLength=""
|
||||
| patternContentUnits=""
|
||||
| patternTransform=""
|
||||
| patternUnits=""
|
||||
| pointsAtX=""
|
||||
| pointsAtY=""
|
||||
| pointsAtZ=""
|
||||
| preserveAlpha=""
|
||||
| preserveAspectRatio=""
|
||||
| primitiveUnits=""
|
||||
| refX=""
|
||||
| refY=""
|
||||
| repeatCount=""
|
||||
| repeatDur=""
|
||||
| requiredExtensions=""
|
||||
| requiredFeatures=""
|
||||
| specularConstant=""
|
||||
| specularExponent=""
|
||||
| spreadMethod=""
|
||||
| startOffset=""
|
||||
| stdDeviation=""
|
||||
| stitchTiles=""
|
||||
| surfaceScale=""
|
||||
| systemLanguage=""
|
||||
| tableValues=""
|
||||
| targetX=""
|
||||
| targetY=""
|
||||
| textLength=""
|
||||
| viewBox=""
|
||||
| viewTarget=""
|
||||
| xChannelSelector=""
|
||||
| yChannelSelector=""
|
||||
| zoomAndPan=""
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| attributename=""
|
||||
| attributetype=""
|
||||
| basefrequency=""
|
||||
| baseprofile=""
|
||||
| calcmode=""
|
||||
| clippathunits=""
|
||||
| contentscripttype=""
|
||||
| contentstyletype=""
|
||||
| diffuseconstant=""
|
||||
| edgemode=""
|
||||
| externalresourcesrequired=""
|
||||
| filterres=""
|
||||
| filterunits=""
|
||||
| glyphref=""
|
||||
| gradienttransform=""
|
||||
| gradientunits=""
|
||||
| kernelmatrix=""
|
||||
| kernelunitlength=""
|
||||
| keypoints=""
|
||||
| keysplines=""
|
||||
| keytimes=""
|
||||
| lengthadjust=""
|
||||
| limitingconeangle=""
|
||||
| markerheight=""
|
||||
| markerunits=""
|
||||
| markerwidth=""
|
||||
| maskcontentunits=""
|
||||
| maskunits=""
|
||||
| numoctaves=""
|
||||
| pathlength=""
|
||||
| patterncontentunits=""
|
||||
| patterntransform=""
|
||||
| patternunits=""
|
||||
| pointsatx=""
|
||||
| pointsaty=""
|
||||
| pointsatz=""
|
||||
| preservealpha=""
|
||||
| preserveaspectratio=""
|
||||
| primitiveunits=""
|
||||
| refx=""
|
||||
| refy=""
|
||||
| repeatcount=""
|
||||
| repeatdur=""
|
||||
| requiredextensions=""
|
||||
| requiredfeatures=""
|
||||
| specularconstant=""
|
||||
| specularexponent=""
|
||||
| spreadmethod=""
|
||||
| startoffset=""
|
||||
| stddeviation=""
|
||||
| stitchtiles=""
|
||||
| surfacescale=""
|
||||
| systemlanguage=""
|
||||
| tablevalues=""
|
||||
| targetx=""
|
||||
| targety=""
|
||||
| textlength=""
|
||||
| viewbox=""
|
||||
| viewtarget=""
|
||||
| xchannelselector=""
|
||||
| ychannelselector=""
|
||||
| zoomandpan=""
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg altGlyph>
|
||||
| <svg altGlyphDef>
|
||||
| <svg altGlyphItem>
|
||||
| <svg animateColor>
|
||||
| <svg animateMotion>
|
||||
| <svg animateTransform>
|
||||
| <svg clipPath>
|
||||
| <svg feBlend>
|
||||
| <svg feColorMatrix>
|
||||
| <svg feComponentTransfer>
|
||||
| <svg feComposite>
|
||||
| <svg feConvolveMatrix>
|
||||
| <svg feDiffuseLighting>
|
||||
| <svg feDisplacementMap>
|
||||
| <svg feDistantLight>
|
||||
| <svg feFlood>
|
||||
| <svg feFuncA>
|
||||
| <svg feFuncB>
|
||||
| <svg feFuncG>
|
||||
| <svg feFuncR>
|
||||
| <svg feGaussianBlur>
|
||||
| <svg feImage>
|
||||
| <svg feMerge>
|
||||
| <svg feMergeNode>
|
||||
| <svg feMorphology>
|
||||
| <svg feOffset>
|
||||
| <svg fePointLight>
|
||||
| <svg feSpecularLighting>
|
||||
| <svg feSpotLight>
|
||||
| <svg feTile>
|
||||
| <svg feTurbulence>
|
||||
| <svg foreignObject>
|
||||
| <svg glyphRef>
|
||||
| <svg linearGradient>
|
||||
| <svg radialGradient>
|
||||
| <svg textPath>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg altGlyph>
|
||||
| <svg altGlyphDef>
|
||||
| <svg altGlyphItem>
|
||||
| <svg animateColor>
|
||||
| <svg animateMotion>
|
||||
| <svg animateTransform>
|
||||
| <svg clipPath>
|
||||
| <svg feBlend>
|
||||
| <svg feColorMatrix>
|
||||
| <svg feComponentTransfer>
|
||||
| <svg feComposite>
|
||||
| <svg feConvolveMatrix>
|
||||
| <svg feDiffuseLighting>
|
||||
| <svg feDisplacementMap>
|
||||
| <svg feDistantLight>
|
||||
| <svg feFlood>
|
||||
| <svg feFuncA>
|
||||
| <svg feFuncB>
|
||||
| <svg feFuncG>
|
||||
| <svg feFuncR>
|
||||
| <svg feGaussianBlur>
|
||||
| <svg feImage>
|
||||
| <svg feMerge>
|
||||
| <svg feMergeNode>
|
||||
| <svg feMorphology>
|
||||
| <svg feOffset>
|
||||
| <svg fePointLight>
|
||||
| <svg feSpecularLighting>
|
||||
| <svg feSpotLight>
|
||||
| <svg feTile>
|
||||
| <svg feTurbulence>
|
||||
| <svg foreignObject>
|
||||
| <svg glyphRef>
|
||||
| <svg linearGradient>
|
||||
| <svg radialGradient>
|
||||
| <svg textPath>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg altGlyph>
|
||||
| <svg altGlyphDef>
|
||||
| <svg altGlyphItem>
|
||||
| <svg animateColor>
|
||||
| <svg animateMotion>
|
||||
| <svg animateTransform>
|
||||
| <svg clipPath>
|
||||
| <svg feBlend>
|
||||
| <svg feColorMatrix>
|
||||
| <svg feComponentTransfer>
|
||||
| <svg feComposite>
|
||||
| <svg feConvolveMatrix>
|
||||
| <svg feDiffuseLighting>
|
||||
| <svg feDisplacementMap>
|
||||
| <svg feDistantLight>
|
||||
| <svg feFlood>
|
||||
| <svg feFuncA>
|
||||
| <svg feFuncB>
|
||||
| <svg feFuncG>
|
||||
| <svg feFuncR>
|
||||
| <svg feGaussianBlur>
|
||||
| <svg feImage>
|
||||
| <svg feMerge>
|
||||
| <svg feMergeNode>
|
||||
| <svg feMorphology>
|
||||
| <svg feOffset>
|
||||
| <svg fePointLight>
|
||||
| <svg feSpecularLighting>
|
||||
| <svg feSpotLight>
|
||||
| <svg feTile>
|
||||
| <svg feTurbulence>
|
||||
| <svg foreignObject>
|
||||
| <svg glyphRef>
|
||||
| <svg linearGradient>
|
||||
| <svg radialGradient>
|
||||
| <svg textPath>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math altglyph>
|
||||
| <math altglyphdef>
|
||||
| <math altglyphitem>
|
||||
| <math animatecolor>
|
||||
| <math animatemotion>
|
||||
| <math animatetransform>
|
||||
| <math clippath>
|
||||
| <math feblend>
|
||||
| <math fecolormatrix>
|
||||
| <math fecomponenttransfer>
|
||||
| <math fecomposite>
|
||||
| <math feconvolvematrix>
|
||||
| <math fediffuselighting>
|
||||
| <math fedisplacementmap>
|
||||
| <math fedistantlight>
|
||||
| <math feflood>
|
||||
| <math fefunca>
|
||||
| <math fefuncb>
|
||||
| <math fefuncg>
|
||||
| <math fefuncr>
|
||||
| <math fegaussianblur>
|
||||
| <math feimage>
|
||||
| <math femerge>
|
||||
| <math femergenode>
|
||||
| <math femorphology>
|
||||
| <math feoffset>
|
||||
| <math fepointlight>
|
||||
| <math fespecularlighting>
|
||||
| <math fespotlight>
|
||||
| <math fetile>
|
||||
| <math feturbulence>
|
||||
| <math foreignobject>
|
||||
| <math glyphref>
|
||||
| <math lineargradient>
|
||||
| <math radialgradient>
|
||||
| <math textpath>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><svg><solidColor /></svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg solidcolor>
|
||||
62
src/pkg/exp/html/testdata/webkit/tests12.dat
vendored
62
src/pkg/exp/html/testdata/webkit/tests12.dat
vendored
@@ -1,62 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "foo"
|
||||
| <math math>
|
||||
| <math mtext>
|
||||
| <i>
|
||||
| "baz"
|
||||
| <math annotation-xml>
|
||||
| <svg svg>
|
||||
| <svg desc>
|
||||
| <b>
|
||||
| "eggs"
|
||||
| <svg g>
|
||||
| <svg foreignObject>
|
||||
| <p>
|
||||
| "spam"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <img>
|
||||
| <svg g>
|
||||
| "quux"
|
||||
| "bar"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "foo"
|
||||
| <math math>
|
||||
| <math mtext>
|
||||
| <i>
|
||||
| "baz"
|
||||
| <math annotation-xml>
|
||||
| <svg svg>
|
||||
| <svg desc>
|
||||
| <b>
|
||||
| "eggs"
|
||||
| <svg g>
|
||||
| <svg foreignObject>
|
||||
| <p>
|
||||
| "spam"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <img>
|
||||
| <svg g>
|
||||
| "quux"
|
||||
| "bar"
|
||||
74
src/pkg/exp/html/testdata/webkit/tests14.dat
vendored
74
src/pkg/exp/html/testdata/webkit/tests14.dat
vendored
@@ -1,74 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <xyz:abc>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <xyz:abc>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
|
||||
#errors
|
||||
15: Unexpected start tag html
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| abc:def="gh"
|
||||
| <head>
|
||||
| <body>
|
||||
| <xyz:abc>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
|
||||
#errors
|
||||
15: Unexpected start tag html
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| xml:lang="bar"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html 123=456>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| 123="456"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html 123=456><html 789=012>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| 123="456"
|
||||
| 789="012"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><body 789=012>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| 789="012"
|
||||
208
src/pkg/exp/html/testdata/webkit/tests15.dat
vendored
208
src/pkg/exp/html/testdata/webkit/tests15.dat
vendored
@@ -1,208 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><p><b><i><u></p> <p>X
|
||||
#errors
|
||||
Line: 1 Col: 31 Unexpected end tag (p). Ignored.
|
||||
Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| " "
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<p><b><i><u></p>
|
||||
<p>X
|
||||
#errors
|
||||
Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
|
||||
Line: 1 Col: 16 Unexpected end tag (p). Ignored.
|
||||
Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| <b>
|
||||
| <i>
|
||||
| <u>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html></html> <head>
|
||||
#errors
|
||||
Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html></body><meta>
|
||||
#errors
|
||||
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <meta>
|
||||
|
||||
#data
|
||||
<html></html><!-- foo -->
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
|
||||
Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <!-- foo -->
|
||||
|
||||
#data
|
||||
<!doctype html></body><title>X</title>
|
||||
#errors
|
||||
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <title>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><table> X<meta></table>
|
||||
#errors
|
||||
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
|
||||
Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " X"
|
||||
| <meta>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table> x</table>
|
||||
#errors
|
||||
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " x"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table> x </table>
|
||||
#errors
|
||||
Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " x "
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr> x</table>
|
||||
#errors
|
||||
Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " x"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><table>X<style> <tr>x </style> </table>
|
||||
#errors
|
||||
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <table>
|
||||
| <style>
|
||||
| " <tr>x "
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
|
||||
#errors
|
||||
Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
|
||||
Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <a>
|
||||
| "foo"
|
||||
| <table>
|
||||
| " "
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "bar"
|
||||
| " "
|
||||
|
||||
#data
|
||||
<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
|
||||
#errors
|
||||
6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
|
||||
13: Stray start tag “frame”.
|
||||
21: Stray end tag “frame”.
|
||||
29: Stray end tag “frame”.
|
||||
39: “frameset” start tag after “body” already open.
|
||||
105: End of file seen inside an [R]CDATA element.
|
||||
105: End of file seen and there were open elements.
|
||||
XXX: These errors are wrong, please fix me!
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| <frame>
|
||||
| <frameset>
|
||||
| <frame>
|
||||
| <noframes>
|
||||
| "</frameset><noframes>"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><object></html>
|
||||
#errors
|
||||
1: Expected closing tag. Unexpected end of file
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <object>
|
||||
2277
src/pkg/exp/html/testdata/webkit/tests16.dat
vendored
2277
src/pkg/exp/html/testdata/webkit/tests16.dat
vendored
File diff suppressed because it is too large
Load Diff
153
src/pkg/exp/html/testdata/webkit/tests17.dat
vendored
153
src/pkg/exp/html/testdata/webkit/tests17.dat
vendored
@@ -1,153 +0,0 @@
|
||||
#data
|
||||
<!doctype html><table><tbody><select><tr>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><select><td>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><td><select><td>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <select>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><th><select><td>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <th>
|
||||
| <select>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<!doctype html><table><caption><select><tr>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <select>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><select><tr>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><select><td>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><select><th>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><select><tbody>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><select><thead>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><select><tfoot>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><select><caption>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr></table>a
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| "a"
|
||||
269
src/pkg/exp/html/testdata/webkit/tests18.dat
vendored
269
src/pkg/exp/html/testdata/webkit/tests18.dat
vendored
@@ -1,269 +0,0 @@
|
||||
#data
|
||||
<!doctype html><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
<!doctype html><table><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tbody><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
| <table>
|
||||
| <tbody>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tbody><tr><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tbody><tr><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><table><td><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
<!doctype html><table><caption><plaintext></plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><style></script></style>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "abc"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <style>
|
||||
| "</script>"
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><script></style></script>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "abc"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <script>
|
||||
| "</style>"
|
||||
|
||||
#data
|
||||
<!doctype html><table><caption><style></script></style>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <style>
|
||||
| "</script>"
|
||||
| "abc"
|
||||
|
||||
#data
|
||||
<!doctype html><table><td><style></script></style>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <style>
|
||||
| "</script>"
|
||||
| "abc"
|
||||
|
||||
#data
|
||||
<!doctype html><select><script></style></script>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <script>
|
||||
| "</style>"
|
||||
| "abc"
|
||||
|
||||
#data
|
||||
<!doctype html><table><select><script></style></script>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <script>
|
||||
| "</style>"
|
||||
| "abc"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr><select><script></style></script>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <script>
|
||||
| "</style>"
|
||||
| "abc"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><frameset></frameset><noframes>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| <noframes>
|
||||
| "abc"
|
||||
|
||||
#data
|
||||
<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| <noframes>
|
||||
| "abc"
|
||||
| <!-- abc -->
|
||||
|
||||
#data
|
||||
<!doctype html><frameset></frameset></html><noframes>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| <noframes>
|
||||
| "abc"
|
||||
|
||||
#data
|
||||
<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| <noframes>
|
||||
| "abc"
|
||||
| <!-- abc -->
|
||||
|
||||
#data
|
||||
<!doctype html><table><tr></tbody><tfoot>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <tfoot>
|
||||
|
||||
#data
|
||||
<!doctype html><table><td><svg></svg>abc<td>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <svg svg>
|
||||
| "abc"
|
||||
| <td>
|
||||
1220
src/pkg/exp/html/testdata/webkit/tests19.dat
vendored
1220
src/pkg/exp/html/testdata/webkit/tests19.dat
vendored
File diff suppressed because it is too large
Load Diff
763
src/pkg/exp/html/testdata/webkit/tests2.dat
vendored
763
src/pkg/exp/html/testdata/webkit/tests2.dat
vendored
@@ -1,763 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html>Test
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Test"
|
||||
|
||||
#data
|
||||
<textarea>test</div>test
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
|
||||
Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "test</div>test"
|
||||
|
||||
#data
|
||||
<table><td>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
|
||||
Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><td>test</tbody></table>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<frame>test
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
|
||||
Line: 1 Col: 7 Unexpected start tag frame. Ignored.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><frameset>test
|
||||
#errors
|
||||
Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
|
||||
Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><frameset><!DOCTYPE html>
|
||||
#errors
|
||||
Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
|
||||
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><font><p><b>test</font>
|
||||
#errors
|
||||
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
|
||||
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <font>
|
||||
| <p>
|
||||
| <font>
|
||||
| <b>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><dt><div><dd>
|
||||
#errors
|
||||
Line: 1 Col: 28 Missing end tag (div, dt).
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <dt>
|
||||
| <div>
|
||||
| <dd>
|
||||
|
||||
#data
|
||||
<script></x
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
|
||||
Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| "</x"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<table><plaintext><td>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
|
||||
Line: 1 Col: 22 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "<td>"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<plaintext></plaintext>
|
||||
#errors
|
||||
Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
|
||||
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <plaintext>
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><table><tr>TEST
|
||||
#errors
|
||||
Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
|
||||
Line: 1 Col: 30 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "TEST"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
|
||||
#errors
|
||||
Line: 1 Col: 37 Unexpected start tag (body).
|
||||
Line: 1 Col: 53 Unexpected start tag (body).
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| t1="1"
|
||||
| t2="2"
|
||||
| t3="3"
|
||||
| t4="4"
|
||||
|
||||
#data
|
||||
</b test
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected end of file in attribute name.
|
||||
Line: 1 Col: 8 End tag contains unexpected attributes.
|
||||
Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
|
||||
Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html></b test<b &=&>X
|
||||
#errors
|
||||
Line: 1 Col: 32 Named entity didn't end with ';'.
|
||||
Line: 1 Col: 33 End tag contains unexpected attributes.
|
||||
Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
|
||||
#errors
|
||||
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
|
||||
Line: 1 Col: 54 Unexpected end of file in the tag name.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| type="text/x-foobar;baz"
|
||||
| "X</SCRipt"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
&
|
||||
#errors
|
||||
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&"
|
||||
|
||||
#data
|
||||
&#
|
||||
#errors
|
||||
Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
|
||||
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&#"
|
||||
|
||||
#data
|
||||
&#X
|
||||
#errors
|
||||
Line: 1 Col: 3 Numeric entity expected but none found.
|
||||
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&#X"
|
||||
|
||||
#data
|
||||
&#x
|
||||
#errors
|
||||
Line: 1 Col: 3 Numeric entity expected but none found.
|
||||
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&#x"
|
||||
|
||||
#data
|
||||
-
|
||||
#errors
|
||||
Line: 1 Col: 4 Numeric entity didn't end with ';'.
|
||||
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "-"
|
||||
|
||||
#data
|
||||
&x-test
|
||||
#errors
|
||||
Line: 1 Col: 1 Named entity expected. Got none.
|
||||
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&x-test"
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><li>
|
||||
#errors
|
||||
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><dt>
|
||||
#errors
|
||||
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <dt>
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><dd>
|
||||
#errors
|
||||
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <dd>
|
||||
|
||||
#data
|
||||
<!doctypehtml><p><form>
|
||||
#errors
|
||||
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
|
||||
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <form>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><p></P>X
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
&
|
||||
#errors
|
||||
Line: 1 Col: 4 Named entity didn't end with ';'.
|
||||
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&"
|
||||
|
||||
#data
|
||||
&AMp;
|
||||
#errors
|
||||
Line: 1 Col: 1 Named entity expected. Got none.
|
||||
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "&AMp;"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
|
||||
#errors
|
||||
Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>X</body>X
|
||||
#errors
|
||||
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "XX"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><!-- X
|
||||
#errors
|
||||
Line: 1 Col: 21 Unexpected end of file in comment.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <!-- X -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><table><caption>test TEST</caption><td>test
|
||||
#errors
|
||||
Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
|
||||
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| "test TEST"
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><select><option><optgroup>
|
||||
#errors
|
||||
Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
| <optgroup>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
|
||||
#errors
|
||||
Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
|
||||
Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <optgroup>
|
||||
| <option>
|
||||
| <option>
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><select><optgroup><option><optgroup>
|
||||
#errors
|
||||
Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <optgroup>
|
||||
| <option>
|
||||
| <optgroup>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><datalist><option>foo</datalist>bar
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <datalist>
|
||||
| <option>
|
||||
| "foo"
|
||||
| "bar"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><font><input><input></font>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <font>
|
||||
| <input>
|
||||
| <input>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><!-- XXX - XXX -->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <!-- XXX - XXX -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><!-- XXX - XXX
|
||||
#errors
|
||||
Line: 1 Col: 29 Unexpected end of file in comment (-)
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <!-- XXX - XXX -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><!-- XXX - XXX - XXX -->
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <!-- XXX - XXX - XXX -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<isindex test=x name=x>
|
||||
#errors
|
||||
Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
|
||||
Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| <hr>
|
||||
| <label>
|
||||
| "This is a searchable index. Enter search keywords: "
|
||||
| <input>
|
||||
| name="isindex"
|
||||
| test="x"
|
||||
| <hr>
|
||||
|
||||
#data
|
||||
test
|
||||
test
|
||||
#errors
|
||||
Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "test
|
||||
test"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><title>test</body></title>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <title>
|
||||
| "test</body>"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
|
||||
x { content:"</style" } </style>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <title>
|
||||
| "X"
|
||||
| <meta>
|
||||
| name="z"
|
||||
| <link>
|
||||
| rel="foo"
|
||||
| <style>
|
||||
| "
|
||||
x { content:"</style" } "
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><select><optgroup></optgroup></select>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <optgroup>
|
||||
|
||||
#data
|
||||
|
||||
|
||||
#errors
|
||||
Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html> <html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><script>
|
||||
</script> <title>x</title> </head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| "
|
||||
"
|
||||
| " "
|
||||
| <title>
|
||||
| "x"
|
||||
| " "
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><body><html id=x>
|
||||
#errors
|
||||
Line: 1 Col: 38 html needs to be the first start tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| id="x"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>X</body><html id="x">
|
||||
#errors
|
||||
Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
|
||||
Line: 1 Col: 36 html needs to be the first start tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| id="x"
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><head><html id=x>
|
||||
#errors
|
||||
Line: 1 Col: 32 html needs to be the first start tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| id="x"
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>X</html>X
|
||||
#errors
|
||||
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "XX"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>X</html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X "
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>X</html><p>X
|
||||
#errors
|
||||
Line: 1 Col: 26 Unexpected start tag (p).
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>X<p/x/y/z>
|
||||
#errors
|
||||
Line: 1 Col: 19 Expected a > after the /.
|
||||
Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
|
||||
Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <p>
|
||||
| x=""
|
||||
| y=""
|
||||
| z=""
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><!--x--
|
||||
#errors
|
||||
Line: 1 Col: 22 Unexpected end of file in comment (--).
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <!-- x -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><table><tr><td></p></table>
|
||||
#errors
|
||||
Line: 1 Col: 34 Unexpected end tag (p). Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
|
||||
#errors
|
||||
Line: 1 Col: 20 Expected space or '>'. Got ''
|
||||
Line: 1 Col: 25 Erroneous DOCTYPE.
|
||||
Line: 1 Col: 35 Unexpected character in comment found.
|
||||
#document
|
||||
| <!DOCTYPE <!doctype>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| ">"
|
||||
| <!-- <!--x -->
|
||||
| "-->"
|
||||
|
||||
#data
|
||||
<!doctype html><div><form></form><div></div></div>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <form>
|
||||
| <div>
|
||||
455
src/pkg/exp/html/testdata/webkit/tests20.dat
vendored
455
src/pkg/exp/html/testdata/webkit/tests20.dat
vendored
@@ -1,455 +0,0 @@
|
||||
#data
|
||||
<!doctype html><p><button><button>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <button>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><address>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <address>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><blockquote>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <blockquote>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><menu>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <menu>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><p>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><ul>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <ul>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><h1>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <h1>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><h6>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <h6>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><listing>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <listing>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><pre>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <pre>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><form>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <form>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><li>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><dd>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <dd>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><dt>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <dt>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><plaintext>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <plaintext>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><hr>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <hr>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button><xmp>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <xmp>
|
||||
|
||||
#data
|
||||
<!doctype html><p><button></p>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <button>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<!doctype html><address><button></address>a
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <address>
|
||||
| <button>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<!doctype html><address><button></address>a
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <address>
|
||||
| <button>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<p><table></p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <p>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><svg>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<!doctype html><p><figcaption>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <figcaption>
|
||||
|
||||
#data
|
||||
<!doctype html><p><summary>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <summary>
|
||||
|
||||
#data
|
||||
<!doctype html><form><table><form>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table><form><form>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <form>
|
||||
|
||||
#data
|
||||
<!doctype html><table><form></table><form>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <form>
|
||||
|
||||
#data
|
||||
<!doctype html><svg><foreignObject><p>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg foreignObject>
|
||||
| <p>
|
||||
|
||||
#data
|
||||
<!doctype html><svg><title>abc
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg title>
|
||||
| "abc"
|
||||
|
||||
#data
|
||||
<option><span><option>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <option>
|
||||
| <span>
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<option><option>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <option>
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<math><annotation-xml><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<math><annotation-xml encoding="application/svg+xml"><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| encoding="application/svg+xml"
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<math><annotation-xml encoding="application/xhtml+xml"><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| encoding="application/xhtml+xml"
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| encoding="aPPlication/xhtmL+xMl"
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<math><annotation-xml encoding="text/html"><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| encoding="text/html"
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<math><annotation-xml encoding="Text/htmL"><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| encoding="Text/htmL"
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<math><annotation-xml encoding=" text/html "><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| encoding=" text/html "
|
||||
| <div>
|
||||
221
src/pkg/exp/html/testdata/webkit/tests21.dat
vendored
221
src/pkg/exp/html/testdata/webkit/tests21.dat
vendored
@@ -1,221 +0,0 @@
|
||||
#data
|
||||
<svg><![CDATA[foo]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<math><![CDATA[foo]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<div><![CDATA[foo]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <!-- [CDATA[foo]] -->
|
||||
|
||||
#data
|
||||
<svg><![CDATA[foo
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[foo
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<svg><![CDATA[]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
|
||||
#data
|
||||
<svg><![CDATA[]] >]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "]] >"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[]] >]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "]] >"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[]]
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "]]"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[]
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "]"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[]>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "]>a"
|
||||
|
||||
#data
|
||||
<svg><foreignObject><div><![CDATA[foo]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg foreignObject>
|
||||
| <div>
|
||||
| <!-- [CDATA[foo]] -->
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<svg>]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<svg>"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[</svg>a]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "</svg>a"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<svg>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<svg>a"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[</svg>a
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "</svg>a"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<svg>]]><path>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<svg>"
|
||||
| <svg path>
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<svg>]]></path>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<svg>"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<svg>]]><!--path-->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<svg>"
|
||||
| <!-- path -->
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<svg>]]>path
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<svg>path"
|
||||
|
||||
#data
|
||||
<svg><![CDATA[<!--svg-->]]>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| "<!--svg-->"
|
||||
157
src/pkg/exp/html/testdata/webkit/tests22.dat
vendored
157
src/pkg/exp/html/testdata/webkit/tests22.dat
vendored
@@ -1,157 +0,0 @@
|
||||
#data
|
||||
<a><b><big><em><strong><div>X</a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <b>
|
||||
| <big>
|
||||
| <em>
|
||||
| <strong>
|
||||
| <big>
|
||||
| <em>
|
||||
| <strong>
|
||||
| <div>
|
||||
| <a>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <div>
|
||||
| id="1"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="2"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="3"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="4"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="5"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="6"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="7"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="8"
|
||||
| <a>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <div>
|
||||
| id="1"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="2"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="3"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="4"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="5"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="6"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="7"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="8"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="9"
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <b>
|
||||
| <b>
|
||||
| <div>
|
||||
| id="1"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="2"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="3"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="4"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="5"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="6"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="7"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="8"
|
||||
| <a>
|
||||
| <div>
|
||||
| id="9"
|
||||
| <div>
|
||||
| id="10"
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
|
||||
Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
|
||||
Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <cite>
|
||||
| <b>
|
||||
| <cite>
|
||||
| <i>
|
||||
| <cite>
|
||||
| <i>
|
||||
| <cite>
|
||||
| <i>
|
||||
| <i>
|
||||
| <i>
|
||||
| <div>
|
||||
| <b>
|
||||
| "X"
|
||||
| "TEST"
|
||||
155
src/pkg/exp/html/testdata/webkit/tests23.dat
vendored
155
src/pkg/exp/html/testdata/webkit/tests23.dat
vendored
@@ -1,155 +0,0 @@
|
||||
#data
|
||||
<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
|
||||
#errors
|
||||
3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
|
||||
116: Unclosed elements.
|
||||
117: End of file seen and there were open elements.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| color="red"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| color="red"
|
||||
| <p>
|
||||
| <font>
|
||||
| color="red"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| color="red"
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<p><font size=4><font size=4><font size=4><font size=4><p>X
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <p>
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="5"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <p>
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="5"
|
||||
| <font>
|
||||
| size="4"
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <font>
|
||||
| id="a"
|
||||
| size="4"
|
||||
| <font>
|
||||
| id="b"
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <p>
|
||||
| <font>
|
||||
| id="a"
|
||||
| size="4"
|
||||
| <font>
|
||||
| id="b"
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| <font>
|
||||
| size="4"
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| <object>
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| id="a"
|
||||
| "X"
|
||||
| <p>
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| id="a"
|
||||
| <b>
|
||||
| "Y"
|
||||
79
src/pkg/exp/html/testdata/webkit/tests24.dat
vendored
79
src/pkg/exp/html/testdata/webkit/tests24.dat
vendored
@@ -1,79 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html>≂̸
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "≂̸"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>≂̸A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "≂̸A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>  
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>  A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| " A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>⊂⃒
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "⊂⃒"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>⊂⃒A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "⊂⃒A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>𝔾
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "𝔾"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html>𝔾A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "𝔾A"
|
||||
219
src/pkg/exp/html/testdata/webkit/tests25.dat
vendored
219
src/pkg/exp/html/testdata/webkit/tests25.dat
vendored
@@ -1,219 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><body><foo>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><area>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <area>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><base>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <base>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><basefont>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <basefont>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><bgsound>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <bgsound>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><br>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <br>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><col>A
|
||||
#errors
|
||||
26: Stray start tag “col”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><command>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <command>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><embed>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <embed>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><frame>A
|
||||
#errors
|
||||
26: Stray start tag “frame”.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><hr>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <hr>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><img>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <img>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><input>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <input>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><keygen>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <keygen>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><link>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <link>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><meta>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <meta>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><param>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <param>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><source>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <source>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><track>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <track>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><wbr>A
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <wbr>
|
||||
| "A"
|
||||
195
src/pkg/exp/html/testdata/webkit/tests26.dat
vendored
195
src/pkg/exp/html/testdata/webkit/tests26.dat
vendored
@@ -1,195 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| href="#1"
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <br>
|
||||
| <a>
|
||||
| href="#2"
|
||||
| <a>
|
||||
| href="#2"
|
||||
| <nobr>
|
||||
| "2"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <br>
|
||||
| <a>
|
||||
| href="#3"
|
||||
| <a>
|
||||
| href="#3"
|
||||
| <nobr>
|
||||
| "3"
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
| "2"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
| "2"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| "3"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
| "2"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <div>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
| "2"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <nobr>
|
||||
| <div>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
| "2"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| "3"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <nobr>
|
||||
| <ins>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "1"
|
||||
| <ins>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| "2"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| "1"
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <i>
|
||||
| <i>
|
||||
| <nobr>
|
||||
| "2"
|
||||
305
src/pkg/exp/html/testdata/webkit/tests3.dat
vendored
305
src/pkg/exp/html/testdata/webkit/tests3.dat
vendored
@@ -1,305 +0,0 @@
|
||||
#data
|
||||
<head></head><style></style>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
|
||||
Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head><script></script>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
|
||||
Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head><!-- --><style></style><!-- --><script></script>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
|
||||
Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| <script>
|
||||
| <!-- -->
|
||||
| <!-- -->
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head><!-- -->x<style></style><!-- --><script></script>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <!-- -->
|
||||
| <body>
|
||||
| "x"
|
||||
| <style>
|
||||
| <!-- -->
|
||||
| <script>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
foo</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
|
||||
foo</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "
|
||||
foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>
|
||||
foo
|
||||
</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "foo
|
||||
"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
|
||||
</span></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "x"
|
||||
| <span>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>x
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "x
|
||||
y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><pre>x<div>
|
||||
y</pre></body></html>
|
||||
#errors
|
||||
Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "x"
|
||||
| <div>
|
||||
| "
|
||||
y"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><pre>

A</pre>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <pre>
|
||||
| "
|
||||
A"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
|
||||
#errors
|
||||
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <meta>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
|
||||
#errors
|
||||
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<textarea>foo<span>bar</span><i>baz
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
|
||||
Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "foo<span>bar</span><i>baz"
|
||||
|
||||
#data
|
||||
<title>foo<span>bar</em><i>baz
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
|
||||
Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "foo<span>bar</em><i>baz"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><textarea>
|
||||
</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><textarea>
|
||||
foo</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><textarea>
|
||||
|
||||
foo</textarea>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| "
|
||||
foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
|
||||
#errors
|
||||
Line: 1 Col: 60 Missing end tag (div, li).
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <ul>
|
||||
| <li>
|
||||
| <div>
|
||||
| <p>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<!doctype html><nobr><nobr><nobr>
|
||||
#errors
|
||||
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
|
||||
Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
|
||||
Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<!doctype html><nobr><nobr></nobr><nobr>
|
||||
#errors
|
||||
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
|
||||
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<!doctype html><html><body><p><table></table></body></html>
|
||||
#errors
|
||||
Not known
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<p><table></table>
|
||||
#errors
|
||||
Not known
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <table>
|
||||
59
src/pkg/exp/html/testdata/webkit/tests4.dat
vendored
59
src/pkg/exp/html/testdata/webkit/tests4.dat
vendored
@@ -1,59 +0,0 @@
|
||||
#data
|
||||
direct div content
|
||||
#errors
|
||||
#document-fragment
|
||||
div
|
||||
#document
|
||||
| "direct div content"
|
||||
|
||||
#data
|
||||
direct textarea content
|
||||
#errors
|
||||
#document-fragment
|
||||
textarea
|
||||
#document
|
||||
| "direct textarea content"
|
||||
|
||||
#data
|
||||
textarea content with <em>pseudo</em> <foo>markup
|
||||
#errors
|
||||
#document-fragment
|
||||
textarea
|
||||
#document
|
||||
| "textarea content with <em>pseudo</em> <foo>markup"
|
||||
|
||||
#data
|
||||
this is CDATA inside a <style> element
|
||||
#errors
|
||||
#document-fragment
|
||||
style
|
||||
#document
|
||||
| "this is CDATA inside a <style> element"
|
||||
|
||||
#data
|
||||
</plaintext>
|
||||
#errors
|
||||
#document-fragment
|
||||
plaintext
|
||||
#document
|
||||
| "</plaintext>"
|
||||
|
||||
#data
|
||||
setting html's innerHTML
|
||||
#errors
|
||||
Line: 1 Col: 24 Unexpected EOF in inner html mode.
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <body>
|
||||
| "setting html's innerHTML"
|
||||
|
||||
#data
|
||||
<title>setting head's innerHTML</title>
|
||||
#errors
|
||||
#document-fragment
|
||||
head
|
||||
#document
|
||||
| <title>
|
||||
| "setting head's innerHTML"
|
||||
191
src/pkg/exp/html/testdata/webkit/tests5.dat
vendored
191
src/pkg/exp/html/testdata/webkit/tests5.dat
vendored
@@ -1,191 +0,0 @@
|
||||
#data
|
||||
<style> <!-- </style>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
|
||||
Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!-- "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!-- </style> --> </style>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!-- "
|
||||
| " "
|
||||
| <body>
|
||||
| "--> x"
|
||||
|
||||
#data
|
||||
<style> <!--> </style>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!--> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<style> <!---> </style>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!---> "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<iframe> <!---> </iframe>x
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <iframe>
|
||||
| " <!---> "
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <iframe>
|
||||
| " <!--- "
|
||||
| "->x --> x"
|
||||
|
||||
#data
|
||||
<script> <!-- </script> --> </script>x
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <script>
|
||||
| " <!-- "
|
||||
| " "
|
||||
| <body>
|
||||
| "--> x"
|
||||
|
||||
#data
|
||||
<title> <!-- </title> --> </title>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| " <!-- "
|
||||
| " "
|
||||
| <body>
|
||||
| "--> x"
|
||||
|
||||
#data
|
||||
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <textarea>
|
||||
| " <!--- "
|
||||
| "->x --> x"
|
||||
|
||||
#data
|
||||
<style> <!</-- </style>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " <!</-- "
|
||||
| <body>
|
||||
| "x"
|
||||
|
||||
#data
|
||||
<p><xmp></xmp>
|
||||
#errors
|
||||
XXX: Unknown
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <xmp>
|
||||
|
||||
#data
|
||||
<xmp> <!-- > --> </xmp>
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <xmp>
|
||||
| " <!-- > --> "
|
||||
|
||||
#data
|
||||
<title>&</title>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "&"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<title><!--&--></title>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "<!--&-->"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<title><!--</title>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
|
||||
Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "<!--"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<noscript><!--</noscript>--></noscript>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <noscript>
|
||||
| "<!--"
|
||||
| <body>
|
||||
| "-->"
|
||||
663
src/pkg/exp/html/testdata/webkit/tests6.dat
vendored
663
src/pkg/exp/html/testdata/webkit/tests6.dat
vendored
@@ -1,663 +0,0 @@
|
||||
#data
|
||||
<!doctype html></head> <head>
|
||||
#errors
|
||||
Line: 1 Col: 29 Unexpected start tag head. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| " "
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><form><div></form><div>
|
||||
#errors
|
||||
33: End tag "form" seen but there were unclosed elements.
|
||||
38: End of file seen and there were open elements.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
| <div>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<!doctype html><title>&</title>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "&"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><title><!--&--></title>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "<!--&-->"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype>
|
||||
#errors
|
||||
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
|
||||
Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
|
||||
Line: 1 Col: 10 Erroneous DOCTYPE.
|
||||
#document
|
||||
| <!DOCTYPE >
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!---x
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected end of file in comment.
|
||||
Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
|
||||
#document
|
||||
| <!-- -x -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<body>
|
||||
<div>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (body).
|
||||
Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
|
||||
#document-fragment
|
||||
div
|
||||
#document
|
||||
| "
|
||||
"
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<frameset></frameset>
|
||||
foo
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<frameset></frameset>
|
||||
<noframes>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| "
|
||||
"
|
||||
| <noframes>
|
||||
|
||||
#data
|
||||
<frameset></frameset>
|
||||
<div>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<frameset></frameset>
|
||||
</html>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<frameset></frameset>
|
||||
</div>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<form><form>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
|
||||
Line: 1 Col: 12 Unexpected start tag (form).
|
||||
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <form>
|
||||
|
||||
#data
|
||||
<button><button>
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
|
||||
Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
|
||||
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <button>
|
||||
| <button>
|
||||
|
||||
#data
|
||||
<table><tr><td></th>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 20 Unexpected end tag (th). Ignored.
|
||||
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><caption><td>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 20 Unexpected end tag (td). Ignored.
|
||||
Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
|
||||
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><caption><div>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
</caption><div>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
|
||||
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<table><caption><div></caption>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
|
||||
Line: 1 Col: 31 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<table><caption></table>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
|
||||
#data
|
||||
</table><div>
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
|
||||
Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
|
||||
Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 23 Unexpected end tag (body). Ignored.
|
||||
Line: 1 Col: 29 Unexpected end tag (col). Ignored.
|
||||
Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
|
||||
Line: 1 Col: 47 Unexpected end tag (html). Ignored.
|
||||
Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
|
||||
Line: 1 Col: 60 Unexpected end tag (td). Ignored.
|
||||
Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
|
||||
Line: 1 Col: 73 Unexpected end tag (th). Ignored.
|
||||
Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
|
||||
Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
|
||||
Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
|
||||
#data
|
||||
<table><caption><div></div>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<table><tr><td></body></caption></col></colgroup></html>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 22 Unexpected end tag (body). Ignored.
|
||||
Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
|
||||
Line: 1 Col: 38 Unexpected end tag (col). Ignored.
|
||||
Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
|
||||
Line: 1 Col: 56 Unexpected end tag (html). Ignored.
|
||||
Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
</table></tbody></tfoot></thead></tr><div>
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
|
||||
Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
|
||||
Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
|
||||
Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
|
||||
Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
|
||||
Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<table><colgroup>foo
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
|
||||
Line: 1 Col: 20 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "foo"
|
||||
| <table>
|
||||
| <colgroup>
|
||||
|
||||
#data
|
||||
foo<col>
|
||||
#errors
|
||||
Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
|
||||
#document-fragment
|
||||
colgroup
|
||||
#document
|
||||
| <col>
|
||||
|
||||
#data
|
||||
<table><colgroup></col>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 23 This element (col) has no end tag.
|
||||
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <colgroup>
|
||||
|
||||
#data
|
||||
<frameset><div>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
</frameset><frame>
|
||||
#errors
|
||||
Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
|
||||
#document-fragment
|
||||
frameset
|
||||
#document
|
||||
| <frame>
|
||||
|
||||
#data
|
||||
<frameset></div>
|
||||
#errors
|
||||
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
|
||||
Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
</body><div>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected end tag (body). Ignored.
|
||||
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
|
||||
#document-fragment
|
||||
body
|
||||
#document
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<table><tr><div>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
|
||||
Line: 1 Col: 16 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
</tr><td>
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
</tbody></tfoot></thead><td>
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
|
||||
Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
|
||||
Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><tr><div><td>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
|
||||
Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
|
||||
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<caption><col><colgroup><tbody><tfoot><thead><tr>
|
||||
#errors
|
||||
Line: 1 Col: 9 Unexpected start tag (caption).
|
||||
Line: 1 Col: 14 Unexpected start tag (col).
|
||||
Line: 1 Col: 24 Unexpected start tag (colgroup).
|
||||
Line: 1 Col: 31 Unexpected start tag (tbody).
|
||||
Line: 1 Col: 38 Unexpected start tag (tfoot).
|
||||
Line: 1 Col: 45 Unexpected start tag (thead).
|
||||
Line: 1 Col: 49 Unexpected end of file. Expected table content.
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<table><tbody></thead>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
|
||||
Line: 1 Col: 22 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
|
||||
#data
|
||||
</table><tr>
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
|
||||
Line: 1 Col: 12 Unexpected end of file. Expected table content.
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<table><tbody></body></caption></col></colgroup></html></td></th></tr>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
|
||||
Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
|
||||
Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
|
||||
Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
|
||||
Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
|
||||
Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
|
||||
Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
|
||||
Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
|
||||
Line: 1 Col: 70 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
|
||||
#data
|
||||
<table><tbody></div>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
|
||||
Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
|
||||
Line: 1 Col: 20 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
|
||||
#data
|
||||
<table><table>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
|
||||
Line: 1 Col: 14 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 14 Unexpected end tag (body). Ignored.
|
||||
Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
|
||||
Line: 1 Col: 30 Unexpected end tag (col). Ignored.
|
||||
Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
|
||||
Line: 1 Col: 48 Unexpected end tag (html). Ignored.
|
||||
Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
|
||||
Line: 1 Col: 61 Unexpected end tag (td). Ignored.
|
||||
Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
|
||||
Line: 1 Col: 74 Unexpected end tag (th). Ignored.
|
||||
Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
|
||||
Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
|
||||
Line: 1 Col: 87 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
</table><tr>
|
||||
#errors
|
||||
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
|
||||
Line: 1 Col: 12 Unexpected end of file. Expected table content.
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<body></body></html>
|
||||
#errors
|
||||
Line: 1 Col: 20 Unexpected html end tag in inner html mode.
|
||||
Line: 1 Col: 20 Unexpected EOF in inner html mode.
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<html><frameset></frameset></html>
|
||||
#errors
|
||||
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
|
||||
#errors
|
||||
Line: 1 Col: 50 Erroneous DOCTYPE.
|
||||
Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<param><frameset></frameset>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
|
||||
Line: 1 Col: 17 Unexpected start tag (frameset).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<source><frameset></frameset>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
|
||||
Line: 1 Col: 17 Unexpected start tag (frameset).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<track><frameset></frameset>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
|
||||
Line: 1 Col: 17 Unexpected start tag (frameset).
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
</html><frameset></frameset>
|
||||
#errors
|
||||
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
|
||||
17: Stray “frameset” start tag.
|
||||
17: “frameset” start tag seen.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
</body><frameset></frameset>
|
||||
#errors
|
||||
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
|
||||
17: Stray “frameset” start tag.
|
||||
17: “frameset” start tag seen.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
390
src/pkg/exp/html/testdata/webkit/tests7.dat
vendored
390
src/pkg/exp/html/testdata/webkit/tests7.dat
vendored
@@ -1,390 +0,0 @@
|
||||
#data
|
||||
<!doctype html><body><title>X</title>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <title>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><table><title>X</title></table>
|
||||
#errors
|
||||
Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
|
||||
Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <title>
|
||||
| "X"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><head></head><title>X</title>
|
||||
#errors
|
||||
Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "X"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html></head><title>X</title>
|
||||
#errors
|
||||
Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <title>
|
||||
| "X"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><table><meta></table>
|
||||
#errors
|
||||
Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <meta>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!doctype html><table>X<tr><td><table> <meta></table></table>
|
||||
#errors
|
||||
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
|
||||
Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <meta>
|
||||
| <table>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html><html> <head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html> <head>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!doctype html><table><style> <tr>x </style> </table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <style>
|
||||
| " <tr>x "
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html><table><TBODY><script> <tr>x </script> </table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <script>
|
||||
| " <tr>x "
|
||||
| " "
|
||||
|
||||
#data
|
||||
<!doctype html><p><applet><p>X</p></applet>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <applet>
|
||||
| <p>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><listing>
|
||||
X</listing>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <listing>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><select><input>X
|
||||
#errors
|
||||
Line: 1 Col: 30 Unexpected input start tag in the select phase.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <input>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><select><select>X
|
||||
#errors
|
||||
Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!doctype html><table><input type=hidDEN></table>
|
||||
#errors
|
||||
Line: 1 Col: 41 Unexpected input with type hidden in table context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <input>
|
||||
| type="hidDEN"
|
||||
|
||||
#data
|
||||
<!doctype html><table>X<input type=hidDEN></table>
|
||||
#errors
|
||||
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
| <table>
|
||||
| <input>
|
||||
| type="hidDEN"
|
||||
|
||||
#data
|
||||
<!doctype html><table> <input type=hidDEN></table>
|
||||
#errors
|
||||
Line: 1 Col: 43 Unexpected input with type hidden in table context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| " "
|
||||
| <input>
|
||||
| type="hidDEN"
|
||||
|
||||
#data
|
||||
<!doctype html><table> <input type='hidDEN'></table>
|
||||
#errors
|
||||
Line: 1 Col: 45 Unexpected input with type hidden in table context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| " "
|
||||
| <input>
|
||||
| type="hidDEN"
|
||||
|
||||
#data
|
||||
<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
|
||||
#errors
|
||||
Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <input>
|
||||
| type=" hidden"
|
||||
| <table>
|
||||
| <input>
|
||||
| type="hidDEN"
|
||||
|
||||
#data
|
||||
<!doctype html><table><select>X<tr>
|
||||
#errors
|
||||
Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
|
||||
Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
|
||||
Line: 1 Col: 35 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| "X"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!doctype html><select>X</select>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<!DOCTYPE hTmL><html></html>
|
||||
#errors
|
||||
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<!DOCTYPE HTML><html></html>
|
||||
#errors
|
||||
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<body>X</body></body>
|
||||
#errors
|
||||
Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
|
||||
Line: 1 Col: 21 Unexpected EOF in inner html mode.
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <body>
|
||||
| "X"
|
||||
|
||||
#data
|
||||
<div><p>a</x> b
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
|
||||
Line: 1 Col: 13 Unexpected end tag (x). Ignored.
|
||||
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <p>
|
||||
| "a b"
|
||||
|
||||
#data
|
||||
<table><tr><td><code></code> </table>
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <code>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<table><b><tr><td>aaa</td></tr>bbb</table>ccc
|
||||
#errors
|
||||
XXX: Fix me
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <b>
|
||||
| "bbb"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "aaa"
|
||||
| <b>
|
||||
| "ccc"
|
||||
|
||||
#data
|
||||
A<table><tr> B</tr> B</table>
|
||||
#errors
|
||||
XXX: Fix me
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "A B B"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
A<table><tr> B</tr> </em>C</table>
|
||||
#errors
|
||||
XXX: Fix me
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "A BC"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| " "
|
||||
|
||||
#data
|
||||
<select><keygen>
|
||||
#errors
|
||||
Not known
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <keygen>
|
||||
148
src/pkg/exp/html/testdata/webkit/tests8.dat
vendored
148
src/pkg/exp/html/testdata/webkit/tests8.dat
vendored
@@ -1,148 +0,0 @@
|
||||
#data
|
||||
<div>
|
||||
<div></div>
|
||||
</span>x
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
|
||||
Line: 3 Col: 7 Unexpected end tag (span). Ignored.
|
||||
Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "
|
||||
"
|
||||
| <div>
|
||||
| "
|
||||
x"
|
||||
|
||||
#data
|
||||
<div>x<div></div>
|
||||
</span>x
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
|
||||
Line: 2 Col: 7 Unexpected end tag (span). Ignored.
|
||||
Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "x"
|
||||
| <div>
|
||||
| "
|
||||
x"
|
||||
|
||||
#data
|
||||
<div>x<div></div>x</span>x
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
|
||||
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
|
||||
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "x"
|
||||
| <div>
|
||||
| "xx"
|
||||
|
||||
#data
|
||||
<div>x<div></div>y</span>z
|
||||
#errors
|
||||
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
|
||||
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
|
||||
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "x"
|
||||
| <div>
|
||||
| "yz"
|
||||
|
||||
#data
|
||||
<table><div>x<div></div>x</span>x
|
||||
#errors
|
||||
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
|
||||
Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
|
||||
Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
|
||||
Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
|
||||
Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
|
||||
Line: 1 Col: 32 Unexpected end tag (span). Ignored.
|
||||
Line: 1 Col: 33 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "x"
|
||||
| <div>
|
||||
| "xx"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
x<table>x
|
||||
#errors
|
||||
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
|
||||
Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
|
||||
Line: 1 Col: 9 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "xx"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
x<table><table>x
|
||||
#errors
|
||||
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
|
||||
Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
|
||||
Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
|
||||
Line: 1 Col: 16 Unexpected end of file. Expected table content.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "x"
|
||||
| <table>
|
||||
| "x"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<b>a<div></div><div></b>y
|
||||
#errors
|
||||
Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
|
||||
Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
|
||||
Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| "a"
|
||||
| <div>
|
||||
| <div>
|
||||
| <b>
|
||||
| "y"
|
||||
|
||||
#data
|
||||
<a><div><p></a>
|
||||
#errors
|
||||
Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
|
||||
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
|
||||
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
|
||||
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| <p>
|
||||
| <a>
|
||||
457
src/pkg/exp/html/testdata/webkit/tests9.dat
vendored
457
src/pkg/exp/html/testdata/webkit/tests9.dat
vendored
@@ -1,457 +0,0 @@
|
||||
#data
|
||||
<!DOCTYPE html><math></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><math></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><math><mi>
|
||||
#errors
|
||||
25: End of file in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><math><annotation-xml><svg><u>
|
||||
#errors
|
||||
45: HTML start tag “u” in a foreign namespace context.
|
||||
45: End of file seen and there were open elements.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math annotation-xml>
|
||||
| <svg svg>
|
||||
| <u>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><select><math></math></select>
|
||||
#errors
|
||||
Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
|
||||
Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><select><option><math></math></option></select>
|
||||
#errors
|
||||
Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
|
||||
Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><math></math></table>
|
||||
#errors
|
||||
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
|
||||
Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
|
||||
#errors
|
||||
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
|
||||
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
|
||||
#errors
|
||||
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
|
||||
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <table>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
|
||||
#errors
|
||||
Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
|
||||
Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <table>
|
||||
| <tbody>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
|
||||
#errors
|
||||
Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
|
||||
Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
|
||||
#errors
|
||||
Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
|
||||
Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
|
||||
#errors
|
||||
Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
|
||||
Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <caption>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| "baz"
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
|
||||
#errors
|
||||
Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
|
||||
Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
|
||||
Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
|
||||
Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
| <table>
|
||||
| <colgroup>
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
|
||||
#errors
|
||||
Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
|
||||
Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
|
||||
Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <select>
|
||||
| "foobarbaz"
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
|
||||
#errors
|
||||
Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
|
||||
Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
|
||||
Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
|
||||
Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
|
||||
Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| "foobarbaz"
|
||||
| <table>
|
||||
| <p>
|
||||
| "quux"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
|
||||
#errors
|
||||
Line: 1 Col: 41 Unexpected start tag (math).
|
||||
Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
|
||||
#errors
|
||||
Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
|
||||
Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| "foo"
|
||||
| <math mi>
|
||||
| "bar"
|
||||
| <p>
|
||||
| "baz"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
|
||||
#errors
|
||||
Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
|
||||
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
|
||||
#errors
|
||||
Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
|
||||
Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
|
||||
Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
|
||||
Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
|
||||
Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
|
||||
Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
|
||||
Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| <math math>
|
||||
| xlink href="foo"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| xml:lang="en"
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| xlink href="foo"
|
||||
| xml lang="en"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| xml:lang="en"
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| xlink href="foo"
|
||||
| xml lang="en"
|
||||
|
||||
#data
|
||||
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| xlink:href="foo"
|
||||
| xml:lang="en"
|
||||
| <math math>
|
||||
| <math mi>
|
||||
| xlink href="foo"
|
||||
| xml lang="en"
|
||||
| "bar"
|
||||
@@ -1,733 +0,0 @@
|
||||
#data
|
||||
<body><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
body
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><body>
|
||||
#errors
|
||||
#document-fragment
|
||||
body
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><body>
|
||||
#errors
|
||||
#document-fragment
|
||||
div
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<body><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <body>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<frameset><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
body
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><frameset>
|
||||
#errors
|
||||
#document-fragment
|
||||
body
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><frameset>
|
||||
#errors
|
||||
#document-fragment
|
||||
div
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<frameset><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<table><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
</table><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<a>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a><caption>a
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <caption>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<a><colgroup><col>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <colgroup>
|
||||
| <col>
|
||||
|
||||
#data
|
||||
<a><tbody><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<a><tfoot><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <tfoot>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<a><thead><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <thead>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<a><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<a><th>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <th>
|
||||
|
||||
#data
|
||||
<a><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
table
|
||||
#document
|
||||
| <a>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table></table><tbody>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <table>
|
||||
|
||||
#data
|
||||
</table><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span></table>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
</caption><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span></caption><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><caption><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><col><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><colgroup><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><html><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><tbody><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><td><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><tfoot><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><thead><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><th><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span><tr><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
<span></table><span>
|
||||
#errors
|
||||
#document-fragment
|
||||
caption
|
||||
#document
|
||||
| <span>
|
||||
| <span>
|
||||
|
||||
#data
|
||||
</colgroup><col>
|
||||
#errors
|
||||
#document-fragment
|
||||
colgroup
|
||||
#document
|
||||
| <col>
|
||||
|
||||
#data
|
||||
<a><col>
|
||||
#errors
|
||||
#document-fragment
|
||||
colgroup
|
||||
#document
|
||||
| <col>
|
||||
|
||||
#data
|
||||
<caption><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<col><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<colgroup><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<tbody><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<tfoot><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<thead><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</table><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<a><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<a><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<a><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<a><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <a>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<td><table><tbody><a><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
tbody
|
||||
#document
|
||||
| <tr>
|
||||
| <td>
|
||||
| <a>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
</tr><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<td><table><a><tr></tr><tr>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
| <a>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<caption><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<col><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<colgroup><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<tbody><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<tfoot><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<thead><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<tr><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
</table><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<td><table></table><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
| <table>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<td><table></table><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
tr
|
||||
#document
|
||||
| <td>
|
||||
| <table>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<caption><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<col><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<colgroup><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<tbody><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<tfoot><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<th><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<thead><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<tr><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</table><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</tbody><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</td><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</tfoot><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</thead><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</th><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
</tr><a>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<table><td><td>
|
||||
#errors
|
||||
#document-fragment
|
||||
td
|
||||
#document
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
</select><option>
|
||||
#errors
|
||||
#document-fragment
|
||||
select
|
||||
#document
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<input><option>
|
||||
#errors
|
||||
#document-fragment
|
||||
select
|
||||
#document
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<keygen><option>
|
||||
#errors
|
||||
#document-fragment
|
||||
select
|
||||
#document
|
||||
| <option>
|
||||
|
||||
#data
|
||||
<textarea><option>
|
||||
#errors
|
||||
#document-fragment
|
||||
select
|
||||
#document
|
||||
| <option>
|
||||
|
||||
#data
|
||||
</html><!--abc-->
|
||||
#errors
|
||||
#document-fragment
|
||||
html
|
||||
#document
|
||||
| <head>
|
||||
| <body>
|
||||
| <!-- abc -->
|
||||
|
||||
#data
|
||||
</frameset><frame>
|
||||
#errors
|
||||
#document-fragment
|
||||
frameset
|
||||
#document
|
||||
| <frame>
|
||||
261
src/pkg/exp/html/testdata/webkit/tricky01.dat
vendored
261
src/pkg/exp/html/testdata/webkit/tricky01.dat
vendored
@@ -1,261 +0,0 @@
|
||||
#data
|
||||
<b><p>Bold </b> Not bold</p>
|
||||
Also not bold.
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <b>
|
||||
| <p>
|
||||
| <b>
|
||||
| "Bold "
|
||||
| " Not bold"
|
||||
| "
|
||||
Also not bold."
|
||||
|
||||
#data
|
||||
<html>
|
||||
<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
|
||||
<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
|
||||
<p>Italic and red. </i> Red.</font> I should not be red.</p>
|
||||
<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <font>
|
||||
| color="red"
|
||||
| <i>
|
||||
| "Italic and Red"
|
||||
| <i>
|
||||
| <p>
|
||||
| <font>
|
||||
| color="red"
|
||||
| "Italic and Red "
|
||||
| " Just italic."
|
||||
| " Italic only."
|
||||
| " Plain
|
||||
"
|
||||
| <p>
|
||||
| "I should not be red. "
|
||||
| <font>
|
||||
| color="red"
|
||||
| "Red. "
|
||||
| <i>
|
||||
| "Italic and red."
|
||||
| <font>
|
||||
| color="red"
|
||||
| <i>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| <font>
|
||||
| color="red"
|
||||
| <i>
|
||||
| "Italic and red. "
|
||||
| " Red."
|
||||
| " I should not be red."
|
||||
| "
|
||||
"
|
||||
| <b>
|
||||
| "Bold "
|
||||
| <i>
|
||||
| "Bold and italic"
|
||||
| <i>
|
||||
| " Only Italic "
|
||||
| " Plain"
|
||||
|
||||
#data
|
||||
<html><body>
|
||||
<p><font size="7">First paragraph.</p>
|
||||
<p>Second paragraph.</p></font>
|
||||
<b><p><i>Bold and Italic</b> Italic</p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| <font>
|
||||
| size="7"
|
||||
| "First paragraph."
|
||||
| <font>
|
||||
| size="7"
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| "Second paragraph."
|
||||
| "
|
||||
"
|
||||
| <b>
|
||||
| <p>
|
||||
| <b>
|
||||
| <i>
|
||||
| "Bold and Italic"
|
||||
| <i>
|
||||
| " Italic"
|
||||
|
||||
#data
|
||||
<html>
|
||||
<dl>
|
||||
<dt><b>Boo
|
||||
<dd>Goo?
|
||||
</dl>
|
||||
</html>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <dl>
|
||||
| "
|
||||
"
|
||||
| <dt>
|
||||
| <b>
|
||||
| "Boo
|
||||
"
|
||||
| <dd>
|
||||
| <b>
|
||||
| "Goo?
|
||||
"
|
||||
| <b>
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<html><body>
|
||||
<label><a><div>Hello<div>World</div></a></label>
|
||||
</body></html>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "
|
||||
"
|
||||
| <label>
|
||||
| <a>
|
||||
| <div>
|
||||
| <a>
|
||||
| "Hello"
|
||||
| <div>
|
||||
| "World"
|
||||
| "
|
||||
"
|
||||
|
||||
#data
|
||||
<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <center>
|
||||
| " "
|
||||
| <font>
|
||||
| "a"
|
||||
| <font>
|
||||
| <img>
|
||||
| " "
|
||||
| <table>
|
||||
| " "
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| " "
|
||||
| " "
|
||||
| " "
|
||||
|
||||
#data
|
||||
<table><tr><p><a><p>You should see this text.
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| <a>
|
||||
| <p>
|
||||
| <a>
|
||||
| "You should see this text."
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<TABLE>
|
||||
<TR>
|
||||
<CENTER><CENTER><TD></TD></TR><TR>
|
||||
<FONT>
|
||||
<TABLE><tr></tr></TABLE>
|
||||
</P>
|
||||
<a></font><font></a>
|
||||
This page contains an insanely badly-nested tag sequence.
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <center>
|
||||
| <center>
|
||||
| <font>
|
||||
| "
|
||||
"
|
||||
| <table>
|
||||
| "
|
||||
"
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| "
|
||||
"
|
||||
| <td>
|
||||
| <tr>
|
||||
| "
|
||||
"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <font>
|
||||
| "
|
||||
"
|
||||
| <p>
|
||||
| "
|
||||
"
|
||||
| <a>
|
||||
| <a>
|
||||
| <font>
|
||||
| <font>
|
||||
| "
|
||||
This page contains an insanely badly-nested tag sequence."
|
||||
|
||||
#data
|
||||
<html>
|
||||
<body>
|
||||
<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
|
||||
nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
|
||||
</body>
|
||||
</html>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "
|
||||
"
|
||||
| <b>
|
||||
| <nobr>
|
||||
| <div>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "This text is in a div inside a nobr"
|
||||
| "More text that should not be in the nobr, i.e., the
|
||||
nobr should have closed the div inside it implicitly. "
|
||||
| <pre>
|
||||
| "A pre tag outside everything else."
|
||||
| "
|
||||
|
||||
"
|
||||
609
src/pkg/exp/html/testdata/webkit/webkit01.dat
vendored
609
src/pkg/exp/html/testdata/webkit/webkit01.dat
vendored
@@ -1,609 +0,0 @@
|
||||
#data
|
||||
Test
|
||||
#errors
|
||||
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "Test"
|
||||
|
||||
#data
|
||||
<div></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<div>Test</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "Test"
|
||||
|
||||
#data
|
||||
<di
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<div>Hello</div>
|
||||
<script>
|
||||
console.log("PASS");
|
||||
</script>
|
||||
<div>Bye</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "Hello"
|
||||
| "
|
||||
"
|
||||
| <script>
|
||||
| "
|
||||
console.log("PASS");
|
||||
"
|
||||
| "
|
||||
"
|
||||
| <div>
|
||||
| "Bye"
|
||||
|
||||
#data
|
||||
<div foo="bar">Hello</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| foo="bar"
|
||||
| "Hello"
|
||||
|
||||
#data
|
||||
<div>Hello</div>
|
||||
<script>
|
||||
console.log("FOO<span>BAR</span>BAZ");
|
||||
</script>
|
||||
<div>Bye</div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| "Hello"
|
||||
| "
|
||||
"
|
||||
| <script>
|
||||
| "
|
||||
console.log("FOO<span>BAR</span>BAZ");
|
||||
"
|
||||
| "
|
||||
"
|
||||
| <div>
|
||||
| "Bye"
|
||||
|
||||
#data
|
||||
<foo bar="baz"></foo><potato quack="duck"></potato>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| bar="baz"
|
||||
| <potato>
|
||||
| quack="duck"
|
||||
|
||||
#data
|
||||
<foo bar="baz"><potato quack="duck"></potato></foo>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| bar="baz"
|
||||
| <potato>
|
||||
| quack="duck"
|
||||
|
||||
#data
|
||||
<foo></foo bar="baz"><potato></potato quack="duck">
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| <potato>
|
||||
|
||||
#data
|
||||
</ tttt>
|
||||
#errors
|
||||
#document
|
||||
| <!-- tttt -->
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<div FOO ><img><img></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| foo=""
|
||||
| <img>
|
||||
| <img>
|
||||
|
||||
#data
|
||||
<p>Test</p<p>Test2</p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| "TestTest2"
|
||||
|
||||
#data
|
||||
<rdar://problem/6869687>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <rdar:>
|
||||
| 6869687=""
|
||||
| problem=""
|
||||
|
||||
#data
|
||||
<A>test< /A>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| "test< /A>"
|
||||
|
||||
#data
|
||||
<
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "<"
|
||||
|
||||
#data
|
||||
<body foo='bar'><body foo='baz' yo='mama'>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| foo="bar"
|
||||
| yo="mama"
|
||||
|
||||
#data
|
||||
<body></br foo="bar"></body>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <br>
|
||||
|
||||
#data
|
||||
<bdy><br foo="bar"></body>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <bdy>
|
||||
| <br>
|
||||
| foo="bar"
|
||||
|
||||
#data
|
||||
<body></body></br foo="bar">
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <br>
|
||||
|
||||
#data
|
||||
<bdy></body><br foo="bar">
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <bdy>
|
||||
| <br>
|
||||
| foo="bar"
|
||||
|
||||
#data
|
||||
<html><body></body></html><!-- Hi there -->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <!-- Hi there -->
|
||||
|
||||
#data
|
||||
<html><body></body></html>x<!-- Hi there -->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "x"
|
||||
| <!-- Hi there -->
|
||||
|
||||
#data
|
||||
<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "x"
|
||||
| <!-- Hi there -->
|
||||
| <!-- Again -->
|
||||
|
||||
#data
|
||||
<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "x"
|
||||
| <!-- Hi there -->
|
||||
| <!-- Again -->
|
||||
|
||||
#data
|
||||
<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <ruby>
|
||||
| <div>
|
||||
| <rp>
|
||||
| "xx"
|
||||
|
||||
#data
|
||||
<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <ruby>
|
||||
| <div>
|
||||
| <rt>
|
||||
| "xx"
|
||||
|
||||
#data
|
||||
<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
| <!-- 1 -->
|
||||
| <noframes>
|
||||
| "A"
|
||||
| <!-- 2 -->
|
||||
| <!-- 3 -->
|
||||
| <noframes>
|
||||
| "B"
|
||||
| <!-- 4 -->
|
||||
| <noframes>
|
||||
| "C"
|
||||
| <!-- 5 -->
|
||||
| <!-- 6 -->
|
||||
|
||||
#data
|
||||
<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <select>
|
||||
| <option>
|
||||
| "A"
|
||||
| <option>
|
||||
| "B"
|
||||
| <select>
|
||||
| <option>
|
||||
| "C"
|
||||
| <option>
|
||||
| "D"
|
||||
| <select>
|
||||
| <option>
|
||||
| "E"
|
||||
| <option>
|
||||
| "F"
|
||||
| <select>
|
||||
| <option>
|
||||
| "G"
|
||||
|
||||
#data
|
||||
<dd><dd><dt><dt><dd><li><li>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <dd>
|
||||
| <dd>
|
||||
| <dt>
|
||||
| <dt>
|
||||
| <dd>
|
||||
| <li>
|
||||
| <li>
|
||||
|
||||
#data
|
||||
<div><b></div><div><nobr>a<nobr>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <b>
|
||||
| <div>
|
||||
| <b>
|
||||
| <nobr>
|
||||
| "a"
|
||||
| <nobr>
|
||||
|
||||
#data
|
||||
<head></head>
|
||||
<body></body>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| "
|
||||
"
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<head></head> <style></style>ddd
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <style>
|
||||
| " "
|
||||
| <body>
|
||||
| "ddd"
|
||||
|
||||
#data
|
||||
<kbd><table></kbd><col><select><tr>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <kbd>
|
||||
| <select>
|
||||
| <table>
|
||||
| <colgroup>
|
||||
| <col>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
|
||||
#data
|
||||
<kbd><table></kbd><col><select><tr></table><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <kbd>
|
||||
| <select>
|
||||
| <table>
|
||||
| <colgroup>
|
||||
| <col>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<a><li><style></style><title></title></a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <li>
|
||||
| <a>
|
||||
| <style>
|
||||
| <title>
|
||||
|
||||
#data
|
||||
<font></p><p><meta><title></title></font>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <font>
|
||||
| <p>
|
||||
| <p>
|
||||
| <font>
|
||||
| <meta>
|
||||
| <title>
|
||||
|
||||
#data
|
||||
<a><center><title></title><a>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <a>
|
||||
| <center>
|
||||
| <a>
|
||||
| <title>
|
||||
| <a>
|
||||
|
||||
#data
|
||||
<svg><title><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg title>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<svg><title><rect><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg title>
|
||||
| <rect>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<svg><title><svg><div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg title>
|
||||
| <svg svg>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<img <="" FAIL>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <img>
|
||||
| <=""
|
||||
| fail=""
|
||||
|
||||
#data
|
||||
<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <ul>
|
||||
| <li>
|
||||
| <div>
|
||||
| id="foo"
|
||||
| "A"
|
||||
| <li>
|
||||
| "B"
|
||||
| <div>
|
||||
| "C"
|
||||
|
||||
#data
|
||||
<svg><em><desc></em>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <em>
|
||||
| <desc>
|
||||
|
||||
#data
|
||||
<table><tr><td><svg><desc><td></desc><circle>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| <svg svg>
|
||||
| <svg desc>
|
||||
| <svg circle>
|
||||
|
||||
#data
|
||||
<svg><tfoot></mi><td>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <svg svg>
|
||||
| <svg tfoot>
|
||||
| <svg td>
|
||||
|
||||
#data
|
||||
<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <math math>
|
||||
| <math mrow>
|
||||
| <math mrow>
|
||||
| <math mn>
|
||||
| "1"
|
||||
| <math mi>
|
||||
| "a"
|
||||
|
||||
#data
|
||||
<!doctype html><input type="hidden"><frameset>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <frameset>
|
||||
|
||||
#data
|
||||
<!doctype html><input type="button"><frameset>
|
||||
#errors
|
||||
#document
|
||||
| <!DOCTYPE html>
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <input>
|
||||
| type="button"
|
||||
104
src/pkg/exp/html/testdata/webkit/webkit02.dat
vendored
104
src/pkg/exp/html/testdata/webkit/webkit02.dat
vendored
@@ -1,104 +0,0 @@
|
||||
#data
|
||||
<foo bar=qux/>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <foo>
|
||||
| bar="qux/"
|
||||
|
||||
#data
|
||||
<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <p>
|
||||
| id="status"
|
||||
| <noscript>
|
||||
| "<strong>A</strong>"
|
||||
| <span>
|
||||
| "B"
|
||||
|
||||
#data
|
||||
<div><sarcasm><div></div></sarcasm></div>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <div>
|
||||
| <sarcasm>
|
||||
| <div>
|
||||
|
||||
#data
|
||||
<html><body><img src="" border="0" alt="><div>A</div></body></html>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
|
||||
#data
|
||||
<table><td></tbody>A
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| "A"
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
|
||||
#data
|
||||
<table><td></thead>A
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<table><td></tfoot>A
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <tbody>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<table><thead><td></tbody>A
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <table>
|
||||
| <thead>
|
||||
| <tr>
|
||||
| <td>
|
||||
| "A"
|
||||
|
||||
#data
|
||||
<legend>test</legend>
|
||||
#errors
|
||||
#document
|
||||
| <html>
|
||||
| <head>
|
||||
| <body>
|
||||
| <legend>
|
||||
| "test"
|
||||
@@ -1,779 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A TokenType is the type of a Token.
|
||||
type TokenType int
|
||||
|
||||
const (
|
||||
// ErrorToken means that an error occurred during tokenization.
|
||||
ErrorToken TokenType = iota
|
||||
// TextToken means a text node.
|
||||
TextToken
|
||||
// A StartTagToken looks like <a>.
|
||||
StartTagToken
|
||||
// An EndTagToken looks like </a>.
|
||||
EndTagToken
|
||||
// A SelfClosingTagToken tag looks like <br/>.
|
||||
SelfClosingTagToken
|
||||
// A CommentToken looks like <!--x-->.
|
||||
CommentToken
|
||||
// A DoctypeToken looks like <!DOCTYPE x>
|
||||
DoctypeToken
|
||||
)
|
||||
|
||||
// String returns a string representation of the TokenType.
|
||||
func (t TokenType) String() string {
|
||||
switch t {
|
||||
case ErrorToken:
|
||||
return "Error"
|
||||
case TextToken:
|
||||
return "Text"
|
||||
case StartTagToken:
|
||||
return "StartTag"
|
||||
case EndTagToken:
|
||||
return "EndTag"
|
||||
case SelfClosingTagToken:
|
||||
return "SelfClosingTag"
|
||||
case CommentToken:
|
||||
return "Comment"
|
||||
case DoctypeToken:
|
||||
return "Doctype"
|
||||
}
|
||||
return "Invalid(" + strconv.Itoa(int(t)) + ")"
|
||||
}
|
||||
|
||||
// An Attribute is an attribute namespace-key-value triple. Namespace is
|
||||
// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
|
||||
// does not contain escapable characters like '&', '<' or '>'), and Val is
|
||||
// unescaped (it looks like "a<b" rather than "a<b").
|
||||
//
|
||||
// Namespace is only used by the parser, not the tokenizer.
|
||||
type Attribute struct {
|
||||
Namespace, Key, Val string
|
||||
}
|
||||
|
||||
// A Token consists of a TokenType and some Data (tag name for start and end
|
||||
// tags, content for text, comments and doctypes). A tag Token may also contain
|
||||
// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
|
||||
// rather than "a<b").
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Data string
|
||||
Attr []Attribute
|
||||
}
|
||||
|
||||
// tagString returns a string representation of a tag Token's Data and Attr.
|
||||
func (t Token) tagString() string {
|
||||
if len(t.Attr) == 0 {
|
||||
return t.Data
|
||||
}
|
||||
buf := bytes.NewBufferString(t.Data)
|
||||
for _, a := range t.Attr {
|
||||
buf.WriteByte(' ')
|
||||
buf.WriteString(a.Key)
|
||||
buf.WriteString(`="`)
|
||||
escape(buf, a.Val)
|
||||
buf.WriteByte('"')
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// String returns a string representation of the Token.
|
||||
func (t Token) String() string {
|
||||
switch t.Type {
|
||||
case ErrorToken:
|
||||
return ""
|
||||
case TextToken:
|
||||
return EscapeString(t.Data)
|
||||
case StartTagToken:
|
||||
return "<" + t.tagString() + ">"
|
||||
case EndTagToken:
|
||||
return "</" + t.tagString() + ">"
|
||||
case SelfClosingTagToken:
|
||||
return "<" + t.tagString() + "/>"
|
||||
case CommentToken:
|
||||
return "<!--" + t.Data + "-->"
|
||||
case DoctypeToken:
|
||||
return "<!DOCTYPE " + t.Data + ">"
|
||||
}
|
||||
return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
|
||||
}
|
||||
|
||||
// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
|
||||
// the end is exclusive.
|
||||
type span struct {
|
||||
start, end int
|
||||
}
|
||||
|
||||
// A Tokenizer returns a stream of HTML Tokens.
|
||||
type Tokenizer struct {
|
||||
// r is the source of the HTML text.
|
||||
r io.Reader
|
||||
// tt is the TokenType of the current token.
|
||||
tt TokenType
|
||||
// err is the first error encountered during tokenization. It is possible
|
||||
// for tt != Error && err != nil to hold: this means that Next returned a
|
||||
// valid token but the subsequent Next call will return an error token.
|
||||
// For example, if the HTML text input was just "plain", then the first
|
||||
// Next call would set z.err to io.EOF but return a TextToken, and all
|
||||
// subsequent Next calls would return an ErrorToken.
|
||||
// err is never reset. Once it becomes non-nil, it stays non-nil.
|
||||
err error
|
||||
// buf[raw.start:raw.end] holds the raw bytes of the current token.
|
||||
// buf[raw.end:] is buffered input that will yield future tokens.
|
||||
raw span
|
||||
buf []byte
|
||||
// buf[data.start:data.end] holds the raw bytes of the current token's data:
|
||||
// a text token's text, a tag token's tag name, etc.
|
||||
data span
|
||||
// pendingAttr is the attribute key and value currently being tokenized.
|
||||
// When complete, pendingAttr is pushed onto attr. nAttrReturned is
|
||||
// incremented on each call to TagAttr.
|
||||
pendingAttr [2]span
|
||||
attr [][2]span
|
||||
nAttrReturned int
|
||||
// rawTag is the "script" in "</script>" that closes the next token. If
|
||||
// non-empty, the subsequent call to Next will return a raw or RCDATA text
|
||||
// token: one that treats "<p>" as text instead of an element.
|
||||
// rawTag's contents are lower-cased.
|
||||
rawTag string
|
||||
// textIsRaw is whether the current text token's data is not escaped.
|
||||
textIsRaw bool
|
||||
}
|
||||
|
||||
// Err returns the error associated with the most recent ErrorToken token.
|
||||
// This is typically io.EOF, meaning the end of tokenization.
|
||||
func (z *Tokenizer) Err() error {
|
||||
if z.tt != ErrorToken {
|
||||
return nil
|
||||
}
|
||||
return z.err
|
||||
}
|
||||
|
||||
// readByte returns the next byte from the input stream, doing a buffered read
|
||||
// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
|
||||
// slice that holds all the bytes read so far for the current token.
|
||||
// It sets z.err if the underlying reader returns an error.
|
||||
// Pre-condition: z.err == nil.
|
||||
func (z *Tokenizer) readByte() byte {
|
||||
if z.raw.end >= len(z.buf) {
|
||||
// Our buffer is exhausted and we have to read from z.r.
|
||||
// We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
|
||||
// z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
|
||||
// allocate a new buffer before the copy.
|
||||
c := cap(z.buf)
|
||||
d := z.raw.end - z.raw.start
|
||||
var buf1 []byte
|
||||
if 2*d > c {
|
||||
buf1 = make([]byte, d, 2*c)
|
||||
} else {
|
||||
buf1 = z.buf[:d]
|
||||
}
|
||||
copy(buf1, z.buf[z.raw.start:z.raw.end])
|
||||
if x := z.raw.start; x != 0 {
|
||||
// Adjust the data/attr spans to refer to the same contents after the copy.
|
||||
z.data.start -= x
|
||||
z.data.end -= x
|
||||
z.pendingAttr[0].start -= x
|
||||
z.pendingAttr[0].end -= x
|
||||
z.pendingAttr[1].start -= x
|
||||
z.pendingAttr[1].end -= x
|
||||
for i := range z.attr {
|
||||
z.attr[i][0].start -= x
|
||||
z.attr[i][0].end -= x
|
||||
z.attr[i][1].start -= x
|
||||
z.attr[i][1].end -= x
|
||||
}
|
||||
}
|
||||
z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
|
||||
// Now that we have copied the live bytes to the start of the buffer,
|
||||
// we read from z.r into the remainder.
|
||||
n, err := z.r.Read(buf1[d:cap(buf1)])
|
||||
if err != nil {
|
||||
z.err = err
|
||||
return 0
|
||||
}
|
||||
z.buf = buf1[:d+n]
|
||||
}
|
||||
x := z.buf[z.raw.end]
|
||||
z.raw.end++
|
||||
return x
|
||||
}
|
||||
|
||||
// skipWhiteSpace skips past any white space.
|
||||
func (z *Tokenizer) skipWhiteSpace() {
|
||||
if z.err != nil {
|
||||
return
|
||||
}
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
return
|
||||
}
|
||||
switch c {
|
||||
case ' ', '\n', '\r', '\t', '\f':
|
||||
// No-op.
|
||||
default:
|
||||
z.raw.end--
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
|
||||
// is typically something like "script" or "textarea".
|
||||
func (z *Tokenizer) readRawOrRCDATA() {
|
||||
loop:
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
if c != '<' {
|
||||
continue loop
|
||||
}
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
if c != '/' {
|
||||
continue loop
|
||||
}
|
||||
for i := 0; i < len(z.rawTag); i++ {
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
switch c {
|
||||
case ' ', '\n', '\r', '\t', '\f', '/', '>':
|
||||
// The 3 is 2 for the leading "</" plus 1 for the trailing character c.
|
||||
z.raw.end -= 3 + len(z.rawTag)
|
||||
break loop
|
||||
case '<':
|
||||
// Step back one, to catch "</foo</foo>".
|
||||
z.raw.end--
|
||||
}
|
||||
}
|
||||
z.data.end = z.raw.end
|
||||
// A textarea's or title's RCDATA can contain escaped entities.
|
||||
z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
|
||||
z.rawTag = ""
|
||||
}
|
||||
|
||||
// readComment reads the next comment token starting with "<!--". The opening
|
||||
// "<!--" has already been consumed.
|
||||
func (z *Tokenizer) readComment() {
|
||||
z.data.start = z.raw.end
|
||||
defer func() {
|
||||
if z.data.end < z.data.start {
|
||||
// It's a comment with no data, like <!-->.
|
||||
z.data.end = z.data.start
|
||||
}
|
||||
}()
|
||||
for dashCount := 2; ; {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
// Ignore up to two dashes at EOF.
|
||||
if dashCount > 2 {
|
||||
dashCount = 2
|
||||
}
|
||||
z.data.end = z.raw.end - dashCount
|
||||
return
|
||||
}
|
||||
switch c {
|
||||
case '-':
|
||||
dashCount++
|
||||
continue
|
||||
case '>':
|
||||
if dashCount >= 2 {
|
||||
z.data.end = z.raw.end - len("-->")
|
||||
return
|
||||
}
|
||||
case '!':
|
||||
if dashCount >= 2 {
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
z.data.end = z.raw.end
|
||||
return
|
||||
}
|
||||
if c == '>' {
|
||||
z.data.end = z.raw.end - len("--!>")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
dashCount = 0
|
||||
}
|
||||
}
|
||||
|
||||
// readUntilCloseAngle reads until the next ">".
|
||||
func (z *Tokenizer) readUntilCloseAngle() {
|
||||
z.data.start = z.raw.end
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
z.data.end = z.raw.end
|
||||
return
|
||||
}
|
||||
if c == '>' {
|
||||
z.data.end = z.raw.end - len(">")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readMarkupDeclaration reads the next token starting with "<!". It might be
|
||||
// a "<!--comment-->", a "<!DOCTYPE foo>", or "<!a bogus comment". The opening
|
||||
// "<!" has already been consumed.
|
||||
func (z *Tokenizer) readMarkupDeclaration() TokenType {
|
||||
z.data.start = z.raw.end
|
||||
var c [2]byte
|
||||
for i := 0; i < 2; i++ {
|
||||
c[i] = z.readByte()
|
||||
if z.err != nil {
|
||||
z.data.end = z.raw.end
|
||||
return CommentToken
|
||||
}
|
||||
}
|
||||
if c[0] == '-' && c[1] == '-' {
|
||||
z.readComment()
|
||||
return CommentToken
|
||||
}
|
||||
z.raw.end -= 2
|
||||
const s = "DOCTYPE"
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
z.data.end = z.raw.end
|
||||
return CommentToken
|
||||
}
|
||||
if c != s[i] && c != s[i]+('a'-'A') {
|
||||
// Back up to read the fragment of "DOCTYPE" again.
|
||||
z.raw.end = z.data.start
|
||||
z.readUntilCloseAngle()
|
||||
return CommentToken
|
||||
}
|
||||
}
|
||||
if z.skipWhiteSpace(); z.err != nil {
|
||||
z.data.start = z.raw.end
|
||||
z.data.end = z.raw.end
|
||||
return DoctypeToken
|
||||
}
|
||||
z.readUntilCloseAngle()
|
||||
return DoctypeToken
|
||||
}
|
||||
|
||||
// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
|
||||
// case-insensitively matches any element of ss.
|
||||
func (z *Tokenizer) startTagIn(ss ...string) bool {
|
||||
loop:
|
||||
for _, s := range ss {
|
||||
if z.data.end-z.data.start != len(s) {
|
||||
continue loop
|
||||
}
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := z.buf[z.data.start+i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
}
|
||||
if c != s[i] {
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// readStartTag reads the next start tag token. The opening "<a" has already
|
||||
// been consumed, where 'a' means anything in [A-Za-z].
|
||||
func (z *Tokenizer) readStartTag() TokenType {
|
||||
z.attr = z.attr[:0]
|
||||
z.nAttrReturned = 0
|
||||
// Read the tag name and attribute key/value pairs.
|
||||
z.readTagName()
|
||||
if z.skipWhiteSpace(); z.err != nil {
|
||||
return ErrorToken
|
||||
}
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil || c == '>' {
|
||||
break
|
||||
}
|
||||
z.raw.end--
|
||||
z.readTagAttrKey()
|
||||
z.readTagAttrVal()
|
||||
// Save pendingAttr if it has a non-empty key.
|
||||
if z.pendingAttr[0].start != z.pendingAttr[0].end {
|
||||
z.attr = append(z.attr, z.pendingAttr)
|
||||
}
|
||||
if z.skipWhiteSpace(); z.err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Several tags flag the tokenizer's next token as raw.
|
||||
c, raw := z.buf[z.data.start], false
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
}
|
||||
switch c {
|
||||
case 'i':
|
||||
raw = z.startTagIn("iframe")
|
||||
case 'n':
|
||||
raw = z.startTagIn("noembed", "noframes", "noscript")
|
||||
case 'p':
|
||||
raw = z.startTagIn("plaintext")
|
||||
case 's':
|
||||
raw = z.startTagIn("script", "style")
|
||||
case 't':
|
||||
raw = z.startTagIn("textarea", "title")
|
||||
case 'x':
|
||||
raw = z.startTagIn("xmp")
|
||||
}
|
||||
if raw {
|
||||
z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
|
||||
}
|
||||
// Look for a self-closing token like "<br/>".
|
||||
if z.err == nil && z.buf[z.raw.end-2] == '/' {
|
||||
return SelfClosingTagToken
|
||||
}
|
||||
return StartTagToken
|
||||
}
|
||||
|
||||
// readEndTag reads the next end tag token. The opening "</a" has already
|
||||
// been consumed, where 'a' means anything in [A-Za-z].
|
||||
func (z *Tokenizer) readEndTag() {
|
||||
z.attr = z.attr[:0]
|
||||
z.nAttrReturned = 0
|
||||
z.readTagName()
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil || c == '>' {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
|
||||
// is positioned such that the first byte of the tag name (the "d" in "<div")
|
||||
// has already been consumed.
|
||||
func (z *Tokenizer) readTagName() {
|
||||
z.data.start = z.raw.end - 1
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
z.data.end = z.raw.end
|
||||
return
|
||||
}
|
||||
switch c {
|
||||
case ' ', '\n', '\r', '\t', '\f':
|
||||
z.data.end = z.raw.end - 1
|
||||
return
|
||||
case '/', '>':
|
||||
z.raw.end--
|
||||
z.data.end = z.raw.end
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
|
||||
// Precondition: z.err == nil.
|
||||
func (z *Tokenizer) readTagAttrKey() {
|
||||
z.pendingAttr[0].start = z.raw.end
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
z.pendingAttr[0].end = z.raw.end
|
||||
return
|
||||
}
|
||||
switch c {
|
||||
case ' ', '\n', '\r', '\t', '\f', '/':
|
||||
z.pendingAttr[0].end = z.raw.end - 1
|
||||
return
|
||||
case '=', '>':
|
||||
z.raw.end--
|
||||
z.pendingAttr[0].end = z.raw.end
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
|
||||
func (z *Tokenizer) readTagAttrVal() {
|
||||
z.pendingAttr[1].start = z.raw.end
|
||||
z.pendingAttr[1].end = z.raw.end
|
||||
if z.skipWhiteSpace(); z.err != nil {
|
||||
return
|
||||
}
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
return
|
||||
}
|
||||
if c != '=' {
|
||||
z.raw.end--
|
||||
return
|
||||
}
|
||||
if z.skipWhiteSpace(); z.err != nil {
|
||||
return
|
||||
}
|
||||
quote := z.readByte()
|
||||
if z.err != nil {
|
||||
return
|
||||
}
|
||||
switch quote {
|
||||
case '>':
|
||||
z.raw.end--
|
||||
return
|
||||
|
||||
case '\'', '"':
|
||||
z.pendingAttr[1].start = z.raw.end
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
z.pendingAttr[1].end = z.raw.end
|
||||
return
|
||||
}
|
||||
if c == quote {
|
||||
z.pendingAttr[1].end = z.raw.end - 1
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
z.pendingAttr[1].start = z.raw.end - 1
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
z.pendingAttr[1].end = z.raw.end
|
||||
return
|
||||
}
|
||||
switch c {
|
||||
case ' ', '\n', '\r', '\t', '\f':
|
||||
z.pendingAttr[1].end = z.raw.end - 1
|
||||
return
|
||||
case '>':
|
||||
z.raw.end--
|
||||
z.pendingAttr[1].end = z.raw.end
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next scans the next token and returns its type.
|
||||
func (z *Tokenizer) Next() TokenType {
|
||||
if z.err != nil {
|
||||
z.tt = ErrorToken
|
||||
return z.tt
|
||||
}
|
||||
z.raw.start = z.raw.end
|
||||
z.data.start = z.raw.end
|
||||
z.data.end = z.raw.end
|
||||
if z.rawTag != "" {
|
||||
if z.rawTag == "plaintext" {
|
||||
// Read everything up to EOF.
|
||||
for z.err == nil {
|
||||
z.readByte()
|
||||
}
|
||||
z.textIsRaw = true
|
||||
} else {
|
||||
z.readRawOrRCDATA()
|
||||
}
|
||||
if z.data.end > z.data.start {
|
||||
z.tt = TextToken
|
||||
return z.tt
|
||||
}
|
||||
}
|
||||
z.textIsRaw = false
|
||||
|
||||
loop:
|
||||
for {
|
||||
c := z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
if c != '<' {
|
||||
continue loop
|
||||
}
|
||||
|
||||
// Check if the '<' we have just read is part of a tag, comment
|
||||
// or doctype. If not, it's part of the accumulated text token.
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
var tokenType TokenType
|
||||
switch {
|
||||
case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
|
||||
tokenType = StartTagToken
|
||||
case c == '/':
|
||||
tokenType = EndTagToken
|
||||
case c == '!' || c == '?':
|
||||
// We use CommentToken to mean any of "<!--actual comments-->",
|
||||
// "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
|
||||
tokenType = CommentToken
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
// We have a non-text token, but we might have accumulated some text
|
||||
// before that. If so, we return the text first, and return the non-
|
||||
// text token on the subsequent call to Next.
|
||||
if x := z.raw.end - len("<a"); z.raw.start < x {
|
||||
z.raw.end = x
|
||||
z.data.end = x
|
||||
z.tt = TextToken
|
||||
return z.tt
|
||||
}
|
||||
switch tokenType {
|
||||
case StartTagToken:
|
||||
z.tt = z.readStartTag()
|
||||
return z.tt
|
||||
case EndTagToken:
|
||||
c = z.readByte()
|
||||
if z.err != nil {
|
||||
break loop
|
||||
}
|
||||
if c == '>' {
|
||||
// "</>" does not generate a token at all.
|
||||
// Reset the tokenizer state and start again.
|
||||
z.raw.start = z.raw.end
|
||||
z.data.start = z.raw.end
|
||||
z.data.end = z.raw.end
|
||||
continue loop
|
||||
}
|
||||
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
|
||||
z.readEndTag()
|
||||
z.tt = EndTagToken
|
||||
return z.tt
|
||||
}
|
||||
z.raw.end--
|
||||
z.readUntilCloseAngle()
|
||||
z.tt = CommentToken
|
||||
return z.tt
|
||||
case CommentToken:
|
||||
if c == '!' {
|
||||
z.tt = z.readMarkupDeclaration()
|
||||
return z.tt
|
||||
}
|
||||
z.raw.end--
|
||||
z.readUntilCloseAngle()
|
||||
z.tt = CommentToken
|
||||
return z.tt
|
||||
}
|
||||
}
|
||||
if z.raw.start < z.raw.end {
|
||||
z.data.end = z.raw.end
|
||||
z.tt = TextToken
|
||||
return z.tt
|
||||
}
|
||||
z.tt = ErrorToken
|
||||
return z.tt
|
||||
}
|
||||
|
||||
// Raw returns the unmodified text of the current token. Calling Next, Token,
|
||||
// Text, TagName or TagAttr may change the contents of the returned slice.
|
||||
func (z *Tokenizer) Raw() []byte {
|
||||
return z.buf[z.raw.start:z.raw.end]
|
||||
}
|
||||
|
||||
// Text returns the unescaped text of a text, comment or doctype token. The
|
||||
// contents of the returned slice may change on the next call to Next.
|
||||
func (z *Tokenizer) Text() []byte {
|
||||
switch z.tt {
|
||||
case TextToken, CommentToken, DoctypeToken:
|
||||
s := z.buf[z.data.start:z.data.end]
|
||||
z.data.start = z.raw.end
|
||||
z.data.end = z.raw.end
|
||||
if !z.textIsRaw {
|
||||
s = unescape(s)
|
||||
}
|
||||
return s
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TagName returns the lower-cased name of a tag token (the `img` out of
|
||||
// `<IMG SRC="foo">`) and whether the tag has attributes.
|
||||
// The contents of the returned slice may change on the next call to Next.
|
||||
func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
|
||||
if z.data.start < z.data.end {
|
||||
switch z.tt {
|
||||
case StartTagToken, EndTagToken, SelfClosingTagToken:
|
||||
s := z.buf[z.data.start:z.data.end]
|
||||
z.data.start = z.raw.end
|
||||
z.data.end = z.raw.end
|
||||
return lower(s), z.nAttrReturned < len(z.attr)
|
||||
}
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// TagAttr returns the lower-cased key and unescaped value of the next unparsed
|
||||
// attribute for the current tag token and whether there are more attributes.
|
||||
// The contents of the returned slices may change on the next call to Next.
|
||||
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
||||
if z.nAttrReturned < len(z.attr) {
|
||||
switch z.tt {
|
||||
case StartTagToken, SelfClosingTagToken:
|
||||
x := z.attr[z.nAttrReturned]
|
||||
z.nAttrReturned++
|
||||
key = z.buf[x[0].start:x[0].end]
|
||||
val = z.buf[x[1].start:x[1].end]
|
||||
return lower(key), unescape(val), z.nAttrReturned < len(z.attr)
|
||||
}
|
||||
}
|
||||
return nil, nil, false
|
||||
}
|
||||
|
||||
// Token returns the next Token. The result's Data and Attr values remain valid
|
||||
// after subsequent Next calls.
|
||||
func (z *Tokenizer) Token() Token {
|
||||
t := Token{Type: z.tt}
|
||||
switch z.tt {
|
||||
case TextToken, CommentToken, DoctypeToken:
|
||||
t.Data = string(z.Text())
|
||||
case StartTagToken, SelfClosingTagToken:
|
||||
var attr []Attribute
|
||||
name, moreAttr := z.TagName()
|
||||
for moreAttr {
|
||||
var key, val []byte
|
||||
key, val, moreAttr = z.TagAttr()
|
||||
attr = append(attr, Attribute{"", string(key), string(val)})
|
||||
}
|
||||
t.Data = string(name)
|
||||
t.Attr = attr
|
||||
case EndTagToken:
|
||||
name, _ := z.TagName()
|
||||
t.Data = string(name)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// NewTokenizer returns a new HTML Tokenizer for the given Reader.
|
||||
// The input is assumed to be UTF-8 encoded.
|
||||
func NewTokenizer(r io.Reader) *Tokenizer {
|
||||
return &Tokenizer{
|
||||
r: r,
|
||||
buf: make([]byte, 0, 4096),
|
||||
}
|
||||
}
|
||||
@@ -1,590 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type tokenTest struct {
|
||||
// A short description of the test case.
|
||||
desc string
|
||||
// The HTML to parse.
|
||||
html string
|
||||
// The string representations of the expected tokens, joined by '$'.
|
||||
golden string
|
||||
}
|
||||
|
||||
var tokenTests = []tokenTest{
|
||||
{
|
||||
"empty",
|
||||
"",
|
||||
"",
|
||||
},
|
||||
// A single text node. The tokenizer should not break text nodes on whitespace,
|
||||
// nor should it normalize whitespace within a text node.
|
||||
{
|
||||
"text",
|
||||
"foo bar",
|
||||
"foo bar",
|
||||
},
|
||||
// An entity.
|
||||
{
|
||||
"entity",
|
||||
"one < two",
|
||||
"one < two",
|
||||
},
|
||||
// A start, self-closing and end tag. The tokenizer does not care if the start
|
||||
// and end tokens don't match; that is the job of the parser.
|
||||
{
|
||||
"tags",
|
||||
"<a>b<c/>d</e>",
|
||||
"<a>$b$<c/>$d$</e>",
|
||||
},
|
||||
// Angle brackets that aren't a tag.
|
||||
{
|
||||
"not a tag #0",
|
||||
"<",
|
||||
"<",
|
||||
},
|
||||
{
|
||||
"not a tag #1",
|
||||
"</",
|
||||
"</",
|
||||
},
|
||||
{
|
||||
"not a tag #2",
|
||||
"</>",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"not a tag #3",
|
||||
"a</>b",
|
||||
"a$b",
|
||||
},
|
||||
{
|
||||
"not a tag #4",
|
||||
"</ >",
|
||||
"<!-- -->",
|
||||
},
|
||||
{
|
||||
"not a tag #5",
|
||||
"</.",
|
||||
"<!--.-->",
|
||||
},
|
||||
{
|
||||
"not a tag #6",
|
||||
"</.>",
|
||||
"<!--.-->",
|
||||
},
|
||||
{
|
||||
"not a tag #7",
|
||||
"a < b",
|
||||
"a < b",
|
||||
},
|
||||
{
|
||||
"not a tag #8",
|
||||
"<.>",
|
||||
"<.>",
|
||||
},
|
||||
{
|
||||
"not a tag #9",
|
||||
"a<<<b>>>c",
|
||||
"a<<$<b>$>>c",
|
||||
},
|
||||
{
|
||||
"not a tag #10",
|
||||
"if x<0 and y < 0 then x*y>0",
|
||||
"if x<0 and y < 0 then x*y>0",
|
||||
},
|
||||
// EOF in a tag name.
|
||||
{
|
||||
"tag name eof #0",
|
||||
"<a",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"tag name eof #1",
|
||||
"<a ",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"tag name eof #2",
|
||||
"a<b",
|
||||
"a",
|
||||
},
|
||||
{
|
||||
"tag name eof #3",
|
||||
"<a><b",
|
||||
"<a>",
|
||||
},
|
||||
{
|
||||
"tag name eof #4",
|
||||
`<a x`,
|
||||
`<a x="">`,
|
||||
},
|
||||
// Some malformed tags that are missing a '>'.
|
||||
{
|
||||
"malformed tag #0",
|
||||
`<p</p>`,
|
||||
`<p< p="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #1",
|
||||
`<p </p>`,
|
||||
`<p <="" p="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #2",
|
||||
`<p id`,
|
||||
`<p id="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #3",
|
||||
`<p id=`,
|
||||
`<p id="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #4",
|
||||
`<p id=>`,
|
||||
`<p id="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #5",
|
||||
`<p id=0`,
|
||||
`<p id="0">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #6",
|
||||
`<p id=0</p>`,
|
||||
`<p id="0</p">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #7",
|
||||
`<p id="0</p>`,
|
||||
`<p id="0</p>">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #8",
|
||||
`<p id="0"</p>`,
|
||||
`<p id="0" <="" p="">`,
|
||||
},
|
||||
// Raw text and RCDATA.
|
||||
{
|
||||
"basic raw text",
|
||||
"<script><a></b></script>",
|
||||
"<script>$<a></b>$</script>",
|
||||
},
|
||||
{
|
||||
"unfinished script end tag",
|
||||
"<SCRIPT>a</SCR",
|
||||
"<script>$a</SCR",
|
||||
},
|
||||
{
|
||||
"broken script end tag",
|
||||
"<SCRIPT>a</SCR ipt>",
|
||||
"<script>$a</SCR ipt>",
|
||||
},
|
||||
{
|
||||
"EOF in script end tag",
|
||||
"<SCRIPT>a</SCRipt",
|
||||
"<script>$a</SCRipt",
|
||||
},
|
||||
{
|
||||
"scriptx end tag",
|
||||
"<SCRIPT>a</SCRiptx",
|
||||
"<script>$a</SCRiptx",
|
||||
},
|
||||
{
|
||||
"' ' completes script end tag",
|
||||
"<SCRIPT>a</SCRipt ",
|
||||
"<script>$a$</script>",
|
||||
},
|
||||
{
|
||||
"'>' completes script end tag",
|
||||
"<SCRIPT>a</SCRipt>",
|
||||
"<script>$a$</script>",
|
||||
},
|
||||
{
|
||||
"self-closing script end tag",
|
||||
"<SCRIPT>a</SCRipt/>",
|
||||
"<script>$a$</script>",
|
||||
},
|
||||
{
|
||||
"nested script tag",
|
||||
"<SCRIPT>a</SCRipt<script>",
|
||||
"<script>$a</SCRipt<script>",
|
||||
},
|
||||
{
|
||||
"script end tag after unfinished",
|
||||
"<SCRIPT>a</SCRipt</script>",
|
||||
"<script>$a</SCRipt$</script>",
|
||||
},
|
||||
{
|
||||
"script/style mismatched tags",
|
||||
"<script>a</style>",
|
||||
"<script>$a</style>",
|
||||
},
|
||||
{
|
||||
"style element with entity",
|
||||
"<style>'",
|
||||
"<style>$&apos;",
|
||||
},
|
||||
{
|
||||
"textarea with tag",
|
||||
"<textarea><div></textarea>",
|
||||
"<textarea>$<div>$</textarea>",
|
||||
},
|
||||
{
|
||||
"title with tag and entity",
|
||||
"<title><b>K&R C</b></title>",
|
||||
"<title>$<b>K&R C</b>$</title>",
|
||||
},
|
||||
// DOCTYPE tests.
|
||||
{
|
||||
"Proper DOCTYPE",
|
||||
"<!DOCTYPE html>",
|
||||
"<!DOCTYPE html>",
|
||||
},
|
||||
{
|
||||
"DOCTYPE with no space",
|
||||
"<!doctypehtml>",
|
||||
"<!DOCTYPE html>",
|
||||
},
|
||||
{
|
||||
"DOCTYPE with two spaces",
|
||||
"<!doctype html>",
|
||||
"<!DOCTYPE html>",
|
||||
},
|
||||
{
|
||||
"looks like DOCTYPE but isn't",
|
||||
"<!DOCUMENT html>",
|
||||
"<!--DOCUMENT html-->",
|
||||
},
|
||||
{
|
||||
"DOCTYPE at EOF",
|
||||
"<!DOCtype",
|
||||
"<!DOCTYPE >",
|
||||
},
|
||||
// XML processing instructions.
|
||||
{
|
||||
"XML processing instruction",
|
||||
"<?xml?>",
|
||||
"<!--?xml?-->",
|
||||
},
|
||||
// Comments.
|
||||
{
|
||||
"comment0",
|
||||
"abc<b><!-- skipme --></b>def",
|
||||
"abc$<b>$<!-- skipme -->$</b>$def",
|
||||
},
|
||||
{
|
||||
"comment1",
|
||||
"a<!-->z",
|
||||
"a$<!---->$z",
|
||||
},
|
||||
{
|
||||
"comment2",
|
||||
"a<!--->z",
|
||||
"a$<!---->$z",
|
||||
},
|
||||
{
|
||||
"comment3",
|
||||
"a<!--x>-->z",
|
||||
"a$<!--x>-->$z",
|
||||
},
|
||||
{
|
||||
"comment4",
|
||||
"a<!--x->-->z",
|
||||
"a$<!--x->-->$z",
|
||||
},
|
||||
{
|
||||
"comment5",
|
||||
"a<!>z",
|
||||
"a$<!---->$z",
|
||||
},
|
||||
{
|
||||
"comment6",
|
||||
"a<!->z",
|
||||
"a$<!----->$z",
|
||||
},
|
||||
{
|
||||
"comment7",
|
||||
"a<!---<>z",
|
||||
"a$<!---<>z-->",
|
||||
},
|
||||
{
|
||||
"comment8",
|
||||
"a<!--z",
|
||||
"a$<!--z-->",
|
||||
},
|
||||
{
|
||||
"comment9",
|
||||
"a<!--z-",
|
||||
"a$<!--z-->",
|
||||
},
|
||||
{
|
||||
"comment10",
|
||||
"a<!--z--",
|
||||
"a$<!--z-->",
|
||||
},
|
||||
{
|
||||
"comment11",
|
||||
"a<!--z---",
|
||||
"a$<!--z--->",
|
||||
},
|
||||
{
|
||||
"comment12",
|
||||
"a<!--z----",
|
||||
"a$<!--z---->",
|
||||
},
|
||||
{
|
||||
"comment13",
|
||||
"a<!--x--!>z",
|
||||
"a$<!--x-->$z",
|
||||
},
|
||||
// An attribute with a backslash.
|
||||
{
|
||||
"backslash",
|
||||
`<p id="a\"b">`,
|
||||
`<p id="a\" b"="">`,
|
||||
},
|
||||
// Entities, tag name and attribute key lower-casing, and whitespace
|
||||
// normalization within a tag.
|
||||
{
|
||||
"tricky",
|
||||
"<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p>",
|
||||
`<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`,
|
||||
},
|
||||
// A nonexistent entity. Tokenizing and converting back to a string should
|
||||
// escape the "&" to become "&".
|
||||
{
|
||||
"noSuchEntity",
|
||||
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
|
||||
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
|
||||
},
|
||||
/*
|
||||
// TODO: re-enable this test when it works. This input/output matches html5lib's behavior.
|
||||
{
|
||||
"entity without semicolon",
|
||||
`¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
|
||||
`¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
|
||||
},
|
||||
*/
|
||||
{
|
||||
"entity with digits",
|
||||
"½",
|
||||
"½",
|
||||
},
|
||||
// Attribute tests:
|
||||
// http://dev.w3.org/html5/spec/Overview.html#attributes-0
|
||||
{
|
||||
"Empty attribute",
|
||||
`<input disabled FOO>`,
|
||||
`<input disabled="" foo="">`,
|
||||
},
|
||||
{
|
||||
"Empty attribute, whitespace",
|
||||
`<input disabled FOO >`,
|
||||
`<input disabled="" foo="">`,
|
||||
},
|
||||
{
|
||||
"Unquoted attribute value",
|
||||
`<input value=yes FOO=BAR>`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Unquoted attribute value, spaces",
|
||||
`<input value = yes FOO = BAR>`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Unquoted attribute value, trailing space",
|
||||
`<input value=yes FOO=BAR >`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Single-quoted attribute value",
|
||||
`<input value='yes' FOO='BAR'>`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Single-quoted attribute value, trailing space",
|
||||
`<input value='yes' FOO='BAR' >`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Double-quoted attribute value",
|
||||
`<input value="I'm an attribute" FOO="BAR">`,
|
||||
`<input value="I'm an attribute" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Attribute name characters",
|
||||
`<meta http-equiv="content-type">`,
|
||||
`<meta http-equiv="content-type">`,
|
||||
},
|
||||
{
|
||||
"Mixed attributes",
|
||||
`a<P V="0 1" w='2' X=3 y>z`,
|
||||
`a$<p v="0 1" w="2" x="3" y="">$z`,
|
||||
},
|
||||
{
|
||||
"Attributes with a solitary single quote",
|
||||
`<p id=can't><p id=won't>`,
|
||||
`<p id="can't">$<p id="won't">`,
|
||||
},
|
||||
}
|
||||
|
||||
func TestTokenizer(t *testing.T) {
|
||||
loop:
|
||||
for _, tt := range tokenTests {
|
||||
z := NewTokenizer(strings.NewReader(tt.html))
|
||||
if tt.golden != "" {
|
||||
for i, s := range strings.Split(tt.golden, "$") {
|
||||
if z.Next() == ErrorToken {
|
||||
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
|
||||
continue loop
|
||||
}
|
||||
actual := z.Token().String()
|
||||
if s != actual {
|
||||
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
}
|
||||
z.Next()
|
||||
if z.Err() != io.EOF {
|
||||
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type unescapeTest struct {
|
||||
// A short description of the test case.
|
||||
desc string
|
||||
// The HTML text.
|
||||
html string
|
||||
// The unescaped text.
|
||||
unescaped string
|
||||
}
|
||||
|
||||
var unescapeTests = []unescapeTest{
|
||||
// Handle no entities.
|
||||
{
|
||||
"copy",
|
||||
"A\ttext\nstring",
|
||||
"A\ttext\nstring",
|
||||
},
|
||||
// Handle simple named entities.
|
||||
{
|
||||
"simple",
|
||||
"& > <",
|
||||
"& > <",
|
||||
},
|
||||
// Handle hitting the end of the string.
|
||||
{
|
||||
"stringEnd",
|
||||
"& &",
|
||||
"& &",
|
||||
},
|
||||
// Handle entities with two codepoints.
|
||||
{
|
||||
"multiCodepoint",
|
||||
"text ⋛︀ blah",
|
||||
"text \u22db\ufe00 blah",
|
||||
},
|
||||
// Handle decimal numeric entities.
|
||||
{
|
||||
"decimalEntity",
|
||||
"Delta = Δ ",
|
||||
"Delta = Δ ",
|
||||
},
|
||||
// Handle hexadecimal numeric entities.
|
||||
{
|
||||
"hexadecimalEntity",
|
||||
"Lambda = λ = λ ",
|
||||
"Lambda = λ = λ ",
|
||||
},
|
||||
// Handle numeric early termination.
|
||||
{
|
||||
"numericEnds",
|
||||
"&# &#x €43 © = ©f = ©",
|
||||
"&# &#x €43 © = ©f = ©",
|
||||
},
|
||||
// Handle numeric ISO-8859-1 entity replacements.
|
||||
{
|
||||
"numericReplacements",
|
||||
"Footnote‡",
|
||||
"Footnote‡",
|
||||
},
|
||||
}
|
||||
|
||||
func TestUnescape(t *testing.T) {
|
||||
for _, tt := range unescapeTests {
|
||||
unescaped := UnescapeString(tt.html)
|
||||
if unescaped != tt.unescaped {
|
||||
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnescapeEscape(t *testing.T) {
|
||||
ss := []string{
|
||||
``,
|
||||
`abc def`,
|
||||
`a & b`,
|
||||
`a&b`,
|
||||
`a & b`,
|
||||
`"`,
|
||||
`"`,
|
||||
`"<&>"`,
|
||||
`"<&>"`,
|
||||
`3&5==1 && 0<1, "0<1", a+acute=á`,
|
||||
}
|
||||
for _, s := range ss {
|
||||
if s != UnescapeString(EscapeString(s)) {
|
||||
t.Errorf("s != UnescapeString(EscapeString(s)), s=%q", s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufAPI(t *testing.T) {
|
||||
s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
|
||||
z := NewTokenizer(bytes.NewBufferString(s))
|
||||
var result bytes.Buffer
|
||||
depth := 0
|
||||
loop:
|
||||
for {
|
||||
tt := z.Next()
|
||||
switch tt {
|
||||
case ErrorToken:
|
||||
if z.Err() != io.EOF {
|
||||
t.Error(z.Err())
|
||||
}
|
||||
break loop
|
||||
case TextToken:
|
||||
if depth > 0 {
|
||||
result.Write(z.Text())
|
||||
}
|
||||
case StartTagToken, EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if len(tn) == 1 && tn[0] == 'a' {
|
||||
if tt == StartTagToken {
|
||||
depth++
|
||||
} else {
|
||||
depth--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
u := "14567"
|
||||
v := string(result.Bytes())
|
||||
if u != v {
|
||||
t.Errorf("TestBufAPI: want %q got %q", u, v)
|
||||
}
|
||||
}
|
||||
@@ -1,289 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
/*
|
||||
Package inotify implements a wrapper for the Linux inotify system.
|
||||
|
||||
Example:
|
||||
watcher, err := inotify.NewWatcher()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
err = watcher.Watch("/tmp")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case ev := <-watcher.Event:
|
||||
log.Println("event:", ev)
|
||||
case err := <-watcher.Error:
|
||||
log.Println("error:", err)
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
package inotify
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type Event struct {
|
||||
Mask uint32 // Mask of events
|
||||
Cookie uint32 // Unique cookie associating related events (for rename(2))
|
||||
Name string // File name (optional)
|
||||
}
|
||||
|
||||
type watch struct {
|
||||
wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall)
|
||||
flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags)
|
||||
}
|
||||
|
||||
type Watcher struct {
|
||||
fd int // File descriptor (as returned by the inotify_init() syscall)
|
||||
watches map[string]*watch // Map of inotify watches (key: path)
|
||||
paths map[int]string // Map of watched paths (key: watch descriptor)
|
||||
Error chan error // Errors are sent on this channel
|
||||
Event chan *Event // Events are returned on this channel
|
||||
done chan bool // Channel for sending a "quit message" to the reader goroutine
|
||||
isClosed bool // Set to true when Close() is first called
|
||||
}
|
||||
|
||||
// NewWatcher creates and returns a new inotify instance using inotify_init(2)
|
||||
func NewWatcher() (*Watcher, error) {
|
||||
fd, errno := syscall.InotifyInit()
|
||||
if fd == -1 {
|
||||
return nil, os.NewSyscallError("inotify_init", errno)
|
||||
}
|
||||
w := &Watcher{
|
||||
fd: fd,
|
||||
watches: make(map[string]*watch),
|
||||
paths: make(map[int]string),
|
||||
Event: make(chan *Event),
|
||||
Error: make(chan error),
|
||||
done: make(chan bool, 1),
|
||||
}
|
||||
|
||||
go w.readEvents()
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// Close closes an inotify watcher instance
|
||||
// It sends a message to the reader goroutine to quit and removes all watches
|
||||
// associated with the inotify instance
|
||||
func (w *Watcher) Close() error {
|
||||
if w.isClosed {
|
||||
return nil
|
||||
}
|
||||
w.isClosed = true
|
||||
|
||||
// Send "quit" message to the reader goroutine
|
||||
w.done <- true
|
||||
for path := range w.watches {
|
||||
w.RemoveWatch(path)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddWatch adds path to the watched file set.
|
||||
// The flags are interpreted as described in inotify_add_watch(2).
|
||||
func (w *Watcher) AddWatch(path string, flags uint32) error {
|
||||
if w.isClosed {
|
||||
return errors.New("inotify instance already closed")
|
||||
}
|
||||
|
||||
watchEntry, found := w.watches[path]
|
||||
if found {
|
||||
watchEntry.flags |= flags
|
||||
flags |= syscall.IN_MASK_ADD
|
||||
}
|
||||
wd, err := syscall.InotifyAddWatch(w.fd, path, flags)
|
||||
if err != nil {
|
||||
return &os.PathError{
|
||||
Op: "inotify_add_watch",
|
||||
Path: path,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
w.watches[path] = &watch{wd: uint32(wd), flags: flags}
|
||||
w.paths[wd] = path
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Watch adds path to the watched file set, watching all events.
|
||||
func (w *Watcher) Watch(path string) error {
|
||||
return w.AddWatch(path, IN_ALL_EVENTS)
|
||||
}
|
||||
|
||||
// RemoveWatch removes path from the watched file set.
|
||||
func (w *Watcher) RemoveWatch(path string) error {
|
||||
watch, ok := w.watches[path]
|
||||
if !ok {
|
||||
return errors.New(fmt.Sprintf("can't remove non-existent inotify watch for: %s", path))
|
||||
}
|
||||
success, errno := syscall.InotifyRmWatch(w.fd, watch.wd)
|
||||
if success == -1 {
|
||||
return os.NewSyscallError("inotify_rm_watch", errno)
|
||||
}
|
||||
delete(w.watches, path)
|
||||
return nil
|
||||
}
|
||||
|
||||
// readEvents reads from the inotify file descriptor, converts the
|
||||
// received events into Event objects and sends them via the Event channel
|
||||
func (w *Watcher) readEvents() {
|
||||
var buf [syscall.SizeofInotifyEvent * 4096]byte
|
||||
|
||||
for {
|
||||
n, err := syscall.Read(w.fd, buf[0:])
|
||||
// See if there is a message on the "done" channel
|
||||
var done bool
|
||||
select {
|
||||
case done = <-w.done:
|
||||
default:
|
||||
}
|
||||
|
||||
// If EOF or a "done" message is received
|
||||
if n == 0 || done {
|
||||
err := syscall.Close(w.fd)
|
||||
if err != nil {
|
||||
w.Error <- os.NewSyscallError("close", err)
|
||||
}
|
||||
close(w.Event)
|
||||
close(w.Error)
|
||||
return
|
||||
}
|
||||
if n < 0 {
|
||||
w.Error <- os.NewSyscallError("read", err)
|
||||
continue
|
||||
}
|
||||
if n < syscall.SizeofInotifyEvent {
|
||||
w.Error <- errors.New("inotify: short read in readEvents()")
|
||||
continue
|
||||
}
|
||||
|
||||
var offset uint32 = 0
|
||||
// We don't know how many events we just read into the buffer
|
||||
// While the offset points to at least one whole event...
|
||||
for offset <= uint32(n-syscall.SizeofInotifyEvent) {
|
||||
// Point "raw" to the event in the buffer
|
||||
raw := (*syscall.InotifyEvent)(unsafe.Pointer(&buf[offset]))
|
||||
event := new(Event)
|
||||
event.Mask = uint32(raw.Mask)
|
||||
event.Cookie = uint32(raw.Cookie)
|
||||
nameLen := uint32(raw.Len)
|
||||
// If the event happened to the watched directory or the watched file, the kernel
|
||||
// doesn't append the filename to the event, but we would like to always fill the
|
||||
// the "Name" field with a valid filename. We retrieve the path of the watch from
|
||||
// the "paths" map.
|
||||
event.Name = w.paths[int(raw.Wd)]
|
||||
if nameLen > 0 {
|
||||
// Point "bytes" at the first byte of the filename
|
||||
bytes := (*[syscall.PathMax]byte)(unsafe.Pointer(&buf[offset+syscall.SizeofInotifyEvent]))
|
||||
// The filename is padded with NUL bytes. TrimRight() gets rid of those.
|
||||
event.Name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000")
|
||||
}
|
||||
// Send the event on the events channel
|
||||
w.Event <- event
|
||||
|
||||
// Move to the next event in the buffer
|
||||
offset += syscall.SizeofInotifyEvent + nameLen
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// String formats the event e in the form
|
||||
// "filename: 0xEventMask = IN_ACCESS|IN_ATTRIB_|..."
|
||||
func (e *Event) String() string {
|
||||
var events string = ""
|
||||
|
||||
m := e.Mask
|
||||
for _, b := range eventBits {
|
||||
if m&b.Value != 0 {
|
||||
m &^= b.Value
|
||||
events += "|" + b.Name
|
||||
}
|
||||
}
|
||||
|
||||
if m != 0 {
|
||||
events += fmt.Sprintf("|%#x", m)
|
||||
}
|
||||
if len(events) > 0 {
|
||||
events = " == " + events[1:]
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%q: %#x%s", e.Name, e.Mask, events)
|
||||
}
|
||||
|
||||
const (
|
||||
// Options for inotify_init() are not exported
|
||||
// IN_CLOEXEC uint32 = syscall.IN_CLOEXEC
|
||||
// IN_NONBLOCK uint32 = syscall.IN_NONBLOCK
|
||||
|
||||
// Options for AddWatch
|
||||
IN_DONT_FOLLOW uint32 = syscall.IN_DONT_FOLLOW
|
||||
IN_ONESHOT uint32 = syscall.IN_ONESHOT
|
||||
IN_ONLYDIR uint32 = syscall.IN_ONLYDIR
|
||||
|
||||
// The "IN_MASK_ADD" option is not exported, as AddWatch
|
||||
// adds it automatically, if there is already a watch for the given path
|
||||
// IN_MASK_ADD uint32 = syscall.IN_MASK_ADD
|
||||
|
||||
// Events
|
||||
IN_ACCESS uint32 = syscall.IN_ACCESS
|
||||
IN_ALL_EVENTS uint32 = syscall.IN_ALL_EVENTS
|
||||
IN_ATTRIB uint32 = syscall.IN_ATTRIB
|
||||
IN_CLOSE uint32 = syscall.IN_CLOSE
|
||||
IN_CLOSE_NOWRITE uint32 = syscall.IN_CLOSE_NOWRITE
|
||||
IN_CLOSE_WRITE uint32 = syscall.IN_CLOSE_WRITE
|
||||
IN_CREATE uint32 = syscall.IN_CREATE
|
||||
IN_DELETE uint32 = syscall.IN_DELETE
|
||||
IN_DELETE_SELF uint32 = syscall.IN_DELETE_SELF
|
||||
IN_MODIFY uint32 = syscall.IN_MODIFY
|
||||
IN_MOVE uint32 = syscall.IN_MOVE
|
||||
IN_MOVED_FROM uint32 = syscall.IN_MOVED_FROM
|
||||
IN_MOVED_TO uint32 = syscall.IN_MOVED_TO
|
||||
IN_MOVE_SELF uint32 = syscall.IN_MOVE_SELF
|
||||
IN_OPEN uint32 = syscall.IN_OPEN
|
||||
|
||||
// Special events
|
||||
IN_ISDIR uint32 = syscall.IN_ISDIR
|
||||
IN_IGNORED uint32 = syscall.IN_IGNORED
|
||||
IN_Q_OVERFLOW uint32 = syscall.IN_Q_OVERFLOW
|
||||
IN_UNMOUNT uint32 = syscall.IN_UNMOUNT
|
||||
)
|
||||
|
||||
var eventBits = []struct {
|
||||
Value uint32
|
||||
Name string
|
||||
}{
|
||||
{IN_ACCESS, "IN_ACCESS"},
|
||||
{IN_ATTRIB, "IN_ATTRIB"},
|
||||
{IN_CLOSE, "IN_CLOSE"},
|
||||
{IN_CLOSE_NOWRITE, "IN_CLOSE_NOWRITE"},
|
||||
{IN_CLOSE_WRITE, "IN_CLOSE_WRITE"},
|
||||
{IN_CREATE, "IN_CREATE"},
|
||||
{IN_DELETE, "IN_DELETE"},
|
||||
{IN_DELETE_SELF, "IN_DELETE_SELF"},
|
||||
{IN_MODIFY, "IN_MODIFY"},
|
||||
{IN_MOVE, "IN_MOVE"},
|
||||
{IN_MOVED_FROM, "IN_MOVED_FROM"},
|
||||
{IN_MOVED_TO, "IN_MOVED_TO"},
|
||||
{IN_MOVE_SELF, "IN_MOVE_SELF"},
|
||||
{IN_OPEN, "IN_OPEN"},
|
||||
{IN_ISDIR, "IN_ISDIR"},
|
||||
{IN_IGNORED, "IN_IGNORED"},
|
||||
{IN_Q_OVERFLOW, "IN_Q_OVERFLOW"},
|
||||
{IN_UNMOUNT, "IN_UNMOUNT"},
|
||||
}
|
||||
@@ -1,106 +0,0 @@
|
||||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build linux
|
||||
|
||||
package inotify
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestInotifyEvents(t *testing.T) {
|
||||
// Create an inotify watcher instance and initialize it
|
||||
watcher, err := NewWatcher()
|
||||
if err != nil {
|
||||
t.Fatalf("NewWatcher failed: %s", err)
|
||||
}
|
||||
|
||||
dir, err := ioutil.TempDir("", "inotify")
|
||||
if err != nil {
|
||||
t.Fatalf("TempDir failed: %s", err)
|
||||
}
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
// Add a watch for "_test"
|
||||
err = watcher.Watch(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("Watch failed: %s", err)
|
||||
}
|
||||
|
||||
// Receive errors on the error channel on a separate goroutine
|
||||
go func() {
|
||||
for err := range watcher.Error {
|
||||
t.Fatalf("error received: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
testFile := dir + "/TestInotifyEvents.testfile"
|
||||
|
||||
// Receive events on the event channel on a separate goroutine
|
||||
eventstream := watcher.Event
|
||||
var eventsReceived = 0
|
||||
done := make(chan bool)
|
||||
go func() {
|
||||
for event := range eventstream {
|
||||
// Only count relevant events
|
||||
if event.Name == testFile {
|
||||
eventsReceived++
|
||||
t.Logf("event received: %s", event)
|
||||
} else {
|
||||
t.Logf("unexpected event received: %s", event)
|
||||
}
|
||||
}
|
||||
done <- true
|
||||
}()
|
||||
|
||||
// Create a file
|
||||
// This should add at least one event to the inotify event queue
|
||||
_, err = os.OpenFile(testFile, os.O_WRONLY|os.O_CREATE, 0666)
|
||||
if err != nil {
|
||||
t.Fatalf("creating test file: %s", err)
|
||||
}
|
||||
|
||||
// We expect this event to be received almost immediately, but let's wait 1 s to be sure
|
||||
time.Sleep(1 * time.Second)
|
||||
if eventsReceived == 0 {
|
||||
t.Fatal("inotify event hasn't been received after 1 second")
|
||||
}
|
||||
|
||||
// Try closing the inotify instance
|
||||
t.Log("calling Close()")
|
||||
watcher.Close()
|
||||
t.Log("waiting for the event channel to become closed...")
|
||||
select {
|
||||
case <-done:
|
||||
t.Log("event channel closed")
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatal("event stream was not closed after 1 second")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInotifyClose(t *testing.T) {
|
||||
watcher, _ := NewWatcher()
|
||||
watcher.Close()
|
||||
|
||||
done := make(chan bool)
|
||||
go func() {
|
||||
watcher.Close()
|
||||
done <- true
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
t.Fatal("double Close() test failed: second Close() call didn't return")
|
||||
}
|
||||
|
||||
err := watcher.Watch(os.TempDir())
|
||||
if err == nil {
|
||||
t.Fatal("expected error on Watch() after Close(), got nil")
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
# Copyright 2011 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
maketables: maketables.go triegen.go
|
||||
go build $^
|
||||
|
||||
maketesttables: maketesttables.go triegen.go
|
||||
go build $^
|
||||
|
||||
normregtest: normregtest.go
|
||||
go build $^
|
||||
|
||||
tables: maketables
|
||||
./maketables > tables.go
|
||||
gofmt -w tables.go
|
||||
|
||||
trietesttables: maketesttables
|
||||
./maketesttables > triedata_test.go
|
||||
gofmt -w triedata_test.go
|
||||
|
||||
# Downloads from www.unicode.org, so not part
|
||||
# of standard test scripts.
|
||||
test: testtables regtest
|
||||
|
||||
testtables: maketables
|
||||
./maketables -test -tables=
|
||||
|
||||
regtest: normregtest
|
||||
./normregtest
|
||||
@@ -1,386 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
const (
|
||||
maxCombiningChars = 30
|
||||
maxBufferSize = maxCombiningChars + 2 // +1 to hold starter +1 to hold CGJ
|
||||
maxBackRunes = maxCombiningChars - 1
|
||||
maxNFCExpansion = 3 // NFC(0x1D160)
|
||||
maxNFKCExpansion = 18 // NFKC(0xFDFA)
|
||||
|
||||
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
|
||||
)
|
||||
|
||||
// reorderBuffer is used to normalize a single segment. Characters inserted with
|
||||
// insert are decomposed and reordered based on CCC. The compose method can
|
||||
// be used to recombine characters. Note that the byte buffer does not hold
|
||||
// the UTF-8 characters in order. Only the rune array is maintained in sorted
|
||||
// order. flush writes the resulting segment to a byte array.
|
||||
type reorderBuffer struct {
|
||||
rune [maxBufferSize]runeInfo // Per character info.
|
||||
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
|
||||
nrune int // Number of runeInfos.
|
||||
nbyte uint8 // Number or bytes.
|
||||
f formInfo
|
||||
|
||||
src input
|
||||
nsrc int
|
||||
srcBytes inputBytes
|
||||
srcString inputString
|
||||
tmpBytes inputBytes
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) init(f Form, src []byte) {
|
||||
rb.f = *formTable[f]
|
||||
rb.srcBytes = inputBytes(src)
|
||||
rb.src = &rb.srcBytes
|
||||
rb.nsrc = len(src)
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) initString(f Form, src string) {
|
||||
rb.f = *formTable[f]
|
||||
rb.srcString = inputString(src)
|
||||
rb.src = &rb.srcString
|
||||
rb.nsrc = len(src)
|
||||
}
|
||||
|
||||
// reset discards all characters from the buffer.
|
||||
func (rb *reorderBuffer) reset() {
|
||||
rb.nrune = 0
|
||||
rb.nbyte = 0
|
||||
}
|
||||
|
||||
// flush appends the normalized segment to out and resets rb.
|
||||
func (rb *reorderBuffer) flush(out []byte) []byte {
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
start := rb.rune[i].pos
|
||||
end := start + rb.rune[i].size
|
||||
out = append(out, rb.byte[start:end]...)
|
||||
}
|
||||
rb.reset()
|
||||
return out
|
||||
}
|
||||
|
||||
// flushCopy copies the normalized segment to buf and resets rb.
|
||||
// It returns the number of bytes written to buf.
|
||||
func (rb *reorderBuffer) flushCopy(buf []byte) int {
|
||||
p := 0
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
runep := rb.rune[i]
|
||||
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
|
||||
}
|
||||
rb.reset()
|
||||
return p
|
||||
}
|
||||
|
||||
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
|
||||
// It returns false if the buffer is not large enough to hold the rune.
|
||||
// It is used internally by insert and insertString only.
|
||||
func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
|
||||
n := rb.nrune
|
||||
if n >= maxCombiningChars+1 {
|
||||
return false
|
||||
}
|
||||
b := rb.rune[:]
|
||||
cc := info.ccc
|
||||
if cc > 0 {
|
||||
// Find insertion position + move elements to make room.
|
||||
for ; n > 0; n-- {
|
||||
if b[n-1].ccc <= cc {
|
||||
break
|
||||
}
|
||||
b[n] = b[n-1]
|
||||
}
|
||||
}
|
||||
rb.nrune += 1
|
||||
pos := uint8(rb.nbyte)
|
||||
rb.nbyte += utf8.UTFMax
|
||||
info.pos = pos
|
||||
b[n] = info
|
||||
return true
|
||||
}
|
||||
|
||||
// insert inserts the given rune in the buffer ordered by CCC.
|
||||
// It returns true if the buffer was large enough to hold the decomposed rune.
|
||||
func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
|
||||
if rune := src.hangul(i); rune != 0 {
|
||||
return rb.decomposeHangul(rune)
|
||||
}
|
||||
if info.hasDecomposition() {
|
||||
return rb.insertDecomposed(info.decomposition())
|
||||
}
|
||||
return rb.insertSingle(src, i, info)
|
||||
}
|
||||
|
||||
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
|
||||
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
|
||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) bool {
|
||||
saveNrune, saveNbyte := rb.nrune, rb.nbyte
|
||||
rb.tmpBytes = inputBytes(dcomp)
|
||||
for i := 0; i < len(dcomp); {
|
||||
info := rb.f.info(&rb.tmpBytes, i)
|
||||
pos := rb.nbyte
|
||||
if !rb.insertOrdered(info) {
|
||||
rb.nrune, rb.nbyte = saveNrune, saveNbyte
|
||||
return false
|
||||
}
|
||||
i += copy(rb.byte[pos:], dcomp[i:i+int(info.size)])
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// insertSingle inserts an entry in the reorderBuffer for the rune at
|
||||
// position i. info is the runeInfo for the rune at position i.
|
||||
func (rb *reorderBuffer) insertSingle(src input, i int, info runeInfo) bool {
|
||||
// insertOrder changes nbyte
|
||||
pos := rb.nbyte
|
||||
if !rb.insertOrdered(info) {
|
||||
return false
|
||||
}
|
||||
src.copySlice(rb.byte[pos:], i, i+int(info.size))
|
||||
return true
|
||||
}
|
||||
|
||||
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
|
||||
func (rb *reorderBuffer) appendRune(r rune) {
|
||||
bn := rb.nbyte
|
||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||
rb.nbyte += utf8.UTFMax
|
||||
rb.rune[rb.nrune] = runeInfo{pos: bn, size: uint8(sz)}
|
||||
rb.nrune++
|
||||
}
|
||||
|
||||
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) assignRune(pos int, r rune) {
|
||||
bn := rb.rune[pos].pos
|
||||
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
|
||||
rb.rune[pos] = runeInfo{pos: bn, size: uint8(sz)}
|
||||
}
|
||||
|
||||
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) runeAt(n int) rune {
|
||||
inf := rb.rune[n]
|
||||
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
|
||||
return r
|
||||
}
|
||||
|
||||
// bytesAt returns the UTF-8 encoding of the rune at position n.
|
||||
// It is used for Hangul and recomposition.
|
||||
func (rb *reorderBuffer) bytesAt(n int) []byte {
|
||||
inf := rb.rune[n]
|
||||
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
|
||||
}
|
||||
|
||||
// For Hangul we combine algorithmically, instead of using tables.
|
||||
const (
|
||||
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
|
||||
hangulBase0 = 0xEA
|
||||
hangulBase1 = 0xB0
|
||||
hangulBase2 = 0x80
|
||||
|
||||
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
|
||||
hangulEnd0 = 0xED
|
||||
hangulEnd1 = 0x9E
|
||||
hangulEnd2 = 0xA4
|
||||
|
||||
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
|
||||
jamoLBase0 = 0xE1
|
||||
jamoLBase1 = 0x84
|
||||
jamoLEnd = 0x1113
|
||||
jamoVBase = 0x1161
|
||||
jamoVEnd = 0x1176
|
||||
jamoTBase = 0x11A7
|
||||
jamoTEnd = 0x11C3
|
||||
|
||||
jamoTCount = 28
|
||||
jamoVCount = 21
|
||||
jamoVTCount = 21 * 28
|
||||
jamoLVTCount = 19 * 21 * 28
|
||||
)
|
||||
|
||||
const hangulUTF8Size = 3
|
||||
|
||||
func isHangul(b []byte) bool {
|
||||
if len(b) < hangulUTF8Size {
|
||||
return false
|
||||
}
|
||||
b0 := b[0]
|
||||
if b0 < hangulBase0 {
|
||||
return false
|
||||
}
|
||||
b1 := b[1]
|
||||
switch {
|
||||
case b0 == hangulBase0:
|
||||
return b1 >= hangulBase1
|
||||
case b0 < hangulEnd0:
|
||||
return true
|
||||
case b0 > hangulEnd0:
|
||||
return false
|
||||
case b1 < hangulEnd1:
|
||||
return true
|
||||
}
|
||||
return b1 == hangulEnd1 && b[2] < hangulEnd2
|
||||
}
|
||||
|
||||
func isHangulString(b string) bool {
|
||||
if len(b) < hangulUTF8Size {
|
||||
return false
|
||||
}
|
||||
b0 := b[0]
|
||||
if b0 < hangulBase0 {
|
||||
return false
|
||||
}
|
||||
b1 := b[1]
|
||||
switch {
|
||||
case b0 == hangulBase0:
|
||||
return b1 >= hangulBase1
|
||||
case b0 < hangulEnd0:
|
||||
return true
|
||||
case b0 > hangulEnd0:
|
||||
return false
|
||||
case b1 < hangulEnd1:
|
||||
return true
|
||||
}
|
||||
return b1 == hangulEnd1 && b[2] < hangulEnd2
|
||||
}
|
||||
|
||||
// Caller must ensure len(b) >= 2.
|
||||
func isJamoVT(b []byte) bool {
|
||||
// True if (rune & 0xff00) == jamoLBase
|
||||
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
|
||||
}
|
||||
|
||||
func isHangulWithoutJamoT(b []byte) bool {
|
||||
c, _ := utf8.DecodeRune(b)
|
||||
c -= hangulBase
|
||||
return c < jamoLVTCount && c%jamoTCount == 0
|
||||
}
|
||||
|
||||
// decomposeHangul writes the decomposed Hangul to buf and returns the number
|
||||
// of bytes written. len(buf) should be at least 9.
|
||||
func decomposeHangul(buf []byte, r rune) int {
|
||||
const JamoUTF8Len = 3
|
||||
r -= hangulBase
|
||||
x := r % jamoTCount
|
||||
r /= jamoTCount
|
||||
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
|
||||
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
|
||||
if x != 0 {
|
||||
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
|
||||
return 3 * JamoUTF8Len
|
||||
}
|
||||
return 2 * JamoUTF8Len
|
||||
}
|
||||
|
||||
// decomposeHangul algorithmically decomposes a Hangul rune into
|
||||
// its Jamo components.
|
||||
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
|
||||
func (rb *reorderBuffer) decomposeHangul(r rune) bool {
|
||||
b := rb.rune[:]
|
||||
n := rb.nrune
|
||||
if n+3 > len(b) {
|
||||
return false
|
||||
}
|
||||
r -= hangulBase
|
||||
x := r % jamoTCount
|
||||
r /= jamoTCount
|
||||
rb.appendRune(jamoLBase + r/jamoVCount)
|
||||
rb.appendRune(jamoVBase + r%jamoVCount)
|
||||
if x != 0 {
|
||||
rb.appendRune(jamoTBase + x)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// combineHangul algorithmically combines Jamo character components into Hangul.
|
||||
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
|
||||
func (rb *reorderBuffer) combineHangul(s, i, k int) {
|
||||
b := rb.rune[:]
|
||||
bn := rb.nrune
|
||||
for ; i < bn; i++ {
|
||||
cccB := b[k-1].ccc
|
||||
cccC := b[i].ccc
|
||||
if cccB == 0 {
|
||||
s = k - 1
|
||||
}
|
||||
if s != k-1 && cccB >= cccC {
|
||||
// b[i] is blocked by greater-equal cccX below it
|
||||
b[k] = b[i]
|
||||
k++
|
||||
} else {
|
||||
l := rb.runeAt(s) // also used to compare to hangulBase
|
||||
v := rb.runeAt(i) // also used to compare to jamoT
|
||||
switch {
|
||||
case jamoLBase <= l && l < jamoLEnd &&
|
||||
jamoVBase <= v && v < jamoVEnd:
|
||||
// 11xx plus 116x to LV
|
||||
rb.assignRune(s, hangulBase+
|
||||
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
|
||||
case hangulBase <= l && l < hangulEnd &&
|
||||
jamoTBase < v && v < jamoTEnd &&
|
||||
((l-hangulBase)%jamoTCount) == 0:
|
||||
// ACxx plus 11Ax to LVT
|
||||
rb.assignRune(s, l+v-jamoTBase)
|
||||
default:
|
||||
b[k] = b[i]
|
||||
k++
|
||||
}
|
||||
}
|
||||
}
|
||||
rb.nrune = k
|
||||
}
|
||||
|
||||
// compose recombines the runes in the buffer.
|
||||
// It should only be used to recompose a single segment, as it will not
|
||||
// handle alternations between Hangul and non-Hangul characters correctly.
|
||||
func (rb *reorderBuffer) compose() {
|
||||
// UAX #15, section X5 , including Corrigendum #5
|
||||
// "In any character sequence beginning with starter S, a character C is
|
||||
// blocked from S if and only if there is some character B between S
|
||||
// and C, and either B is a starter or it has the same or higher
|
||||
// combining class as C."
|
||||
bn := rb.nrune
|
||||
if bn == 0 {
|
||||
return
|
||||
}
|
||||
k := 1
|
||||
b := rb.rune[:]
|
||||
for s, i := 0, 1; i < bn; i++ {
|
||||
if isJamoVT(rb.bytesAt(i)) {
|
||||
// Redo from start in Hangul mode. Necessary to support
|
||||
// U+320E..U+321E in NFKC mode.
|
||||
rb.combineHangul(s, i, k)
|
||||
return
|
||||
}
|
||||
ii := b[i]
|
||||
// We can only use combineForward as a filter if we later
|
||||
// get the info for the combined character. This is more
|
||||
// expensive than using the filter. Using combinesBackward()
|
||||
// is safe.
|
||||
if ii.combinesBackward() {
|
||||
cccB := b[k-1].ccc
|
||||
cccC := ii.ccc
|
||||
blocked := false // b[i] blocked by starter or greater or equal CCC?
|
||||
if cccB == 0 {
|
||||
s = k - 1
|
||||
} else {
|
||||
blocked = s != k-1 && cccB >= cccC
|
||||
}
|
||||
if !blocked {
|
||||
combined := combine(rb.runeAt(s), rb.runeAt(i))
|
||||
if combined != 0 {
|
||||
rb.assignRune(s, combined)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
b[k] = b[i]
|
||||
k++
|
||||
}
|
||||
rb.nrune = k
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestCase is used for most tests.
|
||||
type TestCase struct {
|
||||
in []rune
|
||||
out []rune
|
||||
}
|
||||
|
||||
type insertFunc func(rb *reorderBuffer, r rune) bool
|
||||
|
||||
func insert(rb *reorderBuffer, r rune) bool {
|
||||
src := inputString(string(r))
|
||||
return rb.insert(src, 0, rb.f.info(src, 0))
|
||||
}
|
||||
|
||||
func runTests(t *testing.T, name string, fm Form, f insertFunc, tests []TestCase) {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(fm, nil)
|
||||
for i, test := range tests {
|
||||
rb.reset()
|
||||
for j, rune := range test.in {
|
||||
b := []byte(string(rune))
|
||||
src := inputBytes(b)
|
||||
if !rb.insert(src, 0, rb.f.info(src, 0)) {
|
||||
t.Errorf("%s:%d: insert failed for rune %d", name, i, j)
|
||||
}
|
||||
}
|
||||
if rb.f.composing {
|
||||
rb.compose()
|
||||
}
|
||||
if rb.nrune != len(test.out) {
|
||||
t.Errorf("%s:%d: length = %d; want %d", name, i, rb.nrune, len(test.out))
|
||||
continue
|
||||
}
|
||||
for j, want := range test.out {
|
||||
found := rune(rb.runeAt(j))
|
||||
if found != want {
|
||||
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type flushFunc func(rb *reorderBuffer) []byte
|
||||
|
||||
func testFlush(t *testing.T, name string, fn flushFunc) {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(NFC, nil)
|
||||
out := fn(&rb)
|
||||
if len(out) != 0 {
|
||||
t.Errorf("%s: wrote bytes on flush of empty buffer. (len(out) = %d)", name, len(out))
|
||||
}
|
||||
|
||||
for _, r := range []rune("world!") {
|
||||
insert(&rb, r)
|
||||
}
|
||||
|
||||
out = []byte("Hello ")
|
||||
out = rb.flush(out)
|
||||
want := "Hello world!"
|
||||
if string(out) != want {
|
||||
t.Errorf(`%s: output after flush was "%s"; want "%s"`, name, string(out), want)
|
||||
}
|
||||
if rb.nrune != 0 {
|
||||
t.Errorf("%s: non-null size of info buffer (rb.nrune == %d)", name, rb.nrune)
|
||||
}
|
||||
if rb.nbyte != 0 {
|
||||
t.Errorf("%s: non-null size of byte buffer (rb.nbyte == %d)", name, rb.nbyte)
|
||||
}
|
||||
}
|
||||
|
||||
func flushF(rb *reorderBuffer) []byte {
|
||||
out := make([]byte, 0)
|
||||
return rb.flush(out)
|
||||
}
|
||||
|
||||
func flushCopyF(rb *reorderBuffer) []byte {
|
||||
out := make([]byte, MaxSegmentSize)
|
||||
n := rb.flushCopy(out)
|
||||
return out[:n]
|
||||
}
|
||||
|
||||
func TestFlush(t *testing.T) {
|
||||
testFlush(t, "flush", flushF)
|
||||
testFlush(t, "flushCopy", flushCopyF)
|
||||
}
|
||||
|
||||
var insertTests = []TestCase{
|
||||
{[]rune{'a'}, []rune{'a'}},
|
||||
{[]rune{0x300}, []rune{0x300}},
|
||||
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
|
||||
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
|
||||
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
|
||||
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
|
||||
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
|
||||
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
|
||||
}
|
||||
|
||||
func TestInsert(t *testing.T) {
|
||||
runTests(t, "TestInsert", NFD, insert, insertTests)
|
||||
}
|
||||
|
||||
var decompositionNFDTest = []TestCase{
|
||||
{[]rune{0xC0}, []rune{0x41, 0x300}},
|
||||
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
|
||||
{[]rune{0x01C4}, []rune{0x01C4}},
|
||||
{[]rune{0x320E}, []rune{0x320E}},
|
||||
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
|
||||
}
|
||||
|
||||
var decompositionNFKDTest = []TestCase{
|
||||
{[]rune{0xC0}, []rune{0x41, 0x300}},
|
||||
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
|
||||
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
|
||||
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
|
||||
}
|
||||
|
||||
func TestDecomposition(t *testing.T) {
|
||||
runTests(t, "TestDecompositionNFD", NFD, insert, decompositionNFDTest)
|
||||
runTests(t, "TestDecompositionNFKD", NFKD, insert, decompositionNFKDTest)
|
||||
}
|
||||
|
||||
var compositionTest = []TestCase{
|
||||
{[]rune{0x41, 0x300}, []rune{0xC0}},
|
||||
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
|
||||
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
|
||||
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
|
||||
// blocking starter
|
||||
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
|
||||
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
|
||||
// parenthesized Hangul, alternate between ASCII and Hangul.
|
||||
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
|
||||
}
|
||||
|
||||
func TestComposition(t *testing.T) {
|
||||
runTests(t, "TestComposition", NFC, insert, compositionTest)
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
// This file contains Form-specific logic and wrappers for data in tables.go.
|
||||
|
||||
// Rune info is stored in a separate trie per composing form. A composing form
|
||||
// and its corresponding decomposing form share the same trie. Each trie maps
|
||||
// a rune to a uint16. The values take two forms. For v >= 0x8000:
|
||||
// bits
|
||||
// 0..8: ccc
|
||||
// 9..12: qcInfo (see below). isYesD is always true (no decompostion).
|
||||
// 16: 1
|
||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
|
||||
// into a byte array of UTF-8 decomposition sequences and additional info and
|
||||
// has the form:
|
||||
// <header> <decomp_byte>* [<tccc> [<lccc>]]
|
||||
// The header contains the number of bytes in the decomposition (excluding this
|
||||
// length byte). The two most significant bits of this length byte correspond
|
||||
// to bit 2 and 3 of qcIfo (see below). The byte sequence itself starts at v+1.
|
||||
// The byte sequence is followed by a trailing and leading CCC if the values
|
||||
// for these are not zero. The value of v determines which ccc are appended
|
||||
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
|
||||
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
|
||||
// there is an additional leading ccc.
|
||||
|
||||
const (
|
||||
qcInfoMask = 0xF // to clear all but the relevant bits in a qcInfo
|
||||
headerLenMask = 0x3F // extract the length value from the header byte
|
||||
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
|
||||
)
|
||||
|
||||
// runeInfo is a representation for the data stored in charinfoTrie.
|
||||
type runeInfo struct {
|
||||
pos uint8 // start position in reorderBuffer; used in composition.go
|
||||
size uint8 // length of UTF-8 encoding of this rune
|
||||
ccc uint8 // leading canonical combining class (ccc if not decomposition)
|
||||
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
|
||||
flags qcInfo // quick check flags
|
||||
index uint16
|
||||
}
|
||||
|
||||
// functions dispatchable per form
|
||||
type lookupFunc func(b input, i int) runeInfo
|
||||
|
||||
// formInfo holds Form-specific functions and tables.
|
||||
type formInfo struct {
|
||||
form Form
|
||||
composing, compatibility bool // form type
|
||||
info lookupFunc
|
||||
}
|
||||
|
||||
var formTable []*formInfo
|
||||
|
||||
func init() {
|
||||
formTable = make([]*formInfo, 4)
|
||||
|
||||
for i := range formTable {
|
||||
f := &formInfo{}
|
||||
formTable[i] = f
|
||||
f.form = Form(i)
|
||||
if Form(i) == NFKD || Form(i) == NFKC {
|
||||
f.compatibility = true
|
||||
f.info = lookupInfoNFKC
|
||||
} else {
|
||||
f.info = lookupInfoNFC
|
||||
}
|
||||
if Form(i) == NFC || Form(i) == NFKC {
|
||||
f.composing = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
|
||||
// unexpected behavior for the user. For example, in NFD, there is a boundary
|
||||
// after 'a'. However, a might combine with modifiers, so from the application's
|
||||
// perspective it is not a good boundary. We will therefore always use the
|
||||
// boundaries for the combining variants.
|
||||
func (i runeInfo) boundaryBefore() bool {
|
||||
if i.ccc == 0 && !i.combinesBackward() {
|
||||
return true
|
||||
}
|
||||
// We assume that the CCC of the first character in a decomposition
|
||||
// is always non-zero if different from info.ccc and that we can return
|
||||
// false at this point. This is verified by maketables.
|
||||
return false
|
||||
}
|
||||
|
||||
func (i runeInfo) boundaryAfter() bool {
|
||||
return i.isInert()
|
||||
}
|
||||
|
||||
// We pack quick check data in 4 bits:
|
||||
// 0: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
|
||||
// 1..2: NFC_QC Yes(00), No (10), or Maybe (11)
|
||||
// 3: Combines forward (0 == false, 1 == true)
|
||||
//
|
||||
// When all 4 bits are zero, the character is inert, meaning it is never
|
||||
// influenced by normalization.
|
||||
type qcInfo uint8
|
||||
|
||||
func (i runeInfo) isYesC() bool { return i.flags&0x4 == 0 }
|
||||
func (i runeInfo) isYesD() bool { return i.flags&0x1 == 0 }
|
||||
|
||||
func (i runeInfo) combinesForward() bool { return i.flags&0x8 != 0 }
|
||||
func (i runeInfo) combinesBackward() bool { return i.flags&0x2 != 0 } // == isMaybe
|
||||
func (i runeInfo) hasDecomposition() bool { return i.flags&0x1 != 0 } // == isNoD
|
||||
|
||||
func (r runeInfo) isInert() bool {
|
||||
return r.flags&0xf == 0 && r.ccc == 0
|
||||
}
|
||||
|
||||
func (r runeInfo) decomposition() []byte {
|
||||
if r.index == 0 {
|
||||
return nil
|
||||
}
|
||||
p := r.index
|
||||
n := decomps[p] & 0x3F
|
||||
p++
|
||||
return decomps[p : p+uint16(n)]
|
||||
}
|
||||
|
||||
// Recomposition
|
||||
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
|
||||
// This clips off the bits of three entries, but we know this will not
|
||||
// result in a collision. In the unlikely event that changes to
|
||||
// UnicodeData.txt introduce collisions, the compiler will catch it.
|
||||
// Note that the recomposition map for NFC and NFKC are identical.
|
||||
|
||||
// combine returns the combined rune or 0 if it doesn't exist.
|
||||
func combine(a, b rune) rune {
|
||||
key := uint32(uint16(a))<<16 + uint32(uint16(b))
|
||||
return recompMap[key]
|
||||
}
|
||||
|
||||
func lookupInfoNFC(b input, i int) runeInfo {
|
||||
v, sz := b.charinfoNFC(i)
|
||||
return compInfo(v, sz)
|
||||
}
|
||||
|
||||
func lookupInfoNFKC(b input, i int) runeInfo {
|
||||
v, sz := b.charinfoNFKC(i)
|
||||
return compInfo(v, sz)
|
||||
}
|
||||
|
||||
// compInfo converts the information contained in v and sz
|
||||
// to a runeInfo. See the comment at the top of the file
|
||||
// for more information on the format.
|
||||
func compInfo(v uint16, sz int) runeInfo {
|
||||
if v == 0 {
|
||||
return runeInfo{size: uint8(sz)}
|
||||
} else if v >= 0x8000 {
|
||||
return runeInfo{
|
||||
size: uint8(sz),
|
||||
ccc: uint8(v),
|
||||
tccc: uint8(v),
|
||||
flags: qcInfo(v>>8) & qcInfoMask,
|
||||
}
|
||||
}
|
||||
// has decomposition
|
||||
h := decomps[v]
|
||||
f := (qcInfo(h&headerFlagsMask) >> 4) | 0x1
|
||||
ri := runeInfo{size: uint8(sz), flags: f, index: v}
|
||||
if v >= firstCCC {
|
||||
v += uint16(h&headerLenMask) + 1
|
||||
ri.tccc = decomps[v]
|
||||
if v >= firstLeadingCCC {
|
||||
ri.ccc = decomps[v+1]
|
||||
}
|
||||
}
|
||||
return ri
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
type input interface {
|
||||
skipASCII(p, max int) int
|
||||
skipNonStarter(p int) int
|
||||
appendSlice(buf []byte, s, e int) []byte
|
||||
copySlice(buf []byte, s, e int)
|
||||
charinfoNFC(p int) (uint16, int)
|
||||
charinfoNFKC(p int) (uint16, int)
|
||||
hangul(p int) rune
|
||||
}
|
||||
|
||||
type inputString string
|
||||
|
||||
func (s inputString) skipASCII(p, max int) int {
|
||||
for ; p < max && s[p] < utf8.RuneSelf; p++ {
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s inputString) skipNonStarter(p int) int {
|
||||
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s inputString) appendSlice(buf []byte, b, e int) []byte {
|
||||
for i := b; i < e; i++ {
|
||||
buf = append(buf, s[i])
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s inputString) copySlice(buf []byte, b, e int) {
|
||||
copy(buf, s[b:e])
|
||||
}
|
||||
|
||||
func (s inputString) charinfoNFC(p int) (uint16, int) {
|
||||
return nfcTrie.lookupString(string(s[p:]))
|
||||
}
|
||||
|
||||
func (s inputString) charinfoNFKC(p int) (uint16, int) {
|
||||
return nfkcTrie.lookupString(string(s[p:]))
|
||||
}
|
||||
|
||||
func (s inputString) hangul(p int) rune {
|
||||
if !isHangulString(string(s[p:])) {
|
||||
return 0
|
||||
}
|
||||
rune, _ := utf8.DecodeRuneInString(string(s[p:]))
|
||||
return rune
|
||||
}
|
||||
|
||||
type inputBytes []byte
|
||||
|
||||
func (s inputBytes) skipASCII(p, max int) int {
|
||||
for ; p < max && s[p] < utf8.RuneSelf; p++ {
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s inputBytes) skipNonStarter(p int) int {
|
||||
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s inputBytes) appendSlice(buf []byte, b, e int) []byte {
|
||||
return append(buf, s[b:e]...)
|
||||
}
|
||||
|
||||
func (s inputBytes) copySlice(buf []byte, b, e int) {
|
||||
copy(buf, s[b:e])
|
||||
}
|
||||
|
||||
func (s inputBytes) charinfoNFC(p int) (uint16, int) {
|
||||
return nfcTrie.lookup(s[p:])
|
||||
}
|
||||
|
||||
func (s inputBytes) charinfoNFKC(p int) (uint16, int) {
|
||||
return nfkcTrie.lookup(s[p:])
|
||||
}
|
||||
|
||||
func (s inputBytes) hangul(p int) rune {
|
||||
if !isHangul(s[p:]) {
|
||||
return 0
|
||||
}
|
||||
rune, _ := utf8.DecodeRune(s[p:])
|
||||
return rune
|
||||
}
|
||||
@@ -1,286 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
const MaxSegmentSize = maxByteBufferSize
|
||||
|
||||
// An Iter iterates over a string or byte slice, while normalizing it
|
||||
// to a given Form.
|
||||
type Iter struct {
|
||||
rb reorderBuffer
|
||||
info runeInfo // first character saved from previous iteration
|
||||
next iterFunc // implementation of next depends on form
|
||||
|
||||
p int // current position in input source
|
||||
outStart int // start of current segment in output buffer
|
||||
inStart int // start of current segment in input source
|
||||
maxp int // position in output buffer after which not to start a new segment
|
||||
maxseg int // for tracking an excess of combining characters
|
||||
|
||||
tccc uint8
|
||||
done bool
|
||||
}
|
||||
|
||||
type iterFunc func(*Iter, []byte) int
|
||||
|
||||
// SetInput initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) SetInput(f Form, src []byte) {
|
||||
i.rb.init(f, src)
|
||||
if i.rb.f.composing {
|
||||
i.next = nextComposed
|
||||
} else {
|
||||
i.next = nextDecomposed
|
||||
}
|
||||
i.p = 0
|
||||
if i.done = len(src) == 0; !i.done {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
}
|
||||
|
||||
// SetInputString initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) SetInputString(f Form, src string) {
|
||||
i.rb.initString(f, src)
|
||||
if i.rb.f.composing {
|
||||
i.next = nextComposed
|
||||
} else {
|
||||
i.next = nextDecomposed
|
||||
}
|
||||
i.p = 0
|
||||
if i.done = len(src) == 0; !i.done {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
}
|
||||
|
||||
// Pos returns the byte position at which the next call to Next will commence processing.
|
||||
func (i *Iter) Pos() int {
|
||||
return i.p
|
||||
}
|
||||
|
||||
// Done returns true if there is no more input to process.
|
||||
func (i *Iter) Done() bool {
|
||||
return i.done
|
||||
}
|
||||
|
||||
// Next writes f(i.input[i.Pos():n]...) to buffer buf, where n is the
|
||||
// largest boundary of i.input such that the result fits in buf.
|
||||
// It returns the number of bytes written to buf.
|
||||
// len(buf) should be at least MaxSegmentSize.
|
||||
// Done must be false before calling Next.
|
||||
func (i *Iter) Next(buf []byte) int {
|
||||
return i.next(i, buf)
|
||||
}
|
||||
|
||||
func (i *Iter) initNext(outn, inStart int) {
|
||||
i.outStart = 0
|
||||
i.inStart = inStart
|
||||
i.maxp = outn - MaxSegmentSize
|
||||
i.maxseg = MaxSegmentSize
|
||||
}
|
||||
|
||||
// setStart resets the start of the new segment to the given position.
|
||||
// It returns true if there is not enough room for the new segment.
|
||||
func (i *Iter) setStart(outp, inp int) bool {
|
||||
if outp > i.maxp {
|
||||
return true
|
||||
}
|
||||
i.outStart = outp
|
||||
i.inStart = inp
|
||||
i.maxseg = outp + MaxSegmentSize
|
||||
return false
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
|
||||
func nextDecomposed(i *Iter, out []byte) int {
|
||||
var outp int
|
||||
i.initNext(len(out), i.p)
|
||||
doFast:
|
||||
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
|
||||
for {
|
||||
if sz := int(i.info.size); sz <= 1 {
|
||||
// ASCII or illegal byte. Either way, advance by 1.
|
||||
i.p++
|
||||
outp++
|
||||
max := min(i.rb.nsrc, len(out)-outp+i.p)
|
||||
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
|
||||
outp += np - i.p
|
||||
i.p = np
|
||||
if i.p >= i.rb.nsrc {
|
||||
break
|
||||
}
|
||||
// ASCII may combine with consecutive runes.
|
||||
if i.setStart(outp-1, i.p-1) {
|
||||
i.p--
|
||||
outp--
|
||||
i.info.size = 1
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if d := i.info.decomposition(); d != nil {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
p := outp + len(d)
|
||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||
return outp
|
||||
}
|
||||
copy(out[outp:], d)
|
||||
outp = p
|
||||
i.p += sz
|
||||
inCopyStart, outCopyStart = i.p, outp
|
||||
} else if r := i.rb.src.hangul(i.p); r != 0 {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
for {
|
||||
outp += decomposeHangul(out[outp:], r)
|
||||
i.p += hangulUTF8Size
|
||||
if r = i.rb.src.hangul(i.p); r == 0 {
|
||||
break
|
||||
}
|
||||
if i.setStart(outp, i.p) {
|
||||
return outp
|
||||
}
|
||||
}
|
||||
inCopyStart, outCopyStart = i.p, outp
|
||||
} else {
|
||||
p := outp + sz
|
||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||
break
|
||||
}
|
||||
outp = p
|
||||
i.p += sz
|
||||
}
|
||||
if i.p >= i.rb.nsrc {
|
||||
break
|
||||
}
|
||||
prevCC := i.info.tccc
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if cc := i.info.ccc; cc == 0 {
|
||||
if i.setStart(outp, i.p) {
|
||||
break
|
||||
}
|
||||
} else if cc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
}
|
||||
if inCopyStart != i.p {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
}
|
||||
i.done = i.p >= i.rb.nsrc
|
||||
return outp
|
||||
doNorm:
|
||||
// Insert what we have decomposed so far in the reorderBuffer.
|
||||
// As we will only reorder, there will always be enough room.
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
if !i.rb.insertDecomposed(out[i.outStart:outp]) {
|
||||
// Start over to prevent decompositions from crossing segment boundaries.
|
||||
// This is a rare occurance.
|
||||
i.p = i.inStart
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
outp = i.outStart
|
||||
for {
|
||||
if !i.rb.insert(i.rb.src, i.p, i.info) {
|
||||
break
|
||||
}
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
outp += i.rb.flushCopy(out[outp:])
|
||||
i.done = true
|
||||
return outp
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if i.info.ccc == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
// new segment or too many combining characters: exit normalization
|
||||
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
|
||||
return outp
|
||||
}
|
||||
goto doFast
|
||||
}
|
||||
|
||||
// nextComposed is the implementation of Next for forms NFC and NFKC.
|
||||
func nextComposed(i *Iter, out []byte) int {
|
||||
var outp int
|
||||
i.initNext(len(out), i.p)
|
||||
doFast:
|
||||
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
|
||||
var prevCC uint8
|
||||
for {
|
||||
if !i.info.isYesC() {
|
||||
goto doNorm
|
||||
}
|
||||
if cc := i.info.ccc; cc == 0 {
|
||||
if i.setStart(outp, i.p) {
|
||||
break
|
||||
}
|
||||
} else if cc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
prevCC = i.info.tccc
|
||||
sz := int(i.info.size)
|
||||
if sz == 0 {
|
||||
sz = 1 // illegal rune: copy byte-by-byte
|
||||
}
|
||||
p := outp + sz
|
||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||
break
|
||||
}
|
||||
outp = p
|
||||
i.p += sz
|
||||
max := min(i.rb.nsrc, len(out)-outp+i.p)
|
||||
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
|
||||
outp += np - i.p
|
||||
i.p = np
|
||||
if i.p >= i.rb.nsrc {
|
||||
break
|
||||
}
|
||||
// ASCII may combine with consecutive runes.
|
||||
if i.setStart(outp-1, i.p-1) {
|
||||
i.p--
|
||||
outp--
|
||||
i.info = runeInfo{size: 1}
|
||||
break
|
||||
}
|
||||
}
|
||||
if i.p >= i.rb.nsrc {
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
if inCopyStart != i.p {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
}
|
||||
i.done = i.p >= i.rb.nsrc
|
||||
return outp
|
||||
doNorm:
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.inStart)
|
||||
outp, i.p = i.outStart, i.inStart
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
for {
|
||||
if !i.rb.insert(i.rb.src, i.p, i.info) {
|
||||
break
|
||||
}
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
i.rb.compose()
|
||||
outp += i.rb.flushCopy(out[outp:])
|
||||
i.done = true
|
||||
return outp
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if i.info.boundaryBefore() {
|
||||
break
|
||||
}
|
||||
}
|
||||
i.rb.compose()
|
||||
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
|
||||
return outp
|
||||
}
|
||||
goto doFast
|
||||
}
|
||||
@@ -1,186 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var iterBufSizes = []int{
|
||||
MaxSegmentSize,
|
||||
1.5 * MaxSegmentSize,
|
||||
2 * MaxSegmentSize,
|
||||
3 * MaxSegmentSize,
|
||||
100 * MaxSegmentSize,
|
||||
}
|
||||
|
||||
func doIterNorm(f Form, buf []byte, s string) []byte {
|
||||
acc := []byte{}
|
||||
i := Iter{}
|
||||
i.SetInputString(f, s)
|
||||
for !i.Done() {
|
||||
n := i.Next(buf)
|
||||
acc = append(acc, buf[:n]...)
|
||||
}
|
||||
return acc
|
||||
}
|
||||
|
||||
func runIterTests(t *testing.T, name string, f Form, tests []AppendTest, norm bool) {
|
||||
for i, test := range tests {
|
||||
in := test.left + test.right
|
||||
gold := test.out
|
||||
if norm {
|
||||
gold = string(f.AppendString(nil, test.out))
|
||||
}
|
||||
for _, sz := range iterBufSizes {
|
||||
buf := make([]byte, sz)
|
||||
out := string(doIterNorm(f, buf, in))
|
||||
if len(out) != len(gold) {
|
||||
const msg = "%s:%d:%d: length is %d; want %d"
|
||||
t.Errorf(msg, name, i, sz, len(out), len(gold))
|
||||
}
|
||||
if out != gold {
|
||||
// Find first rune that differs and show context.
|
||||
ir := []rune(out)
|
||||
ig := []rune(gold)
|
||||
for j := 0; j < len(ir) && j < len(ig); j++ {
|
||||
if ir[j] == ig[j] {
|
||||
continue
|
||||
}
|
||||
if j -= 3; j < 0 {
|
||||
j = 0
|
||||
}
|
||||
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
|
||||
const msg = "%s:%d:%d: runeAt(%d) = %U; want %U"
|
||||
t.Errorf(msg, name, i, sz, j, ir[j], ig[j])
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func rep(r rune, n int) string {
|
||||
return strings.Repeat(string(r), n)
|
||||
}
|
||||
|
||||
var iterTests = []AppendTest{
|
||||
{"", ascii, ascii},
|
||||
{"", txt_all, txt_all},
|
||||
{"", "a" + rep(0x0300, MaxSegmentSize/2), "a" + rep(0x0300, MaxSegmentSize/2)},
|
||||
}
|
||||
|
||||
var iterTestsD = []AppendTest{
|
||||
{ // segment overflow on unchanged character
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2) + "\u0316",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0316\u0300",
|
||||
},
|
||||
{ // segment overflow on unchanged character + start value
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars+4) + "\u0316",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
|
||||
},
|
||||
{ // segment overflow on decomposition
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2),
|
||||
},
|
||||
{ // segment overflow on decomposition + start value
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
|
||||
},
|
||||
{ // start value after ASCII overflow
|
||||
"",
|
||||
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
|
||||
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
|
||||
},
|
||||
{ // start value after Hangul overflow
|
||||
"",
|
||||
rep(0xAC00, MaxSegmentSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
|
||||
strings.Repeat("\u1100\u1161", MaxSegmentSize/6) + rep(0x300, maxCombiningChars-1) + "\u0320" + rep(0x300, 3),
|
||||
},
|
||||
{ // start value after cc=0
|
||||
"",
|
||||
"您您" + rep(0x300, maxCombiningChars+4) + "\u0320",
|
||||
"您您" + rep(0x300, maxCombiningChars) + "\u0320" + rep(0x300, 4),
|
||||
},
|
||||
{ // start value after normalization
|
||||
"",
|
||||
"\u0300\u0320a" + rep(0x300, maxCombiningChars+4) + "\u0320",
|
||||
"\u0320\u0300a" + rep(0x300, maxCombiningChars) + "\u0320" + rep(0x300, 4),
|
||||
},
|
||||
}
|
||||
|
||||
var iterTestsC = []AppendTest{
|
||||
{ // ordering of non-composing combining characters
|
||||
"",
|
||||
"\u0305\u0316",
|
||||
"\u0316\u0305",
|
||||
},
|
||||
{ // segment overflow
|
||||
"",
|
||||
"a" + rep(0x0305, MaxSegmentSize/2+4) + "\u0316",
|
||||
"a" + rep(0x0305, MaxSegmentSize/2-1) + "\u0316" + rep(0x305, 5),
|
||||
},
|
||||
}
|
||||
|
||||
func TestIterNextD(t *testing.T) {
|
||||
runIterTests(t, "IterNextD1", NFKD, appendTests, true)
|
||||
runIterTests(t, "IterNextD2", NFKD, iterTests, true)
|
||||
runIterTests(t, "IterNextD3", NFKD, iterTestsD, false)
|
||||
}
|
||||
|
||||
func TestIterNextC(t *testing.T) {
|
||||
runIterTests(t, "IterNextC1", NFKC, appendTests, true)
|
||||
runIterTests(t, "IterNextC2", NFKC, iterTests, true)
|
||||
runIterTests(t, "IterNextC3", NFKC, iterTestsC, false)
|
||||
}
|
||||
|
||||
type SegmentTest struct {
|
||||
in string
|
||||
out []string
|
||||
}
|
||||
|
||||
var segmentTests = []SegmentTest{
|
||||
{rep('a', MaxSegmentSize), []string{rep('a', MaxSegmentSize), ""}},
|
||||
{rep('a', MaxSegmentSize+2), []string{rep('a', MaxSegmentSize-1), "aaa", ""}},
|
||||
{rep('a', MaxSegmentSize) + "\u0300aa", []string{rep('a', MaxSegmentSize-1), "a\u0300", "aa", ""}},
|
||||
}
|
||||
|
||||
// Note that, by design, segmentation is equal for composing and decomposing forms.
|
||||
func TestIterSegmentation(t *testing.T) {
|
||||
segmentTest(t, "SegmentTestD", NFD, segmentTests)
|
||||
segmentTest(t, "SegmentTestC", NFC, segmentTests)
|
||||
}
|
||||
|
||||
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
|
||||
iter := Iter{}
|
||||
for i, tt := range segmentTests {
|
||||
buf := make([]byte, MaxSegmentSize)
|
||||
iter.SetInputString(f, tt.in)
|
||||
for j, seg := range tt.out {
|
||||
if seg == "" {
|
||||
if !iter.Done() {
|
||||
n := iter.Next(buf)
|
||||
res := string(buf[:n])
|
||||
t.Errorf(`%s:%d:%d: expected Done()==true, found segment "%s"`, name, i, j, res)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if iter.Done() {
|
||||
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
|
||||
}
|
||||
n := iter.Next(buf)
|
||||
seg = f.String(seg)
|
||||
if res := string(buf[:n]); res != seg {
|
||||
t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d)`, name, i, j, res, len(res), seg, len(seg))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,902 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Normalization table generator.
|
||||
// Data read from the web.
|
||||
// See forminfo.go for a description of the trie values associated with each rune.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
loadUnicodeData()
|
||||
loadCompositionExclusions()
|
||||
completeCharFields(FCanonical)
|
||||
completeCharFields(FCompatibility)
|
||||
verifyComputed()
|
||||
printChars()
|
||||
makeTables()
|
||||
testDerived()
|
||||
}
|
||||
|
||||
var url = flag.String("url",
|
||||
"http://www.unicode.org/Public/6.0.0/ucd/",
|
||||
"URL of Unicode database directory")
|
||||
var tablelist = flag.String("tables",
|
||||
"all",
|
||||
"comma-separated list of which tables to generate; "+
|
||||
"can be 'decomp', 'recomp', 'info' and 'all'")
|
||||
var test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data")
|
||||
var verbose = flag.Bool("verbose",
|
||||
false,
|
||||
"write data to stdout as it is parsed")
|
||||
var localFiles = flag.Bool("local",
|
||||
false,
|
||||
"data files have been copied to the current directory; for debugging only")
|
||||
|
||||
var logger = log.New(os.Stderr, "", log.Lshortfile)
|
||||
|
||||
// UnicodeData.txt has form:
|
||||
// 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
|
||||
// 007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A
|
||||
// See http://unicode.org/reports/tr44/ for full explanation
|
||||
// The fields:
|
||||
const (
|
||||
FCodePoint = iota
|
||||
FName
|
||||
FGeneralCategory
|
||||
FCanonicalCombiningClass
|
||||
FBidiClass
|
||||
FDecompMapping
|
||||
FDecimalValue
|
||||
FDigitValue
|
||||
FNumericValue
|
||||
FBidiMirrored
|
||||
FUnicode1Name
|
||||
FISOComment
|
||||
FSimpleUppercaseMapping
|
||||
FSimpleLowercaseMapping
|
||||
FSimpleTitlecaseMapping
|
||||
NumField
|
||||
|
||||
MaxChar = 0x10FFFF // anything above this shouldn't exist
|
||||
)
|
||||
|
||||
// Quick Check properties of runes allow us to quickly
|
||||
// determine whether a rune may occur in a normal form.
|
||||
// For a given normal form, a rune may be guaranteed to occur
|
||||
// verbatim (QC=Yes), may or may not combine with another
|
||||
// rune (QC=Maybe), or may not occur (QC=No).
|
||||
type QCResult int
|
||||
|
||||
const (
|
||||
QCUnknown QCResult = iota
|
||||
QCYes
|
||||
QCNo
|
||||
QCMaybe
|
||||
)
|
||||
|
||||
func (r QCResult) String() string {
|
||||
switch r {
|
||||
case QCYes:
|
||||
return "Yes"
|
||||
case QCNo:
|
||||
return "No"
|
||||
case QCMaybe:
|
||||
return "Maybe"
|
||||
}
|
||||
return "***UNKNOWN***"
|
||||
}
|
||||
|
||||
const (
|
||||
FCanonical = iota // NFC or NFD
|
||||
FCompatibility // NFKC or NFKD
|
||||
FNumberOfFormTypes
|
||||
)
|
||||
|
||||
const (
|
||||
MComposed = iota // NFC or NFKC
|
||||
MDecomposed // NFD or NFKD
|
||||
MNumberOfModes
|
||||
)
|
||||
|
||||
// This contains only the properties we're interested in.
|
||||
type Char struct {
|
||||
name string
|
||||
codePoint rune // if zero, this index is not a valid code point.
|
||||
ccc uint8 // canonical combining class
|
||||
excludeInComp bool // from CompositionExclusions.txt
|
||||
compatDecomp bool // it has a compatibility expansion
|
||||
|
||||
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
|
||||
|
||||
state State
|
||||
}
|
||||
|
||||
var chars = make([]Char, MaxChar+1)
|
||||
|
||||
func (c Char) String() string {
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
|
||||
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
|
||||
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
|
||||
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
|
||||
fmt.Fprintf(buf, " state: %v\n", c.state)
|
||||
fmt.Fprintf(buf, " NFC:\n")
|
||||
fmt.Fprint(buf, c.forms[FCanonical])
|
||||
fmt.Fprintf(buf, " NFKC:\n")
|
||||
fmt.Fprint(buf, c.forms[FCompatibility])
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// In UnicodeData.txt, some ranges are marked like this:
|
||||
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
||||
// parseCharacter keeps a state variable indicating the weirdness.
|
||||
type State int
|
||||
|
||||
const (
|
||||
SNormal State = iota // known to be zero for the type
|
||||
SFirst
|
||||
SLast
|
||||
SMissing
|
||||
)
|
||||
|
||||
var lastChar = rune('\u0000')
|
||||
|
||||
func (c Char) isValid() bool {
|
||||
return c.codePoint != 0 && c.state != SMissing
|
||||
}
|
||||
|
||||
type FormInfo struct {
|
||||
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
|
||||
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
|
||||
|
||||
combinesForward bool // May combine with rune on the right
|
||||
combinesBackward bool // May combine with rune on the left
|
||||
isOneWay bool // Never appears in result
|
||||
inDecomp bool // Some decompositions result in this char.
|
||||
decomp Decomposition
|
||||
expandedDecomp Decomposition
|
||||
}
|
||||
|
||||
func (f FormInfo) String() string {
|
||||
buf := bytes.NewBuffer(make([]byte, 0))
|
||||
|
||||
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
|
||||
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
|
||||
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
|
||||
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
|
||||
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
|
||||
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
|
||||
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
|
||||
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
type Decomposition []rune
|
||||
|
||||
func openReader(file string) (input io.ReadCloser) {
|
||||
if *localFiles {
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
logger.Fatal(err)
|
||||
}
|
||||
input = f
|
||||
} else {
|
||||
path := *url + file
|
||||
resp, err := http.Get(path)
|
||||
if err != nil {
|
||||
logger.Fatal(err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
logger.Fatal("bad GET status for "+file, resp.Status)
|
||||
}
|
||||
input = resp.Body
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseDecomposition(s string, skipfirst bool) (a []rune, e error) {
|
||||
decomp := strings.Split(s, " ")
|
||||
if len(decomp) > 0 && skipfirst {
|
||||
decomp = decomp[1:]
|
||||
}
|
||||
for _, d := range decomp {
|
||||
point, err := strconv.ParseUint(d, 16, 64)
|
||||
if err != nil {
|
||||
return a, err
|
||||
}
|
||||
a = append(a, rune(point))
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func parseCharacter(line string) {
|
||||
field := strings.Split(line, ";")
|
||||
if len(field) != NumField {
|
||||
logger.Fatalf("%5s: %d fields (expected %d)\n", line, len(field), NumField)
|
||||
}
|
||||
x, err := strconv.ParseUint(field[FCodePoint], 16, 64)
|
||||
point := int(x)
|
||||
if err != nil {
|
||||
logger.Fatalf("%.5s...: %s", line, err)
|
||||
}
|
||||
if point == 0 {
|
||||
return // not interesting and we use 0 as unset
|
||||
}
|
||||
if point > MaxChar {
|
||||
logger.Fatalf("%5s: Rune %X > MaxChar (%X)", line, point, MaxChar)
|
||||
return
|
||||
}
|
||||
state := SNormal
|
||||
switch {
|
||||
case strings.Index(field[FName], ", First>") > 0:
|
||||
state = SFirst
|
||||
case strings.Index(field[FName], ", Last>") > 0:
|
||||
state = SLast
|
||||
}
|
||||
firstChar := lastChar + 1
|
||||
lastChar = rune(point)
|
||||
if state != SLast {
|
||||
firstChar = lastChar
|
||||
}
|
||||
x, err = strconv.ParseUint(field[FCanonicalCombiningClass], 10, 64)
|
||||
if err != nil {
|
||||
logger.Fatalf("%U: bad ccc field: %s", int(x), err)
|
||||
}
|
||||
ccc := uint8(x)
|
||||
decmap := field[FDecompMapping]
|
||||
exp, e := parseDecomposition(decmap, false)
|
||||
isCompat := false
|
||||
if e != nil {
|
||||
if len(decmap) > 0 {
|
||||
exp, e = parseDecomposition(decmap, true)
|
||||
if e != nil {
|
||||
logger.Fatalf(`%U: bad decomp |%v|: "%s"`, int(x), decmap, e)
|
||||
}
|
||||
isCompat = true
|
||||
}
|
||||
}
|
||||
for i := firstChar; i <= lastChar; i++ {
|
||||
char := &chars[i]
|
||||
char.name = field[FName]
|
||||
char.codePoint = i
|
||||
char.forms[FCompatibility].decomp = exp
|
||||
if !isCompat {
|
||||
char.forms[FCanonical].decomp = exp
|
||||
} else {
|
||||
char.compatDecomp = true
|
||||
}
|
||||
if len(decmap) > 0 {
|
||||
char.forms[FCompatibility].decomp = exp
|
||||
}
|
||||
char.ccc = ccc
|
||||
char.state = SMissing
|
||||
if i == lastChar {
|
||||
char.state = state
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func loadUnicodeData() {
|
||||
f := openReader("UnicodeData.txt")
|
||||
defer f.Close()
|
||||
input := bufio.NewReader(f)
|
||||
for {
|
||||
line, err := input.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
logger.Fatal(err)
|
||||
}
|
||||
parseCharacter(line[0 : len(line)-1])
|
||||
}
|
||||
}
|
||||
|
||||
var singlePointRe = regexp.MustCompile(`^([0-9A-F]+) *$`)
|
||||
|
||||
// CompositionExclusions.txt has form:
|
||||
// 0958 # ...
|
||||
// See http://unicode.org/reports/tr44/ for full explanation
|
||||
func parseExclusion(line string) int {
|
||||
comment := strings.Index(line, "#")
|
||||
if comment >= 0 {
|
||||
line = line[0:comment]
|
||||
}
|
||||
if len(line) == 0 {
|
||||
return 0
|
||||
}
|
||||
matches := singlePointRe.FindStringSubmatch(line)
|
||||
if len(matches) != 2 {
|
||||
logger.Fatalf("%s: %d matches (expected 1)\n", line, len(matches))
|
||||
}
|
||||
point, err := strconv.ParseUint(matches[1], 16, 64)
|
||||
if err != nil {
|
||||
logger.Fatalf("%.5s...: %s", line, err)
|
||||
}
|
||||
return int(point)
|
||||
}
|
||||
|
||||
func loadCompositionExclusions() {
|
||||
f := openReader("CompositionExclusions.txt")
|
||||
defer f.Close()
|
||||
input := bufio.NewReader(f)
|
||||
for {
|
||||
line, err := input.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
logger.Fatal(err)
|
||||
}
|
||||
point := parseExclusion(line[0 : len(line)-1])
|
||||
if point == 0 {
|
||||
continue
|
||||
}
|
||||
c := &chars[point]
|
||||
if c.excludeInComp {
|
||||
logger.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
|
||||
}
|
||||
c.excludeInComp = true
|
||||
}
|
||||
}
|
||||
|
||||
// hasCompatDecomp returns true if any of the recursive
|
||||
// decompositions contains a compatibility expansion.
|
||||
// In this case, the character may not occur in NFK*.
|
||||
func hasCompatDecomp(r rune) bool {
|
||||
c := &chars[r]
|
||||
if c.compatDecomp {
|
||||
return true
|
||||
}
|
||||
for _, d := range c.forms[FCompatibility].decomp {
|
||||
if hasCompatDecomp(d) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Hangul related constants.
|
||||
const (
|
||||
HangulBase = 0xAC00
|
||||
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
|
||||
|
||||
JamoLBase = 0x1100
|
||||
JamoLEnd = 0x1113
|
||||
JamoVBase = 0x1161
|
||||
JamoVEnd = 0x1176
|
||||
JamoTBase = 0x11A8
|
||||
JamoTEnd = 0x11C3
|
||||
)
|
||||
|
||||
func isHangul(r rune) bool {
|
||||
return HangulBase <= r && r < HangulEnd
|
||||
}
|
||||
|
||||
func ccc(r rune) uint8 {
|
||||
return chars[r].ccc
|
||||
}
|
||||
|
||||
// Insert a rune in a buffer, ordered by Canonical Combining Class.
|
||||
func insertOrdered(b Decomposition, r rune) Decomposition {
|
||||
n := len(b)
|
||||
b = append(b, 0)
|
||||
cc := ccc(r)
|
||||
if cc > 0 {
|
||||
// Use bubble sort.
|
||||
for ; n > 0; n-- {
|
||||
if ccc(b[n-1]) <= cc {
|
||||
break
|
||||
}
|
||||
b[n] = b[n-1]
|
||||
}
|
||||
}
|
||||
b[n] = r
|
||||
return b
|
||||
}
|
||||
|
||||
// Recursively decompose.
|
||||
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
|
||||
if isHangul(r) {
|
||||
return d
|
||||
}
|
||||
dcomp := chars[r].forms[form].decomp
|
||||
if len(dcomp) == 0 {
|
||||
return insertOrdered(d, r)
|
||||
}
|
||||
for _, c := range dcomp {
|
||||
d = decomposeRecursive(form, c, d)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func completeCharFields(form int) {
|
||||
// Phase 0: pre-expand decomposition.
|
||||
for i := range chars {
|
||||
f := &chars[i].forms[form]
|
||||
if len(f.decomp) == 0 {
|
||||
continue
|
||||
}
|
||||
exp := make(Decomposition, 0)
|
||||
for _, c := range f.decomp {
|
||||
exp = decomposeRecursive(form, c, exp)
|
||||
}
|
||||
f.expandedDecomp = exp
|
||||
}
|
||||
|
||||
// Phase 1: composition exclusion, mark decomposition.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
// Marks script-specific exclusions and version restricted.
|
||||
f.isOneWay = c.excludeInComp
|
||||
|
||||
// Singletons
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) == 1
|
||||
|
||||
// Non-starter decompositions
|
||||
if len(f.decomp) > 1 {
|
||||
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
|
||||
f.isOneWay = f.isOneWay || chk
|
||||
}
|
||||
|
||||
// Runes that decompose into more than two runes.
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) > 2
|
||||
|
||||
if form == FCompatibility {
|
||||
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
|
||||
}
|
||||
|
||||
for _, r := range f.decomp {
|
||||
chars[r].forms[form].inDecomp = true
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: forward and backward combining.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
if !f.isOneWay && len(f.decomp) == 2 {
|
||||
f0 := &chars[f.decomp[0]].forms[form]
|
||||
f1 := &chars[f.decomp[1]].forms[form]
|
||||
if !f0.isOneWay {
|
||||
f0.combinesForward = true
|
||||
}
|
||||
if !f1.isOneWay {
|
||||
f1.combinesBackward = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: quick check values.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
switch {
|
||||
case len(f.decomp) > 0:
|
||||
f.quickCheck[MDecomposed] = QCNo
|
||||
case isHangul(rune(i)):
|
||||
f.quickCheck[MDecomposed] = QCNo
|
||||
default:
|
||||
f.quickCheck[MDecomposed] = QCYes
|
||||
}
|
||||
switch {
|
||||
case f.isOneWay:
|
||||
f.quickCheck[MComposed] = QCNo
|
||||
case (i & 0xffff00) == JamoLBase:
|
||||
f.quickCheck[MComposed] = QCYes
|
||||
if JamoLBase <= i && i < JamoLEnd {
|
||||
f.combinesForward = true
|
||||
}
|
||||
if JamoVBase <= i && i < JamoVEnd {
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
f.combinesBackward = true
|
||||
f.combinesForward = true
|
||||
}
|
||||
if JamoTBase <= i && i < JamoTEnd {
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
f.combinesBackward = true
|
||||
}
|
||||
case !f.combinesBackward:
|
||||
f.quickCheck[MComposed] = QCYes
|
||||
default:
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func printBytes(b []byte, name string) {
|
||||
fmt.Printf("// %s: %d bytes\n", name, len(b))
|
||||
fmt.Printf("var %s = [...]byte {", name)
|
||||
for i, c := range b {
|
||||
switch {
|
||||
case i%64 == 0:
|
||||
fmt.Printf("\n// Bytes %x - %x\n", i, i+63)
|
||||
case i%8 == 0:
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
fmt.Printf("0x%.2X, ", c)
|
||||
}
|
||||
fmt.Print("\n}\n\n")
|
||||
}
|
||||
|
||||
// See forminfo.go for format.
|
||||
func makeEntry(f *FormInfo) uint16 {
|
||||
e := uint16(0)
|
||||
if f.combinesForward {
|
||||
e |= 0x8
|
||||
}
|
||||
if f.quickCheck[MDecomposed] == QCNo {
|
||||
e |= 0x1
|
||||
}
|
||||
switch f.quickCheck[MComposed] {
|
||||
case QCYes:
|
||||
case QCNo:
|
||||
e |= 0x4
|
||||
case QCMaybe:
|
||||
e |= 0x6
|
||||
default:
|
||||
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// decompSet keeps track of unique decompositions, grouped by whether
|
||||
// the decomposition is followed by a trailing and/or leading CCC.
|
||||
type decompSet [4]map[string]bool
|
||||
|
||||
func makeDecompSet() decompSet {
|
||||
m := decompSet{}
|
||||
for i := range m {
|
||||
m[i] = make(map[string]bool)
|
||||
}
|
||||
return m
|
||||
}
|
||||
func (m *decompSet) insert(key int, s string) {
|
||||
m[key][s] = true
|
||||
}
|
||||
|
||||
func printCharInfoTables() int {
|
||||
mkstr := func(r rune, f *FormInfo) (int, string) {
|
||||
d := f.expandedDecomp
|
||||
s := string([]rune(d))
|
||||
if max := 1 << 6; len(s) >= max {
|
||||
const msg = "%U: too many bytes in decomposition: %d >= %d"
|
||||
logger.Fatalf(msg, r, len(s), max)
|
||||
}
|
||||
head := uint8(len(s))
|
||||
if f.quickCheck[MComposed] != QCYes {
|
||||
head |= 0x40
|
||||
}
|
||||
if f.combinesForward {
|
||||
head |= 0x80
|
||||
}
|
||||
s = string([]byte{head}) + s
|
||||
|
||||
lccc := ccc(d[0])
|
||||
tccc := ccc(d[len(d)-1])
|
||||
if tccc < lccc && lccc != 0 {
|
||||
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
||||
logger.Fatalf(msg, r, lccc, tccc)
|
||||
}
|
||||
index := 0
|
||||
if tccc > 0 || lccc > 0 {
|
||||
s += string([]byte{tccc})
|
||||
index = 1
|
||||
if lccc > 0 {
|
||||
s += string([]byte{lccc})
|
||||
index |= 2
|
||||
}
|
||||
}
|
||||
return index, s
|
||||
}
|
||||
|
||||
decompSet := makeDecompSet()
|
||||
|
||||
// Store the uniqued decompositions in a byte buffer,
|
||||
// preceded by their byte length.
|
||||
for _, c := range chars {
|
||||
for _, f := range c.forms {
|
||||
if len(f.expandedDecomp) == 0 {
|
||||
continue
|
||||
}
|
||||
if f.combinesBackward {
|
||||
logger.Fatalf("%U: combinesBackward and decompose", c.codePoint)
|
||||
}
|
||||
index, s := mkstr(c.codePoint, &f)
|
||||
decompSet.insert(index, s)
|
||||
}
|
||||
}
|
||||
|
||||
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
|
||||
size := 0
|
||||
positionMap := make(map[string]uint16)
|
||||
decompositions.WriteString("\000")
|
||||
cname := []string{"firstCCC", "firstLeadingCCC", "", "lastDecomp"}
|
||||
fmt.Println("const (")
|
||||
for i, m := range decompSet {
|
||||
sa := []string{}
|
||||
for s := range m {
|
||||
sa = append(sa, s)
|
||||
}
|
||||
sort.Strings(sa)
|
||||
for _, s := range sa {
|
||||
p := decompositions.Len()
|
||||
decompositions.WriteString(s)
|
||||
positionMap[s] = uint16(p)
|
||||
}
|
||||
if cname[i] != "" {
|
||||
fmt.Printf("%s = 0x%X\n", cname[i], decompositions.Len())
|
||||
}
|
||||
}
|
||||
fmt.Println("maxDecomp = 0x8000")
|
||||
fmt.Println(")")
|
||||
b := decompositions.Bytes()
|
||||
printBytes(b, "decomps")
|
||||
size += len(b)
|
||||
|
||||
varnames := []string{"nfc", "nfkc"}
|
||||
for i := 0; i < FNumberOfFormTypes; i++ {
|
||||
trie := newNode()
|
||||
for r, c := range chars {
|
||||
f := c.forms[i]
|
||||
d := f.expandedDecomp
|
||||
if len(d) != 0 {
|
||||
_, key := mkstr(c.codePoint, &f)
|
||||
trie.insert(rune(r), positionMap[key])
|
||||
if c.ccc != ccc(d[0]) {
|
||||
// We assume the lead ccc of a decomposition !=0 in this case.
|
||||
if ccc(d[0]) == 0 {
|
||||
logger.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
|
||||
}
|
||||
}
|
||||
} else if v := makeEntry(&f)<<8 | uint16(c.ccc); v != 0 {
|
||||
trie.insert(c.codePoint, 0x8000|v)
|
||||
}
|
||||
}
|
||||
size += trie.printTables(varnames[i])
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
func contains(sa []string, s string) bool {
|
||||
for _, a := range sa {
|
||||
if a == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Extract the version number from the URL.
|
||||
func version() string {
|
||||
// From http://www.unicode.org/standard/versions/#Version_Numbering:
|
||||
// for the later Unicode versions, data files are located in
|
||||
// versioned directories.
|
||||
fields := strings.Split(*url, "/")
|
||||
for _, f := range fields {
|
||||
if match, _ := regexp.MatchString(`[0-9]\.[0-9]\.[0-9]`, f); match {
|
||||
return f
|
||||
}
|
||||
}
|
||||
logger.Fatal("unknown version")
|
||||
return "Unknown"
|
||||
}
|
||||
|
||||
const fileHeader = `// Generated by running
|
||||
// maketables --tables=%s --url=%s
|
||||
// DO NOT EDIT
|
||||
|
||||
package norm
|
||||
|
||||
`
|
||||
|
||||
func makeTables() {
|
||||
size := 0
|
||||
if *tablelist == "" {
|
||||
return
|
||||
}
|
||||
list := strings.Split(*tablelist, ",")
|
||||
if *tablelist == "all" {
|
||||
list = []string{"recomp", "info"}
|
||||
}
|
||||
fmt.Printf(fileHeader, *tablelist, *url)
|
||||
|
||||
fmt.Println("// Version is the Unicode edition from which the tables are derived.")
|
||||
fmt.Printf("const Version = %q\n\n", version())
|
||||
|
||||
if contains(list, "info") {
|
||||
size += printCharInfoTables()
|
||||
}
|
||||
|
||||
if contains(list, "recomp") {
|
||||
// Note that we use 32 bit keys, instead of 64 bit.
|
||||
// This clips the bits of three entries, but we know
|
||||
// this won't cause a collision. The compiler will catch
|
||||
// any changes made to UnicodeData.txt that introduces
|
||||
// a collision.
|
||||
// Note that the recomposition map for NFC and NFKC
|
||||
// are identical.
|
||||
|
||||
// Recomposition map
|
||||
nrentries := 0
|
||||
for _, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
if !f.isOneWay && len(f.decomp) > 0 {
|
||||
nrentries++
|
||||
}
|
||||
}
|
||||
sz := nrentries * 8
|
||||
size += sz
|
||||
fmt.Printf("// recompMap: %d bytes (entries only)\n", sz)
|
||||
fmt.Println("var recompMap = map[uint32]rune{")
|
||||
for i, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
d := f.decomp
|
||||
if !f.isOneWay && len(d) > 0 {
|
||||
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
|
||||
fmt.Printf("0x%.8X: 0x%.4X,\n", key, i)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
fmt.Printf("// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
|
||||
}
|
||||
|
||||
func printChars() {
|
||||
if *verbose {
|
||||
for _, c := range chars {
|
||||
if !c.isValid() || c.state == SMissing {
|
||||
continue
|
||||
}
|
||||
fmt.Println(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// verifyComputed does various consistency tests.
|
||||
func verifyComputed() {
|
||||
for i, c := range chars {
|
||||
for _, f := range c.forms {
|
||||
isNo := (f.quickCheck[MDecomposed] == QCNo)
|
||||
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
|
||||
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
|
||||
}
|
||||
|
||||
isMaybe := f.quickCheck[MComposed] == QCMaybe
|
||||
if f.combinesBackward != isMaybe {
|
||||
log.Fatalf("%U: NF*C must be maybe if combinesBackward", i)
|
||||
}
|
||||
}
|
||||
nfc := c.forms[FCanonical]
|
||||
nfkc := c.forms[FCompatibility]
|
||||
if nfc.combinesBackward != nfkc.combinesBackward {
|
||||
logger.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var qcRe = regexp.MustCompile(`([0-9A-F\.]+) *; (NF.*_QC); ([YNM]) #.*`)
|
||||
|
||||
// Use values in DerivedNormalizationProps.txt to compare against the
|
||||
// values we computed.
|
||||
// DerivedNormalizationProps.txt has form:
|
||||
// 00C0..00C5 ; NFD_QC; N # ...
|
||||
// 0374 ; NFD_QC; N # ...
|
||||
// See http://unicode.org/reports/tr44/ for full explanation
|
||||
func testDerived() {
|
||||
if !*test {
|
||||
return
|
||||
}
|
||||
f := openReader("DerivedNormalizationProps.txt")
|
||||
defer f.Close()
|
||||
input := bufio.NewReader(f)
|
||||
for {
|
||||
line, err := input.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
logger.Fatal(err)
|
||||
}
|
||||
qc := qcRe.FindStringSubmatch(line)
|
||||
if qc == nil {
|
||||
continue
|
||||
}
|
||||
rng := strings.Split(qc[1], "..")
|
||||
i, err := strconv.ParseUint(rng[0], 16, 64)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
j := i
|
||||
if len(rng) > 1 {
|
||||
j, err = strconv.ParseUint(rng[1], 16, 64)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
var ftype, mode int
|
||||
qt := strings.TrimSpace(qc[2])
|
||||
switch qt {
|
||||
case "NFC_QC":
|
||||
ftype, mode = FCanonical, MComposed
|
||||
case "NFD_QC":
|
||||
ftype, mode = FCanonical, MDecomposed
|
||||
case "NFKC_QC":
|
||||
ftype, mode = FCompatibility, MComposed
|
||||
case "NFKD_QC":
|
||||
ftype, mode = FCompatibility, MDecomposed
|
||||
default:
|
||||
log.Fatalf(`Unexpected quick check type "%s"`, qt)
|
||||
}
|
||||
var qr QCResult
|
||||
switch qc[3] {
|
||||
case "Y":
|
||||
qr = QCYes
|
||||
case "N":
|
||||
qr = QCNo
|
||||
case "M":
|
||||
qr = QCMaybe
|
||||
default:
|
||||
log.Fatalf(`Unexpected quick check value "%s"`, qc[3])
|
||||
}
|
||||
var lastFailed bool
|
||||
// Verify current
|
||||
for ; i <= j; i++ {
|
||||
c := &chars[int(i)]
|
||||
c.forms[ftype].verified[mode] = true
|
||||
curqr := c.forms[ftype].quickCheck[mode]
|
||||
if curqr != qr {
|
||||
if !lastFailed {
|
||||
logger.Printf("%s: %.4X..%.4X -- %s\n",
|
||||
qt, int(i), int(j), line[0:50])
|
||||
}
|
||||
logger.Printf("%U: FAILED %s (was %v need %v)\n",
|
||||
int(i), qt, curqr, qr)
|
||||
lastFailed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Any unspecified value must be QCYes. Verify this.
|
||||
for i, c := range chars {
|
||||
for j, fd := range c.forms {
|
||||
for k, qr := range fd.quickCheck {
|
||||
if !fd.verified[k] && qr != QCYes {
|
||||
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
|
||||
logger.Printf(m, i, j, k, qr, c.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Generate test data for trie code.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func main() {
|
||||
printTestTables()
|
||||
}
|
||||
|
||||
// We take the smallest, largest and an arbitrary value for each
|
||||
// of the UTF-8 sequence lengths.
|
||||
var testRunes = []rune{
|
||||
0x01, 0x0C, 0x7F, // 1-byte sequences
|
||||
0x80, 0x100, 0x7FF, // 2-byte sequences
|
||||
0x800, 0x999, 0xFFFF, // 3-byte sequences
|
||||
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
|
||||
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
|
||||
}
|
||||
|
||||
const fileHeader = `// Generated by running
|
||||
// maketesttables
|
||||
// DO NOT EDIT
|
||||
|
||||
package norm
|
||||
|
||||
`
|
||||
|
||||
func printTestTables() {
|
||||
fmt.Print(fileHeader)
|
||||
fmt.Printf("var testRunes = %#v\n\n", testRunes)
|
||||
t := newNode()
|
||||
for i, r := range testRunes {
|
||||
t.insert(r, uint16(i))
|
||||
}
|
||||
t.printTables("testdata")
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPlaceHolder(t *testing.T) {
|
||||
// Does nothing, just allows the Makefile to be canonical
|
||||
// while waiting for the package itself to be written.
|
||||
}
|
||||
@@ -1,478 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package norm contains types and functions for normalizing Unicode strings.
|
||||
package norm
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// A Form denotes a canonical representation of Unicode code points.
|
||||
// The Unicode-defined normalization and equivalence forms are:
|
||||
//
|
||||
// NFC Unicode Normalization Form C
|
||||
// NFD Unicode Normalization Form D
|
||||
// NFKC Unicode Normalization Form KC
|
||||
// NFKD Unicode Normalization Form KD
|
||||
//
|
||||
// For a Form f, this documentation uses the notation f(x) to mean
|
||||
// the bytes or string x converted to the given form.
|
||||
// A position n in x is called a boundary if conversion to the form can
|
||||
// proceed independently on both sides:
|
||||
// f(x) == append(f(x[0:n]), f(x[n:])...)
|
||||
//
|
||||
// References: http://unicode.org/reports/tr15/ and
|
||||
// http://unicode.org/notes/tn5/.
|
||||
type Form int
|
||||
|
||||
const (
|
||||
NFC Form = iota
|
||||
NFD
|
||||
NFKC
|
||||
NFKD
|
||||
)
|
||||
|
||||
// Bytes returns f(b). May return b if f(b) = b.
|
||||
func (f Form) Bytes(b []byte) []byte {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, b)
|
||||
n := quickSpan(&rb, 0)
|
||||
if n == len(b) {
|
||||
return b
|
||||
}
|
||||
out := make([]byte, n, len(b))
|
||||
copy(out, b[0:n])
|
||||
return doAppend(&rb, out, n)
|
||||
}
|
||||
|
||||
// String returns f(s).
|
||||
func (f Form) String(s string) string {
|
||||
rb := reorderBuffer{}
|
||||
rb.initString(f, s)
|
||||
n := quickSpan(&rb, 0)
|
||||
if n == len(s) {
|
||||
return s
|
||||
}
|
||||
out := make([]byte, n, len(s))
|
||||
copy(out, s[0:n])
|
||||
return string(doAppend(&rb, out, n))
|
||||
}
|
||||
|
||||
// IsNormal returns true if b == f(b).
|
||||
func (f Form) IsNormal(b []byte) bool {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, b)
|
||||
bp := quickSpan(&rb, 0)
|
||||
if bp == len(b) {
|
||||
return true
|
||||
}
|
||||
for bp < len(b) {
|
||||
decomposeSegment(&rb, bp)
|
||||
if rb.f.composing {
|
||||
rb.compose()
|
||||
}
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
info := rb.rune[i]
|
||||
if bp+int(info.size) > len(b) {
|
||||
return false
|
||||
}
|
||||
p := info.pos
|
||||
pe := p + info.size
|
||||
for ; p < pe; p++ {
|
||||
if b[bp] != rb.byte[p] {
|
||||
return false
|
||||
}
|
||||
bp++
|
||||
}
|
||||
}
|
||||
rb.reset()
|
||||
bp = quickSpan(&rb, bp)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsNormalString returns true if s == f(s).
|
||||
func (f Form) IsNormalString(s string) bool {
|
||||
rb := reorderBuffer{}
|
||||
rb.initString(f, s)
|
||||
bp := quickSpan(&rb, 0)
|
||||
if bp == len(s) {
|
||||
return true
|
||||
}
|
||||
for bp < len(s) {
|
||||
decomposeSegment(&rb, bp)
|
||||
if rb.f.composing {
|
||||
rb.compose()
|
||||
}
|
||||
for i := 0; i < rb.nrune; i++ {
|
||||
info := rb.rune[i]
|
||||
if bp+int(info.size) > len(s) {
|
||||
return false
|
||||
}
|
||||
p := info.pos
|
||||
pe := p + info.size
|
||||
for ; p < pe; p++ {
|
||||
if s[bp] != rb.byte[p] {
|
||||
return false
|
||||
}
|
||||
bp++
|
||||
}
|
||||
}
|
||||
rb.reset()
|
||||
bp = quickSpan(&rb, bp)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// patchTail fixes a case where a rune may be incorrectly normalized
|
||||
// if it is followed by illegal continuation bytes. It returns the
|
||||
// patched buffer and whether there were trailing continuation bytes.
|
||||
func patchTail(rb *reorderBuffer, buf []byte) ([]byte, bool) {
|
||||
info, p := lastRuneStart(&rb.f, buf)
|
||||
if p == -1 || info.size == 0 {
|
||||
return buf, false
|
||||
}
|
||||
end := p + int(info.size)
|
||||
extra := len(buf) - end
|
||||
if extra > 0 {
|
||||
// Potentially allocating memory. However, this only
|
||||
// happens with ill-formed UTF-8.
|
||||
x := make([]byte, 0)
|
||||
x = append(x, buf[len(buf)-extra:]...)
|
||||
buf = decomposeToLastBoundary(rb, buf[:end])
|
||||
if rb.f.composing {
|
||||
rb.compose()
|
||||
}
|
||||
buf = rb.flush(buf)
|
||||
return append(buf, x...), true
|
||||
}
|
||||
return buf, false
|
||||
}
|
||||
|
||||
func appendQuick(rb *reorderBuffer, dst []byte, i int) ([]byte, int) {
|
||||
if rb.nsrc == i {
|
||||
return dst, i
|
||||
}
|
||||
end := quickSpan(rb, i)
|
||||
return rb.src.appendSlice(dst, i, end), end
|
||||
}
|
||||
|
||||
// Append returns f(append(out, b...)).
|
||||
// The buffer out must be nil, empty, or equal to f(out).
|
||||
func (f Form) Append(out []byte, src ...byte) []byte {
|
||||
if len(src) == 0 {
|
||||
return out
|
||||
}
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, src)
|
||||
return doAppend(&rb, out, 0)
|
||||
}
|
||||
|
||||
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
|
||||
src, n := rb.src, rb.nsrc
|
||||
doMerge := len(out) > 0
|
||||
if q := src.skipNonStarter(p); q > p {
|
||||
// Move leading non-starters to destination.
|
||||
out = src.appendSlice(out, p, q)
|
||||
buf, endsInError := patchTail(rb, out)
|
||||
if endsInError {
|
||||
out = buf
|
||||
doMerge = false // no need to merge, ends with illegal UTF-8
|
||||
} else {
|
||||
out = decomposeToLastBoundary(rb, buf) // force decomposition
|
||||
}
|
||||
p = q
|
||||
}
|
||||
fd := &rb.f
|
||||
if doMerge {
|
||||
var info runeInfo
|
||||
if p < n {
|
||||
info = fd.info(src, p)
|
||||
if p == 0 && !info.boundaryBefore() {
|
||||
out = decomposeToLastBoundary(rb, out)
|
||||
}
|
||||
}
|
||||
if info.size == 0 || info.boundaryBefore() {
|
||||
if fd.composing {
|
||||
rb.compose()
|
||||
}
|
||||
out = rb.flush(out)
|
||||
if info.size == 0 {
|
||||
// Append incomplete UTF-8 encoding.
|
||||
return src.appendSlice(out, p, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
if rb.nrune == 0 {
|
||||
out, p = appendQuick(rb, out, p)
|
||||
}
|
||||
for p < n {
|
||||
p = decomposeSegment(rb, p)
|
||||
if fd.composing {
|
||||
rb.compose()
|
||||
}
|
||||
out = rb.flush(out)
|
||||
out, p = appendQuick(rb, out, p)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// AppendString returns f(append(out, []byte(s))).
|
||||
// The buffer out must be nil, empty, or equal to f(out).
|
||||
func (f Form) AppendString(out []byte, src string) []byte {
|
||||
if len(src) == 0 {
|
||||
return out
|
||||
}
|
||||
rb := reorderBuffer{}
|
||||
rb.initString(f, src)
|
||||
return doAppend(&rb, out, 0)
|
||||
}
|
||||
|
||||
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) QuickSpan(b []byte) int {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, b)
|
||||
n := quickSpan(&rb, 0)
|
||||
return n
|
||||
}
|
||||
|
||||
func quickSpan(rb *reorderBuffer, i int) int {
|
||||
var lastCC uint8
|
||||
var nc int
|
||||
lastSegStart := i
|
||||
src, n := rb.src, rb.nsrc
|
||||
for i < n {
|
||||
if j := src.skipASCII(i, n); i != j {
|
||||
i = j
|
||||
lastSegStart = i - 1
|
||||
lastCC = 0
|
||||
nc = 0
|
||||
continue
|
||||
}
|
||||
info := rb.f.info(src, i)
|
||||
if info.size == 0 {
|
||||
// include incomplete runes
|
||||
return n
|
||||
}
|
||||
cc := info.ccc
|
||||
if rb.f.composing {
|
||||
if !info.isYesC() {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if !info.isYesD() {
|
||||
break
|
||||
}
|
||||
}
|
||||
if cc == 0 {
|
||||
lastSegStart = i
|
||||
nc = 0
|
||||
} else {
|
||||
if nc >= maxCombiningChars {
|
||||
lastSegStart = i
|
||||
lastCC = cc
|
||||
nc = 1
|
||||
} else {
|
||||
if lastCC > cc {
|
||||
return lastSegStart
|
||||
}
|
||||
nc++
|
||||
}
|
||||
}
|
||||
lastCC = cc
|
||||
i += int(info.size)
|
||||
}
|
||||
if i == n {
|
||||
return n
|
||||
}
|
||||
if rb.f.composing {
|
||||
return lastSegStart
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// QuickSpanString returns a boundary n such that b[0:n] == f(s[0:n]).
|
||||
// It is not guaranteed to return the largest such n.
|
||||
func (f Form) QuickSpanString(s string) int {
|
||||
rb := reorderBuffer{}
|
||||
rb.initString(f, s)
|
||||
return quickSpan(&rb, 0)
|
||||
}
|
||||
|
||||
// FirstBoundary returns the position i of the first boundary in b
|
||||
// or -1 if b contains no boundary.
|
||||
func (f Form) FirstBoundary(b []byte) int {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, b)
|
||||
return firstBoundary(&rb)
|
||||
}
|
||||
|
||||
func firstBoundary(rb *reorderBuffer) int {
|
||||
src, nsrc := rb.src, rb.nsrc
|
||||
i := src.skipNonStarter(0)
|
||||
if i >= nsrc {
|
||||
return -1
|
||||
}
|
||||
fd := &rb.f
|
||||
info := fd.info(src, i)
|
||||
for n := 0; info.size != 0 && !info.boundaryBefore(); {
|
||||
i += int(info.size)
|
||||
if n++; n >= maxCombiningChars {
|
||||
return i
|
||||
}
|
||||
if i >= nsrc {
|
||||
if !info.boundaryAfter() {
|
||||
return -1
|
||||
}
|
||||
return nsrc
|
||||
}
|
||||
info = fd.info(src, i)
|
||||
}
|
||||
if info.size == 0 {
|
||||
return -1
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// FirstBoundaryInString returns the position i of the first boundary in s
|
||||
// or -1 if s contains no boundary.
|
||||
func (f Form) FirstBoundaryInString(s string) int {
|
||||
rb := reorderBuffer{}
|
||||
rb.initString(f, s)
|
||||
return firstBoundary(&rb)
|
||||
}
|
||||
|
||||
// LastBoundary returns the position i of the last boundary in b
|
||||
// or -1 if b contains no boundary.
|
||||
func (f Form) LastBoundary(b []byte) int {
|
||||
return lastBoundary(formTable[f], b)
|
||||
}
|
||||
|
||||
func lastBoundary(fd *formInfo, b []byte) int {
|
||||
i := len(b)
|
||||
info, p := lastRuneStart(fd, b)
|
||||
if p == -1 {
|
||||
return -1
|
||||
}
|
||||
if info.size == 0 { // ends with incomplete rune
|
||||
if p == 0 { // starts with incomplete rune
|
||||
return -1
|
||||
}
|
||||
i = p
|
||||
info, p = lastRuneStart(fd, b[:i])
|
||||
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
|
||||
return i
|
||||
}
|
||||
}
|
||||
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
|
||||
return i
|
||||
}
|
||||
if info.boundaryAfter() {
|
||||
return i
|
||||
}
|
||||
i = p
|
||||
for n := 0; i >= 0 && !info.boundaryBefore(); {
|
||||
info, p = lastRuneStart(fd, b[:i])
|
||||
if n++; n >= maxCombiningChars {
|
||||
return len(b)
|
||||
}
|
||||
if p+int(info.size) != i {
|
||||
if p == -1 { // no boundary found
|
||||
return -1
|
||||
}
|
||||
return i // boundary after an illegal UTF-8 encoding
|
||||
}
|
||||
i = p
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// decomposeSegment scans the first segment in src into rb.
|
||||
// It returns the number of bytes consumed from src.
|
||||
// TODO(mpvl): consider inserting U+034f (Combining Grapheme Joiner)
|
||||
// when we detect a sequence of 30+ non-starter chars.
|
||||
func decomposeSegment(rb *reorderBuffer, sp int) int {
|
||||
// Force one character to be consumed.
|
||||
info := rb.f.info(rb.src, sp)
|
||||
if info.size == 0 {
|
||||
return 0
|
||||
}
|
||||
for rb.insert(rb.src, sp, info) {
|
||||
sp += int(info.size)
|
||||
if sp >= rb.nsrc {
|
||||
break
|
||||
}
|
||||
info = rb.f.info(rb.src, sp)
|
||||
bound := info.boundaryBefore()
|
||||
if bound || info.size == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return sp
|
||||
}
|
||||
|
||||
// lastRuneStart returns the runeInfo and position of the last
|
||||
// rune in buf or the zero runeInfo and -1 if no rune was found.
|
||||
func lastRuneStart(fd *formInfo, buf []byte) (runeInfo, int) {
|
||||
p := len(buf) - 1
|
||||
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
|
||||
}
|
||||
if p < 0 {
|
||||
return runeInfo{}, -1
|
||||
}
|
||||
return fd.info(inputBytes(buf), p), p
|
||||
}
|
||||
|
||||
// decomposeToLastBoundary finds an open segment at the end of the buffer
|
||||
// and scans it into rb. Returns the buffer minus the last segment.
|
||||
func decomposeToLastBoundary(rb *reorderBuffer, buf []byte) []byte {
|
||||
fd := &rb.f
|
||||
info, i := lastRuneStart(fd, buf)
|
||||
if int(info.size) != len(buf)-i {
|
||||
// illegal trailing continuation bytes
|
||||
return buf
|
||||
}
|
||||
if info.boundaryAfter() {
|
||||
return buf
|
||||
}
|
||||
var add [maxBackRunes]runeInfo // stores runeInfo in reverse order
|
||||
add[0] = info
|
||||
padd := 1
|
||||
n := 1
|
||||
p := len(buf) - int(info.size)
|
||||
for ; p >= 0 && !info.boundaryBefore(); p -= int(info.size) {
|
||||
info, i = lastRuneStart(fd, buf[:p])
|
||||
if int(info.size) != p-i {
|
||||
break
|
||||
}
|
||||
// Check that decomposition doesn't result in overflow.
|
||||
if info.hasDecomposition() {
|
||||
if isHangul(buf) {
|
||||
i += int(info.size)
|
||||
n++
|
||||
} else {
|
||||
dcomp := info.decomposition()
|
||||
for i := 0; i < len(dcomp); {
|
||||
inf := rb.f.info(inputBytes(dcomp), i)
|
||||
i += int(inf.size)
|
||||
n++
|
||||
}
|
||||
}
|
||||
} else {
|
||||
n++
|
||||
}
|
||||
if n > maxBackRunes {
|
||||
break
|
||||
}
|
||||
add[padd] = info
|
||||
padd++
|
||||
}
|
||||
pp := p
|
||||
for padd--; padd >= 0; padd-- {
|
||||
info = add[padd]
|
||||
rb.insert(inputBytes(buf), pp, info)
|
||||
pp += int(info.size)
|
||||
}
|
||||
return buf[:p]
|
||||
}
|
||||
@@ -1,724 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type PositionTest struct {
|
||||
input string
|
||||
pos int
|
||||
buffer string // expected contents of reorderBuffer, if applicable
|
||||
}
|
||||
|
||||
type positionFunc func(rb *reorderBuffer, s string) int
|
||||
|
||||
func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []PositionTest) {
|
||||
rb := reorderBuffer{}
|
||||
rb.init(f, nil)
|
||||
for i, test := range tests {
|
||||
rb.reset()
|
||||
rb.src = inputString(test.input)
|
||||
rb.nsrc = len(test.input)
|
||||
pos := fn(&rb, test.input)
|
||||
if pos != test.pos {
|
||||
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
|
||||
}
|
||||
runes := []rune(test.buffer)
|
||||
if rb.nrune != len(runes) {
|
||||
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
|
||||
continue
|
||||
}
|
||||
for j, want := range runes {
|
||||
found := rune(rb.runeAt(j))
|
||||
if found != want {
|
||||
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var decomposeSegmentTests = []PositionTest{
|
||||
// illegal runes
|
||||
{"\xC0", 0, ""},
|
||||
{"\u00E0\x80", 2, "\u0061\u0300"},
|
||||
// starter
|
||||
{"a", 1, "a"},
|
||||
{"ab", 1, "a"},
|
||||
// starter + composing
|
||||
{"a\u0300", 3, "a\u0300"},
|
||||
{"a\u0300b", 3, "a\u0300"},
|
||||
// with decomposition
|
||||
{"\u00C0", 2, "A\u0300"},
|
||||
{"\u00C0b", 2, "A\u0300"},
|
||||
// long
|
||||
{strings.Repeat("\u0300", 31), 62, strings.Repeat("\u0300", 31)},
|
||||
// ends with incomplete UTF-8 encoding
|
||||
{"\xCC", 0, ""},
|
||||
{"\u0300\xCC", 2, "\u0300"},
|
||||
}
|
||||
|
||||
func decomposeSegmentF(rb *reorderBuffer, s string) int {
|
||||
rb.src = inputString(s)
|
||||
rb.nsrc = len(s)
|
||||
return decomposeSegment(rb, 0)
|
||||
}
|
||||
|
||||
func TestDecomposeSegment(t *testing.T) {
|
||||
runPosTests(t, "TestDecomposeSegment", NFC, decomposeSegmentF, decomposeSegmentTests)
|
||||
}
|
||||
|
||||
var firstBoundaryTests = []PositionTest{
|
||||
// no boundary
|
||||
{"", -1, ""},
|
||||
{"\u0300", -1, ""},
|
||||
{"\x80\x80", -1, ""},
|
||||
// illegal runes
|
||||
{"\xff", 0, ""},
|
||||
{"\u0300\xff", 2, ""},
|
||||
{"\u0300\xc0\x80\x80", 2, ""},
|
||||
// boundaries
|
||||
{"a", 0, ""},
|
||||
{"\u0300a", 2, ""},
|
||||
// Hangul
|
||||
{"\u1103\u1161", 0, ""},
|
||||
{"\u110B\u1173\u11B7", 0, ""},
|
||||
{"\u1161\u110B\u1173\u11B7", 3, ""},
|
||||
{"\u1173\u11B7\u1103\u1161", 6, ""},
|
||||
// too many combining characters.
|
||||
{strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
|
||||
{strings.Repeat("\u0300", maxCombiningChars), 60, ""},
|
||||
{strings.Repeat("\u0300", maxCombiningChars+1), 60, ""},
|
||||
}
|
||||
|
||||
func firstBoundaryF(rb *reorderBuffer, s string) int {
|
||||
return rb.f.form.FirstBoundary([]byte(s))
|
||||
}
|
||||
|
||||
func firstBoundaryStringF(rb *reorderBuffer, s string) int {
|
||||
return rb.f.form.FirstBoundaryInString(s)
|
||||
}
|
||||
|
||||
func TestFirstBoundary(t *testing.T) {
|
||||
runPosTests(t, "TestFirstBoundary", NFC, firstBoundaryF, firstBoundaryTests)
|
||||
runPosTests(t, "TestFirstBoundaryInString", NFC, firstBoundaryStringF, firstBoundaryTests)
|
||||
}
|
||||
|
||||
var decomposeToLastTests = []PositionTest{
|
||||
// ends with inert character
|
||||
{"Hello!", 6, ""},
|
||||
{"\u0632", 2, ""},
|
||||
{"a\u0301\u0635", 5, ""},
|
||||
// ends with non-inert starter
|
||||
{"a", 0, "a"},
|
||||
{"a\u0301a", 3, "a"},
|
||||
{"a\u0301\u03B9", 3, "\u03B9"},
|
||||
{"a\u0327", 0, "a\u0327"},
|
||||
// illegal runes
|
||||
{"\xFF", 1, ""},
|
||||
{"aa\xFF", 3, ""},
|
||||
{"\xC0\x80\x80", 3, ""},
|
||||
{"\xCC\x80\x80", 3, ""},
|
||||
// ends with incomplete UTF-8 encoding
|
||||
{"a\xCC", 2, ""},
|
||||
// ends with combining characters
|
||||
{"\u0300\u0301", 0, "\u0300\u0301"},
|
||||
{"a\u0300\u0301", 0, "a\u0300\u0301"},
|
||||
{"a\u0301\u0308", 0, "a\u0301\u0308"},
|
||||
{"a\u0308\u0301", 0, "a\u0308\u0301"},
|
||||
{"aaaa\u0300\u0301", 3, "a\u0300\u0301"},
|
||||
{"\u0300a\u0300\u0301", 2, "a\u0300\u0301"},
|
||||
{"\u00C0", 0, "A\u0300"},
|
||||
{"a\u00C0", 1, "A\u0300"},
|
||||
// decomposing
|
||||
{"a\u0300\uFDC0", 3, "\u0645\u062C\u064A"},
|
||||
{"\uFDC0" + strings.Repeat("\u0300", 26), 0, "\u0645\u062C\u064A" + strings.Repeat("\u0300", 26)},
|
||||
// Hangul
|
||||
{"a\u1103", 1, "\u1103"},
|
||||
{"a\u110B", 1, "\u110B"},
|
||||
{"a\u110B\u1173", 1, "\u110B\u1173"},
|
||||
// See comment in composition.go:compBoundaryAfter.
|
||||
{"a\u110B\u1173\u11B7", 1, "\u110B\u1173\u11B7"},
|
||||
{"a\uC73C", 1, "\u110B\u1173"},
|
||||
{"다음", 3, "\u110B\u1173\u11B7"},
|
||||
{"다", 0, "\u1103\u1161"},
|
||||
{"\u1103\u1161\u110B\u1173\u11B7", 6, "\u110B\u1173\u11B7"},
|
||||
{"\u110B\u1173\u11B7\u1103\u1161", 9, "\u1103\u1161"},
|
||||
{"다음음", 6, "\u110B\u1173\u11B7"},
|
||||
{"음다다", 6, "\u1103\u1161"},
|
||||
// buffer overflow
|
||||
{"a" + strings.Repeat("\u0300", 30), 3, strings.Repeat("\u0300", 29)},
|
||||
{"\uFDFA" + strings.Repeat("\u0300", 14), 3, strings.Repeat("\u0300", 14)},
|
||||
// weird UTF-8
|
||||
{"a\u0300\u11B7", 0, "a\u0300\u11B7"},
|
||||
}
|
||||
|
||||
func decomposeToLast(rb *reorderBuffer, s string) int {
|
||||
buf := decomposeToLastBoundary(rb, []byte(s))
|
||||
return len(buf)
|
||||
}
|
||||
|
||||
func TestDecomposeToLastBoundary(t *testing.T) {
|
||||
runPosTests(t, "TestDecomposeToLastBoundary", NFKC, decomposeToLast, decomposeToLastTests)
|
||||
}
|
||||
|
||||
var lastBoundaryTests = []PositionTest{
|
||||
// ends with inert character
|
||||
{"Hello!", 6, ""},
|
||||
{"\u0632", 2, ""},
|
||||
// ends with non-inert starter
|
||||
{"a", 0, ""},
|
||||
// illegal runes
|
||||
{"\xff", 1, ""},
|
||||
{"aa\xff", 3, ""},
|
||||
{"a\xff\u0300", 1, ""},
|
||||
{"\xc0\x80\x80", 3, ""},
|
||||
{"\xc0\x80\x80\u0300", 3, ""},
|
||||
// ends with incomplete UTF-8 encoding
|
||||
{"\xCC", -1, ""},
|
||||
{"\xE0\x80", -1, ""},
|
||||
{"\xF0\x80\x80", -1, ""},
|
||||
{"a\xCC", 0, ""},
|
||||
{"\x80\xCC", 1, ""},
|
||||
{"\xCC\xCC", 1, ""},
|
||||
// ends with combining characters
|
||||
{"a\u0300\u0301", 0, ""},
|
||||
{"aaaa\u0300\u0301", 3, ""},
|
||||
{"\u0300a\u0300\u0301", 2, ""},
|
||||
{"\u00C0", 0, ""},
|
||||
{"a\u00C0", 1, ""},
|
||||
// decomposition may recombine
|
||||
{"\u0226", 0, ""},
|
||||
// no boundary
|
||||
{"", -1, ""},
|
||||
{"\u0300\u0301", -1, ""},
|
||||
{"\u0300", -1, ""},
|
||||
{"\x80\x80", -1, ""},
|
||||
{"\x80\x80\u0301", -1, ""},
|
||||
// Hangul
|
||||
{"다음", 3, ""},
|
||||
{"다", 0, ""},
|
||||
{"\u1103\u1161\u110B\u1173\u11B7", 6, ""},
|
||||
{"\u110B\u1173\u11B7\u1103\u1161", 9, ""},
|
||||
// too many combining characters.
|
||||
{strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
|
||||
{strings.Repeat("\u0300", maxCombiningChars), 60, ""},
|
||||
{strings.Repeat("\u0300", maxCombiningChars+1), 62, ""},
|
||||
}
|
||||
|
||||
func lastBoundaryF(rb *reorderBuffer, s string) int {
|
||||
return rb.f.form.LastBoundary([]byte(s))
|
||||
}
|
||||
|
||||
func TestLastBoundary(t *testing.T) {
|
||||
runPosTests(t, "TestLastBoundary", NFC, lastBoundaryF, lastBoundaryTests)
|
||||
}
|
||||
|
||||
var quickSpanTests = []PositionTest{
|
||||
{"", 0, ""},
|
||||
// starters
|
||||
{"a", 1, ""},
|
||||
{"abc", 3, ""},
|
||||
{"\u043Eb", 3, ""},
|
||||
// incomplete last rune.
|
||||
{"\xCC", 1, ""},
|
||||
{"a\xCC", 2, ""},
|
||||
// incorrectly ordered combining characters
|
||||
{"\u0300\u0316", 0, ""},
|
||||
{"\u0300\u0316cd", 0, ""},
|
||||
// have a maximum number of combining characters.
|
||||
{strings.Repeat("\u035D", 30) + "\u035B", 62, ""},
|
||||
{"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""},
|
||||
{"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
|
||||
{"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
|
||||
}
|
||||
|
||||
var quickSpanNFDTests = []PositionTest{
|
||||
// needs decomposing
|
||||
{"\u00C0", 0, ""},
|
||||
{"abc\u00C0", 3, ""},
|
||||
// correctly ordered combining characters
|
||||
{"\u0300", 2, ""},
|
||||
{"ab\u0300", 4, ""},
|
||||
{"ab\u0300cd", 6, ""},
|
||||
{"\u0300cd", 4, ""},
|
||||
{"\u0316\u0300", 4, ""},
|
||||
{"ab\u0316\u0300", 6, ""},
|
||||
{"ab\u0316\u0300cd", 8, ""},
|
||||
{"ab\u0316\u0300\u00C0", 6, ""},
|
||||
{"\u0316\u0300cd", 6, ""},
|
||||
{"\u043E\u0308b", 5, ""},
|
||||
// incorrectly ordered combining characters
|
||||
{"ab\u0300\u0316", 1, ""}, // TODO: we could skip 'b' as well.
|
||||
{"ab\u0300\u0316cd", 1, ""},
|
||||
// Hangul
|
||||
{"같은", 0, ""},
|
||||
}
|
||||
|
||||
var quickSpanNFCTests = []PositionTest{
|
||||
// okay composed
|
||||
{"\u00C0", 2, ""},
|
||||
{"abc\u00C0", 5, ""},
|
||||
// correctly ordered combining characters
|
||||
{"ab\u0300", 1, ""},
|
||||
{"ab\u0300cd", 1, ""},
|
||||
{"ab\u0316\u0300", 1, ""},
|
||||
{"ab\u0316\u0300cd", 1, ""},
|
||||
{"\u00C0\u035D", 4, ""},
|
||||
// we do not special case leading combining characters
|
||||
{"\u0300cd", 0, ""},
|
||||
{"\u0300", 0, ""},
|
||||
{"\u0316\u0300", 0, ""},
|
||||
{"\u0316\u0300cd", 0, ""},
|
||||
// incorrectly ordered combining characters
|
||||
{"ab\u0300\u0316", 1, ""},
|
||||
{"ab\u0300\u0316cd", 1, ""},
|
||||
// Hangul
|
||||
{"같은", 6, ""},
|
||||
}
|
||||
|
||||
func doQuickSpan(rb *reorderBuffer, s string) int {
|
||||
return rb.f.form.QuickSpan([]byte(s))
|
||||
}
|
||||
|
||||
func doQuickSpanString(rb *reorderBuffer, s string) int {
|
||||
return rb.f.form.QuickSpanString(s)
|
||||
}
|
||||
|
||||
func TestQuickSpan(t *testing.T) {
|
||||
runPosTests(t, "TestQuickSpanNFD1", NFD, doQuickSpan, quickSpanTests)
|
||||
runPosTests(t, "TestQuickSpanNFD2", NFD, doQuickSpan, quickSpanNFDTests)
|
||||
runPosTests(t, "TestQuickSpanNFC1", NFC, doQuickSpan, quickSpanTests)
|
||||
runPosTests(t, "TestQuickSpanNFC2", NFC, doQuickSpan, quickSpanNFCTests)
|
||||
|
||||
runPosTests(t, "TestQuickSpanStringNFD1", NFD, doQuickSpanString, quickSpanTests)
|
||||
runPosTests(t, "TestQuickSpanStringNFD2", NFD, doQuickSpanString, quickSpanNFDTests)
|
||||
runPosTests(t, "TestQuickSpanStringNFC1", NFC, doQuickSpanString, quickSpanTests)
|
||||
runPosTests(t, "TestQuickSpanStringNFC2", NFC, doQuickSpanString, quickSpanNFCTests)
|
||||
}
|
||||
|
||||
var isNormalTests = []PositionTest{
|
||||
{"", 1, ""},
|
||||
// illegal runes
|
||||
{"\xff", 1, ""},
|
||||
// starters
|
||||
{"a", 1, ""},
|
||||
{"abc", 1, ""},
|
||||
{"\u043Eb", 1, ""},
|
||||
// incorrectly ordered combining characters
|
||||
{"\u0300\u0316", 0, ""},
|
||||
{"ab\u0300\u0316", 0, ""},
|
||||
{"ab\u0300\u0316cd", 0, ""},
|
||||
{"\u0300\u0316cd", 0, ""},
|
||||
}
|
||||
var isNormalNFDTests = []PositionTest{
|
||||
// needs decomposing
|
||||
{"\u00C0", 0, ""},
|
||||
{"abc\u00C0", 0, ""},
|
||||
// correctly ordered combining characters
|
||||
{"\u0300", 1, ""},
|
||||
{"ab\u0300", 1, ""},
|
||||
{"ab\u0300cd", 1, ""},
|
||||
{"\u0300cd", 1, ""},
|
||||
{"\u0316\u0300", 1, ""},
|
||||
{"ab\u0316\u0300", 1, ""},
|
||||
{"ab\u0316\u0300cd", 1, ""},
|
||||
{"\u0316\u0300cd", 1, ""},
|
||||
{"\u043E\u0308b", 1, ""},
|
||||
// Hangul
|
||||
{"같은", 0, ""},
|
||||
}
|
||||
var isNormalNFCTests = []PositionTest{
|
||||
// okay composed
|
||||
{"\u00C0", 1, ""},
|
||||
{"abc\u00C0", 1, ""},
|
||||
// need reordering
|
||||
{"a\u0300", 0, ""},
|
||||
{"a\u0300cd", 0, ""},
|
||||
{"a\u0316\u0300", 0, ""},
|
||||
{"a\u0316\u0300cd", 0, ""},
|
||||
// correctly ordered combining characters
|
||||
{"ab\u0300", 1, ""},
|
||||
{"ab\u0300cd", 1, ""},
|
||||
{"ab\u0316\u0300", 1, ""},
|
||||
{"ab\u0316\u0300cd", 1, ""},
|
||||
{"\u00C0\u035D", 1, ""},
|
||||
{"\u0300", 1, ""},
|
||||
{"\u0316\u0300cd", 1, ""},
|
||||
// Hangul
|
||||
{"같은", 1, ""},
|
||||
}
|
||||
|
||||
func isNormalF(rb *reorderBuffer, s string) int {
|
||||
if rb.f.form.IsNormal([]byte(s)) {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func TestIsNormal(t *testing.T) {
|
||||
runPosTests(t, "TestIsNormalNFD1", NFD, isNormalF, isNormalTests)
|
||||
runPosTests(t, "TestIsNormalNFD2", NFD, isNormalF, isNormalNFDTests)
|
||||
runPosTests(t, "TestIsNormalNFC1", NFC, isNormalF, isNormalTests)
|
||||
runPosTests(t, "TestIsNormalNFC2", NFC, isNormalF, isNormalNFCTests)
|
||||
}
|
||||
|
||||
type AppendTest struct {
|
||||
left string
|
||||
right string
|
||||
out string
|
||||
}
|
||||
|
||||
type appendFunc func(f Form, out []byte, s string) []byte
|
||||
|
||||
func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []AppendTest) {
|
||||
for i, test := range tests {
|
||||
out := []byte(test.left)
|
||||
out = fn(f, out, test.right)
|
||||
outs := string(out)
|
||||
if len(outs) != len(test.out) {
|
||||
t.Errorf("%s:%d: length is %d; want %d", name, i, len(outs), len(test.out))
|
||||
}
|
||||
if outs != test.out {
|
||||
// Find first rune that differs and show context.
|
||||
ir := []rune(outs)
|
||||
ig := []rune(test.out)
|
||||
for j := 0; j < len(ir) && j < len(ig); j++ {
|
||||
if ir[j] == ig[j] {
|
||||
continue
|
||||
}
|
||||
if j -= 3; j < 0 {
|
||||
j = 0
|
||||
}
|
||||
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
|
||||
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, ir[j], ig[j])
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var appendTests = []AppendTest{
|
||||
// empty buffers
|
||||
{"", "", ""},
|
||||
{"a", "", "a"},
|
||||
{"", "a", "a"},
|
||||
{"", "\u0041\u0307\u0304", "\u01E0"},
|
||||
// segment split across buffers
|
||||
{"", "a\u0300b", "\u00E0b"},
|
||||
{"a", "\u0300b", "\u00E0b"},
|
||||
{"a", "\u0300\u0316", "\u00E0\u0316"},
|
||||
{"a", "\u0316\u0300", "\u00E0\u0316"},
|
||||
{"a", "\u0300a\u0300", "\u00E0\u00E0"},
|
||||
{"a", "\u0300a\u0300a\u0300", "\u00E0\u00E0\u00E0"},
|
||||
{"a", "\u0300aaa\u0300aaa\u0300", "\u00E0aa\u00E0aa\u00E0"},
|
||||
{"a\u0300", "\u0327", "\u00E0\u0327"},
|
||||
{"a\u0327", "\u0300", "\u00E0\u0327"},
|
||||
{"a\u0316", "\u0300", "\u00E0\u0316"},
|
||||
{"\u0041\u0307", "\u0304", "\u01E0"},
|
||||
// Hangul
|
||||
{"", "\u110B\u1173", "\uC73C"},
|
||||
{"", "\u1103\u1161", "\uB2E4"},
|
||||
{"", "\u110B\u1173\u11B7", "\uC74C"},
|
||||
{"", "\u320E", "\x28\uAC00\x29"},
|
||||
{"", "\x28\u1100\u1161\x29", "\x28\uAC00\x29"},
|
||||
{"\u1103", "\u1161", "\uB2E4"},
|
||||
{"\u110B", "\u1173\u11B7", "\uC74C"},
|
||||
{"\u110B\u1173", "\u11B7", "\uC74C"},
|
||||
{"\uC73C", "\u11B7", "\uC74C"},
|
||||
// UTF-8 encoding split across buffers
|
||||
{"a\xCC", "\x80", "\u00E0"},
|
||||
{"a\xCC", "\x80b", "\u00E0b"},
|
||||
{"a\xCC", "\x80a\u0300", "\u00E0\u00E0"},
|
||||
{"a\xCC", "\x80\x80", "\u00E0\x80"},
|
||||
{"a\xCC", "\x80\xCC", "\u00E0\xCC"},
|
||||
{"a\u0316\xCC", "\x80a\u0316\u0300", "\u00E0\u0316\u00E0\u0316"},
|
||||
// ending in incomplete UTF-8 encoding
|
||||
{"", "\xCC", "\xCC"},
|
||||
{"a", "\xCC", "a\xCC"},
|
||||
{"a", "b\xCC", "ab\xCC"},
|
||||
{"\u0226", "\xCC", "\u0226\xCC"},
|
||||
// illegal runes
|
||||
{"", "\x80", "\x80"},
|
||||
{"", "\x80\x80\x80", "\x80\x80\x80"},
|
||||
{"", "\xCC\x80\x80\x80", "\xCC\x80\x80\x80"},
|
||||
{"", "a\x80", "a\x80"},
|
||||
{"", "a\x80\x80\x80", "a\x80\x80\x80"},
|
||||
{"", "a\x80\x80\x80\x80\x80\x80", "a\x80\x80\x80\x80\x80\x80"},
|
||||
{"a", "\x80\x80\x80", "a\x80\x80\x80"},
|
||||
// overflow
|
||||
{"", strings.Repeat("\x80", 33), strings.Repeat("\x80", 33)},
|
||||
{strings.Repeat("\x80", 33), "", strings.Repeat("\x80", 33)},
|
||||
{strings.Repeat("\x80", 33), strings.Repeat("\x80", 33), strings.Repeat("\x80", 66)},
|
||||
// overflow of combining characters
|
||||
{strings.Repeat("\u0300", 33), "", strings.Repeat("\u0300", 33)},
|
||||
// weird UTF-8
|
||||
{"\u00E0\xE1", "\x86", "\u00E0\xE1\x86"},
|
||||
{"a\u0300\u11B7", "\u0300", "\u00E0\u11B7\u0300"},
|
||||
{"a\u0300\u11B7\u0300", "\u0300", "\u00E0\u11B7\u0300\u0300"},
|
||||
{"\u0300", "\xF8\x80\x80\x80\x80\u0300", "\u0300\xF8\x80\x80\x80\x80\u0300"},
|
||||
{"\u0300", "\xFC\x80\x80\x80\x80\x80\u0300", "\u0300\xFC\x80\x80\x80\x80\x80\u0300"},
|
||||
{"\xF8\x80\x80\x80\x80\u0300", "\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
|
||||
{"\xFC\x80\x80\x80\x80\x80\u0300", "\u0300", "\xFC\x80\x80\x80\x80\x80\u0300\u0300"},
|
||||
{"\xF8\x80\x80\x80", "\x80\u0300\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
|
||||
}
|
||||
|
||||
func appendF(f Form, out []byte, s string) []byte {
|
||||
return f.Append(out, []byte(s)...)
|
||||
}
|
||||
|
||||
func appendStringF(f Form, out []byte, s string) []byte {
|
||||
return f.AppendString(out, s)
|
||||
}
|
||||
|
||||
func bytesF(f Form, out []byte, s string) []byte {
|
||||
buf := []byte{}
|
||||
buf = append(buf, out...)
|
||||
buf = append(buf, s...)
|
||||
return f.Bytes(buf)
|
||||
}
|
||||
|
||||
func stringF(f Form, out []byte, s string) []byte {
|
||||
outs := string(out) + s
|
||||
return []byte(f.String(outs))
|
||||
}
|
||||
|
||||
func TestAppend(t *testing.T) {
|
||||
runAppendTests(t, "TestAppend", NFKC, appendF, appendTests)
|
||||
runAppendTests(t, "TestAppendString", NFKC, appendStringF, appendTests)
|
||||
runAppendTests(t, "TestBytes", NFKC, bytesF, appendTests)
|
||||
runAppendTests(t, "TestString", NFKC, stringF, appendTests)
|
||||
}
|
||||
|
||||
func appendBench(f Form, in []byte) func() {
|
||||
buf := make([]byte, 0, 4*len(in))
|
||||
return func() {
|
||||
f.Append(buf, in...)
|
||||
}
|
||||
}
|
||||
|
||||
func iterBench(f Form, in []byte) func() {
|
||||
buf := make([]byte, 4*len(in))
|
||||
iter := Iter{}
|
||||
return func() {
|
||||
iter.SetInput(f, in)
|
||||
for !iter.Done() {
|
||||
iter.Next(buf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func appendBenchmarks(bm []func(), f Form, in []byte) []func() {
|
||||
//bm = append(bm, appendBench(f, in))
|
||||
bm = append(bm, iterBench(f, in))
|
||||
return bm
|
||||
}
|
||||
|
||||
func doFormBenchmark(b *testing.B, inf, f Form, s string) {
|
||||
b.StopTimer()
|
||||
in := inf.Bytes([]byte(s))
|
||||
bm := appendBenchmarks(nil, f, in)
|
||||
b.SetBytes(int64(len(in) * len(bm)))
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, fn := range bm {
|
||||
fn()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var ascii = strings.Repeat("There is nothing to change here! ", 500)
|
||||
|
||||
func BenchmarkNormalizeAsciiNFC(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFC, ascii)
|
||||
}
|
||||
func BenchmarkNormalizeAsciiNFD(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFD, ascii)
|
||||
}
|
||||
func BenchmarkNormalizeAsciiNFKC(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFKC, ascii)
|
||||
}
|
||||
func BenchmarkNormalizeAsciiNFKD(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFKD, ascii)
|
||||
}
|
||||
|
||||
func BenchmarkNormalizeNFC2NFC(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFC, txt_all)
|
||||
}
|
||||
func BenchmarkNormalizeNFC2NFD(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFD, txt_all)
|
||||
}
|
||||
func BenchmarkNormalizeNFD2NFC(b *testing.B) {
|
||||
doFormBenchmark(b, NFD, NFC, txt_all)
|
||||
}
|
||||
func BenchmarkNormalizeNFD2NFD(b *testing.B) {
|
||||
doFormBenchmark(b, NFD, NFD, txt_all)
|
||||
}
|
||||
|
||||
// Hangul is often special-cased, so we test it separately.
|
||||
func BenchmarkNormalizeHangulNFC2NFC(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFC, txt_kr)
|
||||
}
|
||||
func BenchmarkNormalizeHangulNFC2NFD(b *testing.B) {
|
||||
doFormBenchmark(b, NFC, NFD, txt_kr)
|
||||
}
|
||||
func BenchmarkNormalizeHangulNFD2NFC(b *testing.B) {
|
||||
doFormBenchmark(b, NFD, NFC, txt_kr)
|
||||
}
|
||||
func BenchmarkNormalizeHangulNFD2NFD(b *testing.B) {
|
||||
doFormBenchmark(b, NFD, NFD, txt_kr)
|
||||
}
|
||||
|
||||
var forms = []Form{NFC, NFD, NFKC, NFKD}
|
||||
|
||||
func doTextBenchmark(b *testing.B, s string) {
|
||||
b.StopTimer()
|
||||
in := []byte(s)
|
||||
bm := []func(){}
|
||||
for _, f := range forms {
|
||||
bm = appendBenchmarks(bm, f, in)
|
||||
}
|
||||
b.SetBytes(int64(len(s) * len(bm)))
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, f := range bm {
|
||||
f()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCanonicalOrdering(b *testing.B) {
|
||||
doTextBenchmark(b, txt_canon)
|
||||
}
|
||||
func BenchmarkExtendedLatin(b *testing.B) {
|
||||
doTextBenchmark(b, txt_vn)
|
||||
}
|
||||
func BenchmarkMiscTwoByteUtf8(b *testing.B) {
|
||||
doTextBenchmark(b, twoByteUtf8)
|
||||
}
|
||||
func BenchmarkMiscThreeByteUtf8(b *testing.B) {
|
||||
doTextBenchmark(b, threeByteUtf8)
|
||||
}
|
||||
func BenchmarkHangul(b *testing.B) {
|
||||
doTextBenchmark(b, txt_kr)
|
||||
}
|
||||
func BenchmarkJapanese(b *testing.B) {
|
||||
doTextBenchmark(b, txt_jp)
|
||||
}
|
||||
func BenchmarkChinese(b *testing.B) {
|
||||
doTextBenchmark(b, txt_cn)
|
||||
}
|
||||
func BenchmarkOverflow(b *testing.B) {
|
||||
doTextBenchmark(b, overflow)
|
||||
}
|
||||
|
||||
var overflow = string(bytes.Repeat([]byte("\u035D"), 4096)) + "\u035B"
|
||||
|
||||
// Tests sampled from the Canonical ordering tests (Part 2) of
|
||||
// http://unicode.org/Public/UNIDATA/NormalizationTest.txt
|
||||
const txt_canon = `\u0061\u0315\u0300\u05AE\u0300\u0062 \u0061\u0300\u0315\u0300\u05AE\u0062
|
||||
\u0061\u0302\u0315\u0300\u05AE\u0062 \u0061\u0307\u0315\u0300\u05AE\u0062
|
||||
\u0061\u0315\u0300\u05AE\u030A\u0062 \u0061\u059A\u0316\u302A\u031C\u0062
|
||||
\u0061\u032E\u059A\u0316\u302A\u0062 \u0061\u0338\u093C\u0334\u0062
|
||||
\u0061\u059A\u0316\u302A\u0339 \u0061\u0341\u0315\u0300\u05AE\u0062
|
||||
\u0061\u0348\u059A\u0316\u302A\u0062 \u0061\u0361\u0345\u035D\u035C\u0062
|
||||
\u0061\u0366\u0315\u0300\u05AE\u0062 \u0061\u0315\u0300\u05AE\u0486\u0062
|
||||
\u0061\u05A4\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0613\u0062
|
||||
\u0061\u0315\u0300\u05AE\u0615\u0062 \u0061\u0617\u0315\u0300\u05AE\u0062
|
||||
\u0061\u0619\u0618\u064D\u064E\u0062 \u0061\u0315\u0300\u05AE\u0654\u0062
|
||||
\u0061\u0315\u0300\u05AE\u06DC\u0062 \u0061\u0733\u0315\u0300\u05AE\u0062
|
||||
\u0061\u0744\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0745\u0062
|
||||
\u0061\u09CD\u05B0\u094D\u3099\u0062 \u0061\u0E38\u0E48\u0E38\u0C56\u0062
|
||||
\u0061\u0EB8\u0E48\u0E38\u0E49\u0062 \u0061\u0F72\u0F71\u0EC8\u0F71\u0062
|
||||
\u0061\u1039\u05B0\u094D\u3099\u0062 \u0061\u05B0\u094D\u3099\u1A60\u0062
|
||||
\u0061\u3099\u093C\u0334\u1BE6\u0062 \u0061\u3099\u093C\u0334\u1C37\u0062
|
||||
\u0061\u1CD9\u059A\u0316\u302A\u0062 \u0061\u2DED\u0315\u0300\u05AE\u0062
|
||||
\u0061\u2DEF\u0315\u0300\u05AE\u0062 \u0061\u302D\u302E\u059A\u0316\u0062`
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
|
||||
const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả.
|
||||
Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ
|
||||
nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc
|
||||
một giấy phép khác có các điều khoản tương tự như giấy phép này
|
||||
cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào
|
||||
trên đây cũng có thể được miễn bỏ nếu bạn được sự cho phép của
|
||||
người sở hữu bản quyền. Phạm vi công chúng — Khi tác phẩm hoặc
|
||||
bất kỳ chương nào của tác phẩm đã trong vùng dành cho công
|
||||
chúng theo quy định của pháp luật thì tình trạng của nó không
|
||||
bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
|
||||
const txt_ru = `При обязательном соблюдении следующих условий:
|
||||
Attribution — Вы должны атрибутировать произведение (указывать
|
||||
автора и источник) в порядке, предусмотренном автором или
|
||||
лицензиаром (но только так, чтобы никоим образом не подразумевалось,
|
||||
что они поддерживают вас или использование вами данного произведения).
|
||||
Υπό τις ακόλουθες προϋποθέσεις:`
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
|
||||
const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με τον
|
||||
τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
|
||||
(χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
|
||||
τη χρήση του έργου από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε,
|
||||
τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
|
||||
μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
|
||||
παρόμοια άδεια.`
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
|
||||
const txt_ar = `بموجب الشروط التالية نسب المصنف — يجب عليك أن
|
||||
تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
|
||||
الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
|
||||
المشاركة على قدم المساواة — إذا كنت يعدل ، والتغيير ، أو الاستفادة
|
||||
من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
|
||||
لهذا الترخيص.`
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/1.0/il/
|
||||
const txt_il = `בכפוף לתנאים הבאים: ייחוס — עליך לייחס את היצירה (לתת קרדיט) באופן
|
||||
המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
|
||||
שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה — אם תחליט/י לשנות,
|
||||
לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
|
||||
החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
|
||||
|
||||
const twoByteUtf8 = txt_ru + txt_gr + txt_ar + txt_il
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/2.0/kr/
|
||||
const txt_kr = `다음과 같은 조건을 따라야 합니다: 저작자표시
|
||||
(Attribution) — 저작자나 이용허락자가 정한 방법으로 저작물의
|
||||
원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의
|
||||
이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다).
|
||||
동일조건변경허락 — 이 저작물을 이용하여 만든 이차적 저작물에는 본
|
||||
라이선스와 동일한 라이선스를 적용해야 합니다.`
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/3.0/th/
|
||||
const txt_th = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา — คุณต้องแสดงที่
|
||||
มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่
|
||||
ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่
|
||||
คุณนำงานไปใช้) อนุญาตแบบเดียวกัน — หากคุณดัดแปลง เปลี่ยนรูป หรื
|
||||
อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื
|
||||
อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น`
|
||||
|
||||
const threeByteUtf8 = txt_th
|
||||
|
||||
// Taken from http://creativecommons.org/licenses/by-sa/2.0/jp/
|
||||
const txt_jp = `あなたの従うべき条件は以下の通りです。
|
||||
表示 — あなたは原著作者のクレジットを表示しなければなりません。
|
||||
継承 — もしあなたがこの作品を改変、変形または加工した場合、
|
||||
あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ
|
||||
頒布することができます。`
|
||||
|
||||
// http://creativecommons.org/licenses/by-sa/2.5/cn/
|
||||
const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、
|
||||
广播或通过信息网络传播本作品 创作演绎作品
|
||||
对本作品进行商业性使用 惟须遵守下列条件:
|
||||
署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
|
||||
相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作,
|
||||
您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
|
||||
|
||||
const txt_cjk = txt_cn + txt_jp + txt_kr
|
||||
const txt_all = txt_vn + twoByteUtf8 + threeByteUtf8 + txt_cjk
|
||||
@@ -1,309 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"exp/norm"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
loadTestData()
|
||||
CharacterByCharacterTests()
|
||||
StandardTests()
|
||||
PerformanceTest()
|
||||
if errorCount == 0 {
|
||||
fmt.Println("PASS")
|
||||
}
|
||||
}
|
||||
|
||||
const file = "NormalizationTest.txt"
|
||||
|
||||
var url = flag.String("url",
|
||||
"http://www.unicode.org/Public/6.0.0/ucd/"+file,
|
||||
"URL of Unicode database directory")
|
||||
var localFiles = flag.Bool("local",
|
||||
false,
|
||||
"data files have been copied to the current directory; for debugging only")
|
||||
|
||||
var logger = log.New(os.Stderr, "", log.Lshortfile)
|
||||
|
||||
// This regression test runs the test set in NormalizationTest.txt
|
||||
// (taken from http://www.unicode.org/Public/6.0.0/ucd/).
|
||||
//
|
||||
// NormalizationTest.txt has form:
|
||||
// @Part0 # Specific cases
|
||||
// #
|
||||
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
|
||||
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
|
||||
//
|
||||
// Each test has 5 columns (c1, c2, c3, c4, c5), where
|
||||
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
|
||||
//
|
||||
// CONFORMANCE:
|
||||
// 1. The following invariants must be true for all conformant implementations
|
||||
//
|
||||
// NFC
|
||||
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
|
||||
// c4 == NFC(c4) == NFC(c5)
|
||||
//
|
||||
// NFD
|
||||
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
|
||||
// c5 == NFD(c4) == NFD(c5)
|
||||
//
|
||||
// NFKC
|
||||
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
|
||||
//
|
||||
// NFKD
|
||||
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
|
||||
//
|
||||
// 2. For every code point X assigned in this version of Unicode that is not
|
||||
// specifically listed in Part 1, the following invariants must be true
|
||||
// for all conformant implementations:
|
||||
//
|
||||
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
|
||||
//
|
||||
|
||||
// Column types.
|
||||
const (
|
||||
cRaw = iota
|
||||
cNFC
|
||||
cNFD
|
||||
cNFKC
|
||||
cNFKD
|
||||
cMaxColumns
|
||||
)
|
||||
|
||||
// Holds data from NormalizationTest.txt
|
||||
var part []Part
|
||||
|
||||
type Part struct {
|
||||
name string
|
||||
number int
|
||||
tests []Test
|
||||
}
|
||||
|
||||
type Test struct {
|
||||
name string
|
||||
partnr int
|
||||
number int
|
||||
r rune // used for character by character test
|
||||
cols [cMaxColumns]string // Each has 5 entries, see below.
|
||||
}
|
||||
|
||||
func (t Test) Name() string {
|
||||
if t.number < 0 {
|
||||
return part[t.partnr].name
|
||||
}
|
||||
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
|
||||
}
|
||||
|
||||
var partRe = regexp.MustCompile(`@Part(\d) # (.*)\n$`)
|
||||
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)\n?$`)
|
||||
|
||||
var counter int
|
||||
|
||||
// Load the data form NormalizationTest.txt
|
||||
func loadTestData() {
|
||||
if *localFiles {
|
||||
pwd, _ := os.Getwd()
|
||||
*url = "file://" + path.Join(pwd, file)
|
||||
}
|
||||
t := &http.Transport{}
|
||||
t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
|
||||
c := &http.Client{Transport: t}
|
||||
resp, err := c.Get(*url)
|
||||
if err != nil {
|
||||
logger.Fatal(err)
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
logger.Fatal("bad GET status for "+file, resp.Status)
|
||||
}
|
||||
f := resp.Body
|
||||
defer f.Close()
|
||||
input := bufio.NewReader(f)
|
||||
for {
|
||||
line, err := input.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
logger.Fatal(err)
|
||||
}
|
||||
if len(line) == 0 || line[0] == '#' {
|
||||
continue
|
||||
}
|
||||
m := partRe.FindStringSubmatch(line)
|
||||
if m != nil {
|
||||
if len(m) < 3 {
|
||||
logger.Fatal("Failed to parse Part: ", line)
|
||||
}
|
||||
i, err := strconv.Atoi(m[1])
|
||||
if err != nil {
|
||||
logger.Fatal(err)
|
||||
}
|
||||
name := m[2]
|
||||
part = append(part, Part{name: name[:len(name)-1], number: i})
|
||||
continue
|
||||
}
|
||||
m = testRe.FindStringSubmatch(line)
|
||||
if m == nil || len(m) < 7 {
|
||||
logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
|
||||
}
|
||||
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
|
||||
counter++
|
||||
for j := 1; j < len(m)-1; j++ {
|
||||
for _, split := range strings.Split(m[j], " ") {
|
||||
r, err := strconv.ParseUint(split, 16, 64)
|
||||
if err != nil {
|
||||
logger.Fatal(err)
|
||||
}
|
||||
if test.r == 0 {
|
||||
// save for CharacterByCharacterTests
|
||||
test.r = rune(r)
|
||||
}
|
||||
var buf [utf8.UTFMax]byte
|
||||
sz := utf8.EncodeRune(buf[:], rune(r))
|
||||
test.cols[j-1] += string(buf[:sz])
|
||||
}
|
||||
}
|
||||
part := &part[len(part)-1]
|
||||
part.tests = append(part.tests, test)
|
||||
}
|
||||
}
|
||||
|
||||
var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
|
||||
|
||||
var errorCount int
|
||||
|
||||
func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
|
||||
if gold != result {
|
||||
errorCount++
|
||||
if errorCount > 20 {
|
||||
return
|
||||
}
|
||||
st, sr, sg := []rune(test), []rune(result), []rune(gold)
|
||||
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
|
||||
t.Name(), name, fstr[f], st, sr, sg, t.name)
|
||||
}
|
||||
}
|
||||
|
||||
func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) {
|
||||
if result != want {
|
||||
errorCount++
|
||||
if errorCount > 20 {
|
||||
return
|
||||
}
|
||||
logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
|
||||
}
|
||||
}
|
||||
|
||||
func doTest(t *Test, f norm.Form, gold, test string) {
|
||||
result := f.Bytes([]byte(test))
|
||||
cmpResult(t, "Bytes", f, gold, test, string(result))
|
||||
sresult := f.String(test)
|
||||
cmpResult(t, "String", f, gold, test, sresult)
|
||||
buf := make([]byte, norm.MaxSegmentSize)
|
||||
acc := []byte{}
|
||||
i := norm.Iter{}
|
||||
i.SetInputString(f, test)
|
||||
for !i.Done() {
|
||||
n := i.Next(buf)
|
||||
acc = append(acc, buf[:n]...)
|
||||
}
|
||||
cmpResult(t, "Iter.Next", f, gold, test, string(acc))
|
||||
for i := range test {
|
||||
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
|
||||
cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
|
||||
}
|
||||
cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
|
||||
}
|
||||
|
||||
func doConformanceTests(t *Test, partn int) {
|
||||
for i := 0; i <= 2; i++ {
|
||||
doTest(t, norm.NFC, t.cols[1], t.cols[i])
|
||||
doTest(t, norm.NFD, t.cols[2], t.cols[i])
|
||||
doTest(t, norm.NFKC, t.cols[3], t.cols[i])
|
||||
doTest(t, norm.NFKD, t.cols[4], t.cols[i])
|
||||
}
|
||||
for i := 3; i <= 4; i++ {
|
||||
doTest(t, norm.NFC, t.cols[3], t.cols[i])
|
||||
doTest(t, norm.NFD, t.cols[4], t.cols[i])
|
||||
doTest(t, norm.NFKC, t.cols[3], t.cols[i])
|
||||
doTest(t, norm.NFKD, t.cols[4], t.cols[i])
|
||||
}
|
||||
}
|
||||
|
||||
func CharacterByCharacterTests() {
|
||||
tests := part[1].tests
|
||||
var last rune = 0
|
||||
for i := 0; i <= len(tests); i++ { // last one is special case
|
||||
var r rune
|
||||
if i == len(tests) {
|
||||
r = 0x2FA1E // Don't have to go to 0x10FFFF
|
||||
} else {
|
||||
r = tests[i].r
|
||||
}
|
||||
for last++; last < r; last++ {
|
||||
// Check all characters that were not explicitly listed in the test.
|
||||
t := &Test{partnr: 1, number: -1}
|
||||
char := string(last)
|
||||
doTest(t, norm.NFC, char, char)
|
||||
doTest(t, norm.NFD, char, char)
|
||||
doTest(t, norm.NFKC, char, char)
|
||||
doTest(t, norm.NFKD, char, char)
|
||||
}
|
||||
if i < len(tests) {
|
||||
doConformanceTests(&tests[i], 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func StandardTests() {
|
||||
for _, j := range []int{0, 2, 3} {
|
||||
for _, test := range part[j].tests {
|
||||
doConformanceTests(&test, j)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PerformanceTest verifies that normalization is O(n). If any of the
|
||||
// code does not properly check for maxCombiningChars, normalization
|
||||
// may exhibit O(n**2) behavior.
|
||||
func PerformanceTest() {
|
||||
runtime.GOMAXPROCS(2)
|
||||
success := make(chan bool, 1)
|
||||
go func() {
|
||||
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
|
||||
buf = append(buf, "\u035B"...)
|
||||
norm.NFC.Append(nil, buf...)
|
||||
success <- true
|
||||
}()
|
||||
timeout := time.After(1 * time.Second)
|
||||
select {
|
||||
case <-success:
|
||||
// test completed before the timeout
|
||||
case <-timeout:
|
||||
errorCount++
|
||||
logger.Printf(`unexpectedly long time to complete PerformanceTest`)
|
||||
}
|
||||
}
|
||||
@@ -1,126 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import "io"
|
||||
|
||||
type normWriter struct {
|
||||
rb reorderBuffer
|
||||
w io.Writer
|
||||
buf []byte
|
||||
}
|
||||
|
||||
// Write implements the standard write interface. If the last characters are
|
||||
// not at a normalization boundary, the bytes will be buffered for the next
|
||||
// write. The remaining bytes will be written on close.
|
||||
func (w *normWriter) Write(data []byte) (n int, err error) {
|
||||
// Process data in pieces to keep w.buf size bounded.
|
||||
const chunk = 4000
|
||||
|
||||
for len(data) > 0 {
|
||||
// Normalize into w.buf.
|
||||
m := len(data)
|
||||
if m > chunk {
|
||||
m = chunk
|
||||
}
|
||||
w.rb.src = inputBytes(data[:m])
|
||||
w.rb.nsrc = m
|
||||
w.buf = doAppend(&w.rb, w.buf, 0)
|
||||
data = data[m:]
|
||||
n += m
|
||||
|
||||
// Write out complete prefix, save remainder.
|
||||
// Note that lastBoundary looks back at most 30 runes.
|
||||
i := lastBoundary(&w.rb.f, w.buf)
|
||||
if i == -1 {
|
||||
i = 0
|
||||
}
|
||||
if i > 0 {
|
||||
if _, err = w.w.Write(w.buf[:i]); err != nil {
|
||||
break
|
||||
}
|
||||
bn := copy(w.buf, w.buf[i:])
|
||||
w.buf = w.buf[:bn]
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Close forces data that remains in the buffer to be written.
|
||||
func (w *normWriter) Close() error {
|
||||
if len(w.buf) > 0 {
|
||||
_, err := w.w.Write(w.buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Writer returns a new writer that implements Write(b)
|
||||
// by writing f(b) to w. The returned writer may use an
|
||||
// an internal buffer to maintain state across Write calls.
|
||||
// Calling its Close method writes any buffered data to w.
|
||||
func (f Form) Writer(w io.Writer) io.WriteCloser {
|
||||
wr := &normWriter{rb: reorderBuffer{}, w: w}
|
||||
wr.rb.init(f, nil)
|
||||
return wr
|
||||
}
|
||||
|
||||
type normReader struct {
|
||||
rb reorderBuffer
|
||||
r io.Reader
|
||||
inbuf []byte
|
||||
outbuf []byte
|
||||
bufStart int
|
||||
lastBoundary int
|
||||
err error
|
||||
}
|
||||
|
||||
// Read implements the standard read interface.
|
||||
func (r *normReader) Read(p []byte) (int, error) {
|
||||
for {
|
||||
if r.lastBoundary-r.bufStart > 0 {
|
||||
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
|
||||
r.bufStart += n
|
||||
if r.lastBoundary-r.bufStart > 0 {
|
||||
return n, nil
|
||||
}
|
||||
return n, r.err
|
||||
}
|
||||
if r.err != nil {
|
||||
return 0, r.err
|
||||
}
|
||||
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
|
||||
r.outbuf = r.outbuf[0:outn]
|
||||
r.bufStart = 0
|
||||
|
||||
n, err := r.r.Read(r.inbuf)
|
||||
r.rb.src = inputBytes(r.inbuf[0:n])
|
||||
r.rb.nsrc, r.err = n, err
|
||||
if n > 0 {
|
||||
r.outbuf = doAppend(&r.rb, r.outbuf, 0)
|
||||
}
|
||||
if err == io.EOF {
|
||||
r.lastBoundary = len(r.outbuf)
|
||||
} else {
|
||||
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
|
||||
if r.lastBoundary == -1 {
|
||||
r.lastBoundary = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
panic("should not reach here")
|
||||
}
|
||||
|
||||
// Reader returns a new reader that implements Read
|
||||
// by reading data from r and returning f(data).
|
||||
func (f Form) Reader(r io.Reader) io.Reader {
|
||||
const chunk = 4000
|
||||
buf := make([]byte, chunk)
|
||||
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
|
||||
rr.rb.init(f, buf)
|
||||
return rr
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var ioTests = []AppendTest{
|
||||
{"", strings.Repeat("a\u0316\u0300", 6), strings.Repeat("\u00E0\u0316", 6)},
|
||||
{"", strings.Repeat("a\u0300\u0316", 4000), strings.Repeat("\u00E0\u0316", 4000)},
|
||||
{"", strings.Repeat("\x80\x80", 4000), strings.Repeat("\x80\x80", 4000)},
|
||||
{"", "\u0041\u0307\u0304", "\u01E0"},
|
||||
}
|
||||
|
||||
var bufSizes = []int{1, 2, 3, 4, 5, 6, 7, 8, 100, 101, 102, 103, 4000, 4001, 4002, 4003}
|
||||
|
||||
func readFunc(size int) appendFunc {
|
||||
return func(f Form, out []byte, s string) []byte {
|
||||
out = append(out, s...)
|
||||
r := f.Reader(bytes.NewBuffer(out))
|
||||
buf := make([]byte, size)
|
||||
result := []byte{}
|
||||
for n, err := 0, error(nil); err == nil; {
|
||||
n, err = r.Read(buf)
|
||||
result = append(result, buf[:n]...)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
for _, s := range bufSizes {
|
||||
name := fmt.Sprintf("TestReader%da", s)
|
||||
runAppendTests(t, name, NFKC, readFunc(s), appendTests)
|
||||
name = fmt.Sprintf("TestReader%db", s)
|
||||
runAppendTests(t, name, NFKC, readFunc(s), ioTests)
|
||||
}
|
||||
}
|
||||
|
||||
func writeFunc(size int) appendFunc {
|
||||
return func(f Form, out []byte, s string) []byte {
|
||||
in := append(out, s...)
|
||||
result := new(bytes.Buffer)
|
||||
w := f.Writer(result)
|
||||
buf := make([]byte, size)
|
||||
for n := 0; len(in) > 0; in = in[n:] {
|
||||
n = copy(buf, in)
|
||||
_, _ = w.Write(buf[:n])
|
||||
}
|
||||
w.Close()
|
||||
return result.Bytes()
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriter(t *testing.T) {
|
||||
for _, s := range bufSizes {
|
||||
name := fmt.Sprintf("TestWriter%da", s)
|
||||
runAppendTests(t, name, NFKC, writeFunc(s), appendTests)
|
||||
name = fmt.Sprintf("TestWriter%db", s)
|
||||
runAppendTests(t, name, NFKC, writeFunc(s), ioTests)
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,237 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
type valueRange struct {
|
||||
value uint16 // header: value:stride
|
||||
lo, hi byte // header: lo:n
|
||||
}
|
||||
|
||||
type trie struct {
|
||||
index []uint8
|
||||
values []uint16
|
||||
sparse []valueRange
|
||||
sparseOffset []uint16
|
||||
cutoff uint8 // indices >= cutoff are sparse
|
||||
}
|
||||
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
|
||||
// is a list of ranges with an accompanying value. Given a matching range r,
|
||||
// the value for b is by r.value + (b - r.lo) * stride.
|
||||
func (t *trie) lookupValue(n uint8, b byte) uint16 {
|
||||
if n < t.cutoff {
|
||||
return t.values[uint16(n)<<6+uint16(b&maskx)]
|
||||
}
|
||||
offset := t.sparseOffset[n-t.cutoff]
|
||||
header := t.sparse[offset]
|
||||
lo := offset + 1
|
||||
hi := lo + uint16(header.lo)
|
||||
for lo < hi {
|
||||
m := lo + (hi-lo)/2
|
||||
r := t.sparse[m]
|
||||
if r.lo <= b && b <= r.hi {
|
||||
return r.value + uint16(b-r.lo)*header.value
|
||||
}
|
||||
if b < r.lo {
|
||||
hi = m
|
||||
} else {
|
||||
lo = m + 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
const (
|
||||
t1 = 0x00 // 0000 0000
|
||||
tx = 0x80 // 1000 0000
|
||||
t2 = 0xC0 // 1100 0000
|
||||
t3 = 0xE0 // 1110 0000
|
||||
t4 = 0xF0 // 1111 0000
|
||||
t5 = 0xF8 // 1111 1000
|
||||
t6 = 0xFC // 1111 1100
|
||||
te = 0xFE // 1111 1110
|
||||
|
||||
maskx = 0x3F // 0011 1111
|
||||
mask2 = 0x1F // 0001 1111
|
||||
mask3 = 0x0F // 0000 1111
|
||||
mask4 = 0x07 // 0000 0111
|
||||
)
|
||||
|
||||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *trie) lookup(s []byte) (v uint16, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < tx:
|
||||
return t.values[c0], 1
|
||||
case c0 < t2:
|
||||
return 0, 1
|
||||
case c0 < t3:
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
return t.lookupValue(i, c1), 2
|
||||
case c0 < t4:
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := uint16(i)<<6 + uint16(c1)&maskx
|
||||
i = t.index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
return t.lookupValue(i, c2), 3
|
||||
case c0 < t5:
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := uint16(i)<<6 + uint16(c1)&maskx
|
||||
i = t.index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
o = uint16(i)<<6 + uint16(c2)&maskx
|
||||
i = t.index[o]
|
||||
c3 := s[3]
|
||||
if c3 < tx || t2 <= c3 {
|
||||
return 0, 3
|
||||
}
|
||||
return t.lookupValue(i, c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookupString returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *trie) lookupString(s string) (v uint16, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < tx:
|
||||
return t.values[c0], 1
|
||||
case c0 < t2:
|
||||
return 0, 1
|
||||
case c0 < t3:
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
return t.lookupValue(i, c1), 2
|
||||
case c0 < t4:
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := uint16(i)<<6 + uint16(c1)&maskx
|
||||
i = t.index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
return t.lookupValue(i, c2), 3
|
||||
case c0 < t5:
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := uint16(i)<<6 + uint16(c1)&maskx
|
||||
i = t.index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
o = uint16(i)<<6 + uint16(c2)&maskx
|
||||
i = t.index[o]
|
||||
c3 := s[3]
|
||||
if c3 < tx || t2 <= c3 {
|
||||
return 0, 3
|
||||
}
|
||||
return t.lookupValue(i, c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must hold a full encoding.
|
||||
func (t *trie) lookupUnsafe(s []byte) uint16 {
|
||||
c0 := s[0]
|
||||
if c0 < tx {
|
||||
return t.values[c0]
|
||||
}
|
||||
if c0 < t2 {
|
||||
return 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
if c0 < t3 {
|
||||
return t.lookupValue(i, s[1])
|
||||
}
|
||||
i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
|
||||
if c0 < t4 {
|
||||
return t.lookupValue(i, s[2])
|
||||
}
|
||||
i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
|
||||
if c0 < t5 {
|
||||
return t.lookupValue(i, s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must hold a full encoding.
|
||||
func (t *trie) lookupStringUnsafe(s string) uint16 {
|
||||
c0 := s[0]
|
||||
if c0 < tx {
|
||||
return t.values[c0]
|
||||
}
|
||||
if c0 < t2 {
|
||||
return 0
|
||||
}
|
||||
i := t.index[c0]
|
||||
if c0 < t3 {
|
||||
return t.lookupValue(i, s[1])
|
||||
}
|
||||
i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
|
||||
if c0 < t4 {
|
||||
return t.lookupValue(i, s[2])
|
||||
}
|
||||
i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
|
||||
if c0 < t5 {
|
||||
return t.lookupValue(i, s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
@@ -1,148 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Test data is located in triedata_test.go; generated by maketesttables.
|
||||
var testdata = testdataTrie
|
||||
|
||||
type rangeTest struct {
|
||||
block uint8
|
||||
lookup byte
|
||||
result uint16
|
||||
table []valueRange
|
||||
offsets []uint16
|
||||
}
|
||||
|
||||
var range1Off = []uint16{0, 2}
|
||||
var range1 = []valueRange{
|
||||
{0, 1, 0},
|
||||
{1, 0x80, 0x80},
|
||||
{0, 2, 0},
|
||||
{1, 0x80, 0x80},
|
||||
{9, 0xff, 0xff},
|
||||
}
|
||||
|
||||
var rangeTests = []rangeTest{
|
||||
{10, 0x80, 1, range1, range1Off},
|
||||
{10, 0x00, 0, range1, range1Off},
|
||||
{11, 0x80, 1, range1, range1Off},
|
||||
{11, 0xff, 9, range1, range1Off},
|
||||
{11, 0x00, 0, range1, range1Off},
|
||||
}
|
||||
|
||||
func TestLookupSparse(t *testing.T) {
|
||||
for i, test := range rangeTests {
|
||||
n := trie{sparse: test.table, sparseOffset: test.offsets, cutoff: 10}
|
||||
v := n.lookupValue(test.block, test.lookup)
|
||||
if v != test.result {
|
||||
t.Errorf("LookupSparse:%d: found %X; want %X", i, v, test.result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test cases for illegal runes.
|
||||
type trietest struct {
|
||||
size int
|
||||
bytes []byte
|
||||
}
|
||||
|
||||
var tests = []trietest{
|
||||
// illegal runes
|
||||
{1, []byte{0x80}},
|
||||
{1, []byte{0xFF}},
|
||||
{1, []byte{t2, tx - 1}},
|
||||
{1, []byte{t2, t2}},
|
||||
{2, []byte{t3, tx, tx - 1}},
|
||||
{2, []byte{t3, tx, t2}},
|
||||
{1, []byte{t3, tx - 1, tx}},
|
||||
{3, []byte{t4, tx, tx, tx - 1}},
|
||||
{3, []byte{t4, tx, tx, t2}},
|
||||
{1, []byte{t4, t2, tx, tx - 1}},
|
||||
{2, []byte{t4, tx, t2, tx - 1}},
|
||||
|
||||
// short runes
|
||||
{0, []byte{t2}},
|
||||
{0, []byte{t3, tx}},
|
||||
{0, []byte{t4, tx, tx}},
|
||||
|
||||
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
|
||||
{1, []byte{t5, tx, tx, tx, tx}},
|
||||
{1, []byte{t6, tx, tx, tx, tx, tx}},
|
||||
}
|
||||
|
||||
func mkUTF8(r rune) ([]byte, int) {
|
||||
var b [utf8.UTFMax]byte
|
||||
sz := utf8.EncodeRune(b[:], r)
|
||||
return b[:sz], sz
|
||||
}
|
||||
|
||||
func TestLookup(t *testing.T) {
|
||||
for i, tt := range testRunes {
|
||||
b, szg := mkUTF8(tt)
|
||||
v, szt := testdata.lookup(b)
|
||||
if int(v) != i {
|
||||
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
|
||||
}
|
||||
if szt != szg {
|
||||
t.Errorf("lookup(%U): found size %d, expected %d", tt, szt, szg)
|
||||
}
|
||||
}
|
||||
for i, tt := range tests {
|
||||
v, sz := testdata.lookup(tt.bytes)
|
||||
if int(v) != 0 {
|
||||
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
|
||||
}
|
||||
if sz != tt.size {
|
||||
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookupUnsafe(t *testing.T) {
|
||||
for i, tt := range testRunes {
|
||||
b, _ := mkUTF8(tt)
|
||||
v := testdata.lookupUnsafe(b)
|
||||
if int(v) != i {
|
||||
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookupString(t *testing.T) {
|
||||
for i, tt := range testRunes {
|
||||
b, szg := mkUTF8(tt)
|
||||
v, szt := testdata.lookupString(string(b))
|
||||
if int(v) != i {
|
||||
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
|
||||
}
|
||||
if szt != szg {
|
||||
t.Errorf("lookup(%U): found size %d, expected %d", i, szt, szg)
|
||||
}
|
||||
}
|
||||
for i, tt := range tests {
|
||||
v, sz := testdata.lookupString(string(tt.bytes))
|
||||
if int(v) != 0 {
|
||||
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
|
||||
}
|
||||
if sz != tt.size {
|
||||
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookupStringUnsafe(t *testing.T) {
|
||||
for i, tt := range testRunes {
|
||||
b, _ := mkUTF8(tt)
|
||||
v := testdata.lookupStringUnsafe(string(b))
|
||||
if int(v) != i {
|
||||
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user