Compare commits

...

2 Commits
tls ... go1

Author SHA1 Message Date
Andrew Gerrand
6174b5e21e go1 2012-03-28 23:41:59 +11:00
Andrew Gerrand
4081aa1277 remove non-go1 commands and packages 2012-03-28 23:41:38 +11:00
137 changed files with 1 additions and 48601 deletions

1
VERSION Normal file
View File

@@ -0,0 +1 @@
go1

View File

@@ -1,5 +0,0 @@
# Copyright 2012 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../Make.dist

View File

@@ -1,34 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Cov is a rudimentary code coverage tool.
Usage:
go tool cov [-lsv] [-g substring] [-m minlines] [6.out args]
Given a command to run, it runs the command while tracking which
sections of code have been executed. When the command finishes,
cov prints the line numbers of sections of code in the binary that
were not executed. With no arguments it assumes the command "6.out".
The options are:
-l
print full path names instead of paths relative to the current directory
-s
show the source code that didn't execute, in addition to the line numbers.
-v
print debugging information during the run.
-g substring
restrict the coverage analysis to functions or files whose names contain substring
-m minlines
only report uncovered sections of code larger than minlines lines
The program is the same for all architectures: 386, amd64, and arm.
*/
package documentation

View File

@@ -1,480 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
* code coverage
*/
#include <u.h>
#include <libc.h>
#include <bio.h>
#include "tree.h"
#include <ureg_amd64.h>
#include <mach.h>
typedef struct Ureg Ureg;
void
usage(void)
{
fprint(2, "usage: cov [-lsv] [-g substring] [-m minlines] [6.out args...]\n");
fprint(2, "-g specifies pattern of interesting functions or files\n");
exits("usage");
}
typedef struct Range Range;
struct Range
{
uvlong pc;
uvlong epc;
};
int chatty;
int fd;
int longnames;
int pid;
int doshowsrc;
Map *mem;
Map *text;
Fhdr fhdr;
char *substring;
char cwd[1000];
int ncwd;
int minlines = -1000;
Tree breakpoints; // code ranges not run
/*
* comparison for Range structures
* they are "equal" if they overlap, so
* that a search for [pc, pc+1) finds the
* Range containing pc.
*/
int
rangecmp(void *va, void *vb)
{
Range *a = va, *b = vb;
if(a->epc <= b->pc)
return 1;
if(b->epc <= a->pc)
return -1;
return 0;
}
/*
* remember that we ran the section of code [pc, epc).
*/
void
ran(uvlong pc, uvlong epc)
{
Range key;
Range *r;
uvlong oldepc;
if(chatty)
print("run %#llux-%#llux\n", pc, epc);
key.pc = pc;
key.epc = pc+1;
r = treeget(&breakpoints, &key);
if(r == nil)
sysfatal("unchecked breakpoint at %#llux+%d", pc, (int)(epc-pc));
// Might be that the tail of the sequence
// was run already, so r->epc is before the end.
// Adjust len.
if(epc > r->epc)
epc = r->epc;
if(r->pc == pc) {
r->pc = epc;
} else {
// Chop r to before pc;
// add new entry for after if needed.
// Changing r->epc does not affect r's position in the tree.
oldepc = r->epc;
r->epc = pc;
if(epc < oldepc) {
Range *n;
n = malloc(sizeof *n);
n->pc = epc;
n->epc = oldepc;
treeput(&breakpoints, n, n);
}
}
}
void
showsrc(char *file, int line1, int line2)
{
Biobuf *b;
char *p;
int n, stop;
if((b = Bopen(file, OREAD)) == nil) {
print("\topen %s: %r\n", file);
return;
}
for(n=1; n<line1 && (p = Brdstr(b, '\n', 1)) != nil; n++)
free(p);
// print up to five lines (this one and 4 more).
// if there are more than five lines, print 4 and "..."
stop = n+4;
if(stop > line2)
stop = line2;
if(stop < line2)
stop--;
for(; n<=stop && (p = Brdstr(b, '\n', 1)) != nil; n++) {
print(" %d %s\n", n, p);
free(p);
}
if(n < line2)
print(" ...\n");
Bterm(b);
}
/*
* if s is in the current directory or below,
* return the relative path.
*/
char*
shortname(char *s)
{
if(!longnames && strlen(s) > ncwd && memcmp(s, cwd, ncwd) == 0 && s[ncwd] == '/')
return s+ncwd+1;
return s;
}
/*
* we've decided that [pc, epc) did not run.
* do something about it.
*/
void
missing(uvlong pc, uvlong epc)
{
char file[1000];
int line1, line2;
char buf[100];
Symbol s;
char *p;
uvlong uv;
if(!findsym(pc, CTEXT, &s) || !fileline(file, sizeof file, pc)) {
notfound:
print("%#llux-%#llux\n", pc, epc);
return;
}
p = strrchr(file, ':');
*p++ = 0;
line1 = atoi(p);
for(uv=pc; uv<epc; ) {
if(!fileline(file, sizeof file, epc-2))
goto notfound;
uv += machdata->instsize(text, uv);
}
p = strrchr(file, ':');
*p++ = 0;
line2 = atoi(p);
if(line2+1-line2 < minlines)
return;
if(pc == s.value) {
// never entered function
print("%s:%d %s never called (%#llux-%#llux)\n", shortname(file), line1, s.name, pc, epc);
return;
}
if(pc <= s.value+13) {
// probably stub for stack growth.
// check whether last instruction is call to morestack.
// the -5 below is the length of
// CALL sys.morestack.
buf[0] = 0;
machdata->das(text, epc-5, 0, buf, sizeof buf);
if(strstr(buf, "morestack"))
return;
}
if(epc - pc == 5) {
// check for CALL sys.panicindex
buf[0] = 0;
machdata->das(text, pc, 0, buf, sizeof buf);
if(strstr(buf, "panicindex"))
return;
}
if(epc - pc == 2 || epc -pc == 3) {
// check for XORL inside shift.
// (on x86 have to implement large left or unsigned right shift with explicit zeroing).
// f+90 0x00002c9f CMPL CX,$20
// f+93 0x00002ca2 JCS f+97(SB)
// f+95 0x00002ca4 XORL AX,AX <<<
// f+97 0x00002ca6 SHLL CL,AX
// f+99 0x00002ca8 MOVL $1,CX
//
// f+c8 0x00002cd7 CMPL CX,$40
// f+cb 0x00002cda JCS f+d0(SB)
// f+cd 0x00002cdc XORQ AX,AX <<<
// f+d0 0x00002cdf SHLQ CL,AX
// f+d3 0x00002ce2 MOVQ $1,CX
buf[0] = 0;
machdata->das(text, pc, 0, buf, sizeof buf);
if(strncmp(buf, "XOR", 3) == 0) {
machdata->das(text, epc, 0, buf, sizeof buf);
if(strncmp(buf, "SHL", 3) == 0 || strncmp(buf, "SHR", 3) == 0)
return;
}
}
if(epc - pc == 3) {
// check for SAR inside shift.
// (on x86 have to implement large signed right shift as >>31).
// f+36 0x00016216 CMPL CX,$20
// f+39 0x00016219 JCS f+3e(SB)
// f+3b 0x0001621b SARL $1f,AX <<<
// f+3e 0x0001621e SARL CL,AX
// f+40 0x00016220 XORL CX,CX
// f+42 0x00016222 CMPL CX,AX
buf[0] = 0;
machdata->das(text, pc, 0, buf, sizeof buf);
if(strncmp(buf, "SAR", 3) == 0) {
machdata->das(text, epc, 0, buf, sizeof buf);
if(strncmp(buf, "SAR", 3) == 0)
return;
}
}
// show first instruction to make clear where we were.
machdata->das(text, pc, 0, buf, sizeof buf);
if(line1 != line2)
print("%s:%d,%d %#llux-%#llux %s\n",
shortname(file), line1, line2, pc, epc, buf);
else
print("%s:%d %#llux-%#llux %s\n",
shortname(file), line1, pc, epc, buf);
if(doshowsrc)
showsrc(file, line1, line2);
}
/*
* walk the tree, calling missing for each non-empty
* section of missing code.
*/
void
walktree(TreeNode *t)
{
Range *n;
if(t == nil)
return;
walktree(t->left);
n = t->key;
if(n->pc < n->epc)
missing(n->pc, n->epc);
walktree(t->right);
}
/*
* set a breakpoint all over [pc, epc)
* and remember that we did.
*/
void
breakpoint(uvlong pc, uvlong epc)
{
Range *r;
r = malloc(sizeof *r);
r->pc = pc;
r->epc = epc;
treeput(&breakpoints, r, r);
for(; pc < epc; pc+=machdata->bpsize)
put1(mem, pc, machdata->bpinst, machdata->bpsize);
}
/*
* install breakpoints over all text symbols
* that match the pattern.
*/
void
cover(void)
{
Symbol s;
char *lastfn;
uvlong lastpc;
int i;
char buf[200];
lastfn = nil;
lastpc = 0;
for(i=0; textsym(&s, i); i++) {
switch(s.type) {
case 'T':
case 't':
if(lastpc != 0) {
breakpoint(lastpc, s.value);
lastpc = 0;
}
// Ignore second entry for a given name;
// that's the debugging blob.
if(lastfn && strcmp(s.name, lastfn) == 0)
break;
lastfn = s.name;
buf[0] = 0;
fileline(buf, sizeof buf, s.value);
if(substring == nil || strstr(buf, substring) || strstr(s.name, substring))
lastpc = s.value;
}
}
}
uvlong
rgetzero(Map *map, char *reg)
{
USED(map);
USED(reg);
return 0;
}
/*
* remove the breakpoints at pc and successive instructions,
* up to and including the first jump or other control flow transfer.
*/
void
uncover(uvlong pc)
{
uchar buf[1000];
int n, n1, n2;
uvlong foll[2];
// Double-check that we stopped at a breakpoint.
if(get1(mem, pc, buf, machdata->bpsize) < 0)
sysfatal("read mem inst at %#llux: %r", pc);
if(memcmp(buf, machdata->bpinst, machdata->bpsize) != 0)
sysfatal("stopped at %#llux; not at breakpoint %d", pc, machdata->bpsize);
// Figure out how many bytes of straight-line code
// there are in the text starting at pc.
n = 0;
while(n < sizeof buf) {
n1 = machdata->instsize(text, pc+n);
if(n+n1 > sizeof buf)
break;
n2 = machdata->foll(text, pc+n, rgetzero, foll);
n += n1;
if(n2 != 1 || foll[0] != pc+n)
break;
}
// Record that this section of code ran.
ran(pc, pc+n);
// Put original instructions back.
if(get1(text, pc, buf, n) < 0)
sysfatal("get1: %r");
if(put1(mem, pc, buf, n) < 0)
sysfatal("put1: %r");
}
int
startprocess(char **argv)
{
int pid;
if((pid = fork()) < 0)
sysfatal("fork: %r");
if(pid == 0) {
pid = getpid();
if(ctlproc(pid, "hang") < 0)
sysfatal("ctlproc hang: %r");
exec(argv[0], argv);
sysfatal("exec %s: %r", argv[0]);
}
if(ctlproc(pid, "attached") < 0 || ctlproc(pid, "waitstop") < 0)
sysfatal("attach %d %s: %r", pid, argv[0]);
return pid;
}
int
go(void)
{
uvlong pc;
char buf[100];
int n;
for(n = 0;; n++) {
ctlproc(pid, "startstop");
if(get8(mem, offsetof(Ureg, ip), &pc) < 0) {
rerrstr(buf, sizeof buf);
if(strstr(buf, "exited") || strstr(buf, "No such process"))
return n;
sysfatal("cannot read pc: %r");
}
pc--;
if(put8(mem, offsetof(Ureg, ip), pc) < 0)
sysfatal("cannot write pc: %r");
uncover(pc);
}
}
void
main(int argc, char **argv)
{
int n;
ARGBEGIN{
case 'g':
substring = EARGF(usage());
break;
case 'l':
longnames++;
break;
case 'n':
minlines = atoi(EARGF(usage()));
break;
case 's':
doshowsrc = 1;
break;
case 'v':
chatty++;
break;
default:
usage();
}ARGEND
getwd(cwd, sizeof cwd);
ncwd = strlen(cwd);
if(argc == 0) {
*--argv = "6.out";
}
fd = open(argv[0], OREAD);
if(fd < 0)
sysfatal("open %s: %r", argv[0]);
if(crackhdr(fd, &fhdr) <= 0)
sysfatal("crackhdr: %r");
machbytype(fhdr.type);
if(syminit(fd, &fhdr) <= 0)
sysfatal("syminit: %r");
text = loadmap(nil, fd, &fhdr);
if(text == nil)
sysfatal("loadmap: %r");
pid = startprocess(argv);
mem = attachproc(pid, &fhdr);
if(mem == nil)
sysfatal("attachproc: %r");
breakpoints.cmp = rangecmp;
cover();
n = go();
walktree(breakpoints.root);
if(chatty)
print("%d breakpoints\n", n);
detachproc(mem);
exits(0);
}

View File

@@ -1,243 +0,0 @@
// Renamed from Map to Tree to avoid conflict with libmach.
/*
Copyright (c) 2003-2007 Russ Cox, Tom Bergan, Austin Clements,
Massachusetts Institute of Technology
Portions Copyright (c) 2009 The Go Authors. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// Mutable map structure, but still based on
// Okasaki, Red Black Trees in a Functional Setting, JFP 1999,
// which is a lot easier than the traditional red-black
// and plenty fast enough for me. (Also I could copy
// and edit fmap.c.)
#include <u.h>
#include <libc.h>
#include "tree.h"
enum
{
Red = 0,
Black = 1
};
// Red-black trees are binary trees with this property:
// 1. No red node has a red parent.
// 2. Every path from the root to a leaf contains the
// same number of black nodes.
static TreeNode*
rwTreeNode(TreeNode *p, int color, TreeNode *left, void *key, void *value, TreeNode *right)
{
if(p == nil)
p = malloc(sizeof *p);
p->color = color;
p->left = left;
p->key = key;
p->value = value;
p->right = right;
return p;
}
static TreeNode*
balance(TreeNode *m0)
{
void *xk, *xv, *yk, *yv, *zk, *zv;
TreeNode *a, *b, *c, *d;
TreeNode *m1, *m2;
int color;
TreeNode *left, *right;
void *key, *value;
color = m0->color;
left = m0->left;
key = m0->key;
value = m0->value;
right = m0->right;
// Okasaki notation: (T is mkTreeNode, B is Black, R is Red, x, y, z are key-value.
//
// balance B (T R (T R a x b) y c) z d
// balance B (T R a x (T R b y c)) z d
// balance B a x (T R (T R b y c) z d)
// balance B a x (T R b y (T R c z d))
//
// = T R (T B a x b) y (T B c z d)
if(color == Black){
if(left && left->color == Red){
if(left->left && left->left->color == Red){
a = left->left->left;
xk = left->left->key;
xv = left->left->value;
b = left->left->right;
yk = left->key;
yv = left->value;
c = left->right;
zk = key;
zv = value;
d = right;
m1 = left;
m2 = left->left;
goto hard;
}else if(left->right && left->right->color == Red){
a = left->left;
xk = left->key;
xv = left->value;
b = left->right->left;
yk = left->right->key;
yv = left->right->value;
c = left->right->right;
zk = key;
zv = value;
d = right;
m1 = left;
m2 = left->right;
goto hard;
}
}else if(right && right->color == Red){
if(right->left && right->left->color == Red){
a = left;
xk = key;
xv = value;
b = right->left->left;
yk = right->left->key;
yv = right->left->value;
c = right->left->right;
zk = right->key;
zv = right->value;
d = right->right;
m1 = right;
m2 = right->left;
goto hard;
}else if(right->right && right->right->color == Red){
a = left;
xk = key;
xv = value;
b = right->left;
yk = right->key;
yv = right->value;
c = right->right->left;
zk = right->right->key;
zv = right->right->value;
d = right->right->right;
m1 = right;
m2 = right->right;
goto hard;
}
}
}
return rwTreeNode(m0, color, left, key, value, right);
hard:
return rwTreeNode(m0, Red, rwTreeNode(m1, Black, a, xk, xv, b),
yk, yv, rwTreeNode(m2, Black, c, zk, zv, d));
}
static TreeNode*
ins0(TreeNode *p, void *k, void *v, TreeNode *rw)
{
if(p == nil)
return rwTreeNode(rw, Red, nil, k, v, nil);
if(p->key == k){
if(rw)
return rwTreeNode(rw, p->color, p->left, k, v, p->right);
p->value = v;
return p;
}
if(p->key < k)
p->left = ins0(p->left, k, v, rw);
else
p->right = ins0(p->right, k, v, rw);
return balance(p);
}
static TreeNode*
ins1(Tree *m, TreeNode *p, void *k, void *v, TreeNode *rw)
{
int i;
if(p == nil)
return rwTreeNode(rw, Red, nil, k, v, nil);
i = m->cmp(p->key, k);
if(i == 0){
if(rw)
return rwTreeNode(rw, p->color, p->left, k, v, p->right);
p->value = v;
return p;
}
if(i < 0)
p->left = ins1(m, p->left, k, v, rw);
else
p->right = ins1(m, p->right, k, v, rw);
return balance(p);
}
void
treeputelem(Tree *m, void *key, void *val, TreeNode *rw)
{
if(m->cmp)
m->root = ins1(m, m->root, key, val, rw);
else
m->root = ins0(m->root, key, val, rw);
}
void
treeput(Tree *m, void *key, void *val)
{
treeputelem(m, key, val, nil);
}
void*
treeget(Tree *m, void *key)
{
int i;
TreeNode *p;
p = m->root;
if(m->cmp){
for(;;){
if(p == nil)
return nil;
i = m->cmp(p->key, key);
if(i < 0)
p = p->left;
else if(i > 0)
p = p->right;
else
return p->value;
}
}else{
for(;;){
if(p == nil)
return nil;
if(p->key == key)
return p->value;
if(p->key < key)
p = p->left;
else
p = p->right;
}
}
}

View File

@@ -1,47 +0,0 @@
// Renamed from Map to Tree to avoid conflict with libmach.
/*
Copyright (c) 2003-2007 Russ Cox, Tom Bergan, Austin Clements,
Massachusetts Institute of Technology
Portions Copyright (c) 2009 The Go Authors. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
typedef struct Tree Tree;
typedef struct TreeNode TreeNode;
struct Tree
{
int (*cmp)(void*, void*);
TreeNode *root;
};
struct TreeNode
{
int color;
TreeNode *left;
void *key;
void *value;
TreeNode *right;
};
void *treeget(Tree*, void*);
void treeput(Tree*, void*, void*);
void treeputelem(Tree*, void*, void*, TreeNode*);

View File

@@ -1,5 +0,0 @@
# Copyright 2012 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../Make.dist

View File

@@ -1,47 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Prof is a rudimentary real-time profiler.
Given a command to run or the process id (pid) of a command already
running, it samples the program's state at regular intervals and reports
on its behavior. With no options, it prints a histogram of the locations
in the code that were sampled during execution.
Since it is a real-time profiler, unlike a traditional profiler it samples
the program's state even when it is not running, such as when it is
asleep or waiting for I/O. Each thread contributes equally to the
statistics.
Usage:
go tool prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]
The output modes (default -h) are:
-P file.prof:
Write the profile information to file.prof, in the format used by pprof.
At the moment, this only works on Linux amd64 binaries and requires that the
binary be written using 6l -e to produce ELF debug info.
See http://code.google.com/p/google-perftools for details.
-h: histograms
How many times a sample occurred at each location.
-f: dynamic functions
At each sample period, print the name of the executing function.
-l: dynamic file and line numbers
At each sample period, print the file and line number of the executing instruction.
-r: dynamic registers
At each sample period, print the register contents.
-s: dynamic function stack traces
At each sample period, print the symbolic stack trace.
Flag -t sets the maximum real time to sample, in seconds, and -d
sets the sampling interval in milliseconds. The default is to sample
every 100ms until the program completes.
It is installed as go tool prof and is architecture-independent.
*/
package documentation

View File

@@ -1,899 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <u.h>
#include <time.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#define Ureg Ureg_amd64
#include <ureg_amd64.h>
#undef Ureg
#define Ureg Ureg_x86
#include <ureg_x86.h>
#undef Ureg
#include <mach.h>
char* file = "6.out";
static Fhdr fhdr;
int have_syms;
int fd;
struct Ureg_amd64 ureg_amd64;
struct Ureg_x86 ureg_x86;
int total_sec = 0;
int delta_msec = 100;
int nsample;
int nsamplethread;
// pprof data, stored as sequences of N followed by N PC values.
// See http://code.google.com/p/google-perftools .
uvlong *ppdata; // traces
Biobuf* pproffd; // file descriptor to write trace info
long ppstart; // start position of current trace
long nppdata; // length of data
long ppalloc; // size of allocated data
char ppmapdata[10*1024]; // the map information for the output file
// output formats
int pprof; // print pprof output to named file
int functions; // print functions
int histograms; // print histograms
int linenums; // print file and line numbers rather than function names
int registers; // print registers
int stacks; // print stack traces
int pid; // main process pid
int nthread; // number of threads
int thread[32]; // thread pids
Map *map[32]; // thread maps
void
Usage(void)
{
fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec]\n");
fprint(2, " prof [-t total_secs] [-d delta_msec] 6.out args ...\n");
fprint(2, "\tformats (default -h):\n");
fprint(2, "\t\t-P file.prof: write [c]pprof output to file.prof\n");
fprint(2, "\t\t-h: histograms\n");
fprint(2, "\t\t-f: dynamic functions\n");
fprint(2, "\t\t-l: dynamic file and line numbers\n");
fprint(2, "\t\t-r: dynamic registers\n");
fprint(2, "\t\t-s: dynamic function stack traces\n");
fprint(2, "\t\t-hs: include stack info in histograms\n");
exit(2);
}
typedef struct PC PC;
struct PC {
uvlong pc;
uvlong callerpc;
unsigned int count;
PC* next;
};
enum {
Ncounters = 256
};
PC *counters[Ncounters];
// Set up by setarch() to make most of the code architecture-independent.
typedef struct Arch Arch;
struct Arch {
char* name;
void (*regprint)(void);
int (*getregs)(Map*);
int (*getPC)(Map*);
int (*getSP)(Map*);
uvlong (*uregPC)(void);
uvlong (*uregSP)(void);
void (*ppword)(uvlong w);
};
void
amd64_regprint(void)
{
fprint(2, "ax\t0x%llux\n", ureg_amd64.ax);
fprint(2, "bx\t0x%llux\n", ureg_amd64.bx);
fprint(2, "cx\t0x%llux\n", ureg_amd64.cx);
fprint(2, "dx\t0x%llux\n", ureg_amd64.dx);
fprint(2, "si\t0x%llux\n", ureg_amd64.si);
fprint(2, "di\t0x%llux\n", ureg_amd64.di);
fprint(2, "bp\t0x%llux\n", ureg_amd64.bp);
fprint(2, "r8\t0x%llux\n", ureg_amd64.r8);
fprint(2, "r9\t0x%llux\n", ureg_amd64.r9);
fprint(2, "r10\t0x%llux\n", ureg_amd64.r10);
fprint(2, "r11\t0x%llux\n", ureg_amd64.r11);
fprint(2, "r12\t0x%llux\n", ureg_amd64.r12);
fprint(2, "r13\t0x%llux\n", ureg_amd64.r13);
fprint(2, "r14\t0x%llux\n", ureg_amd64.r14);
fprint(2, "r15\t0x%llux\n", ureg_amd64.r15);
fprint(2, "ds\t0x%llux\n", ureg_amd64.ds);
fprint(2, "es\t0x%llux\n", ureg_amd64.es);
fprint(2, "fs\t0x%llux\n", ureg_amd64.fs);
fprint(2, "gs\t0x%llux\n", ureg_amd64.gs);
fprint(2, "type\t0x%llux\n", ureg_amd64.type);
fprint(2, "error\t0x%llux\n", ureg_amd64.error);
fprint(2, "pc\t0x%llux\n", ureg_amd64.ip);
fprint(2, "cs\t0x%llux\n", ureg_amd64.cs);
fprint(2, "flags\t0x%llux\n", ureg_amd64.flags);
fprint(2, "sp\t0x%llux\n", ureg_amd64.sp);
fprint(2, "ss\t0x%llux\n", ureg_amd64.ss);
}
int
amd64_getregs(Map *map)
{
int i;
union {
uvlong regs[1];
struct Ureg_amd64 ureg;
} u;
for(i = 0; i < sizeof ureg_amd64; i+=8) {
if(get8(map, (uvlong)i, &u.regs[i/8]) < 0)
return -1;
}
ureg_amd64 = u.ureg;
return 0;
}
int
amd64_getPC(Map *map)
{
uvlong x;
int r;
r = get8(map, offsetof(struct Ureg_amd64, ip), &x);
ureg_amd64.ip = x;
return r;
}
int
amd64_getSP(Map *map)
{
uvlong x;
int r;
r = get8(map, offsetof(struct Ureg_amd64, sp), &x);
ureg_amd64.sp = x;
return r;
}
uvlong
amd64_uregPC(void)
{
return ureg_amd64.ip;
}
uvlong
amd64_uregSP(void) {
return ureg_amd64.sp;
}
void
amd64_ppword(uvlong w)
{
uchar buf[8];
buf[0] = w;
buf[1] = w >> 8;
buf[2] = w >> 16;
buf[3] = w >> 24;
buf[4] = w >> 32;
buf[5] = w >> 40;
buf[6] = w >> 48;
buf[7] = w >> 56;
Bwrite(pproffd, buf, 8);
}
void
x86_regprint(void)
{
fprint(2, "ax\t0x%ux\n", ureg_x86.ax);
fprint(2, "bx\t0x%ux\n", ureg_x86.bx);
fprint(2, "cx\t0x%ux\n", ureg_x86.cx);
fprint(2, "dx\t0x%ux\n", ureg_x86.dx);
fprint(2, "si\t0x%ux\n", ureg_x86.si);
fprint(2, "di\t0x%ux\n", ureg_x86.di);
fprint(2, "bp\t0x%ux\n", ureg_x86.bp);
fprint(2, "ds\t0x%ux\n", ureg_x86.ds);
fprint(2, "es\t0x%ux\n", ureg_x86.es);
fprint(2, "fs\t0x%ux\n", ureg_x86.fs);
fprint(2, "gs\t0x%ux\n", ureg_x86.gs);
fprint(2, "cs\t0x%ux\n", ureg_x86.cs);
fprint(2, "flags\t0x%ux\n", ureg_x86.flags);
fprint(2, "pc\t0x%ux\n", ureg_x86.pc);
fprint(2, "sp\t0x%ux\n", ureg_x86.sp);
fprint(2, "ss\t0x%ux\n", ureg_x86.ss);
}
int
x86_getregs(Map *map)
{
int i;
for(i = 0; i < sizeof ureg_x86; i+=4) {
if(get4(map, (uvlong)i, &((uint32*)&ureg_x86)[i/4]) < 0)
return -1;
}
return 0;
}
int
x86_getPC(Map* map)
{
return get4(map, offsetof(struct Ureg_x86, pc), &ureg_x86.pc);
}
int
x86_getSP(Map* map)
{
return get4(map, offsetof(struct Ureg_x86, sp), &ureg_x86.sp);
}
uvlong
x86_uregPC(void)
{
return (uvlong)ureg_x86.pc;
}
uvlong
x86_uregSP(void)
{
return (uvlong)ureg_x86.sp;
}
void
x86_ppword(uvlong w)
{
uchar buf[4];
buf[0] = w;
buf[1] = w >> 8;
buf[2] = w >> 16;
buf[3] = w >> 24;
Bwrite(pproffd, buf, 4);
}
Arch archtab[] = {
{
"amd64",
amd64_regprint,
amd64_getregs,
amd64_getPC,
amd64_getSP,
amd64_uregPC,
amd64_uregSP,
amd64_ppword,
},
{
"386",
x86_regprint,
x86_getregs,
x86_getPC,
x86_getSP,
x86_uregPC,
x86_uregSP,
x86_ppword,
},
{
nil
}
};
Arch *arch;
int
setarch(void)
{
int i;
if(mach != nil) {
for(i = 0; archtab[i].name != nil; i++) {
if (strcmp(mach->name, archtab[i].name) == 0) {
arch = &archtab[i];
return 0;
}
}
}
return -1;
}
int
getthreads(void)
{
int i, j, curn, found;
Map *curmap[nelem(map)];
int curthread[nelem(map)];
static int complained = 0;
curn = procthreadpids(pid, curthread, nelem(curthread));
if(curn <= 0)
return curn;
if(curn > nelem(map)) {
if(complained == 0) {
fprint(2, "prof: too many threads; limiting to %d\n", nthread, nelem(map));
complained = 1;
}
curn = nelem(map);
}
if(curn == nthread && memcmp(thread, curthread, curn*sizeof(*thread)) == 0)
return curn; // no changes
// Number of threads has changed (might be the init case).
// A bit expensive but rare enough not to bother being clever.
for(i = 0; i < curn; i++) {
found = 0;
for(j = 0; j < nthread; j++) {
if(curthread[i] == thread[j]) {
found = 1;
curmap[i] = map[j];
map[j] = nil;
break;
}
}
if(found)
continue;
// map new thread
curmap[i] = attachproc(curthread[i], &fhdr);
if(curmap[i] == nil) {
fprint(2, "prof: can't attach to %d: %r\n", curthread[i]);
return -1;
}
}
for(j = 0; j < nthread; j++)
if(map[j] != nil)
detachproc(map[j]);
nthread = curn;
memmove(thread, curthread, nthread*sizeof thread[0]);
memmove(map, curmap, sizeof map);
return nthread;
}
int
sample(Map *map)
{
static int n;
n++;
if(registers) {
if(arch->getregs(map) < 0)
goto bad;
} else {
// we need only two registers
if(arch->getPC(map) < 0)
goto bad;
if(arch->getSP(map) < 0)
goto bad;
}
return 1;
bad:
if(n == 1)
fprint(2, "prof: can't read registers: %r\n");
return 0;
}
void
addtohistogram(uvlong pc, uvlong callerpc, uvlong sp)
{
int h;
PC *x;
USED(sp);
h = (pc + callerpc*101) % Ncounters;
for(x = counters[h]; x != NULL; x = x->next) {
if(x->pc == pc && x->callerpc == callerpc) {
x->count++;
return;
}
}
x = malloc(sizeof(PC));
x->pc = pc;
x->callerpc = callerpc;
x->count = 1;
x->next = counters[h];
counters[h] = x;
}
void
addppword(uvlong pc)
{
if(pc == 0) {
return;
}
if(nppdata == ppalloc) {
ppalloc = (1000+nppdata)*2;
ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]);
if(ppdata == nil) {
fprint(2, "prof: realloc failed: %r\n");
exit(2);
}
}
ppdata[nppdata++] = pc;
}
void
startpptrace()
{
ppstart = nppdata;
addppword(~0);
}
void
endpptrace()
{
ppdata[ppstart] = nppdata-ppstart-1;
}
uvlong nextpc;
void
xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
{
USED(map);
char buf[1024];
if(sym == nil){
fprint(2, "syms\n");
return;
}
if(histograms)
addtohistogram(nextpc, pc, sp);
if(!histograms || stacks > 1 || pprof) {
if(nextpc == 0)
nextpc = sym->value;
if(stacks){
fprint(2, "%s(", sym->name);
fprint(2, ")");
if(nextpc != sym->value)
fprint(2, "+%#llux ", nextpc - sym->value);
if(have_syms && linenums && fileline(buf, sizeof buf, pc)) {
fprint(2, " %s", buf);
}
fprint(2, "\n");
}
if (pprof) {
addppword(nextpc);
}
}
nextpc = pc;
}
void
stacktracepcsp(Map *map, uvlong pc, uvlong sp)
{
nextpc = pc;
if(pprof){
startpptrace();
}
if(machdata->ctrace==nil)
fprint(2, "no machdata->ctrace\n");
else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0)
fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp);
else {
addtohistogram(nextpc, 0, sp);
if(stacks)
fprint(2, "\n");
}
if(pprof){
endpptrace();
}
}
void
printpc(Map *map, uvlong pc, uvlong sp)
{
char buf[1024];
if(registers)
arch->regprint();
if(have_syms > 0 && linenums && fileline(buf, sizeof buf, pc))
fprint(2, "%s\n", buf);
if(have_syms > 0 && functions) {
symoff(buf, sizeof(buf), pc, CANY);
fprint(2, "%s\n", buf);
}
if(stacks || pprof){
stacktracepcsp(map, pc, sp);
}
else if(histograms){
addtohistogram(pc, 0, sp);
}
}
void
ppmaps(void)
{
int fd, n;
char tmp[100];
Seg *seg;
// If it's Linux, the info is in /proc/$pid/maps
snprint(tmp, sizeof tmp, "/proc/%d/maps", pid);
fd = open(tmp, 0);
if(fd >= 0) {
n = read(fd, ppmapdata, sizeof ppmapdata - 1);
close(fd);
if(n < 0) {
fprint(2, "prof: can't read %s: %r\n", tmp);
exit(2);
}
ppmapdata[n] = 0;
return;
}
// It's probably a mac. Synthesize an entry for the text file.
// The register segment may come first but it has a zero offset, so grab the first non-zero offset segment.
for(n = 0; n < 3; n++){
seg = &map[0]->seg[n];
if(seg->b == 0) {
continue;
}
snprint(ppmapdata, sizeof ppmapdata,
"%.16x-%.16x r-xp %d 00:00 34968549 %s\n",
seg->b, seg->e, seg->f, "/home/r/6.out"
);
return;
}
fprint(2, "prof: no text segment in maps for %s\n", file);
exit(2);
}
void
samples(void)
{
int i, pid, msec;
struct timespec req;
int getmaps;
req.tv_sec = delta_msec/1000;
req.tv_nsec = 1000000*(delta_msec % 1000);
getmaps = 0;
if(pprof)
getmaps= 1;
for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) {
nsample++;
nsamplethread += nthread;
for(i = 0; i < nthread; i++) {
pid = thread[i];
if(ctlproc(pid, "stop") < 0)
return;
if(!sample(map[i])) {
ctlproc(pid, "start");
return;
}
printpc(map[i], arch->uregPC(), arch->uregSP());
ctlproc(pid, "start");
}
nanosleep(&req, NULL);
getthreads();
if(nthread == 0)
break;
if(getmaps) {
getmaps = 0;
ppmaps();
}
}
}
typedef struct Func Func;
struct Func
{
Func *next;
Symbol s;
uint onstack;
uint leaf;
};
Func *func[257];
int nfunc;
Func*
findfunc(uvlong pc)
{
Func *f;
uint h;
Symbol s;
if(pc == 0)
return nil;
if(!findsym(pc, CTEXT, &s))
return nil;
h = s.value % nelem(func);
for(f = func[h]; f != NULL; f = f->next)
if(f->s.value == s.value)
return f;
f = malloc(sizeof *f);
memset(f, 0, sizeof *f);
f->s = s;
f->next = func[h];
func[h] = f;
nfunc++;
return f;
}
int
compareleaf(const void *va, const void *vb)
{
Func *a, *b;
a = *(Func**)va;
b = *(Func**)vb;
if(a->leaf != b->leaf)
return b->leaf - a->leaf;
if(a->onstack != b->onstack)
return b->onstack - a->onstack;
return strcmp(a->s.name, b->s.name);
}
void
dumphistogram()
{
int i, h, n;
PC *x;
Func *f, **ff;
if(!histograms)
return;
// assign counts to functions.
for(h = 0; h < Ncounters; h++) {
for(x = counters[h]; x != NULL; x = x->next) {
f = findfunc(x->pc);
if(f) {
f->onstack += x->count;
f->leaf += x->count;
}
f = findfunc(x->callerpc);
if(f)
f->leaf -= x->count;
}
}
// build array
ff = malloc(nfunc*sizeof ff[0]);
n = 0;
for(h = 0; h < nelem(func); h++)
for(f = func[h]; f != NULL; f = f->next)
ff[n++] = f;
// sort by leaf counts
qsort(ff, nfunc, sizeof ff[0], compareleaf);
// print.
fprint(2, "%d samples (avg %.1g threads)\n", nsample, (double)nsamplethread/nsample);
for(i = 0; i < nfunc; i++) {
f = ff[i];
fprint(2, "%6.2f%%\t", 100.0*(double)f->leaf/nsample);
if(stacks)
fprint(2, "%6.2f%%\t", 100.0*(double)f->onstack/nsample);
fprint(2, "%s\n", f->s.name);
}
}
typedef struct Trace Trace;
struct Trace {
int count;
int npc;
uvlong *pc;
Trace *next;
};
void
dumppprof()
{
uvlong i, n, *p, *e;
int ntrace;
Trace *trace, *tp, *up, *prev;
if(!pprof)
return;
e = ppdata + nppdata;
// Create list of traces. First, count the traces
ntrace = 0;
for(p = ppdata; p < e;) {
n = *p++;
p += n;
if(n == 0)
continue;
ntrace++;
}
if(ntrace <= 0)
return;
// Allocate and link the traces together.
trace = malloc(ntrace * sizeof(Trace));
tp = trace;
for(p = ppdata; p < e;) {
n = *p++;
if(n == 0)
continue;
tp->count = 1;
tp->npc = n;
tp->pc = p;
tp->next = tp+1;
tp++;
p += n;
}
trace[ntrace-1].next = nil;
// Eliminate duplicates. Lousy algorithm, although not as bad as it looks because
// the list collapses fast.
for(tp = trace; tp != nil; tp = tp->next) {
prev = tp;
for(up = tp->next; up != nil; up = up->next) {
if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) {
tp->count++;
prev->next = up->next;
} else {
prev = up;
}
}
}
// Write file.
// See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html
// 1) Header
arch->ppword(0); // must be zero
arch->ppword(3); // 3 words follow in header
arch->ppword(0); // must be zero
arch->ppword(delta_msec * 1000); // sampling period in microseconds
arch->ppword(0); // must be zero (padding)
// 2) One record for each trace.
for(tp = trace; tp != nil; tp = tp->next) {
arch->ppword(tp->count);
arch->ppword(tp->npc);
for(i = 0; i < tp->npc; i++) {
arch->ppword(tp->pc[i]);
}
}
// 3) Binary trailer
arch->ppword(0); // must be zero
arch->ppword(1); // must be one
arch->ppword(0); // must be zero
// 4) Mapped objects.
Bwrite(pproffd, ppmapdata, strlen(ppmapdata));
// 5) That's it.
Bterm(pproffd);
}
int
startprocess(char **argv)
{
int pid;
if((pid = fork()) == 0) {
pid = getpid();
if(ctlproc(pid, "hang") < 0){
fprint(2, "prof: child process could not hang\n");
exits(0);
}
execv(argv[0], argv);
fprint(2, "prof: could not exec %s: %r\n", argv[0]);
exits(0);
}
if(pid == -1) {
fprint(2, "prof: could not fork\n");
exit(1);
}
if(ctlproc(pid, "attached") < 0 || ctlproc(pid, "waitstop") < 0) {
fprint(2, "prof: could not attach to child process: %r\n");
exit(1);
}
return pid;
}
void
detach(void)
{
int i;
for(i = 0; i < nthread; i++)
detachproc(map[i]);
}
int
main(int argc, char *argv[])
{
int i;
char *ppfile;
ARGBEGIN{
case 'P':
pprof =1;
ppfile = EARGF(Usage());
pproffd = Bopen(ppfile, OWRITE);
if(pproffd == nil) {
fprint(2, "prof: cannot open %s: %r\n", ppfile);
exit(2);
}
break;
case 'd':
delta_msec = atoi(EARGF(Usage()));
break;
case 't':
total_sec = atoi(EARGF(Usage()));
break;
case 'p':
pid = atoi(EARGF(Usage()));
break;
case 'f':
functions = 1;
break;
case 'h':
histograms = 1;
break;
case 'l':
linenums = 1;
break;
case 'r':
registers = 1;
break;
case 's':
stacks++;
break;
default:
Usage();
}ARGEND
if(pid <= 0 && argc == 0)
Usage();
if(functions+linenums+registers+stacks+pprof == 0)
histograms = 1;
if(!machbyname("amd64")) {
fprint(2, "prof: no amd64 support\n", pid);
exit(1);
}
if(argc > 0)
file = argv[0];
else if(pid) {
file = proctextfile(pid);
if (file == NULL) {
fprint(2, "prof: can't find file for pid %d: %r\n", pid);
fprint(2, "prof: on Darwin, need to provide file name explicitly\n");
exit(1);
}
}
fd = open(file, 0);
if(fd < 0) {
fprint(2, "prof: can't open %s: %r\n", file);
exit(1);
}
if(crackhdr(fd, &fhdr)) {
have_syms = syminit(fd, &fhdr);
if(!have_syms) {
fprint(2, "prof: no symbols for %s: %r\n", file);
}
} else {
fprint(2, "prof: crack header for %s: %r\n", file);
exit(1);
}
if(pid <= 0)
pid = startprocess(argv);
attachproc(pid, &fhdr); // initializes thread list
if(setarch() < 0) {
detach();
fprint(2, "prof: can't identify binary architecture for pid %d\n", pid);
exit(1);
}
if(getthreads() <= 0) {
detach();
fprint(2, "prof: can't find threads for pid %d\n", pid);
exit(1);
}
for(i = 0; i < nthread; i++)
ctlproc(thread[i], "start");
samples();
detach();
dumphistogram();
dumppprof();
exit(0);
}

View File

@@ -1,3 +0,0 @@
This directory tree contains experimental packages and
unfinished code that is subject to even more change than the
rest of the Go tree.

View File

@@ -1,269 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ebnf is a library for EBNF grammars. The input is text ([]byte)
// satisfying the following grammar (represented itself in EBNF):
//
// Production = name "=" [ Expression ] "." .
// Expression = Alternative { "|" Alternative } .
// Alternative = Term { Term } .
// Term = name | token [ "…" token ] | Group | Option | Repetition .
// Group = "(" Expression ")" .
// Option = "[" Expression "]" .
// Repetition = "{" Expression "}" .
//
// A name is a Go identifier, a token is a Go string, and comments
// and white space follow the same rules as for the Go language.
// Production names starting with an uppercase Unicode letter denote
// non-terminal productions (i.e., productions which allow white-space
// and comments between tokens); all other production names denote
// lexical productions.
//
package ebnf
import (
"errors"
"fmt"
"text/scanner"
"unicode"
"unicode/utf8"
)
// ----------------------------------------------------------------------------
// Error handling
type errorList []error
func (list errorList) Err() error {
if len(list) == 0 {
return nil
}
return list
}
func (list errorList) Error() string {
switch len(list) {
case 0:
return "no errors"
case 1:
return list[0].Error()
}
return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
}
func newError(pos scanner.Position, msg string) error {
return errors.New(fmt.Sprintf("%s: %s", pos, msg))
}
// ----------------------------------------------------------------------------
// Internal representation
type (
// An Expression node represents a production expression.
Expression interface {
// Pos is the position of the first character of the syntactic construct
Pos() scanner.Position
}
// An Alternative node represents a non-empty list of alternative expressions.
Alternative []Expression // x | y | z
// A Sequence node represents a non-empty list of sequential expressions.
Sequence []Expression // x y z
// A Name node represents a production name.
Name struct {
StringPos scanner.Position
String string
}
// A Token node represents a literal.
Token struct {
StringPos scanner.Position
String string
}
// A List node represents a range of characters.
Range struct {
Begin, End *Token // begin ... end
}
// A Group node represents a grouped expression.
Group struct {
Lparen scanner.Position
Body Expression // (body)
}
// An Option node represents an optional expression.
Option struct {
Lbrack scanner.Position
Body Expression // [body]
}
// A Repetition node represents a repeated expression.
Repetition struct {
Lbrace scanner.Position
Body Expression // {body}
}
// A Production node represents an EBNF production.
Production struct {
Name *Name
Expr Expression
}
// A Bad node stands for pieces of source code that lead to a parse error.
Bad struct {
TokPos scanner.Position
Error string // parser error message
}
// A Grammar is a set of EBNF productions. The map
// is indexed by production name.
//
Grammar map[string]*Production
)
func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences
func (x *Name) Pos() scanner.Position { return x.StringPos }
func (x *Token) Pos() scanner.Position { return x.StringPos }
func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }
func (x *Group) Pos() scanner.Position { return x.Lparen }
func (x *Option) Pos() scanner.Position { return x.Lbrack }
func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
func (x *Bad) Pos() scanner.Position { return x.TokPos }
// ----------------------------------------------------------------------------
// Grammar verification
func isLexical(name string) bool {
ch, _ := utf8.DecodeRuneInString(name)
return !unicode.IsUpper(ch)
}
type verifier struct {
errors errorList
worklist []*Production
reached Grammar // set of productions reached from (and including) the root production
grammar Grammar
}
func (v *verifier) error(pos scanner.Position, msg string) {
v.errors = append(v.errors, newError(pos, msg))
}
func (v *verifier) push(prod *Production) {
name := prod.Name.String
if _, found := v.reached[name]; !found {
v.worklist = append(v.worklist, prod)
v.reached[name] = prod
}
}
func (v *verifier) verifyChar(x *Token) rune {
s := x.String
if utf8.RuneCountInString(s) != 1 {
v.error(x.Pos(), "single char expected, found "+s)
return 0
}
ch, _ := utf8.DecodeRuneInString(s)
return ch
}
func (v *verifier) verifyExpr(expr Expression, lexical bool) {
switch x := expr.(type) {
case nil:
// empty expression
case Alternative:
for _, e := range x {
v.verifyExpr(e, lexical)
}
case Sequence:
for _, e := range x {
v.verifyExpr(e, lexical)
}
case *Name:
// a production with this name must exist;
// add it to the worklist if not yet processed
if prod, found := v.grammar[x.String]; found {
v.push(prod)
} else {
v.error(x.Pos(), "missing production "+x.String)
}
// within a lexical production references
// to non-lexical productions are invalid
if lexical && !isLexical(x.String) {
v.error(x.Pos(), "reference to non-lexical production "+x.String)
}
case *Token:
// nothing to do for now
case *Range:
i := v.verifyChar(x.Begin)
j := v.verifyChar(x.End)
if i >= j {
v.error(x.Pos(), "decreasing character range")
}
case *Group:
v.verifyExpr(x.Body, lexical)
case *Option:
v.verifyExpr(x.Body, lexical)
case *Repetition:
v.verifyExpr(x.Body, lexical)
case *Bad:
v.error(x.Pos(), x.Error)
default:
panic(fmt.Sprintf("internal error: unexpected type %T", expr))
}
}
func (v *verifier) verify(grammar Grammar, start string) {
// find root production
root, found := grammar[start]
if !found {
var noPos scanner.Position
v.error(noPos, "no start production "+start)
return
}
// initialize verifier
v.worklist = v.worklist[0:0]
v.reached = make(Grammar)
v.grammar = grammar
// work through the worklist
v.push(root)
for {
n := len(v.worklist) - 1
if n < 0 {
break
}
prod := v.worklist[n]
v.worklist = v.worklist[0:n]
v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
}
// check if all productions were reached
if len(v.reached) < len(v.grammar) {
for name, prod := range v.grammar {
if _, found := v.reached[name]; !found {
v.error(prod.Pos(), name+" is unreachable")
}
}
}
}
// Verify checks that:
// - all productions used are defined
// - all productions defined are used when beginning at start
// - lexical productions refer only to other lexical productions
//
// Position information is interpreted relative to the file set fset.
//
func Verify(grammar Grammar, start string) error {
var v verifier
v.verify(grammar, start)
return v.errors.Err()
}

View File

@@ -1,71 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ebnf
import (
"bytes"
"testing"
)
var goodGrammars = []string{
`Program = .`,
`Program = foo .
foo = "foo" .`,
`Program = "a" | "b" "c" .`,
`Program = "a" … "z" .`,
`Program = Song .
Song = { Note } .
Note = Do | (Re | Mi | Fa | So | La) | Ti .
Do = "c" .
Re = "d" .
Mi = "e" .
Fa = "f" .
So = "g" .
La = "a" .
Ti = ti .
ti = "b" .`,
}
var badGrammars = []string{
`Program = | .`,
`Program = | b .`,
`Program = a … b .`,
`Program = "a" … .`,
`Program = … "b" .`,
`Program = () .`,
`Program = [] .`,
`Program = {} .`,
}
func checkGood(t *testing.T, src string) {
grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
if err != nil {
t.Errorf("Parse(%s) failed: %v", src, err)
return
}
if err = Verify(grammar, "Program"); err != nil {
t.Errorf("Verify(%s) failed: %v", src, err)
}
}
func checkBad(t *testing.T, src string) {
_, err := Parse("", bytes.NewBuffer([]byte(src)))
if err == nil {
t.Errorf("Parse(%s) should have failed", src)
}
}
func TestGrammars(t *testing.T) {
for _, src := range goodGrammars {
checkGood(t, src)
}
for _, src := range badGrammars {
checkBad(t, src)
}
}

View File

@@ -1,190 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ebnf
import (
"io"
"strconv"
"text/scanner"
)
type parser struct {
errors errorList
scanner scanner.Scanner
pos scanner.Position // token position
tok rune // one token look-ahead
lit string // token literal
}
func (p *parser) next() {
p.tok = p.scanner.Scan()
p.pos = p.scanner.Position
p.lit = p.scanner.TokenText()
}
func (p *parser) error(pos scanner.Position, msg string) {
p.errors = append(p.errors, newError(pos, msg))
}
func (p *parser) errorExpected(pos scanner.Position, msg string) {
msg = `expected "` + msg + `"`
if pos.Offset == p.pos.Offset {
// the error happened at the current position;
// make the error message more specific
msg += ", found " + scanner.TokenString(p.tok)
if p.tok < 0 {
msg += " " + p.lit
}
}
p.error(pos, msg)
}
func (p *parser) expect(tok rune) scanner.Position {
pos := p.pos
if p.tok != tok {
p.errorExpected(pos, scanner.TokenString(tok))
}
p.next() // make progress in any case
return pos
}
func (p *parser) parseIdentifier() *Name {
pos := p.pos
name := p.lit
p.expect(scanner.Ident)
return &Name{pos, name}
}
func (p *parser) parseToken() *Token {
pos := p.pos
value := ""
if p.tok == scanner.String {
value, _ = strconv.Unquote(p.lit)
// Unquote may fail with an error, but only if the scanner found
// an illegal string in the first place. In this case the error
// has already been reported.
p.next()
} else {
p.expect(scanner.String)
}
return &Token{pos, value}
}
// ParseTerm returns nil if no term was found.
func (p *parser) parseTerm() (x Expression) {
pos := p.pos
switch p.tok {
case scanner.Ident:
x = p.parseIdentifier()
case scanner.String:
tok := p.parseToken()
x = tok
const ellipsis = '…' // U+2026, the horizontal ellipsis character
if p.tok == ellipsis {
p.next()
x = &Range{tok, p.parseToken()}
}
case '(':
p.next()
x = &Group{pos, p.parseExpression()}
p.expect(')')
case '[':
p.next()
x = &Option{pos, p.parseExpression()}
p.expect(']')
case '{':
p.next()
x = &Repetition{pos, p.parseExpression()}
p.expect('}')
}
return x
}
func (p *parser) parseSequence() Expression {
var list Sequence
for x := p.parseTerm(); x != nil; x = p.parseTerm() {
list = append(list, x)
}
// no need for a sequence if list.Len() < 2
switch len(list) {
case 0:
p.errorExpected(p.pos, "term")
return &Bad{p.pos, "term expected"}
case 1:
return list[0]
}
return list
}
func (p *parser) parseExpression() Expression {
var list Alternative
for {
list = append(list, p.parseSequence())
if p.tok != '|' {
break
}
p.next()
}
// len(list) > 0
// no need for an Alternative node if list.Len() < 2
if len(list) == 1 {
return list[0]
}
return list
}
func (p *parser) parseProduction() *Production {
name := p.parseIdentifier()
p.expect('=')
var expr Expression
if p.tok != '.' {
expr = p.parseExpression()
}
p.expect('.')
return &Production{name, expr}
}
func (p *parser) parse(filename string, src io.Reader) Grammar {
p.scanner.Init(src)
p.scanner.Filename = filename
p.next() // initializes pos, tok, lit
grammar := make(Grammar)
for p.tok != scanner.EOF {
prod := p.parseProduction()
name := prod.Name.String
if _, found := grammar[name]; !found {
grammar[name] = prod
} else {
p.error(prod.Pos(), name+" declared already")
}
}
return grammar
}
// Parse parses a set of EBNF productions from source src.
// It returns a set of productions. Errors are reported
// for incorrect syntax and if a production is declared
// more than once; the filename is used only for error
// positions.
//
func Parse(filename string, src io.Reader) (Grammar, error) {
var p parser
grammar := p.parse(filename, src)
return grammar, p.errors.Err()
}

View File

@@ -1,22 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Ebnflint verifies that EBNF productions are consistent and grammatically correct.
It reads them from an HTML document such as the Go specification.
Grammar productions are grouped in boxes demarcated by the HTML elements
<pre class="ebnf">
</pre>
Usage:
go tool ebnflint [--start production] [file]
The --start flag specifies the name of the start production for
the grammar; it defaults to "Start".
*/
package documentation

View File

@@ -1,122 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"exp/ebnf"
"flag"
"fmt"
"go/scanner"
"go/token"
"io"
"io/ioutil"
"os"
"path/filepath"
)
var fset = token.NewFileSet()
var start = flag.String("start", "Start", "name of start production")
func usage() {
fmt.Fprintf(os.Stderr, "usage: go tool ebnflint [flags] [filename]\n")
flag.PrintDefaults()
os.Exit(1)
}
// Markers around EBNF sections in .html files
var (
open = []byte(`<pre class="ebnf">`)
close = []byte(`</pre>`)
)
func report(err error) {
scanner.PrintError(os.Stderr, err)
os.Exit(1)
}
func extractEBNF(src []byte) []byte {
var buf bytes.Buffer
for {
// i = beginning of EBNF text
i := bytes.Index(src, open)
if i < 0 {
break // no EBNF found - we are done
}
i += len(open)
// write as many newlines as found in the excluded text
// to maintain correct line numbers in error messages
for _, ch := range src[0:i] {
if ch == '\n' {
buf.WriteByte('\n')
}
}
// j = end of EBNF text (or end of source)
j := bytes.Index(src[i:], close) // close marker
if j < 0 {
j = len(src) - i
}
j += i
// copy EBNF text
buf.Write(src[i:j])
// advance
src = src[j:]
}
return buf.Bytes()
}
func main() {
flag.Parse()
var (
name string
r io.Reader
)
switch flag.NArg() {
case 0:
name, r = "<stdin>", os.Stdin
case 1:
name = flag.Arg(0)
default:
usage()
}
if err := verify(name, *start, r); err != nil {
report(err)
}
}
func verify(name, start string, r io.Reader) error {
if r == nil {
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
r = f
}
src, err := ioutil.ReadAll(r)
if err != nil {
return err
}
if filepath.Ext(name) == ".html" || bytes.Index(src, open) >= 0 {
src = extractEBNF(src)
}
grammar, err := ebnf.Parse(name, bytes.NewBuffer(src))
if err != nil {
return err
}
return ebnf.Verify(grammar, start)
}

View File

@@ -1,16 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"runtime"
"testing"
)
func TestSpec(t *testing.T) {
if err := verify(runtime.GOROOT()+"/doc/go_spec.html", "SourceFile", nil); err != nil {
t.Fatal(err)
}
}

View File

@@ -1,63 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
The gotype command does syntactic and semantic analysis of Go files
and packages similar to the analysis performed by the front-end of
a Go compiler. Errors are reported if the analysis fails; otherwise
gotype is quiet (unless -v is set).
Without a list of paths, gotype processes the standard input, which must
be the source of a single package file.
Given a list of file names, each file must be a source file belonging to
the same package unless the package name is explicitly specified with the
-p flag.
Given a directory name, gotype collects all .go files in the directory
and processes them as if they were provided as an explicit list of file
names. Each directory is processed independently. Files starting with .
or not ending in .go are ignored.
Usage:
gotype [flags] [path ...]
The flags are:
-e
Print all (including spurious) errors.
-p pkgName
Process only those files in package pkgName.
-r
Recursively process subdirectories.
-v
Verbose mode.
Debugging flags:
-comments
Parse comments (ignored if -ast not set).
-ast
Print AST (disables concurrent parsing).
-trace
Print parse trace (disables concurrent parsing).
Examples
To check the files file.go, old.saved, and .ignored:
gotype file.go old.saved .ignored
To check all .go files belonging to package main in the current directory
and recursively in all subdirectories:
gotype -p main -r .
To verify the output of a pipe:
echo "package foo" | gotype
*/
package documentation
// BUG(gri): At the moment, only single-file scope analysis is performed.

View File

@@ -1,197 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"errors"
"exp/types"
"flag"
"fmt"
"go/ast"
"go/parser"
"go/scanner"
"go/token"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
var (
// main operation modes
pkgName = flag.String("p", "", "process only those files in package pkgName")
recursive = flag.Bool("r", false, "recursively process subdirectories")
verbose = flag.Bool("v", false, "verbose mode")
allErrors = flag.Bool("e", false, "print all (including spurious) errors")
// debugging support
parseComments = flag.Bool("comments", false, "parse comments (ignored if -ast not set)")
printTrace = flag.Bool("trace", false, "print parse trace")
printAST = flag.Bool("ast", false, "print AST")
)
var exitCode = 0
func usage() {
fmt.Fprintf(os.Stderr, "usage: gotype [flags] [path ...]\n")
flag.PrintDefaults()
os.Exit(2)
}
func report(err error) {
scanner.PrintError(os.Stderr, err)
exitCode = 2
}
// parse returns the AST for the Go source src.
// The filename is for error reporting only.
// The result is nil if there were errors or if
// the file does not belong to the -p package.
func parse(fset *token.FileSet, filename string, src []byte) *ast.File {
if *verbose {
fmt.Println(filename)
}
// ignore files with different package name
if *pkgName != "" {
file, err := parser.ParseFile(fset, filename, src, parser.PackageClauseOnly)
if err != nil {
report(err)
return nil
}
if file.Name.Name != *pkgName {
if *verbose {
fmt.Printf("\tignored (package %s)\n", file.Name.Name)
}
return nil
}
}
// parse entire file
mode := parser.DeclarationErrors
if *allErrors {
mode |= parser.SpuriousErrors
}
if *parseComments && *printAST {
mode |= parser.ParseComments
}
if *printTrace {
mode |= parser.Trace
}
file, err := parser.ParseFile(fset, filename, src, mode)
if err != nil {
report(err)
return nil
}
if *printAST {
ast.Print(fset, file)
}
return file
}
func parseStdin(fset *token.FileSet) (files map[string]*ast.File) {
files = make(map[string]*ast.File)
src, err := ioutil.ReadAll(os.Stdin)
if err != nil {
report(err)
return
}
const filename = "<standard input>"
if file := parse(fset, filename, src); file != nil {
files[filename] = file
}
return
}
func parseFiles(fset *token.FileSet, filenames []string) (files map[string]*ast.File) {
files = make(map[string]*ast.File)
for _, filename := range filenames {
src, err := ioutil.ReadFile(filename)
if err != nil {
report(err)
continue
}
if file := parse(fset, filename, src); file != nil {
if files[filename] != nil {
report(errors.New(fmt.Sprintf("%q: duplicate file", filename)))
continue
}
files[filename] = file
}
}
return
}
func isGoFilename(filename string) bool {
// ignore non-Go files
return !strings.HasPrefix(filename, ".") && strings.HasSuffix(filename, ".go")
}
func processDirectory(dirname string) {
f, err := os.Open(dirname)
if err != nil {
report(err)
return
}
filenames, err := f.Readdirnames(-1)
f.Close()
if err != nil {
report(err)
// continue since filenames may not be empty
}
for i, filename := range filenames {
filenames[i] = filepath.Join(dirname, filename)
}
processFiles(filenames, false)
}
func processFiles(filenames []string, allFiles bool) {
i := 0
for _, filename := range filenames {
switch info, err := os.Stat(filename); {
case err != nil:
report(err)
case info.IsDir():
if allFiles || *recursive {
processDirectory(filename)
}
default:
if allFiles || isGoFilename(info.Name()) {
filenames[i] = filename
i++
}
}
}
fset := token.NewFileSet()
processPackage(fset, parseFiles(fset, filenames[0:i]))
}
func processPackage(fset *token.FileSet, files map[string]*ast.File) {
// make a package (resolve all identifiers)
pkg, err := ast.NewPackage(fset, files, types.GcImport, types.Universe)
if err != nil {
report(err)
return
}
_, err = types.Check(fset, pkg)
if err != nil {
report(err)
}
}
func main() {
flag.Usage = usage
flag.Parse()
if flag.NArg() == 0 {
fset := token.NewFileSet()
processPackage(fset, parseStdin(fset))
} else {
processFiles(flag.Args(), true)
}
os.Exit(exitCode)
}

View File

@@ -1,49 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"path/filepath"
"runtime"
"testing"
)
func runTest(t *testing.T, path, pkg string) {
exitCode = 0
*pkgName = pkg
*recursive = false
if pkg == "" {
processFiles([]string{path}, true)
} else {
processDirectory(path)
}
if exitCode != 0 {
t.Errorf("processing %s failed: exitCode = %d", path, exitCode)
}
}
var tests = []struct {
path string
pkg string
}{
// individual files
{"testdata/test1.go", ""},
// directories
{filepath.Join(runtime.GOROOT(), "src/pkg/go/ast"), "ast"},
{filepath.Join(runtime.GOROOT(), "src/pkg/go/doc"), "doc"},
{filepath.Join(runtime.GOROOT(), "src/pkg/go/token"), "scanner"},
{filepath.Join(runtime.GOROOT(), "src/pkg/go/scanner"), "scanner"},
{filepath.Join(runtime.GOROOT(), "src/pkg/go/parser"), "parser"},
{filepath.Join(runtime.GOROOT(), "src/pkg/exp/types"), "types"},
}
func Test(t *testing.T) {
for _, test := range tests {
runTest(t, test.path, test.pkg)
}
}

View File

@@ -1,27 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package p
func _() {
// the scope of a local type declaration starts immediately after the type name
type T struct{ _ *T }
}
func _(x interface{}) {
// the variable defined by a TypeSwitchGuard is declared in each TypeCaseClause
switch t := x.(type) {
case int:
_ = t
case float32:
_ = t
default:
_ = t
}
// the variable defined by a TypeSwitchGuard must not conflict with other
// variables declared in the initial simple statement
switch t := 0; t := x.(type) {
}
}

View File

@@ -1,100 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.3.2 of the HTML5 specification says "The following elements
// have varying levels of special parsing rules".
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
var isSpecialElementMap = map[string]bool{
"address": true,
"applet": true,
"area": true,
"article": true,
"aside": true,
"base": true,
"basefont": true,
"bgsound": true,
"blockquote": true,
"body": true,
"br": true,
"button": true,
"caption": true,
"center": true,
"col": true,
"colgroup": true,
"command": true,
"dd": true,
"details": true,
"dir": true,
"div": true,
"dl": true,
"dt": true,
"embed": true,
"fieldset": true,
"figcaption": true,
"figure": true,
"footer": true,
"form": true,
"frame": true,
"frameset": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"header": true,
"hgroup": true,
"hr": true,
"html": true,
"iframe": true,
"img": true,
"input": true,
"isindex": true,
"li": true,
"link": true,
"listing": true,
"marquee": true,
"menu": true,
"meta": true,
"nav": true,
"noembed": true,
"noframes": true,
"noscript": true,
"object": true,
"ol": true,
"p": true,
"param": true,
"plaintext": true,
"pre": true,
"script": true,
"section": true,
"select": true,
"style": true,
"summary": true,
"table": true,
"tbody": true,
"td": true,
"textarea": true,
"tfoot": true,
"th": true,
"thead": true,
"title": true,
"tr": true,
"ul": true,
"wbr": true,
"xmp": true,
}
func isSpecialElement(element *Node) bool {
switch element.Namespace {
case "", "html":
return isSpecialElementMap[element.Data]
case "svg":
return element.Data == "foreignObject"
}
return false
}

View File

@@ -1,107 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package html implements an HTML5-compliant tokenizer and parser.
INCOMPLETE.
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
z := html.NewTokenizer(r)
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
which parses the next token and returns its type, or an error:
for {
tt := z.Next()
if tt == html.ErrorToken {
// ...
return ...
}
// Process the current token.
}
There are two APIs for retrieving the current token. The high-level API is to
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
allow optionally calling Raw after Next but before Token, Text, TagName, or
TagAttr. In EBNF notation, the valid call sequence per token is:
Next {Raw} [ Token | Text | TagName {TagAttr} ]
Token returns an independent data structure that completely describes a token.
Entities (such as "&lt;") are unescaped, tag names and attribute keys are
lower-cased, and attributes are collected into a []Attribute. For example:
for {
if z.Next() == html.ErrorToken {
// Returning io.EOF indicates success.
return z.Err()
}
emitToken(z.Token())
}
The low-level API performs fewer allocations and copies, but the contents of
the []byte values returned by Text, TagName and TagAttr may change on the next
call to Next. For example, to extract an HTML page's anchor text:
depth := 0
for {
tt := z.Next()
switch tt {
case ErrorToken:
return z.Err()
case TextToken:
if depth > 0 {
// emitBytes should copy the []byte it receives,
// if it doesn't process it immediately.
emitBytes(z.Text())
}
case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTagToken {
depth++
} else {
depth--
}
}
}
}
Parsing is done by calling Parse with an io.Reader, which returns the root of
the parse tree (the document element) as a *Node. It is the caller's
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
example, to process each anchor node in depth-first order:
doc, err := html.Parse(r)
if err != nil {
// ...
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
for _, c := range n.Child {
f(c)
}
}
f(doc)
The relevant specifications include:
http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html and
http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
*/
package html
// The tokenization algorithm implemented by this package is not a line-by-line
// transliteration of the relatively verbose state-machine in the WHATWG
// specification. A more direct approach is used instead, where the program
// counter implies the state, such as whether it is tokenizing a tag or a text
// node. Specification compliance is verified by checking expected and actual
// outputs over a test suite rather than aiming for algorithmic fidelity.
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
// TODO(nigeltao): How does parsing interact with a JavaScript engine?

View File

@@ -1,156 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
// parseDoctype parses the data from a DoctypeToken into a name,
// public identifier, and system identifier. It returns a Node whose Type
// is DoctypeNode, whose Data is the name, and which has attributes
// named "system" and "public" for the two identifiers if they were present.
// quirks is whether the document should be parsed in "quirks mode".
func parseDoctype(s string) (n *Node, quirks bool) {
n = &Node{Type: DoctypeNode}
// Find the name.
space := strings.IndexAny(s, whitespace)
if space == -1 {
space = len(s)
}
n.Data = s[:space]
// The comparison to "html" is case-sensitive.
if n.Data != "html" {
quirks = true
}
n.Data = strings.ToLower(n.Data)
s = strings.TrimLeft(s[space:], whitespace)
if len(s) < 6 {
// It can't start with "PUBLIC" or "SYSTEM".
// Ignore the rest of the string.
return n, quirks || s != ""
}
key := strings.ToLower(s[:6])
s = s[6:]
for key == "public" || key == "system" {
s = strings.TrimLeft(s, whitespace)
if s == "" {
break
}
quote := s[0]
if quote != '"' && quote != '\'' {
break
}
s = s[1:]
q := strings.IndexRune(s, rune(quote))
var id string
if q == -1 {
id = s
s = ""
} else {
id = s[:q]
s = s[q+1:]
}
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
if key == "public" {
key = "system"
} else {
key = ""
}
}
if key != "" || s != "" {
quirks = true
} else if len(n.Attr) > 0 {
if n.Attr[0].Key == "public" {
public := strings.ToLower(n.Attr[0].Val)
switch public {
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
quirks = true
default:
for _, q := range quirkyIDs {
if strings.HasPrefix(public, q) {
quirks = true
break
}
}
}
// The following two public IDs only cause quirks mode if there is no system ID.
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
quirks = true
}
}
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
quirks = true
}
}
return n, quirks
}
// quirkyIDs is a list of public doctype identifiers that cause a document
// to be interpreted in quirks mode. The identifiers should be in lower case.
var quirkyIDs = []string{
"+//silmaril//dtd html pro v0r11 19970101//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//",
"-//w3c//dtd html 3.2 draft//",
"-//w3c//dtd html 3.2 final//",
"-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//",
"-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//",
"-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//",
"-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//",
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,29 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"testing"
"unicode/utf8"
)
func TestEntityLength(t *testing.T) {
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
// The +1 comes from the leading "&". This property implies that the length of
// unescaped text is <= the length of escaped text.
for k, v := range entity {
if 1+len(k) < utf8.RuneLen(v) {
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
}
if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
}
}
for k, v := range entity2 {
if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
}
}
}

View File

@@ -1,253 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"strings"
"unicode/utf8"
)
// These replacements permit compatibility with old numeric entities that
// assumed Windows-1252 encoding.
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
var replacementTable = [...]rune{
'\u20AC', // First entry is what 0x80 should be replaced with.
'\u0081',
'\u201A',
'\u0192',
'\u201E',
'\u2026',
'\u2020',
'\u2021',
'\u02C6',
'\u2030',
'\u0160',
'\u2039',
'\u0152',
'\u008D',
'\u017D',
'\u008F',
'\u0090',
'\u2018',
'\u2019',
'\u201C',
'\u201D',
'\u2022',
'\u2013',
'\u2014',
'\u02DC',
'\u2122',
'\u0161',
'\u203A',
'\u0153',
'\u009D',
'\u017E',
'\u0178', // Last entry is 0x9F.
// 0x00->'\uFFFD' is handled programmatically.
// 0x0D->'\u000D' is a no-op.
}
// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
// attribute should be true if parsing an attribute value.
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
// i starts at 1 because we already know that s[0] == '&'.
i, s := 1, b[src:]
if len(s) <= 1 {
b[dst] = b[src]
return dst + 1, src + 1
}
if s[i] == '#' {
if len(s) <= 3 { // We need to have at least "&#.".
b[dst] = b[src]
return dst + 1, src + 1
}
i++
c := s[i]
hex := false
if c == 'x' || c == 'X' {
hex = true
i++
}
x := '\x00'
for i < len(s) {
c = s[i]
i++
if hex {
if '0' <= c && c <= '9' {
x = 16*x + rune(c) - '0'
continue
} else if 'a' <= c && c <= 'f' {
x = 16*x + rune(c) - 'a' + 10
continue
} else if 'A' <= c && c <= 'F' {
x = 16*x + rune(c) - 'A' + 10
continue
}
} else if '0' <= c && c <= '9' {
x = 10*x + rune(c) - '0'
continue
}
if c != ';' {
i--
}
break
}
if i <= 3 { // No characters matched.
b[dst] = b[src]
return dst + 1, src + 1
}
if 0x80 <= x && x <= 0x9F {
// Replace characters from Windows-1252 with UTF-8 equivalents.
x = replacementTable[x-0x80]
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
// Replace invalid characters with the replacement character.
x = '\uFFFD'
}
return dst + utf8.EncodeRune(b[dst:], x), src + i
}
// Consume the maximum number of characters possible, with the
// consumed characters matching one of the named references.
for i < len(s) {
c := s[i]
i++
// Lower-cased characters are more common in entities, so we check for them first.
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
continue
}
if c != ';' {
i--
}
break
}
entityName := string(s[1:i])
if entityName == "" {
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
} else if x := entity[entityName]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + i
} else if x := entity2[entityName]; x[0] != 0 {
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
} else if !attribute {
maxLen := len(entityName) - 1
if maxLen > longestEntityWithoutSemicolon {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
if x := entity[entityName[:j]]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
}
}
}
dst1, src1 = dst+i, src+i
copy(b[dst:dst1], b[src:src1])
return dst1, src1
}
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
func unescape(b []byte) []byte {
for i, c := range b {
if c == '&' {
dst, src := unescapeEntity(b, i, i, false)
for src < len(b) {
c := b[src]
if c == '&' {
dst, src = unescapeEntity(b, dst, src, false)
} else {
b[dst] = c
dst, src = dst+1, src+1
}
}
return b[0:dst]
}
}
return b
}
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
func lower(b []byte) []byte {
for i, c := range b {
if 'A' <= c && c <= 'Z' {
b[i] = c + 'a' - 'A'
}
}
return b
}
const escapedChars = `&'<>"`
func escape(w writer, s string) error {
i := strings.IndexAny(s, escapedChars)
for i != -1 {
if _, err := w.WriteString(s[:i]); err != nil {
return err
}
var esc string
switch s[i] {
case '&':
esc = "&amp;"
case '\'':
esc = "&apos;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
esc = "&quot;"
default:
panic("unrecognized escape character")
}
s = s[i+1:]
if _, err := w.WriteString(esc); err != nil {
return err
}
i = strings.IndexAny(s, escapedChars)
}
_, err := w.WriteString(s)
return err
}
// EscapeString escapes special characters like "<" to become "&lt;". It
// escapes only five such characters: amp, apos, lt, gt and quot.
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func EscapeString(s string) string {
if strings.IndexAny(s, escapedChars) == -1 {
return s
}
var buf bytes.Buffer
escape(&buf, s)
return buf.String()
}
// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
// larger range of entities than EscapeString escapes. For example, "&aacute;"
// unescapes to "á", as does "&#225;" and "&xE1;".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
return string(unescape([]byte(s)))
}
}
return s
}

View File

@@ -1,132 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
func adjustForeignAttributes(aa []Attribute) {
for i, a := range aa {
if a.Key == "" || a.Key[0] != 'x' {
continue
}
switch a.Key {
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
j := strings.Index(a.Key, ":")
aa[i].Namespace = a.Key[:j]
aa[i].Key = a.Key[j+1:]
}
}
}
func htmlIntegrationPoint(n *Node) bool {
if n.Type != ElementNode {
return false
}
switch n.Namespace {
case "math":
// TODO: annotation-xml elements whose start tags have "text/html" or
// "application/xhtml+xml" encodings.
case "svg":
switch n.Data {
case "desc", "foreignObject", "title":
return true
}
}
return false
}
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"font": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// Section 12.2.5.5.
var svgTagNameAdjustments = map[string]string{
"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
"altglyphitem": "altGlyphItem",
"animatecolor": "animateColor",
"animatemotion": "animateMotion",
"animatetransform": "animateTransform",
"clippath": "clipPath",
"feblend": "feBlend",
"fecolormatrix": "feColorMatrix",
"fecomponenttransfer": "feComponentTransfer",
"fecomposite": "feComposite",
"feconvolvematrix": "feConvolveMatrix",
"fediffuselighting": "feDiffuseLighting",
"fedisplacementmap": "feDisplacementMap",
"fedistantlight": "feDistantLight",
"feflood": "feFlood",
"fefunca": "feFuncA",
"fefuncb": "feFuncB",
"fefuncg": "feFuncG",
"fefuncr": "feFuncR",
"fegaussianblur": "feGaussianBlur",
"feimage": "feImage",
"femerge": "feMerge",
"femergenode": "feMergeNode",
"femorphology": "feMorphology",
"feoffset": "feOffset",
"fepointlight": "fePointLight",
"fespecularlighting": "feSpecularLighting",
"fespotlight": "feSpotLight",
"fetile": "feTile",
"feturbulence": "feTurbulence",
"foreignobject": "foreignObject",
"glyphref": "glyphRef",
"lineargradient": "linearGradient",
"radialgradient": "radialGradient",
"textpath": "textPath",
}
// TODO: add look-up tables for MathML and SVG attribute adjustments.

View File

@@ -1,154 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// A NodeType is the type of a Node.
type NodeType int
const (
ErrorNode NodeType = iota
TextNode
DocumentNode
ElementNode
CommentNode
DoctypeNode
scopeMarkerNode
)
// Section 12.2.3.3 says "scope markers are inserted when entering applet
// elements, buttons, object elements, marquees, table cells, and table
// captions, and are used to prevent formatting from 'leaking'".
var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b".
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent *Node
Child []*Node
Type NodeType
Data string
Namespace string
Attr []Attribute
}
// Add adds a node as a child of n.
// It will panic if the child's parent is not nil.
func (n *Node) Add(child *Node) {
if child.Parent != nil {
panic("html: Node.Add called for a child Node that already has a parent")
}
child.Parent = n
n.Child = append(n.Child, child)
}
// Remove removes a node as a child of n.
// It will panic if the child's parent is not n.
func (n *Node) Remove(child *Node) {
if child.Parent == n {
child.Parent = nil
for i, m := range n.Child {
if m == child {
copy(n.Child[i:], n.Child[i+1:])
j := len(n.Child) - 1
n.Child[j] = nil
n.Child = n.Child[:j]
return
}
}
}
panic("html: Node.Remove called for a non-child Node")
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *Node) {
for _, n := range src.Child {
if n.Parent != src {
panic("html: nodes have an inconsistent parent/child relationship")
}
n.Parent = dst
}
dst.Child = append(dst.Child, src.Child...)
src.Child = nil
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent and no children.
func (n *Node) clone() *Node {
m := &Node{
Type: n.Type,
Data: n.Data,
Attr: make([]Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}
// nodeStack is a stack of nodes.
type nodeStack []*Node
// pop pops the stack. It will panic if s is empty.
func (s *nodeStack) pop() *Node {
i := len(*s)
n := (*s)[i-1]
*s = (*s)[:i-1]
return n
}
// top returns the most recently pushed node, or nil if s is empty.
func (s *nodeStack) top() *Node {
if i := len(*s); i > 0 {
return (*s)[i-1]
}
return nil
}
// index returns the index of the top-most occurrence of n in the stack, or -1
// if n is not present.
func (s *nodeStack) index(n *Node) int {
for i := len(*s) - 1; i >= 0; i-- {
if (*s)[i] == n {
return i
}
}
return -1
}
// insert inserts a node at the given index.
func (s *nodeStack) insert(i int, n *Node) {
(*s) = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = n
}
// remove removes a node from the stack. It is a no-op if n is not present.
func (s *nodeStack) remove(n *Node) {
i := s.index(n)
if i == -1 {
return
}
copy((*s)[i:], (*s)[i+1:])
j := len(*s) - 1
(*s)[j] = nil
*s = (*s)[:j]
}
// TODO(nigeltao): forTag no longer used. Should it be deleted?
// forTag returns the top-most element node with the given tag.
func (s *nodeStack) forTag(tag string) *Node {
for i := len(*s) - 1; i >= 0; i-- {
n := (*s)[i]
if n.Type == ElementNode && n.Data == tag {
return n
}
}
return nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,276 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"os"
"strings"
"testing"
)
// readParseTest reads a single test case from r.
func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
line, err := r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
var b []byte
// Read the HTML.
if string(line) != "#data\n" {
return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
if line[0] == '#' {
break
}
b = append(b, line...)
}
text = strings.TrimRight(string(b), "\n")
b = b[:0]
// Skip the error list.
if string(line) != "#errors\n" {
return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
if line[0] == '#' {
break
}
}
if string(line) == "#document-fragment\n" {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
context = strings.TrimSpace(string(line))
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
}
// Read the dump of what the parse tree should be.
if string(line) != "#document\n" {
return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil && err != io.EOF {
return "", "", "", err
}
if len(line) == 0 || len(line) == 1 && line[0] == '\n' {
break
}
b = append(b, line...)
}
return text, string(b), context, nil
}
func dumpIndent(w io.Writer, level int) {
io.WriteString(w, "| ")
for i := 0; i < level; i++ {
io.WriteString(w, " ")
}
}
func dumpLevel(w io.Writer, n *Node, level int) error {
dumpIndent(w, level)
switch n.Type {
case ErrorNode:
return errors.New("unexpected ErrorNode")
case DocumentNode:
return errors.New("unexpected DocumentNode")
case ElementNode:
if n.Namespace != "" {
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
} else {
fmt.Fprintf(w, "<%s>", n.Data)
}
attr := n.Attr
if len(attr) == 2 && attr[0].Namespace == "xml" && attr[1].Namespace == "xlink" {
// Some of the test cases in tests10.dat change the order of adjusted
// foreign attributes, but that behavior is not in the spec, and could
// simply be an implementation detail of html5lib's python map ordering.
attr[0], attr[1] = attr[1], attr[0]
}
for _, a := range attr {
io.WriteString(w, "\n")
dumpIndent(w, level+1)
if a.Namespace != "" {
fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
} else {
fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
}
}
case TextNode:
fmt.Fprintf(w, `"%s"`, n.Data)
case CommentNode:
fmt.Fprintf(w, "<!-- %s -->", n.Data)
case DoctypeNode:
fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" || s != "" {
fmt.Fprintf(w, ` "%s"`, p)
fmt.Fprintf(w, ` "%s"`, s)
}
}
io.WriteString(w, ">")
case scopeMarkerNode:
return errors.New("unexpected scopeMarkerNode")
default:
return errors.New("unknown node type")
}
io.WriteString(w, "\n")
for _, c := range n.Child {
if err := dumpLevel(w, c, level+1); err != nil {
return err
}
}
return nil
}
func dump(n *Node) (string, error) {
if n == nil || len(n.Child) == 0 {
return "", nil
}
var b bytes.Buffer
for _, child := range n.Child {
if err := dumpLevel(&b, child, 0); err != nil {
return "", err
}
}
return b.String(), nil
}
func TestParser(t *testing.T) {
testFiles := []struct {
filename string
// n is the number of test cases to run from that file.
// -1 means all test cases.
n int
}{
// TODO(nigeltao): Process all the test cases from all the .dat files.
{"adoption01.dat", -1},
{"doctype01.dat", -1},
{"tests1.dat", -1},
{"tests2.dat", -1},
{"tests3.dat", -1},
{"tests4.dat", -1},
{"tests5.dat", -1},
{"tests6.dat", -1},
{"tests10.dat", 35},
}
for _, tf := range testFiles {
f, err := os.Open("testdata/webkit/" + tf.filename)
if err != nil {
t.Fatal(err)
}
defer f.Close()
r := bufio.NewReader(f)
for i := 0; i != tf.n; i++ {
text, want, context, err := readParseTest(r)
if err == io.EOF && tf.n == -1 {
break
}
if err != nil {
t.Fatal(err)
}
var doc *Node
if context == "" {
doc, err = Parse(strings.NewReader(text))
if err != nil {
t.Fatal(err)
}
} else {
contextNode := &Node{
Type: ElementNode,
Data: context,
}
nodes, err := ParseFragment(strings.NewReader(text), contextNode)
if err != nil {
t.Fatal(err)
}
doc = &Node{
Type: DocumentNode,
}
for _, n := range nodes {
doc.Add(n)
}
}
got, err := dump(doc)
if err != nil {
t.Fatal(err)
}
// Compare the parsed tree to the #document section.
if got != want {
t.Errorf("%s test #%d %q, got vs want:\n----\n%s----\n%s----", tf.filename, i, text, got, want)
continue
}
if renderTestBlacklist[text] || context != "" {
continue
}
// Check that rendering and re-parsing results in an identical tree.
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(Render(pw, doc))
}()
doc1, err := Parse(pr)
if err != nil {
t.Fatal(err)
}
got1, err := dump(doc1)
if err != nil {
t.Fatal(err)
}
if got != got1 {
t.Errorf("%s test #%d %q, got vs got1:\n----\n%s----\n%s----", tf.filename, i, text, got, got1)
continue
}
}
}
}
// Some test input result in parse trees are not 'well-formed' despite
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
// tree will not result in an exact clone of that tree. We blacklist such
// inputs from the render test.
var renderTestBlacklist = map[string]bool{
// The second <a> will be reparented to the first <table>'s parent. This
// results in an <a> whose parent is an <a>, which is not 'well-formed'.
`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
// More cases of <a> being reparented:
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
`<a><table><a></table><p><a><div><a>`: true,
`<a><table><td><a><table></table><a></tr><a></table><a>`: true,
// A <plaintext> element is reparented, putting it before a table.
// A <plaintext> element can't have anything after it in HTML.
`<table><plaintext><td>`: true,
}

View File

@@ -1,277 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
type writer interface {
io.Writer
WriteByte(byte) error
WriteString(string) (int, error)
}
// Render renders the parse tree n to the given writer.
//
// Rendering is done on a 'best effort' basis: calling Parse on the output of
// Render will always result in something similar to the original tree, but it
// is not necessarily an exact clone unless the original tree was 'well-formed'.
// 'Well-formed' is not easily specified; the HTML5 specification is
// complicated.
//
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
// For example, in a 'well-formed' parse tree, no <a> element is a child of
// another <a> element: parsing "<a><a>" results in two sibling elements.
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
// children; the <a> is reparented to the <table>'s parent. However, calling
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
// element with an <a> child, and is therefore not 'well-formed'.
//
// Programmatically constructed trees are typically also 'well-formed', but it
// is possible to construct a tree that looks innocuous but, when rendered and
// re-parsed, results in a different tree. A simple example is that a solitary
// text node would become a tree containing <html>, <head> and <body> elements.
// Another example is that the programmatic equivalent of "a<head>b</head>c"
// becomes "<html><head><head/><body>abc</body></html>".
func Render(w io.Writer, n *Node) error {
if x, ok := w.(writer); ok {
return render(x, n)
}
buf := bufio.NewWriter(w)
if err := render(buf, n); err != nil {
return err
}
return buf.Flush()
}
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases.
switch n.Type {
case ErrorNode:
return errors.New("html: cannot render an ErrorNode node")
case TextNode:
return escape(w, n.Data)
case DocumentNode:
for _, c := range n.Child {
if err := render1(w, c); err != nil {
return err
}
}
return nil
case ElementNode:
// No-op.
case CommentNode:
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
return err
}
return nil
case DoctypeNode:
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" {
if _, err := w.WriteString(" PUBLIC "); err != nil {
return err
}
if err := writeQuoted(w, p); err != nil {
return err
}
if s != "" {
if err := w.WriteByte(' '); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
} else if s != "" {
if _, err := w.WriteString(" SYSTEM "); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
}
return w.WriteByte('>')
default:
return errors.New("html: unknown node type")
}
// Render the <xxx> opening tag.
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
for _, a := range n.Attr {
if err := w.WriteByte(' '); err != nil {
return err
}
if a.Namespace != "" {
if _, err := w.WriteString(a.Namespace); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
return err
}
}
if _, err := w.WriteString(a.Key); err != nil {
return err
}
if _, err := w.WriteString(`="`); err != nil {
return err
}
if err := escape(w, a.Val); err != nil {
return err
}
if err := w.WriteByte('"'); err != nil {
return err
}
}
if voidElements[n.Data] {
if len(n.Child) != 0 {
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
}
_, err := w.WriteString("/>")
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
// Add initial newline where there is danger of a newline beging ignored.
if len(n.Child) > 0 && n.Child[0].Type == TextNode && strings.HasPrefix(n.Child[0].Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for _, c := range n.Child {
if c.Type != TextNode {
return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data)
}
if _, err := w.WriteString(c.Data); err != nil {
return err
}
}
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
case "textarea", "title":
for _, c := range n.Child {
if c.Type != TextNode {
return fmt.Errorf("html: RCDATA element <%s> has non-text child node", n.Data)
}
if err := render1(w, c); err != nil {
return err
}
}
default:
for _, c := range n.Child {
if err := render1(w, c); err != nil {
return err
}
}
}
// Render the </xxx> closing tag.
if _, err := w.WriteString("</"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
return w.WriteByte('>')
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
// In valid HTML, they can't contain both types of quotes.
func writeQuoted(w writer, s string) error {
var q byte = '"'
if strings.Contains(s, `"`) {
q = '\''
}
if err := w.WriteByte(q); err != nil {
return err
}
if _, err := w.WriteString(s); err != nil {
return err
}
if err := w.WriteByte(q); err != nil {
return err
}
return nil
}
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"command": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true,
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}

View File

@@ -1,111 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"testing"
)
func TestRenderer(t *testing.T) {
n := &Node{
Type: ElementNode,
Data: "html",
Child: []*Node{
{
Type: ElementNode,
Data: "head",
},
{
Type: ElementNode,
Data: "body",
Child: []*Node{
{
Type: TextNode,
Data: "0<1",
},
{
Type: ElementNode,
Data: "p",
Attr: []Attribute{
{
Key: "id",
Val: "A",
},
{
Key: "foo",
Val: `abc"def`,
},
},
Child: []*Node{
{
Type: TextNode,
Data: "2",
},
{
Type: ElementNode,
Data: "b",
Attr: []Attribute{
{
Key: "empty",
Val: "",
},
},
Child: []*Node{
{
Type: TextNode,
Data: "3",
},
},
},
{
Type: ElementNode,
Data: "i",
Attr: []Attribute{
{
Key: "backslash",
Val: `\`,
},
},
Child: []*Node{
{
Type: TextNode,
Data: "&4",
},
},
},
},
},
{
Type: TextNode,
Data: "5",
},
{
Type: ElementNode,
Data: "blockquote",
},
{
Type: ElementNode,
Data: "br",
},
{
Type: TextNode,
Data: "6",
},
},
},
},
}
want := `<html><head></head><body>0&lt;1<p id="A" foo="abc&quot;def">` +
`2<b empty="">3</b><i backslash="\">&amp;4</i></p>` +
`5<blockquote></blockquote><br/>6</body></html>`
b := new(bytes.Buffer)
if err := Render(b, n); err != nil {
t.Fatal(err)
}
if got := b.String(); got != want {
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}

View File

@@ -1,28 +0,0 @@
The *.dat files in this directory are copied from The WebKit Open Source
Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
WebKit is licensed under a BSD style license.
http://webkit.org/coding/bsd-license.html says:
Copyright (C) 2009 Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -1,194 +0,0 @@
#data
<a><p></a></p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <p>
| <a>
#data
<a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
#data
<a>1<button>2</a>3</button>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <button>
| <a>
| "2"
| "3"
#data
<a>1<b>2</a>3</b>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <b>
| "2"
| <b>
| "3"
#data
<a>1<div>2<div>3</a>4</div>5</div>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <div>
| <a>
| "2"
| <div>
| <a>
| "3"
| "4"
| "5"
#data
<table><a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
| <table>
#data
<b><b><a><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <a>
| <p>
| <a>
#data
<b><a><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <a>
| <b>
| <b>
| <p>
| <a>
#data
<a><b><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <b>
| <b>
| <p>
| <a>
#data
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "1"
| <s>
| id="A"
| "2"
| <b>
| id="B"
| "3"
| <s>
| id="A"
| <b>
| id="B"
| "4"
| <b>
| id="B"
| "5"
#data
<table><a>1<td>2</td>3</table>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <a>
| "3"
| <table>
| <tbody>
| <tr>
| <td>
| "2"
#data
<table>A<td>B</td>C</table>
#errors
#document
| <html>
| <head>
| <body>
| "AC"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<a><svg><tr><input></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <svg svg>
| <svg tr>
| <svg input>

View File

@@ -1,31 +0,0 @@
#data
<b>1<i>2<p>3</b>4
#errors
#document
| <html>
| <head>
| <body>
| <b>
| "1"
| <i>
| "2"
| <i>
| <p>
| <b>
| "3"
| "4"
#data
<a><div><style></style><address><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <style>
| <address>
| <a>
| <a>

View File

@@ -1,135 +0,0 @@
#data
FOO<!-- BAR -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- >BAZ -->
#data
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
#data
FOO<!---->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!--->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!-->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
<?xml version="1.0">Hi
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
| "Hi"
#data
<?xml version="1.0">
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
#data
<?xml version
#errors
#document
| <!-- ?xml version -->
| <html>
| <head>
| <body>
#data
FOO<!----->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- - -->
| "BAZ"

View File

@@ -1,370 +0,0 @@
#data
<!DOCTYPE html>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!dOctYpE HtMl>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPEhtml>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE>Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE >Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco "ddd>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM ggg>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM taco >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM 'taco"'>Hello
#errors
#document
| <!DOCTYPE potato "" "taco"">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "taco">Hello
#errors
#document
| <!DOCTYPE potato "" "taco">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "tai'co">Hello
#errors
#document
| <!DOCTYPE potato "" "tai'co">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato grass SYSTEM taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIcgoof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC goof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "go'of">Hello
#errors
#document
| <!DOCTYPE potato "go'of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go'of'>Hello
#errors
#document
| <!DOCTYPE potato "go" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
#errors
#document
| <!DOCTYPE potato "go:hh of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
#errors
#document
| <!DOCTYPE potato "W3C-//dfdf" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">Hello
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE ...>Hello
#errors
#document
| <!DOCTYPE ...>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
<!-- internal declarations -->
]>
#errors
#document
| <!DOCTYPE root-element>
| <html>
| <head>
| <body>
| "]>"
#data
<!DOCTYPE html PUBLIC
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
#errors
#document
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
#errors
#document
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
| <html>
| <head>
| <body>
| <b>
| "Mine!"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>

View File

@@ -1,603 +0,0 @@
#data
FOO&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gtBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gt BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO> BAR"
#data
FOO&gt;;;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>;;BAR"
#data
I'm &notit; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ¬it; I tell you"
#data
I'm &notin; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ∉ I tell you"
#data
FOO& BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO& BAR"
#data
FOO&<BAR>
#errors
#document
| <html>
| <head>
| <body>
| "FOO&"
| <bar>
#data
FOO&&&&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&&&>BAR"
#data
FOO&#41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#X41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#BAR"
#data
FOO&#ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#ZOO"
#data
FOO&#xBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOºR"
#data
FOO&#xZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#xZOO"
#data
FOO&#XZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#XZOO"
#data
FOO&#41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO䆺R"
#data
FOO&#x41ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOAZOO"
#data
FOO&#x0000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#x0078;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOxZOO"
#data
FOO&#x0079;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOyZOO"
#data
FOO&#x0080;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO€ZOO"
#data
FOO&#x0081;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0082;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0083;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOƒZOO"
#data
FOO&#x0084;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO„ZOO"
#data
FOO&#x0085;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO…ZOO"
#data
FOO&#x0086;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO†ZOO"
#data
FOO&#x0087;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‡ZOO"
#data
FOO&#x0088;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOˆZOO"
#data
FOO&#x0089;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‰ZOO"
#data
FOO&#x008A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŠZOO"
#data
FOO&#x008B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŒZOO"
#data
FOO&#x008D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŽZOO"
#data
FOO&#x008F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0090;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0091;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0092;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0093;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO“ZOO"
#data
FOO&#x0094;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO”ZOO"
#data
FOO&#x0095;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO•ZOO"
#data
FOO&#x0096;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0097;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO—ZOO"
#data
FOO&#x0098;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO˜ZOO"
#data
FOO&#x0099;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO™ZOO"
#data
FOO&#x009A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOšZOO"
#data
FOO&#x009B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOœZOO"
#data
FOO&#x009D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOžZOO"
#data
FOO&#x009F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŸZOO"
#data
FOO&#x00A0;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO ZOO"
#data
FOO&#xD7FF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO퟿ZOO"
#data
FOO&#xD800;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xD801;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xE000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x10FFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿾ZOO"
#data
FOO&#x1087D4;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􈟔ZOO"
#data
FOO&#x10FFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿿ZOO"
#data
FOO&#x110000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xFFFFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"

View File

@@ -1,249 +0,0 @@
#data
<div bar="ZZ&gt;YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>YY"
#data
<div bar="ZZ&"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar='ZZ&'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar=ZZ&></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar="ZZ&gt=YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt=YY"
#data
<div bar="ZZ&gt0YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt0YY"
#data
<div bar="ZZ&gt9YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt9YY"
#data
<div bar="ZZ&gtaYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtaYY"
#data
<div bar="ZZ&gtZYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtZYY"
#data
<div bar="ZZ&gt YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ> YY"
#data
<div bar="ZZ&gt"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar='ZZ&gt'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar=ZZ&gt></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar="ZZ&pound_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod_id=23"
#data
<div bar="ZZ&pound;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ∏_id=23"
#data
<div bar="ZZ&pound=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&pound=23"
#data
<div bar="ZZ&prod=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod=23"
#data
<div>ZZ&pound_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod_id=23"
#data
<div>ZZ&pound;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ∏_id=23"
#data
<div>ZZ&pound=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£=23"
#data
<div>ZZ&prod=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod=23"

View File

@@ -1,246 +0,0 @@
#data
<div<div>
#errors
#document
| <html>
| <head>
| <body>
| <div<div>
#data
<div foo<bar=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo<bar=""
#data
<div foo=`bar`>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="`bar`"
#data
<div \"foo=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| \"foo=""
#data
<a href='\nbar'></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| href="\nbar"
#data
<!DOCTYPE html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
&lang;&rang;
#errors
#document
| <html>
| <head>
| <body>
| "⟨⟩"
#data
&apos;
#errors
#document
| <html>
| <head>
| <body>
| "'"
#data
&ImaginaryI;
#errors
#document
| <html>
| <head>
| <body>
| ""
#data
&Kopf;
#errors
#document
| <html>
| <head>
| <body>
| "𝕂"
#data
&notinva;
#errors
#document
| <html>
| <head>
| <body>
| "∉"
#data
<?import namespace="foo" implementation="#bar">
#errors
#document
| <!-- ?import namespace="foo" implementation="#bar" -->
| <html>
| <head>
| <body>
#data
<!--foo--bar-->
#errors
#document
| <!-- foo--bar -->
| <html>
| <head>
| <body>
#data
<![CDATA[x]]>
#errors
#document
| <!-- [CDATA[x]] -->
| <html>
| <head>
| <body>
#data
<textarea><!--</textarea>--></textarea>
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<textarea><!--</textarea>-->
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<style><!--</style>--></style>
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<style><!--</style>-->
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<ul><li>A </li> <li>B</li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| "A "
| " "
| <li>
| "B"
#data
<table><form><input type=hidden><input></form><div></div></table>
#errors
#document
| <html>
| <head>
| <body>
| <input>
| <div>
| <table>
| <form>
| <input>
| type="hidden"
#data
<i>A<b>B<p></i>C</b>D
#errors
#document
| <html>
| <head>
| <body>
| <i>
| "A"
| <b>
| "B"
| <b>
| <p>
| <b>
| <i>
| "C"
| "D"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<svg></svg>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<math></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>

View File

@@ -1,43 +0,0 @@
#data
<button>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <button>
| "1"
#data
<foo>1<p>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <p>
| "2"
#data
<dd>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| "1"
#data
<foo>1<dd>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <dd>
| "2"

View File

@@ -1,40 +0,0 @@
#data
<isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| <hr>
#data
<isindex name="A" action="B" prompt="C" foo="D">
#errors
#document
| <html>
| <head>
| <body>
| <form>
| action="B"
| <hr>
| <label>
| "C"
| <input>
| foo="D"
| name="isindex"
| <hr>
#data
<form><isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>

View File

@@ -1,28 +0,0 @@
#data
<input type="hidden"><frameset>
#errors
21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
31: “frameset” start tag seen.
31: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><table><caption><svg>foo</table>bar
#errors
47: End tag “table” did not match the name of the current open element (“svg”).
47: “table” closed but “caption” was still open.
47: End tag “table” seen, but there were open elements.
36: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| "foo"
| "bar"

View File

@@ -1,8 +0,0 @@
#data
FOO&#x000D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO

View File

@@ -1,308 +0,0 @@
#data
FOO<script>'Hello'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'Hello'"
| "BAR"
#data
FOO<script></script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/ >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script type="text/plain"></scriptx>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "</scriptx>BAR"
#data
FOO<script></script foo=">" dd>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script>'<'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<'"
| "BAR"
#data
FOO<script>'<!'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!'"
| "BAR"
#data
FOO<script>'<!-'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-'"
| "BAR"
#data
FOO<script>'<!--'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!--'"
| "BAR"
#data
FOO<script>'<!---'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!---'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-- potato'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- potato'"
| "BAR"
#data
FOO<script>'<!-- <sCrIpt'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --'</script>BAR"
#data
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt> -->'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --!>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -- >'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt '</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt\'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
| "QUX"

View File

@@ -1,15 +0,0 @@
#data
<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="B"
| <script>
| "document.getElementById("A").id = "B""
| <b>
| id="A"
| "TEXT"

View File

@@ -1,28 +0,0 @@
#data
1<script>document.write("2")</script>3
#errors
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("2")"
| "23"
#data
1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
#errors
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
| <script>
| "document.write('2')"
| "2"
| <script>
| "document.write('3')"
| "34"

View File

@@ -1,197 +0,0 @@
#data
<table><th>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
#data
<table><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><col foo='bar'>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
| <col>
| foo="bar"
#data
<table><colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
<table></table><p>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <p>
| "foo"
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><select><option>3</select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "3"
| <table>
#data
<table><select><table></table></select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
| <table>
#data
<table><select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
#data
<table><select><option>A<tr><td>B</td></tr></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<table><td></body></caption></col></colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td>A</table>B
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
| "B"
#data
<table><tr><caption>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <caption>
#data
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td><tr>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <tr>
#data
<table><td><button><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <button>
| <td>

File diff suppressed because it is too large Load Diff

View File

@@ -1,799 +0,0 @@
#data
<!DOCTYPE html><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg></svg><![CDATA[a]]>
#errors
29: Bogus comment
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <!-- [CDATA[a]] -->
#data
<!DOCTYPE html><body><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><body><select><svg></svg></select>
#errors
35: Stray “svg” start tag.
42: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><svg></svg></option></select>
#errors
43: Stray “svg” start tag.
50: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><svg></svg></table>
#errors
34: Start tag “svg” seen in “table”.
41: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
53: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
58: Stray end tag “g”.
65: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
#errors
41: Start tag “svg” seen in “table”.
53: Stray end tag “g”.
65: Stray end tag “g”.
72: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
#errors
45: Start tag “svg” seen in “table”.
57: Stray end tag “g”.
69: Stray end tag “g”.
76: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
70: HTML start tag “p” in a foreign namespace context.
81: “table” closed but “caption” was still open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
#errors
78: “table” closed but “caption” was still open.
78: Unclosed elements on stack.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
44: Start tag “svg” seen in “table”.
56: Stray end tag “g”.
68: Stray end tag “g”.
71: HTML start tag “p” in a foreign namespace context.
71: Start tag “p” seen in “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
50: Stray “svg” start tag.
54: Stray “g” start tag.
62: Stray end tag “g”
66: Stray “g” start tag.
74: Stray end tag “g”
77: Stray “p” start tag.
88: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
36: Start tag “select” seen in “table”.
42: Stray “svg” start tag.
46: Stray “g” start tag.
54: Stray end tag “g”
58: Stray “g” start tag.
66: Stray end tag “g”
69: Stray “p” start tag.
80: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
#errors
41: Stray “svg” start tag.
68: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
#errors
34: Stray “svg” start tag.
61: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
#errors
31: Stray “svg” start tag.
35: Stray “g” start tag.
40: Stray end tag “g”
44: Stray “g” start tag.
49: Stray end tag “g”
52: Stray “p” start tag.
58: Stray “span” start tag.
58: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
#errors
42: Stray “svg” start tag.
46: Stray “g” start tag.
51: Stray end tag “g”
55: Stray “g” start tag.
60: Stray end tag “g”
63: Stray “p” start tag.
69: Stray “span” start tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <svg svg>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
| "bar"
#data
<svg></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<div><svg></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| "a"
#data
<div><svg><path></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| "a"
#data
<div><svg><path></svg><path>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <path>
#data
<div><svg><path><foreignObject><math></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <math math>
| "a"
#data
<div><svg><path><foreignObject><p></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| "a"
#data
<!DOCTYPE html><svg><desc><div><svg><ul>a
#errors
40: HTML start tag “ul” in a foreign namespace context.
41: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <div>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><svg><desc><svg><ul>a
#errors
35: HTML start tag “ul” in a foreign namespace context.
36: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><p><svg><desc><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg desc>
| <p>
#data
<!DOCTYPE html><p><svg><title><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg title>
| <p>
#data
<div><svg><path><foreignObject><p></foreignObject><p>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| <p>
#data
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <div>
| <object>
| <div>
| <span>
| <math mi>
#data
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <svg svg>
| <svg foreignObject>
| <div>
| <div>
| <math mi>
#data
<svg><script></script><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg script>
| <svg path>
#data
<table><svg></svg><tr>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <table>
| <tbody>
| <tr>
#data
<math><mi><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math mglyph>
#data
<math><mi><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math malignmark>
#data
<math><mo><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math mglyph>
#data
<math><mo><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math malignmark>
#data
<math><mn><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math mglyph>
#data
<math><mn><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math malignmark>
#data
<math><ms><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math mglyph>
#data
<math><ms><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math malignmark>
#data
<math><mtext><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math mglyph>
#data
<math><mtext><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math malignmark>
#data
<math><annotation-xml><svg></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <div>
| <math math>
| <math mi>
| <span>
| <svg path>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <math math>
| <math mi>
| <svg svg>
| <math mo>
| <span>
| <svg path>
| <math mi>

View File

@@ -1,482 +0,0 @@
#data
<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| attributename=""
| attributetype=""
| basefrequency=""
| baseprofile=""
| calcmode=""
| clippathunits=""
| contentscripttype=""
| contentstyletype=""
| diffuseconstant=""
| edgemode=""
| externalresourcesrequired=""
| filterres=""
| filterunits=""
| glyphref=""
| gradienttransform=""
| gradientunits=""
| kernelmatrix=""
| kernelunitlength=""
| keypoints=""
| keysplines=""
| keytimes=""
| lengthadjust=""
| limitingconeangle=""
| markerheight=""
| markerunits=""
| markerwidth=""
| maskcontentunits=""
| maskunits=""
| numoctaves=""
| pathlength=""
| patterncontentunits=""
| patterntransform=""
| patternunits=""
| pointsatx=""
| pointsaty=""
| pointsatz=""
| preservealpha=""
| preserveaspectratio=""
| primitiveunits=""
| refx=""
| refy=""
| repeatcount=""
| repeatdur=""
| requiredextensions=""
| requiredfeatures=""
| specularconstant=""
| specularexponent=""
| spreadmethod=""
| startoffset=""
| stddeviation=""
| stitchtiles=""
| surfacescale=""
| systemlanguage=""
| tablevalues=""
| targetx=""
| targety=""
| textlength=""
| viewbox=""
| viewtarget=""
| xchannelselector=""
| ychannelselector=""
| zoomandpan=""
#data
<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math altglyph>
| <math altglyphdef>
| <math altglyphitem>
| <math animatecolor>
| <math animatemotion>
| <math animatetransform>
| <math clippath>
| <math feblend>
| <math fecolormatrix>
| <math fecomponenttransfer>
| <math fecomposite>
| <math feconvolvematrix>
| <math fediffuselighting>
| <math fedisplacementmap>
| <math fedistantlight>
| <math feflood>
| <math fefunca>
| <math fefuncb>
| <math fefuncg>
| <math fefuncr>
| <math fegaussianblur>
| <math feimage>
| <math femerge>
| <math femergenode>
| <math femorphology>
| <math feoffset>
| <math fepointlight>
| <math fespecularlighting>
| <math fespotlight>
| <math fetile>
| <math feturbulence>
| <math foreignobject>
| <math glyphref>
| <math lineargradient>
| <math radialgradient>
| <math textpath>
#data
<!DOCTYPE html><body><svg><solidColor /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg solidcolor>

View File

@@ -1,62 +0,0 @@
#data
<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"
#data
<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"

View File

@@ -1,74 +0,0 @@
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
| <span>
#data
<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
#errors
15: Unexpected start tag html
#document
| <!DOCTYPE html>
| <html>
| abc:def="gh"
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
#errors
15: Unexpected start tag html
#document
| <!DOCTYPE html>
| <html>
| xml:lang="bar"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456><html 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| 789="012"
| <head>
| <body>
#data
<!DOCTYPE html><html><body 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| 789="012"

View File

@@ -1,208 +0,0 @@
#data
<!DOCTYPE html><p><b><i><u></p> <p>X
#errors
Line: 1 Col: 31 Unexpected end tag (p). Ignored.
Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| " "
| <p>
| "X"
#data
<p><b><i><u></p>
<p>X
#errors
Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag (p). Ignored.
Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| "
"
| <p>
| "X"
#data
<!doctype html></html> <head>
#errors
Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " "
#data
<!doctype html></body><meta>
#errors
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
#data
<html></html><!-- foo -->
#errors
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
#document
| <html>
| <head>
| <body>
| <!-- foo -->
#data
<!doctype html></body><title>X</title>
#errors
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
#data
<!doctype html><table> X<meta></table>
#errors
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " X"
| <meta>
| <table>
#data
<!doctype html><table> x</table>
#errors
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x"
| <table>
#data
<!doctype html><table> x </table>
#errors
Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x "
| <table>
#data
<!doctype html><table><tr> x</table>
#errors
Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table>X<style> <tr>x </style> </table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <style>
| " <tr>x "
| " "
#data
<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
#errors
Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <a>
| "foo"
| <table>
| " "
| <tbody>
| <tr>
| <td>
| "bar"
| " "
#data
<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
#errors
6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
13: Stray start tag “frame”.
21: Stray end tag “frame”.
29: Stray end tag “frame”.
39: “frameset” start tag after “body” already open.
105: End of file seen inside an [R]CDATA element.
105: End of file seen and there were open elements.
XXX: These errors are wrong, please fix me!
#document
| <html>
| <head>
| <frameset>
| <frame>
| <frameset>
| <frame>
| <noframes>
| "</frameset><noframes>"
#data
<!DOCTYPE html><object></html>
#errors
1: Expected closing tag. Unexpected end of file
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <object>

File diff suppressed because it is too large Load Diff

View File

@@ -1,153 +0,0 @@
#data
<!doctype html><table><tbody><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><tr><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <table>
| <tbody>
| <tr>
| <td>
#data
<!doctype html><table><tr><td><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| <td>
#data
<!doctype html><table><tr><th><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
| <select>
| <td>
#data
<!doctype html><table><caption><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <select>
| <tbody>
| <tr>
#data
<!doctype html><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><th>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><tbody>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><thead>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><tfoot>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><caption>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><table><tr></table>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| "a"

View File

@@ -1,269 +0,0 @@
#data
<!doctype html><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
#data
<!doctype html><table><tbody><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
#data
<!doctype html><table><tbody><tr><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><tbody><tr><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><td><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><caption><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><tr><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "abc"
| <table>
| <tbody>
| <tr>
| <style>
| "</script>"
#data
<!doctype html><table><tr><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "abc"
| <table>
| <tbody>
| <tr>
| <script>
| "</style>"
#data
<!doctype html><table><caption><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <style>
| "</script>"
| "abc"
#data
<!doctype html><table><td><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <style>
| "</script>"
| "abc"
#data
<!doctype html><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
#data
<!doctype html><table><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
| <table>
#data
<!doctype html><table><tr><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><frameset></frameset><noframes>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
#data
<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
| <!-- abc -->
#data
<!doctype html><frameset></frameset></html><noframes>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
#data
<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
| <!-- abc -->
#data
<!doctype html><table><tr></tbody><tfoot>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <tfoot>
#data
<!doctype html><table><td><svg></svg>abc<td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| "abc"
| <td>

File diff suppressed because it is too large Load Diff

View File

@@ -1,763 +0,0 @@
#data
<!DOCTYPE html>Test
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Test"
#data
<textarea>test</div>test
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "test</div>test"
#data
<table><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td>test</tbody></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "test"
#data
<frame>test
#errors
Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
Line: 1 Col: 7 Unexpected start tag frame. Ignored.
#document
| <html>
| <head>
| <body>
| "test"
#data
<!DOCTYPE html><frameset>test
#errors
Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset><!DOCTYPE html>
#errors
Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><font><p><b>test</font>
#errors
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <font>
| <p>
| <font>
| <b>
| "test"
#data
<!DOCTYPE html><dt><div><dd>
#errors
Line: 1 Col: 28 Missing end tag (div, dt).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <dt>
| <div>
| <dd>
#data
<script></x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
#document
| <html>
| <head>
| <script>
| "</x"
| <body>
#data
<table><plaintext><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
Line: 1 Col: 22 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "<td>"
| <table>
#data
<plaintext></plaintext>
#errors
Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!DOCTYPE html><table><tr>TEST
#errors
Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Unexpected end of file. Expected table content.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "TEST"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
#errors
Line: 1 Col: 37 Unexpected start tag (body).
Line: 1 Col: 53 Unexpected start tag (body).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| t1="1"
| t2="2"
| t3="3"
| t4="4"
#data
</b test
#errors
Line: 1 Col: 8 Unexpected end of file in attribute name.
Line: 1 Col: 8 End tag contains unexpected attributes.
Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html></b test<b &=&amp>X
#errors
Line: 1 Col: 32 Named entity didn't end with ';'.
Line: 1 Col: 33 End tag contains unexpected attributes.
Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
#data
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 54 Unexpected end of file in the tag name.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| type="text/x-foobar;baz"
| "X</SCRipt"
| <body>
#data
&
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&#
#errors
Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#"
#data
&#X
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#X"
#data
&#x
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#x"
#data
&#45
#errors
Line: 1 Col: 4 Numeric entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "-"
#data
&x-test
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&x-test"
#data
<!doctypehtml><p><li>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <li>
#data
<!doctypehtml><p><dt>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <dt>
#data
<!doctypehtml><p><dd>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <dd>
#data
<!doctypehtml><p><form>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <form>
#data
<!DOCTYPE html><p></P>X
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "X"
#data
&AMP
#errors
Line: 1 Col: 4 Named entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&AMp;
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&AMp;"
#data
<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
#errors
Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
#data
<!DOCTYPE html>X</body>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE html><!-- X
#errors
Line: 1 Col: 21 Unexpected end of file in comment.
#document
| <!DOCTYPE html>
| <!-- X -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><table><caption>test TEST</caption><td>test
#errors
Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| "test TEST"
| <tbody>
| <tr>
| <td>
| "test"
#data
<!DOCTYPE html><select><option><optgroup>
#errors
Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
| <optgroup>
#data
<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
#errors
Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <option>
| <option>
#data
<!DOCTYPE html><select><optgroup><option><optgroup>
#errors
Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <optgroup>
#data
<!DOCTYPE html><datalist><option>foo</datalist>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <datalist>
| <option>
| "foo"
| "bar"
#data
<!DOCTYPE html><font><input><input></font>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <font>
| <input>
| <input>
#data
<!DOCTYPE html><!-- XXX - XXX -->
#errors
#document
| <!DOCTYPE html>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><!-- XXX - XXX
#errors
Line: 1 Col: 29 Unexpected end of file in comment (-)
#document
| <!DOCTYPE html>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><!-- XXX - XXX - XXX -->
#errors
#document
| <!DOCTYPE html>
| <!-- XXX - XXX - XXX -->
| <html>
| <head>
| <body>
#data
<isindex test=x name=x>
#errors
Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| test="x"
| <hr>
#data
test
test
#errors
Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "test
test"
#data
<!DOCTYPE html><body><title>test</body></title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "test</body>"
#data
<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
| <meta>
| name="z"
| <link>
| rel="foo"
| <style>
| "
x { content:"</style" } "
#data
<!DOCTYPE html><select><optgroup></optgroup></select>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
#data
#errors
Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html> <html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><script>
</script> <title>x</title> </head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| "
"
| " "
| <title>
| "x"
| " "
| <body>
#data
<!DOCTYPE html><html><body><html id=x>
#errors
Line: 1 Col: 38 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE html>X</body><html id="x">
#errors
Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
Line: 1 Col: 36 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
| "X"
#data
<!DOCTYPE html><head><html id=x>
#errors
Line: 1 Col: 32 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE html>X</html>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE html>X</html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X "
#data
<!DOCTYPE html>X</html><p>X
#errors
Line: 1 Col: 26 Unexpected start tag (p).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <p>
| "X"
#data
<!DOCTYPE html>X<p/x/y/z>
#errors
Line: 1 Col: 19 Expected a > after the /.
Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <p>
| x=""
| y=""
| z=""
#data
<!DOCTYPE html><!--x--
#errors
Line: 1 Col: 22 Unexpected end of file in comment (--).
#document
| <!DOCTYPE html>
| <!-- x -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><table><tr><td></p></table>
#errors
Line: 1 Col: 34 Unexpected end tag (p). Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <p>
#data
<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
#errors
Line: 1 Col: 20 Expected space or '>'. Got ''
Line: 1 Col: 25 Erroneous DOCTYPE.
Line: 1 Col: 35 Unexpected character in comment found.
#document
| <!DOCTYPE <!doctype>
| <html>
| <head>
| <body>
| ">"
| <!-- <!--x -->
| "-->"
#data
<!doctype html><div><form></form><div></div></div>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <form>
| <div>

View File

@@ -1,455 +0,0 @@
#data
<!doctype html><p><button><button>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <button>
#data
<!doctype html><p><button><address>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <address>
#data
<!doctype html><p><button><blockquote>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <blockquote>
#data
<!doctype html><p><button><menu>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <menu>
#data
<!doctype html><p><button><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <p>
#data
<!doctype html><p><button><ul>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <ul>
#data
<!doctype html><p><button><h1>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <h1>
#data
<!doctype html><p><button><h6>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <h6>
#data
<!doctype html><p><button><listing>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <listing>
#data
<!doctype html><p><button><pre>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <pre>
#data
<!doctype html><p><button><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <form>
#data
<!doctype html><p><button><li>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <li>
#data
<!doctype html><p><button><dd>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <dd>
#data
<!doctype html><p><button><dt>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <dt>
#data
<!doctype html><p><button><plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <plaintext>
#data
<!doctype html><p><button><table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <table>
#data
<!doctype html><p><button><hr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <hr>
#data
<!doctype html><p><button><xmp>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <xmp>
#data
<!doctype html><p><button></p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <p>
#data
<!doctype html><address><button></address>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <button>
| "a"
#data
<!doctype html><address><button></address>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <button>
| "a"
#data
<p><table></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <p>
| <table>
#data
<!doctype html><svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!doctype html><p><figcaption>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <figcaption>
#data
<!doctype html><p><summary>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <summary>
#data
<!doctype html><form><table><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <form>
| <table>
#data
<!doctype html><table><form><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <form>
#data
<!doctype html><table><form></table><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <form>
#data
<!doctype html><svg><foreignObject><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <p>
#data
<!doctype html><svg><title>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| "abc"
#data
<option><span><option>
#errors
#document
| <html>
| <head>
| <body>
| <option>
| <span>
| <option>
#data
<option><option>
#errors
#document
| <html>
| <head>
| <body>
| <option>
| <option>
#data
<math><annotation-xml><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <div>
#data
<math><annotation-xml encoding="application/svg+xml"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="application/svg+xml"
| <div>
#data
<math><annotation-xml encoding="application/xhtml+xml"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="application/xhtml+xml"
| <div>
#data
<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="aPPlication/xhtmL+xMl"
| <div>
#data
<math><annotation-xml encoding="text/html"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="text/html"
| <div>
#data
<math><annotation-xml encoding="Text/htmL"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="Text/htmL"
| <div>
#data
<math><annotation-xml encoding=" text/html "><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding=" text/html "
| <div>

View File

@@ -1,221 +0,0 @@
#data
<svg><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<math><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| "foo"
#data
<div><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <!-- [CDATA[foo]] -->
#data
<svg><![CDATA[foo
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<svg><![CDATA[foo
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<svg><![CDATA[
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<svg><![CDATA[]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<svg><![CDATA[]] >]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]] >"
#data
<svg><![CDATA[]] >]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]] >"
#data
<svg><![CDATA[]]
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]]"
#data
<svg><![CDATA[]
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]"
#data
<svg><![CDATA[]>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]>a"
#data
<svg><foreignObject><div><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <div>
| <!-- [CDATA[foo]] -->
#data
<svg><![CDATA[<svg>]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
#data
<svg><![CDATA[</svg>a]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "</svg>a"
#data
<svg><![CDATA[<svg>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>a"
#data
<svg><![CDATA[</svg>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "</svg>a"
#data
<svg><![CDATA[<svg>]]><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
| <svg path>
#data
<svg><![CDATA[<svg>]]></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
#data
<svg><![CDATA[<svg>]]><!--path-->
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
| <!-- path -->
#data
<svg><![CDATA[<svg>]]>path
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>path"
#data
<svg><![CDATA[<!--svg-->]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<!--svg-->"

View File

@@ -1,157 +0,0 @@
#data
<a><b><big><em><strong><div>X</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <big>
| <em>
| <strong>
| <big>
| <em>
| <strong>
| <div>
| <a>
| "X"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| "A"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| <div>
| id="9"
| "A"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| <div>
| id="9"
| <div>
| id="10"
| "A"
#data
<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
#errors
Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <cite>
| <b>
| <cite>
| <i>
| <cite>
| <i>
| <cite>
| <i>
| <i>
| <i>
| <div>
| <b>
| "X"
| "TEST"

View File

@@ -1,155 +0,0 @@
#data
<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
#errors
3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
116: Unclosed elements.
117: End of file seen and there were open elements.
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| color="red"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| color="red"
| <p>
| <font>
| color="red"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| color="red"
| "X"
#data
<p><font size=4><font size=4><font size=4><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"
#data
<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <font>
| size="4"
| "X"
#data
<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| id="a"
| size="4"
| <font>
| id="b"
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <p>
| <font>
| id="a"
| size="4"
| <font>
| id="b"
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"
#data
<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="a"
| <b>
| id="a"
| <b>
| id="a"
| <b>
| <object>
| <b>
| id="a"
| <b>
| id="a"
| "X"
| <p>
| <b>
| id="a"
| <b>
| id="a"
| <b>
| id="a"
| <b>
| "Y"

View File

@@ -1,79 +0,0 @@
#data
<!DOCTYPE html>&NotEqualTilde;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "≂̸"
#data
<!DOCTYPE html>&NotEqualTilde;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "≂̸A"
#data
<!DOCTYPE html>&ThickSpace;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| ""
#data
<!DOCTYPE html>&ThickSpace;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html>&NotSubset;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "⊂⃒"
#data
<!DOCTYPE html>&NotSubset;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "⊂⃒A"
#data
<!DOCTYPE html>&Gopf;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "𝔾"
#data
<!DOCTYPE html>&Gopf;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "𝔾A"

View File

@@ -1,219 +0,0 @@
#data
<!DOCTYPE html><body><foo>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <foo>
| "A"
#data
<!DOCTYPE html><body><area>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <area>
| "A"
#data
<!DOCTYPE html><body><base>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <base>
| "A"
#data
<!DOCTYPE html><body><basefont>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <basefont>
| "A"
#data
<!DOCTYPE html><body><bgsound>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <bgsound>
| "A"
#data
<!DOCTYPE html><body><br>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <br>
| "A"
#data
<!DOCTYPE html><body><col>A
#errors
26: Stray start tag “col”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html><body><command>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <command>
| "A"
#data
<!DOCTYPE html><body><embed>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <embed>
| "A"
#data
<!DOCTYPE html><body><frame>A
#errors
26: Stray start tag “frame”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html><body><hr>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <hr>
| "A"
#data
<!DOCTYPE html><body><img>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <img>
| "A"
#data
<!DOCTYPE html><body><input>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| "A"
#data
<!DOCTYPE html><body><keygen>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <keygen>
| "A"
#data
<!DOCTYPE html><body><link>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <link>
| "A"
#data
<!DOCTYPE html><body><meta>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
| "A"
#data
<!DOCTYPE html><body><param>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <param>
| "A"
#data
<!DOCTYPE html><body><source>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <source>
| "A"
#data
<!DOCTYPE html><body><track>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <track>
| "A"
#data
<!DOCTYPE html><body><wbr>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <wbr>
| "A"

View File

@@ -1,195 +0,0 @@
#data
<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <a>
| href="#1"
| <nobr>
| "1"
| <nobr>
| <nobr>
| <br>
| <a>
| href="#2"
| <a>
| href="#2"
| <nobr>
| "2"
| <nobr>
| <nobr>
| <br>
| <a>
| href="#3"
| <a>
| href="#3"
| <nobr>
| "3"
| <nobr>
#data
<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
| <table>
#data
<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <table>
| <tbody>
| <tr>
| <td>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <div>
| <b>
| <nobr>
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <div>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <ins>
| <nobr>
| <i>
| <i>
| <nobr>
#data
<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <ins>
| <nobr>
| <nobr>
| <i>
| "2"
#data
<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| "1"
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"

View File

@@ -1,305 +0,0 @@
#data
<head></head><style></style>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
#document
| <html>
| <head>
| <style>
| <body>
#data
<head></head><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
#document
| <html>
| <head>
| <script>
| <body>
#data
<head></head><!-- --><style></style><!-- --><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
#document
| <html>
| <head>
| <style>
| <script>
| <!-- -->
| <!-- -->
| <body>
#data
<head></head><!-- -->x<style></style><!-- --><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#document
| <html>
| <head>
| <!-- -->
| <body>
| "x"
| <style>
| <!-- -->
| <script>
#data
<!DOCTYPE html><html><head></head><body><pre>
</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "
foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo
</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "foo
"
#data
<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
</span></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <span>
| "
"
#data
<!DOCTYPE html><html><head></head><body><pre>x
y</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x
y"
#data
<!DOCTYPE html><html><head></head><body><pre>x<div>
y</pre></body></html>
#errors
Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <div>
| "
y"
#data
<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "
A"
#data
<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
#errors
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <meta>
| <body>
#data
<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
#errors
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<textarea>foo<span>bar</span><i>baz
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "foo<span>bar</span><i>baz"
#data
<title>foo<span>bar</em><i>baz
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
#document
| <html>
| <head>
| <title>
| "foo<span>bar</em><i>baz"
| <body>
#data
<!DOCTYPE html><textarea>
</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
| "foo"
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
| "
foo"
#data
<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
#errors
Line: 1 Col: 60 Missing end tag (div, li).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| <p>
| <li>
#data
<!doctype html><nobr><nobr><nobr>
#errors
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nobr>
| <nobr>
| <nobr>
#data
<!doctype html><nobr><nobr></nobr><nobr>
#errors
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nobr>
| <nobr>
| <nobr>
#data
<!doctype html><html><body><p><table></table></body></html>
#errors
Not known
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <table>
#data
<p><table></table>
#errors
Not known
#document
| <html>
| <head>
| <body>
| <p>
| <table>

View File

@@ -1,59 +0,0 @@
#data
direct div content
#errors
#document-fragment
div
#document
| "direct div content"
#data
direct textarea content
#errors
#document-fragment
textarea
#document
| "direct textarea content"
#data
textarea content with <em>pseudo</em> <foo>markup
#errors
#document-fragment
textarea
#document
| "textarea content with <em>pseudo</em> <foo>markup"
#data
this is &#x0043;DATA inside a <style> element
#errors
#document-fragment
style
#document
| "this is &#x0043;DATA inside a <style> element"
#data
</plaintext>
#errors
#document-fragment
plaintext
#document
| "</plaintext>"
#data
setting html's innerHTML
#errors
Line: 1 Col: 24 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
| "setting html's innerHTML"
#data
<title>setting head's innerHTML</title>
#errors
#document-fragment
head
#document
| <title>
| "setting head's innerHTML"

View File

@@ -1,191 +0,0 @@
#data
<style> <!-- </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
#document
| <html>
| <head>
| <style>
| " <!-- "
| <body>
| "x"
#data
<style> <!-- </style> --> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<style> <!--> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!--> "
| <body>
| "x"
#data
<style> <!---> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!---> "
| <body>
| "x"
#data
<iframe> <!---> </iframe>x
#errors
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <iframe>
| " <!---> "
| "x"
#data
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
#errors
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <iframe>
| " <!--- "
| "->x --> x"
#data
<script> <!-- </script> --> </script>x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
#document
| <html>
| <head>
| <script>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<title> <!-- </title> --> </title>x
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <textarea>
| " <!--- "
| "->x --> x"
#data
<style> <!</-- </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!</-- "
| <body>
| "x"
#data
<p><xmp></xmp>
#errors
XXX: Unknown
#document
| <html>
| <head>
| <body>
| <p>
| <xmp>
#data
<xmp> <!-- > --> </xmp>
#errors
Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <xmp>
| " <!-- > --> "
#data
<title>&amp;</title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<title><!--&amp;--></title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| "<!--&-->"
| <body>
#data
<title><!--</title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
#document
| <html>
| <head>
| <title>
| "<!--"
| <body>
#data
<noscript><!--</noscript>--></noscript>
#errors
Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
#document
| <html>
| <head>
| <noscript>
| "<!--"
| <body>
| "-->"

View File

@@ -1,663 +0,0 @@
#data
<!doctype html></head> <head>
#errors
Line: 1 Col: 29 Unexpected start tag head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| " "
| <body>
#data
<!doctype html><form><div></form><div>
#errors
33: End tag "form" seen but there were unclosed elements.
38: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <form>
| <div>
| <div>
#data
<!doctype html><title>&amp;</title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<!doctype html><title><!--&amp;--></title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "<!--&-->"
| <body>
#data
<!doctype>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
Line: 1 Col: 10 Erroneous DOCTYPE.
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
#data
<!---x
#errors
Line: 1 Col: 6 Unexpected end of file in comment.
Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
#document
| <!-- -x -->
| <html>
| <head>
| <body>
#data
<body>
<div>
#errors
Line: 1 Col: 6 Unexpected start tag (body).
Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
#document-fragment
div
#document
| "
"
| <div>
#data
<frameset></frameset>
foo
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
<noframes>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
| "
"
| <noframes>
#data
<frameset></frameset>
<div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</html>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<form><form>
#errors
Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
Line: 1 Col: 12 Unexpected start tag (form).
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <form>
#data
<button><button>
#errors
Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <button>
| <button>
#data
<table><tr><td></th>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (th). Ignored.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><caption><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (td). Ignored.
Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <tbody>
| <tr>
| <td>
#data
<table><caption><div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
</caption><div>
#errors
Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document-fragment
caption
#document
| <div>
#data
<table><caption><div></caption>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
Line: 1 Col: 31 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
<table><caption></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
#data
</table><div>
#errors
Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
#document-fragment
caption
#document
| <div>
#data
<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected end tag (body). Ignored.
Line: 1 Col: 29 Unexpected end tag (col). Ignored.
Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 47 Unexpected end tag (html). Ignored.
Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 60 Unexpected end tag (td). Ignored.
Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 73 Unexpected end tag (th). Ignored.
Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
#data
<table><caption><div></div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
<table><tr><td></body></caption></col></colgroup></html>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end tag (body). Ignored.
Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
Line: 1 Col: 38 Unexpected end tag (col). Ignored.
Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 56 Unexpected end tag (html). Ignored.
Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
</table></tbody></tfoot></thead></tr><div>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
#document-fragment
td
#document
| <div>
#data
<table><colgroup>foo
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 20 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
foo<col>
#errors
Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
#document-fragment
colgroup
#document
| <col>
#data
<table><colgroup></col>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 23 This element (col) has no end tag.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
#data
<frameset><div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
#data
</frameset><frame>
#errors
Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
#document-fragment
frameset
#document
| <frame>
#data
<frameset></div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
#data
</body><div>
#errors
Line: 1 Col: 7 Unexpected end tag (body). Ignored.
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
#document-fragment
body
#document
| <div>
#data
<table><tr><div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 16 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <div>
| <table>
| <tbody>
| <tr>
#data
</tr><td>
#errors
Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
#document-fragment
tr
#document
| <td>
#data
</tbody></tfoot></thead><td>
#errors
Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
#document-fragment
tr
#document
| <td>
#data
<table><tr><div><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| <table>
| <tbody>
| <tr>
| <td>
#data
<caption><col><colgroup><tbody><tfoot><thead><tr>
#errors
Line: 1 Col: 9 Unexpected start tag (caption).
Line: 1 Col: 14 Unexpected start tag (col).
Line: 1 Col: 24 Unexpected start tag (colgroup).
Line: 1 Col: 31 Unexpected start tag (tbody).
Line: 1 Col: 38 Unexpected start tag (tfoot).
Line: 1 Col: 45 Unexpected start tag (thead).
Line: 1 Col: 49 Unexpected end of file. Expected table content.
#document-fragment
tbody
#document
| <tr>
#data
<table><tbody></thead>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
Line: 1 Col: 22 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
</table><tr>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 12 Unexpected end of file. Expected table content.
#document-fragment
tbody
#document
| <tr>
#data
<table><tbody></body></caption></col></colgroup></html></td></th></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
Line: 1 Col: 70 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
<table><tbody></div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
Line: 1 Col: 20 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
<table><table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
Line: 1 Col: 14 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <table>
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 14 Unexpected end tag (body). Ignored.
Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
Line: 1 Col: 30 Unexpected end tag (col). Ignored.
Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 48 Unexpected end tag (html). Ignored.
Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 61 Unexpected end tag (td). Ignored.
Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 74 Unexpected end tag (th). Ignored.
Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
Line: 1 Col: 87 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
#data
</table><tr>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 12 Unexpected end of file. Expected table content.
#document-fragment
table
#document
| <tbody>
| <tr>
#data
<body></body></html>
#errors
Line: 1 Col: 20 Unexpected html end tag in inner html mode.
Line: 1 Col: 20 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
#data
<html><frameset></frameset></html>
#errors
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| " "
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
#errors
Line: 1 Col: 50 Erroneous DOCTYPE.
Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
| <html>
| <head>
| <body>
#data
<param><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
<source><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
<track><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
</html><frameset></frameset>
#errors
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
17: Stray “frameset” start tag.
17: “frameset” start tag seen.
#document
| <html>
| <head>
| <frameset>
#data
</body><frameset></frameset>
#errors
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
17: Stray “frameset” start tag.
17: “frameset” start tag seen.
#document
| <html>
| <head>
| <frameset>

View File

@@ -1,390 +0,0 @@
#data
<!doctype html><body><title>X</title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
#data
<!doctype html><table><title>X</title></table>
#errors
Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
| <table>
#data
<!doctype html><head></head><title>X</title>
#errors
Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "X"
| <body>
#data
<!doctype html></head><title>X</title>
#errors
Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "X"
| <body>
#data
<!doctype html><table><meta></table>
#errors
Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
| <table>
#data
<!doctype html><table>X<tr><td><table> <meta></table></table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <tbody>
| <tr>
| <td>
| <meta>
| <table>
| " "
#data
<!doctype html><html> <head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!doctype html> <head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!doctype html><table><style> <tr>x </style> </table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <style>
| " <tr>x "
| " "
#data
<!doctype html><table><TBODY><script> <tr>x </script> </table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <script>
| " <tr>x "
| " "
#data
<!doctype html><p><applet><p>X</p></applet>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <applet>
| <p>
| "X"
#data
<!doctype html><listing>
X</listing>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <listing>
| "X"
#data
<!doctype html><select><input>X
#errors
Line: 1 Col: 30 Unexpected input start tag in the select phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <input>
| "X"
#data
<!doctype html><select><select>X
#errors
Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
#data
<!doctype html><table><input type=hidDEN></table>
#errors
Line: 1 Col: 41 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table>X<input type=hidDEN></table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table> <input type=hidDEN></table>
#errors
Line: 1 Col: 43 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| " "
| <input>
| type="hidDEN"
#data
<!doctype html><table> <input type='hidDEN'></table>
#errors
Line: 1 Col: 45 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| " "
| <input>
| type="hidDEN"
#data
<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
#errors
Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| type=" hidden"
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table><select>X<tr>
#errors
Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
Line: 1 Col: 35 Unexpected end of file. Expected table content.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><select>X</select>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
#data
<!DOCTYPE hTmL><html></html>
#errors
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><html></html>
#errors
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<body>X</body></body>
#errors
Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
Line: 1 Col: 21 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
| "X"
#data
<div><p>a</x> b
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 13 Unexpected end tag (x). Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| <p>
| "a b"
#data
<table><tr><td><code></code> </table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <code>
| " "
#data
<table><b><tr><td>aaa</td></tr>bbb</table>ccc
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| "bbb"
| <table>
| <tbody>
| <tr>
| <td>
| "aaa"
| <b>
| "ccc"
#data
A<table><tr> B</tr> B</table>
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| "A B B"
| <table>
| <tbody>
| <tr>
#data
A<table><tr> B</tr> </em>C</table>
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| "A BC"
| <table>
| <tbody>
| <tr>
| " "
#data
<select><keygen>
#errors
Not known
#document
| <html>
| <head>
| <body>
| <select>
| <keygen>

View File

@@ -1,148 +0,0 @@
#data
<div>
<div></div>
</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 3 Col: 7 Unexpected end tag (span). Ignored.
Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "
"
| <div>
| "
x"
#data
<div>x<div></div>
</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 2 Col: 7 Unexpected end tag (span). Ignored.
Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "
x"
#data
<div>x<div></div>x</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "xx"
#data
<div>x<div></div>y</span>z
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "yz"
#data
<table><div>x<div></div>x</span>x
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
Line: 1 Col: 32 Unexpected end tag (span). Ignored.
Line: 1 Col: 33 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "xx"
| <table>
#data
x<table>x
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 9 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "xx"
| <table>
#data
x<table><table>x
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 16 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "x"
| <table>
| "x"
| <table>
#data
<b>a<div></div><div></b>y
#errors
Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <b>
| "a"
| <div>
| <div>
| <b>
| "y"
#data
<a><div><p></a>
#errors
Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <p>
| <a>

View File

@@ -1,457 +0,0 @@
#data
<!DOCTYPE html><math></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
#data
<!DOCTYPE html><body><math></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
#data
<!DOCTYPE html><math><mi>
#errors
25: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
#data
<!DOCTYPE html><math><annotation-xml><svg><u>
#errors
45: HTML start tag “u” in a foreign namespace context.
45: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <u>
#data
<!DOCTYPE html><body><select><math></math></select>
#errors
Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><math></math></option></select>
#errors
Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><math></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <table>
#data
<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
#errors
Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
#errors
Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
#errors
Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
#errors
Line: 1 Col: 41 Unexpected start tag (math).
Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
#errors
Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
#errors
Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
#errors
Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <math math>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
| "bar"

View File

@@ -1,733 +0,0 @@
#data
<body><span>
#errors
#document-fragment
body
#document
| <span>
#data
<span><body>
#errors
#document-fragment
body
#document
| <span>
#data
<span><body>
#errors
#document-fragment
div
#document
| <span>
#data
<body><span>
#errors
#document-fragment
html
#document
| <head>
| <body>
| <span>
#data
<frameset><span>
#errors
#document-fragment
body
#document
| <span>
#data
<span><frameset>
#errors
#document-fragment
body
#document
| <span>
#data
<span><frameset>
#errors
#document-fragment
div
#document
| <span>
#data
<frameset><span>
#errors
#document-fragment
html
#document
| <head>
| <frameset>
#data
<table><tr>
#errors
#document-fragment
table
#document
| <tbody>
| <tr>
#data
</table><tr>
#errors
#document-fragment
table
#document
| <tbody>
| <tr>
#data
<a>
#errors
#document-fragment
table
#document
| <a>
#data
<a>
#errors
#document-fragment
table
#document
| <a>
#data
<a><caption>a
#errors
#document-fragment
table
#document
| <a>
| <caption>
| "a"
#data
<a><colgroup><col>
#errors
#document-fragment
table
#document
| <a>
| <colgroup>
| <col>
#data
<a><tbody><tr>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
#data
<a><tfoot><tr>
#errors
#document-fragment
table
#document
| <a>
| <tfoot>
| <tr>
#data
<a><thead><tr>
#errors
#document-fragment
table
#document
| <a>
| <thead>
| <tr>
#data
<a><tr>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
#data
<a><th>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
| <th>
#data
<a><td>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
| <td>
#data
<table></table><tbody>
#errors
#document-fragment
caption
#document
| <table>
#data
</table><span>
#errors
#document-fragment
caption
#document
| <span>
#data
<span></table>
#errors
#document-fragment
caption
#document
| <span>
#data
</caption><span>
#errors
#document-fragment
caption
#document
| <span>
#data
<span></caption><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><caption><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><col><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><colgroup><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><html><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tbody><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><td><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tfoot><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><thead><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><th><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tr><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span></table><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
</colgroup><col>
#errors
#document-fragment
colgroup
#document
| <col>
#data
<a><col>
#errors
#document-fragment
colgroup
#document
| <col>
#data
<caption><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<col><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<colgroup><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<tbody><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<tfoot><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<thead><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
</table><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<a><tr>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<td><table><tbody><a><tr>
#errors
#document-fragment
tbody
#document
| <tr>
| <td>
| <a>
| <table>
| <tbody>
| <tr>
#data
</tr><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<td><table><a><tr></tr><tr>
#errors
#document-fragment
tr
#document
| <td>
| <a>
| <table>
| <tbody>
| <tr>
| <tr>
#data
<caption><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<col><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<colgroup><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tbody><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tfoot><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<thead><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tr><td>
#errors
#document-fragment
tr
#document
| <td>
#data
</table><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<td><table></table><td>
#errors
#document-fragment
tr
#document
| <td>
| <table>
| <td>
#data
<td><table></table><td>
#errors
#document-fragment
tr
#document
| <td>
| <table>
| <td>
#data
<caption><a>
#errors
#document-fragment
td
#document
| <a>
#data
<col><a>
#errors
#document-fragment
td
#document
| <a>
#data
<colgroup><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tbody><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tfoot><a>
#errors
#document-fragment
td
#document
| <a>
#data
<th><a>
#errors
#document-fragment
td
#document
| <a>
#data
<thead><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tr><a>
#errors
#document-fragment
td
#document
| <a>
#data
</table><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tbody><a>
#errors
#document-fragment
td
#document
| <a>
#data
</td><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tfoot><a>
#errors
#document-fragment
td
#document
| <a>
#data
</thead><a>
#errors
#document-fragment
td
#document
| <a>
#data
</th><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tr><a>
#errors
#document-fragment
td
#document
| <a>
#data
<table><td><td>
#errors
#document-fragment
td
#document
| <table>
| <tbody>
| <tr>
| <td>
| <td>
#data
</select><option>
#errors
#document-fragment
select
#document
| <option>
#data
<input><option>
#errors
#document-fragment
select
#document
| <option>
#data
<keygen><option>
#errors
#document-fragment
select
#document
| <option>
#data
<textarea><option>
#errors
#document-fragment
select
#document
| <option>
#data
</html><!--abc-->
#errors
#document-fragment
html
#document
| <head>
| <body>
| <!-- abc -->
#data
</frameset><frame>
#errors
#document-fragment
frameset
#document
| <frame>

View File

@@ -1,261 +0,0 @@
#data
<b><p>Bold </b> Not bold</p>
Also not bold.
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <p>
| <b>
| "Bold "
| " Not bold"
| "
Also not bold."
#data
<html>
<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
<p>Italic and red. </i> Red.</font> I should not be red.</p>
<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
#errors
#document
| <html>
| <head>
| <body>
| <font>
| color="red"
| <i>
| "Italic and Red"
| <i>
| <p>
| <font>
| color="red"
| "Italic and Red "
| " Just italic."
| " Italic only."
| " Plain
"
| <p>
| "I should not be red. "
| <font>
| color="red"
| "Red. "
| <i>
| "Italic and red."
| <font>
| color="red"
| <i>
| "
"
| <p>
| <font>
| color="red"
| <i>
| "Italic and red. "
| " Red."
| " I should not be red."
| "
"
| <b>
| "Bold "
| <i>
| "Bold and italic"
| <i>
| " Only Italic "
| " Plain"
#data
<html><body>
<p><font size="7">First paragraph.</p>
<p>Second paragraph.</p></font>
<b><p><i>Bold and Italic</b> Italic</p>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <p>
| <font>
| size="7"
| "First paragraph."
| <font>
| size="7"
| "
"
| <p>
| "Second paragraph."
| "
"
| <b>
| <p>
| <b>
| <i>
| "Bold and Italic"
| <i>
| " Italic"
#data
<html>
<dl>
<dt><b>Boo
<dd>Goo?
</dl>
</html>
#errors
#document
| <html>
| <head>
| <body>
| <dl>
| "
"
| <dt>
| <b>
| "Boo
"
| <dd>
| <b>
| "Goo?
"
| <b>
| "
"
#data
<html><body>
<label><a><div>Hello<div>World</div></a></label>
</body></html>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <label>
| <a>
| <div>
| <a>
| "Hello"
| <div>
| "World"
| "
"
#data
<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
#errors
#document
| <html>
| <head>
| <body>
| <center>
| " "
| <font>
| "a"
| <font>
| <img>
| " "
| <table>
| " "
| <tbody>
| <tr>
| <td>
| " "
| " "
| " "
#data
<table><tr><p><a><p>You should see this text.
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <a>
| <p>
| <a>
| "You should see this text."
| <table>
| <tbody>
| <tr>
#data
<TABLE>
<TR>
<CENTER><CENTER><TD></TD></TR><TR>
<FONT>
<TABLE><tr></tr></TABLE>
</P>
<a></font><font></a>
This page contains an insanely badly-nested tag sequence.
#errors
#document
| <html>
| <head>
| <body>
| <center>
| <center>
| <font>
| "
"
| <table>
| "
"
| <tbody>
| <tr>
| "
"
| <td>
| <tr>
| "
"
| <table>
| <tbody>
| <tr>
| <font>
| "
"
| <p>
| "
"
| <a>
| <a>
| <font>
| <font>
| "
This page contains an insanely badly-nested tag sequence."
#data
<html>
<body>
<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
</body>
</html>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <b>
| <nobr>
| <div>
| <b>
| <nobr>
| "This text is in a div inside a nobr"
| "More text that should not be in the nobr, i.e., the
nobr should have closed the div inside it implicitly. "
| <pre>
| "A pre tag outside everything else."
| "
"

View File

@@ -1,609 +0,0 @@
#data
Test
#errors
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "Test"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<div>Test</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Test"
#data
<di
#errors
#document
| <html>
| <head>
| <body>
#data
<div>Hello</div>
<script>
console.log("PASS");
</script>
<div>Bye</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Hello"
| "
"
| <script>
| "
console.log("PASS");
"
| "
"
| <div>
| "Bye"
#data
<div foo="bar">Hello</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="bar"
| "Hello"
#data
<div>Hello</div>
<script>
console.log("FOO<span>BAR</span>BAZ");
</script>
<div>Bye</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Hello"
| "
"
| <script>
| "
console.log("FOO<span>BAR</span>BAZ");
"
| "
"
| <div>
| "Bye"
#data
<foo bar="baz"></foo><potato quack="duck"></potato>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="baz"
| <potato>
| quack="duck"
#data
<foo bar="baz"><potato quack="duck"></potato></foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="baz"
| <potato>
| quack="duck"
#data
<foo></foo bar="baz"><potato></potato quack="duck">
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| <potato>
#data
</ tttt>
#errors
#document
| <!-- tttt -->
| <html>
| <head>
| <body>
#data
<div FOO ><img><img></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo=""
| <img>
| <img>
#data
<p>Test</p<p>Test2</p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "TestTest2"
#data
<rdar://problem/6869687>
#errors
#document
| <html>
| <head>
| <body>
| <rdar:>
| 6869687=""
| problem=""
#data
<A>test< /A>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "test< /A>"
#data
&lt;
#errors
#document
| <html>
| <head>
| <body>
| "<"
#data
<body foo='bar'><body foo='baz' yo='mama'>
#errors
#document
| <html>
| <head>
| <body>
| foo="bar"
| yo="mama"
#data
<body></br foo="bar"></body>
#errors
#document
| <html>
| <head>
| <body>
| <br>
#data
<bdy><br foo="bar"></body>
#errors
#document
| <html>
| <head>
| <body>
| <bdy>
| <br>
| foo="bar"
#data
<body></body></br foo="bar">
#errors
#document
| <html>
| <head>
| <body>
| <br>
#data
<bdy></body><br foo="bar">
#errors
#document
| <html>
| <head>
| <body>
| <bdy>
| <br>
| foo="bar"
#data
<html><body></body></html><!-- Hi there -->
#errors
#document
| <html>
| <head>
| <body>
| <!-- Hi there -->
#data
<html><body></body></html>x<!-- Hi there -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
#data
<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
| <!-- Again -->
#data
<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
| <!-- Again -->
#data
<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
#errors
#document
| <html>
| <head>
| <body>
| <ruby>
| <div>
| <rp>
| "xx"
#data
<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
#errors
#document
| <html>
| <head>
| <body>
| <ruby>
| <div>
| <rt>
| "xx"
#data
<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
#errors
#document
| <html>
| <head>
| <frameset>
| <!-- 1 -->
| <noframes>
| "A"
| <!-- 2 -->
| <!-- 3 -->
| <noframes>
| "B"
| <!-- 4 -->
| <noframes>
| "C"
| <!-- 5 -->
| <!-- 6 -->
#data
<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <option>
| "B"
| <select>
| <option>
| "C"
| <option>
| "D"
| <select>
| <option>
| "E"
| <option>
| "F"
| <select>
| <option>
| "G"
#data
<dd><dd><dt><dt><dd><li><li>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| <dd>
| <dt>
| <dt>
| <dd>
| <li>
| <li>
#data
<div><b></div><div><nobr>a<nobr>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <b>
| <div>
| <b>
| <nobr>
| "a"
| <nobr>
#data
<head></head>
<body></body>
#errors
#document
| <html>
| <head>
| "
"
| <body>
#data
<head></head> <style></style>ddd
#errors
#document
| <html>
| <head>
| <style>
| " "
| <body>
| "ddd"
#data
<kbd><table></kbd><col><select><tr>
#errors
#document
| <html>
| <head>
| <body>
| <kbd>
| <select>
| <table>
| <colgroup>
| <col>
| <tbody>
| <tr>
#data
<kbd><table></kbd><col><select><tr></table><div>
#errors
#document
| <html>
| <head>
| <body>
| <kbd>
| <select>
| <table>
| <colgroup>
| <col>
| <tbody>
| <tr>
| <div>
#data
<a><li><style></style><title></title></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <li>
| <a>
| <style>
| <title>
#data
<font></p><p><meta><title></title></font>
#errors
#document
| <html>
| <head>
| <body>
| <font>
| <p>
| <p>
| <font>
| <meta>
| <title>
#data
<a><center><title></title><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <center>
| <a>
| <title>
| <a>
#data
<svg><title><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <div>
#data
<svg><title><rect><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <rect>
| <div>
#data
<svg><title><svg><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <svg svg>
| <div>
#data
<img <="" FAIL>
#errors
#document
| <html>
| <head>
| <body>
| <img>
| <=""
| fail=""
#data
<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| id="foo"
| "A"
| <li>
| "B"
| <div>
| "C"
#data
<svg><em><desc></em>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <em>
| <desc>
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <svg circle>
#data
<svg><tfoot></mi><td>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg tfoot>
| <svg td>
#data
<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mrow>
| <math mrow>
| <math mn>
| "1"
| <math mi>
| "a"
#data
<!doctype html><input type="hidden"><frameset>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!doctype html><input type="button"><frameset>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| type="button"

View File

@@ -1,104 +0,0 @@
#data
<foo bar=qux/>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="qux/"
#data
<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| id="status"
| <noscript>
| "<strong>A</strong>"
| <span>
| "B"
#data
<div><sarcasm><div></div></sarcasm></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <sarcasm>
| <div>
#data
<html><body><img src="" border="0" alt="><div>A</div></body></html>
#errors
#document
| <html>
| <head>
| <body>
#data
<table><td></tbody>A
#errors
#document
| <html>
| <head>
| <body>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td></thead>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
#data
<table><td></tfoot>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
#data
<table><thead><td></tbody>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <thead>
| <tr>
| <td>
| "A"
#data
<legend>test</legend>
#errors
#document
| <html>
| <head>
| <body>
| <legend>
| "test"

View File

@@ -1,779 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"io"
"strconv"
"strings"
)
// A TokenType is the type of a Token.
type TokenType int
const (
// ErrorToken means that an error occurred during tokenization.
ErrorToken TokenType = iota
// TextToken means a text node.
TextToken
// A StartTagToken looks like <a>.
StartTagToken
// An EndTagToken looks like </a>.
EndTagToken
// A SelfClosingTagToken tag looks like <br/>.
SelfClosingTagToken
// A CommentToken looks like <!--x-->.
CommentToken
// A DoctypeToken looks like <!DOCTYPE x>
DoctypeToken
)
// String returns a string representation of the TokenType.
func (t TokenType) String() string {
switch t {
case ErrorToken:
return "Error"
case TextToken:
return "Text"
case StartTagToken:
return "StartTag"
case EndTagToken:
return "EndTag"
case SelfClosingTagToken:
return "SelfClosingTag"
case CommentToken:
return "Comment"
case DoctypeToken:
return "Doctype"
}
return "Invalid(" + strconv.Itoa(int(t)) + ")"
}
// An Attribute is an attribute namespace-key-value triple. Namespace is
// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
// does not contain escapable characters like '&', '<' or '>'), and Val is
// unescaped (it looks like "a<b" rather than "a&lt;b").
//
// Namespace is only used by the parser, not the tokenizer.
type Attribute struct {
Namespace, Key, Val string
}
// A Token consists of a TokenType and some Data (tag name for start and end
// tags, content for text, comments and doctypes). A tag Token may also contain
// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
// rather than "a&lt;b").
type Token struct {
Type TokenType
Data string
Attr []Attribute
}
// tagString returns a string representation of a tag Token's Data and Attr.
func (t Token) tagString() string {
if len(t.Attr) == 0 {
return t.Data
}
buf := bytes.NewBufferString(t.Data)
for _, a := range t.Attr {
buf.WriteByte(' ')
buf.WriteString(a.Key)
buf.WriteString(`="`)
escape(buf, a.Val)
buf.WriteByte('"')
}
return buf.String()
}
// String returns a string representation of the Token.
func (t Token) String() string {
switch t.Type {
case ErrorToken:
return ""
case TextToken:
return EscapeString(t.Data)
case StartTagToken:
return "<" + t.tagString() + ">"
case EndTagToken:
return "</" + t.tagString() + ">"
case SelfClosingTagToken:
return "<" + t.tagString() + "/>"
case CommentToken:
return "<!--" + t.Data + "-->"
case DoctypeToken:
return "<!DOCTYPE " + t.Data + ">"
}
return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
}
// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
// the end is exclusive.
type span struct {
start, end int
}
// A Tokenizer returns a stream of HTML Tokens.
type Tokenizer struct {
// r is the source of the HTML text.
r io.Reader
// tt is the TokenType of the current token.
tt TokenType
// err is the first error encountered during tokenization. It is possible
// for tt != Error && err != nil to hold: this means that Next returned a
// valid token but the subsequent Next call will return an error token.
// For example, if the HTML text input was just "plain", then the first
// Next call would set z.err to io.EOF but return a TextToken, and all
// subsequent Next calls would return an ErrorToken.
// err is never reset. Once it becomes non-nil, it stays non-nil.
err error
// buf[raw.start:raw.end] holds the raw bytes of the current token.
// buf[raw.end:] is buffered input that will yield future tokens.
raw span
buf []byte
// buf[data.start:data.end] holds the raw bytes of the current token's data:
// a text token's text, a tag token's tag name, etc.
data span
// pendingAttr is the attribute key and value currently being tokenized.
// When complete, pendingAttr is pushed onto attr. nAttrReturned is
// incremented on each call to TagAttr.
pendingAttr [2]span
attr [][2]span
nAttrReturned int
// rawTag is the "script" in "</script>" that closes the next token. If
// non-empty, the subsequent call to Next will return a raw or RCDATA text
// token: one that treats "<p>" as text instead of an element.
// rawTag's contents are lower-cased.
rawTag string
// textIsRaw is whether the current text token's data is not escaped.
textIsRaw bool
}
// Err returns the error associated with the most recent ErrorToken token.
// This is typically io.EOF, meaning the end of tokenization.
func (z *Tokenizer) Err() error {
if z.tt != ErrorToken {
return nil
}
return z.err
}
// readByte returns the next byte from the input stream, doing a buffered read
// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
// slice that holds all the bytes read so far for the current token.
// It sets z.err if the underlying reader returns an error.
// Pre-condition: z.err == nil.
func (z *Tokenizer) readByte() byte {
if z.raw.end >= len(z.buf) {
// Our buffer is exhausted and we have to read from z.r.
// We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
// z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
// allocate a new buffer before the copy.
c := cap(z.buf)
d := z.raw.end - z.raw.start
var buf1 []byte
if 2*d > c {
buf1 = make([]byte, d, 2*c)
} else {
buf1 = z.buf[:d]
}
copy(buf1, z.buf[z.raw.start:z.raw.end])
if x := z.raw.start; x != 0 {
// Adjust the data/attr spans to refer to the same contents after the copy.
z.data.start -= x
z.data.end -= x
z.pendingAttr[0].start -= x
z.pendingAttr[0].end -= x
z.pendingAttr[1].start -= x
z.pendingAttr[1].end -= x
for i := range z.attr {
z.attr[i][0].start -= x
z.attr[i][0].end -= x
z.attr[i][1].start -= x
z.attr[i][1].end -= x
}
}
z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
// Now that we have copied the live bytes to the start of the buffer,
// we read from z.r into the remainder.
n, err := z.r.Read(buf1[d:cap(buf1)])
if err != nil {
z.err = err
return 0
}
z.buf = buf1[:d+n]
}
x := z.buf[z.raw.end]
z.raw.end++
return x
}
// skipWhiteSpace skips past any white space.
func (z *Tokenizer) skipWhiteSpace() {
if z.err != nil {
return
}
for {
c := z.readByte()
if z.err != nil {
return
}
switch c {
case ' ', '\n', '\r', '\t', '\f':
// No-op.
default:
z.raw.end--
return
}
}
}
// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
// is typically something like "script" or "textarea".
func (z *Tokenizer) readRawOrRCDATA() {
loop:
for {
c := z.readByte()
if z.err != nil {
break loop
}
if c != '<' {
continue loop
}
c = z.readByte()
if z.err != nil {
break loop
}
if c != '/' {
continue loop
}
for i := 0; i < len(z.rawTag); i++ {
c = z.readByte()
if z.err != nil {
break loop
}
if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
continue loop
}
}
c = z.readByte()
if z.err != nil {
break loop
}
switch c {
case ' ', '\n', '\r', '\t', '\f', '/', '>':
// The 3 is 2 for the leading "</" plus 1 for the trailing character c.
z.raw.end -= 3 + len(z.rawTag)
break loop
case '<':
// Step back one, to catch "</foo</foo>".
z.raw.end--
}
}
z.data.end = z.raw.end
// A textarea's or title's RCDATA can contain escaped entities.
z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
z.rawTag = ""
}
// readComment reads the next comment token starting with "<!--". The opening
// "<!--" has already been consumed.
func (z *Tokenizer) readComment() {
z.data.start = z.raw.end
defer func() {
if z.data.end < z.data.start {
// It's a comment with no data, like <!-->.
z.data.end = z.data.start
}
}()
for dashCount := 2; ; {
c := z.readByte()
if z.err != nil {
// Ignore up to two dashes at EOF.
if dashCount > 2 {
dashCount = 2
}
z.data.end = z.raw.end - dashCount
return
}
switch c {
case '-':
dashCount++
continue
case '>':
if dashCount >= 2 {
z.data.end = z.raw.end - len("-->")
return
}
case '!':
if dashCount >= 2 {
c = z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return
}
if c == '>' {
z.data.end = z.raw.end - len("--!>")
return
}
}
}
dashCount = 0
}
}
// readUntilCloseAngle reads until the next ">".
func (z *Tokenizer) readUntilCloseAngle() {
z.data.start = z.raw.end
for {
c := z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return
}
if c == '>' {
z.data.end = z.raw.end - len(">")
return
}
}
}
// readMarkupDeclaration reads the next token starting with "<!". It might be
// a "<!--comment-->", a "<!DOCTYPE foo>", or "<!a bogus comment". The opening
// "<!" has already been consumed.
func (z *Tokenizer) readMarkupDeclaration() TokenType {
z.data.start = z.raw.end
var c [2]byte
for i := 0; i < 2; i++ {
c[i] = z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return CommentToken
}
}
if c[0] == '-' && c[1] == '-' {
z.readComment()
return CommentToken
}
z.raw.end -= 2
const s = "DOCTYPE"
for i := 0; i < len(s); i++ {
c := z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return CommentToken
}
if c != s[i] && c != s[i]+('a'-'A') {
// Back up to read the fragment of "DOCTYPE" again.
z.raw.end = z.data.start
z.readUntilCloseAngle()
return CommentToken
}
}
if z.skipWhiteSpace(); z.err != nil {
z.data.start = z.raw.end
z.data.end = z.raw.end
return DoctypeToken
}
z.readUntilCloseAngle()
return DoctypeToken
}
// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
// case-insensitively matches any element of ss.
func (z *Tokenizer) startTagIn(ss ...string) bool {
loop:
for _, s := range ss {
if z.data.end-z.data.start != len(s) {
continue loop
}
for i := 0; i < len(s); i++ {
c := z.buf[z.data.start+i]
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
if c != s[i] {
continue loop
}
}
return true
}
return false
}
// readStartTag reads the next start tag token. The opening "<a" has already
// been consumed, where 'a' means anything in [A-Za-z].
func (z *Tokenizer) readStartTag() TokenType {
z.attr = z.attr[:0]
z.nAttrReturned = 0
// Read the tag name and attribute key/value pairs.
z.readTagName()
if z.skipWhiteSpace(); z.err != nil {
return ErrorToken
}
for {
c := z.readByte()
if z.err != nil || c == '>' {
break
}
z.raw.end--
z.readTagAttrKey()
z.readTagAttrVal()
// Save pendingAttr if it has a non-empty key.
if z.pendingAttr[0].start != z.pendingAttr[0].end {
z.attr = append(z.attr, z.pendingAttr)
}
if z.skipWhiteSpace(); z.err != nil {
break
}
}
// Several tags flag the tokenizer's next token as raw.
c, raw := z.buf[z.data.start], false
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
switch c {
case 'i':
raw = z.startTagIn("iframe")
case 'n':
raw = z.startTagIn("noembed", "noframes", "noscript")
case 'p':
raw = z.startTagIn("plaintext")
case 's':
raw = z.startTagIn("script", "style")
case 't':
raw = z.startTagIn("textarea", "title")
case 'x':
raw = z.startTagIn("xmp")
}
if raw {
z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
}
// Look for a self-closing token like "<br/>".
if z.err == nil && z.buf[z.raw.end-2] == '/' {
return SelfClosingTagToken
}
return StartTagToken
}
// readEndTag reads the next end tag token. The opening "</a" has already
// been consumed, where 'a' means anything in [A-Za-z].
func (z *Tokenizer) readEndTag() {
z.attr = z.attr[:0]
z.nAttrReturned = 0
z.readTagName()
for {
c := z.readByte()
if z.err != nil || c == '>' {
return
}
}
}
// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
// is positioned such that the first byte of the tag name (the "d" in "<div")
// has already been consumed.
func (z *Tokenizer) readTagName() {
z.data.start = z.raw.end - 1
for {
c := z.readByte()
if z.err != nil {
z.data.end = z.raw.end
return
}
switch c {
case ' ', '\n', '\r', '\t', '\f':
z.data.end = z.raw.end - 1
return
case '/', '>':
z.raw.end--
z.data.end = z.raw.end
return
}
}
}
// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
// Precondition: z.err == nil.
func (z *Tokenizer) readTagAttrKey() {
z.pendingAttr[0].start = z.raw.end
for {
c := z.readByte()
if z.err != nil {
z.pendingAttr[0].end = z.raw.end
return
}
switch c {
case ' ', '\n', '\r', '\t', '\f', '/':
z.pendingAttr[0].end = z.raw.end - 1
return
case '=', '>':
z.raw.end--
z.pendingAttr[0].end = z.raw.end
return
}
}
}
// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
func (z *Tokenizer) readTagAttrVal() {
z.pendingAttr[1].start = z.raw.end
z.pendingAttr[1].end = z.raw.end
if z.skipWhiteSpace(); z.err != nil {
return
}
c := z.readByte()
if z.err != nil {
return
}
if c != '=' {
z.raw.end--
return
}
if z.skipWhiteSpace(); z.err != nil {
return
}
quote := z.readByte()
if z.err != nil {
return
}
switch quote {
case '>':
z.raw.end--
return
case '\'', '"':
z.pendingAttr[1].start = z.raw.end
for {
c := z.readByte()
if z.err != nil {
z.pendingAttr[1].end = z.raw.end
return
}
if c == quote {
z.pendingAttr[1].end = z.raw.end - 1
return
}
}
default:
z.pendingAttr[1].start = z.raw.end - 1
for {
c := z.readByte()
if z.err != nil {
z.pendingAttr[1].end = z.raw.end
return
}
switch c {
case ' ', '\n', '\r', '\t', '\f':
z.pendingAttr[1].end = z.raw.end - 1
return
case '>':
z.raw.end--
z.pendingAttr[1].end = z.raw.end
return
}
}
}
}
// Next scans the next token and returns its type.
func (z *Tokenizer) Next() TokenType {
if z.err != nil {
z.tt = ErrorToken
return z.tt
}
z.raw.start = z.raw.end
z.data.start = z.raw.end
z.data.end = z.raw.end
if z.rawTag != "" {
if z.rawTag == "plaintext" {
// Read everything up to EOF.
for z.err == nil {
z.readByte()
}
z.textIsRaw = true
} else {
z.readRawOrRCDATA()
}
if z.data.end > z.data.start {
z.tt = TextToken
return z.tt
}
}
z.textIsRaw = false
loop:
for {
c := z.readByte()
if z.err != nil {
break loop
}
if c != '<' {
continue loop
}
// Check if the '<' we have just read is part of a tag, comment
// or doctype. If not, it's part of the accumulated text token.
c = z.readByte()
if z.err != nil {
break loop
}
var tokenType TokenType
switch {
case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
tokenType = StartTagToken
case c == '/':
tokenType = EndTagToken
case c == '!' || c == '?':
// We use CommentToken to mean any of "<!--actual comments-->",
// "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
tokenType = CommentToken
default:
continue
}
// We have a non-text token, but we might have accumulated some text
// before that. If so, we return the text first, and return the non-
// text token on the subsequent call to Next.
if x := z.raw.end - len("<a"); z.raw.start < x {
z.raw.end = x
z.data.end = x
z.tt = TextToken
return z.tt
}
switch tokenType {
case StartTagToken:
z.tt = z.readStartTag()
return z.tt
case EndTagToken:
c = z.readByte()
if z.err != nil {
break loop
}
if c == '>' {
// "</>" does not generate a token at all.
// Reset the tokenizer state and start again.
z.raw.start = z.raw.end
z.data.start = z.raw.end
z.data.end = z.raw.end
continue loop
}
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
z.readEndTag()
z.tt = EndTagToken
return z.tt
}
z.raw.end--
z.readUntilCloseAngle()
z.tt = CommentToken
return z.tt
case CommentToken:
if c == '!' {
z.tt = z.readMarkupDeclaration()
return z.tt
}
z.raw.end--
z.readUntilCloseAngle()
z.tt = CommentToken
return z.tt
}
}
if z.raw.start < z.raw.end {
z.data.end = z.raw.end
z.tt = TextToken
return z.tt
}
z.tt = ErrorToken
return z.tt
}
// Raw returns the unmodified text of the current token. Calling Next, Token,
// Text, TagName or TagAttr may change the contents of the returned slice.
func (z *Tokenizer) Raw() []byte {
return z.buf[z.raw.start:z.raw.end]
}
// Text returns the unescaped text of a text, comment or doctype token. The
// contents of the returned slice may change on the next call to Next.
func (z *Tokenizer) Text() []byte {
switch z.tt {
case TextToken, CommentToken, DoctypeToken:
s := z.buf[z.data.start:z.data.end]
z.data.start = z.raw.end
z.data.end = z.raw.end
if !z.textIsRaw {
s = unescape(s)
}
return s
}
return nil
}
// TagName returns the lower-cased name of a tag token (the `img` out of
// `<IMG SRC="foo">`) and whether the tag has attributes.
// The contents of the returned slice may change on the next call to Next.
func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
if z.data.start < z.data.end {
switch z.tt {
case StartTagToken, EndTagToken, SelfClosingTagToken:
s := z.buf[z.data.start:z.data.end]
z.data.start = z.raw.end
z.data.end = z.raw.end
return lower(s), z.nAttrReturned < len(z.attr)
}
}
return nil, false
}
// TagAttr returns the lower-cased key and unescaped value of the next unparsed
// attribute for the current tag token and whether there are more attributes.
// The contents of the returned slices may change on the next call to Next.
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
if z.nAttrReturned < len(z.attr) {
switch z.tt {
case StartTagToken, SelfClosingTagToken:
x := z.attr[z.nAttrReturned]
z.nAttrReturned++
key = z.buf[x[0].start:x[0].end]
val = z.buf[x[1].start:x[1].end]
return lower(key), unescape(val), z.nAttrReturned < len(z.attr)
}
}
return nil, nil, false
}
// Token returns the next Token. The result's Data and Attr values remain valid
// after subsequent Next calls.
func (z *Tokenizer) Token() Token {
t := Token{Type: z.tt}
switch z.tt {
case TextToken, CommentToken, DoctypeToken:
t.Data = string(z.Text())
case StartTagToken, SelfClosingTagToken:
var attr []Attribute
name, moreAttr := z.TagName()
for moreAttr {
var key, val []byte
key, val, moreAttr = z.TagAttr()
attr = append(attr, Attribute{"", string(key), string(val)})
}
t.Data = string(name)
t.Attr = attr
case EndTagToken:
name, _ := z.TagName()
t.Data = string(name)
}
return t
}
// NewTokenizer returns a new HTML Tokenizer for the given Reader.
// The input is assumed to be UTF-8 encoded.
func NewTokenizer(r io.Reader) *Tokenizer {
return &Tokenizer{
r: r,
buf: make([]byte, 0, 4096),
}
}

View File

@@ -1,590 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"io"
"strings"
"testing"
)
type tokenTest struct {
// A short description of the test case.
desc string
// The HTML to parse.
html string
// The string representations of the expected tokens, joined by '$'.
golden string
}
var tokenTests = []tokenTest{
{
"empty",
"",
"",
},
// A single text node. The tokenizer should not break text nodes on whitespace,
// nor should it normalize whitespace within a text node.
{
"text",
"foo bar",
"foo bar",
},
// An entity.
{
"entity",
"one &lt; two",
"one &lt; two",
},
// A start, self-closing and end tag. The tokenizer does not care if the start
// and end tokens don't match; that is the job of the parser.
{
"tags",
"<a>b<c/>d</e>",
"<a>$b$<c/>$d$</e>",
},
// Angle brackets that aren't a tag.
{
"not a tag #0",
"<",
"&lt;",
},
{
"not a tag #1",
"</",
"&lt;/",
},
{
"not a tag #2",
"</>",
"",
},
{
"not a tag #3",
"a</>b",
"a$b",
},
{
"not a tag #4",
"</ >",
"<!-- -->",
},
{
"not a tag #5",
"</.",
"<!--.-->",
},
{
"not a tag #6",
"</.>",
"<!--.-->",
},
{
"not a tag #7",
"a < b",
"a &lt; b",
},
{
"not a tag #8",
"<.>",
"&lt;.&gt;",
},
{
"not a tag #9",
"a<<<b>>>c",
"a&lt;&lt;$<b>$&gt;&gt;c",
},
{
"not a tag #10",
"if x<0 and y < 0 then x*y>0",
"if x&lt;0 and y &lt; 0 then x*y&gt;0",
},
// EOF in a tag name.
{
"tag name eof #0",
"<a",
"",
},
{
"tag name eof #1",
"<a ",
"",
},
{
"tag name eof #2",
"a<b",
"a",
},
{
"tag name eof #3",
"<a><b",
"<a>",
},
{
"tag name eof #4",
`<a x`,
`<a x="">`,
},
// Some malformed tags that are missing a '>'.
{
"malformed tag #0",
`<p</p>`,
`<p< p="">`,
},
{
"malformed tag #1",
`<p </p>`,
`<p <="" p="">`,
},
{
"malformed tag #2",
`<p id`,
`<p id="">`,
},
{
"malformed tag #3",
`<p id=`,
`<p id="">`,
},
{
"malformed tag #4",
`<p id=>`,
`<p id="">`,
},
{
"malformed tag #5",
`<p id=0`,
`<p id="0">`,
},
{
"malformed tag #6",
`<p id=0</p>`,
`<p id="0&lt;/p">`,
},
{
"malformed tag #7",
`<p id="0</p>`,
`<p id="0&lt;/p&gt;">`,
},
{
"malformed tag #8",
`<p id="0"</p>`,
`<p id="0" <="" p="">`,
},
// Raw text and RCDATA.
{
"basic raw text",
"<script><a></b></script>",
"<script>$&lt;a&gt;&lt;/b&gt;$</script>",
},
{
"unfinished script end tag",
"<SCRIPT>a</SCR",
"<script>$a&lt;/SCR",
},
{
"broken script end tag",
"<SCRIPT>a</SCR ipt>",
"<script>$a&lt;/SCR ipt&gt;",
},
{
"EOF in script end tag",
"<SCRIPT>a</SCRipt",
"<script>$a&lt;/SCRipt",
},
{
"scriptx end tag",
"<SCRIPT>a</SCRiptx",
"<script>$a&lt;/SCRiptx",
},
{
"' ' completes script end tag",
"<SCRIPT>a</SCRipt ",
"<script>$a$</script>",
},
{
"'>' completes script end tag",
"<SCRIPT>a</SCRipt>",
"<script>$a$</script>",
},
{
"self-closing script end tag",
"<SCRIPT>a</SCRipt/>",
"<script>$a$</script>",
},
{
"nested script tag",
"<SCRIPT>a</SCRipt<script>",
"<script>$a&lt;/SCRipt&lt;script&gt;",
},
{
"script end tag after unfinished",
"<SCRIPT>a</SCRipt</script>",
"<script>$a&lt;/SCRipt$</script>",
},
{
"script/style mismatched tags",
"<script>a</style>",
"<script>$a&lt;/style&gt;",
},
{
"style element with entity",
"<style>&apos;",
"<style>$&amp;apos;",
},
{
"textarea with tag",
"<textarea><div></textarea>",
"<textarea>$&lt;div&gt;$</textarea>",
},
{
"title with tag and entity",
"<title><b>K&amp;R C</b></title>",
"<title>$&lt;b&gt;K&amp;R C&lt;/b&gt;$</title>",
},
// DOCTYPE tests.
{
"Proper DOCTYPE",
"<!DOCTYPE html>",
"<!DOCTYPE html>",
},
{
"DOCTYPE with no space",
"<!doctypehtml>",
"<!DOCTYPE html>",
},
{
"DOCTYPE with two spaces",
"<!doctype html>",
"<!DOCTYPE html>",
},
{
"looks like DOCTYPE but isn't",
"<!DOCUMENT html>",
"<!--DOCUMENT html-->",
},
{
"DOCTYPE at EOF",
"<!DOCtype",
"<!DOCTYPE >",
},
// XML processing instructions.
{
"XML processing instruction",
"<?xml?>",
"<!--?xml?-->",
},
// Comments.
{
"comment0",
"abc<b><!-- skipme --></b>def",
"abc$<b>$<!-- skipme -->$</b>$def",
},
{
"comment1",
"a<!-->z",
"a$<!---->$z",
},
{
"comment2",
"a<!--->z",
"a$<!---->$z",
},
{
"comment3",
"a<!--x>-->z",
"a$<!--x>-->$z",
},
{
"comment4",
"a<!--x->-->z",
"a$<!--x->-->$z",
},
{
"comment5",
"a<!>z",
"a$<!---->$z",
},
{
"comment6",
"a<!->z",
"a$<!----->$z",
},
{
"comment7",
"a<!---<>z",
"a$<!---<>z-->",
},
{
"comment8",
"a<!--z",
"a$<!--z-->",
},
{
"comment9",
"a<!--z-",
"a$<!--z-->",
},
{
"comment10",
"a<!--z--",
"a$<!--z-->",
},
{
"comment11",
"a<!--z---",
"a$<!--z--->",
},
{
"comment12",
"a<!--z----",
"a$<!--z---->",
},
{
"comment13",
"a<!--x--!>z",
"a$<!--x-->$z",
},
// An attribute with a backslash.
{
"backslash",
`<p id="a\"b">`,
`<p id="a\" b"="">`,
},
// Entities, tag name and attribute key lower-casing, and whitespace
// normalization within a tag.
{
"tricky",
"<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
`<p id="a&quot;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
},
// A nonexistent entity. Tokenizing and converting back to a string should
// escape the "&" to become "&amp;".
{
"noSuchEntity",
`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
},
/*
// TODO: re-enable this test when it works. This input/output matches html5lib's behavior.
{
"entity without semicolon",
`&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
`¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
},
*/
{
"entity with digits",
"&frac12;",
"½",
},
// Attribute tests:
// http://dev.w3.org/html5/spec/Overview.html#attributes-0
{
"Empty attribute",
`<input disabled FOO>`,
`<input disabled="" foo="">`,
},
{
"Empty attribute, whitespace",
`<input disabled FOO >`,
`<input disabled="" foo="">`,
},
{
"Unquoted attribute value",
`<input value=yes FOO=BAR>`,
`<input value="yes" foo="BAR">`,
},
{
"Unquoted attribute value, spaces",
`<input value = yes FOO = BAR>`,
`<input value="yes" foo="BAR">`,
},
{
"Unquoted attribute value, trailing space",
`<input value=yes FOO=BAR >`,
`<input value="yes" foo="BAR">`,
},
{
"Single-quoted attribute value",
`<input value='yes' FOO='BAR'>`,
`<input value="yes" foo="BAR">`,
},
{
"Single-quoted attribute value, trailing space",
`<input value='yes' FOO='BAR' >`,
`<input value="yes" foo="BAR">`,
},
{
"Double-quoted attribute value",
`<input value="I'm an attribute" FOO="BAR">`,
`<input value="I&apos;m an attribute" foo="BAR">`,
},
{
"Attribute name characters",
`<meta http-equiv="content-type">`,
`<meta http-equiv="content-type">`,
},
{
"Mixed attributes",
`a<P V="0 1" w='2' X=3 y>z`,
`a$<p v="0 1" w="2" x="3" y="">$z`,
},
{
"Attributes with a solitary single quote",
`<p id=can't><p id=won't>`,
`<p id="can&apos;t">$<p id="won&apos;t">`,
},
}
func TestTokenizer(t *testing.T) {
loop:
for _, tt := range tokenTests {
z := NewTokenizer(strings.NewReader(tt.html))
if tt.golden != "" {
for i, s := range strings.Split(tt.golden, "$") {
if z.Next() == ErrorToken {
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
continue loop
}
actual := z.Token().String()
if s != actual {
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
continue loop
}
}
}
z.Next()
if z.Err() != io.EOF {
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
}
}
}
type unescapeTest struct {
// A short description of the test case.
desc string
// The HTML text.
html string
// The unescaped text.
unescaped string
}
var unescapeTests = []unescapeTest{
// Handle no entities.
{
"copy",
"A\ttext\nstring",
"A\ttext\nstring",
},
// Handle simple named entities.
{
"simple",
"&amp; &gt; &lt;",
"& > <",
},
// Handle hitting the end of the string.
{
"stringEnd",
"&amp &amp",
"& &",
},
// Handle entities with two codepoints.
{
"multiCodepoint",
"text &gesl; blah",
"text \u22db\ufe00 blah",
},
// Handle decimal numeric entities.
{
"decimalEntity",
"Delta = &#916; ",
"Delta = Δ ",
},
// Handle hexadecimal numeric entities.
{
"hexadecimalEntity",
"Lambda = &#x3bb; = &#X3Bb ",
"Lambda = λ = λ ",
},
// Handle numeric early termination.
{
"numericEnds",
"&# &#x &#128;43 &copy = &#169f = &#xa9",
"&# &#x €43 © = ©f = ©",
},
// Handle numeric ISO-8859-1 entity replacements.
{
"numericReplacements",
"Footnote&#x87;",
"Footnote‡",
},
}
func TestUnescape(t *testing.T) {
for _, tt := range unescapeTests {
unescaped := UnescapeString(tt.html)
if unescaped != tt.unescaped {
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
}
}
}
func TestUnescapeEscape(t *testing.T) {
ss := []string{
``,
`abc def`,
`a & b`,
`a&amp;b`,
`a &amp b`,
`&quot;`,
`"`,
`"<&>"`,
`&quot;&lt;&amp;&gt;&quot;`,
`3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
}
for _, s := range ss {
if s != UnescapeString(EscapeString(s)) {
t.Errorf("s != UnescapeString(EscapeString(s)), s=%q", s)
}
}
}
func TestBufAPI(t *testing.T) {
s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
z := NewTokenizer(bytes.NewBufferString(s))
var result bytes.Buffer
depth := 0
loop:
for {
tt := z.Next()
switch tt {
case ErrorToken:
if z.Err() != io.EOF {
t.Error(z.Err())
}
break loop
case TextToken:
if depth > 0 {
result.Write(z.Text())
}
case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTagToken {
depth++
} else {
depth--
}
}
}
}
u := "14567"
v := string(result.Bytes())
if u != v {
t.Errorf("TestBufAPI: want %q got %q", u, v)
}
}

View File

@@ -1,289 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package inotify implements a wrapper for the Linux inotify system.
Example:
watcher, err := inotify.NewWatcher()
if err != nil {
log.Fatal(err)
}
err = watcher.Watch("/tmp")
if err != nil {
log.Fatal(err)
}
for {
select {
case ev := <-watcher.Event:
log.Println("event:", ev)
case err := <-watcher.Error:
log.Println("error:", err)
}
}
*/
package inotify
import (
"errors"
"fmt"
"os"
"strings"
"syscall"
"unsafe"
)
type Event struct {
Mask uint32 // Mask of events
Cookie uint32 // Unique cookie associating related events (for rename(2))
Name string // File name (optional)
}
type watch struct {
wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall)
flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags)
}
type Watcher struct {
fd int // File descriptor (as returned by the inotify_init() syscall)
watches map[string]*watch // Map of inotify watches (key: path)
paths map[int]string // Map of watched paths (key: watch descriptor)
Error chan error // Errors are sent on this channel
Event chan *Event // Events are returned on this channel
done chan bool // Channel for sending a "quit message" to the reader goroutine
isClosed bool // Set to true when Close() is first called
}
// NewWatcher creates and returns a new inotify instance using inotify_init(2)
func NewWatcher() (*Watcher, error) {
fd, errno := syscall.InotifyInit()
if fd == -1 {
return nil, os.NewSyscallError("inotify_init", errno)
}
w := &Watcher{
fd: fd,
watches: make(map[string]*watch),
paths: make(map[int]string),
Event: make(chan *Event),
Error: make(chan error),
done: make(chan bool, 1),
}
go w.readEvents()
return w, nil
}
// Close closes an inotify watcher instance
// It sends a message to the reader goroutine to quit and removes all watches
// associated with the inotify instance
func (w *Watcher) Close() error {
if w.isClosed {
return nil
}
w.isClosed = true
// Send "quit" message to the reader goroutine
w.done <- true
for path := range w.watches {
w.RemoveWatch(path)
}
return nil
}
// AddWatch adds path to the watched file set.
// The flags are interpreted as described in inotify_add_watch(2).
func (w *Watcher) AddWatch(path string, flags uint32) error {
if w.isClosed {
return errors.New("inotify instance already closed")
}
watchEntry, found := w.watches[path]
if found {
watchEntry.flags |= flags
flags |= syscall.IN_MASK_ADD
}
wd, err := syscall.InotifyAddWatch(w.fd, path, flags)
if err != nil {
return &os.PathError{
Op: "inotify_add_watch",
Path: path,
Err: err,
}
}
if !found {
w.watches[path] = &watch{wd: uint32(wd), flags: flags}
w.paths[wd] = path
}
return nil
}
// Watch adds path to the watched file set, watching all events.
func (w *Watcher) Watch(path string) error {
return w.AddWatch(path, IN_ALL_EVENTS)
}
// RemoveWatch removes path from the watched file set.
func (w *Watcher) RemoveWatch(path string) error {
watch, ok := w.watches[path]
if !ok {
return errors.New(fmt.Sprintf("can't remove non-existent inotify watch for: %s", path))
}
success, errno := syscall.InotifyRmWatch(w.fd, watch.wd)
if success == -1 {
return os.NewSyscallError("inotify_rm_watch", errno)
}
delete(w.watches, path)
return nil
}
// readEvents reads from the inotify file descriptor, converts the
// received events into Event objects and sends them via the Event channel
func (w *Watcher) readEvents() {
var buf [syscall.SizeofInotifyEvent * 4096]byte
for {
n, err := syscall.Read(w.fd, buf[0:])
// See if there is a message on the "done" channel
var done bool
select {
case done = <-w.done:
default:
}
// If EOF or a "done" message is received
if n == 0 || done {
err := syscall.Close(w.fd)
if err != nil {
w.Error <- os.NewSyscallError("close", err)
}
close(w.Event)
close(w.Error)
return
}
if n < 0 {
w.Error <- os.NewSyscallError("read", err)
continue
}
if n < syscall.SizeofInotifyEvent {
w.Error <- errors.New("inotify: short read in readEvents()")
continue
}
var offset uint32 = 0
// We don't know how many events we just read into the buffer
// While the offset points to at least one whole event...
for offset <= uint32(n-syscall.SizeofInotifyEvent) {
// Point "raw" to the event in the buffer
raw := (*syscall.InotifyEvent)(unsafe.Pointer(&buf[offset]))
event := new(Event)
event.Mask = uint32(raw.Mask)
event.Cookie = uint32(raw.Cookie)
nameLen := uint32(raw.Len)
// If the event happened to the watched directory or the watched file, the kernel
// doesn't append the filename to the event, but we would like to always fill the
// the "Name" field with a valid filename. We retrieve the path of the watch from
// the "paths" map.
event.Name = w.paths[int(raw.Wd)]
if nameLen > 0 {
// Point "bytes" at the first byte of the filename
bytes := (*[syscall.PathMax]byte)(unsafe.Pointer(&buf[offset+syscall.SizeofInotifyEvent]))
// The filename is padded with NUL bytes. TrimRight() gets rid of those.
event.Name += "/" + strings.TrimRight(string(bytes[0:nameLen]), "\000")
}
// Send the event on the events channel
w.Event <- event
// Move to the next event in the buffer
offset += syscall.SizeofInotifyEvent + nameLen
}
}
}
// String formats the event e in the form
// "filename: 0xEventMask = IN_ACCESS|IN_ATTRIB_|..."
func (e *Event) String() string {
var events string = ""
m := e.Mask
for _, b := range eventBits {
if m&b.Value != 0 {
m &^= b.Value
events += "|" + b.Name
}
}
if m != 0 {
events += fmt.Sprintf("|%#x", m)
}
if len(events) > 0 {
events = " == " + events[1:]
}
return fmt.Sprintf("%q: %#x%s", e.Name, e.Mask, events)
}
const (
// Options for inotify_init() are not exported
// IN_CLOEXEC uint32 = syscall.IN_CLOEXEC
// IN_NONBLOCK uint32 = syscall.IN_NONBLOCK
// Options for AddWatch
IN_DONT_FOLLOW uint32 = syscall.IN_DONT_FOLLOW
IN_ONESHOT uint32 = syscall.IN_ONESHOT
IN_ONLYDIR uint32 = syscall.IN_ONLYDIR
// The "IN_MASK_ADD" option is not exported, as AddWatch
// adds it automatically, if there is already a watch for the given path
// IN_MASK_ADD uint32 = syscall.IN_MASK_ADD
// Events
IN_ACCESS uint32 = syscall.IN_ACCESS
IN_ALL_EVENTS uint32 = syscall.IN_ALL_EVENTS
IN_ATTRIB uint32 = syscall.IN_ATTRIB
IN_CLOSE uint32 = syscall.IN_CLOSE
IN_CLOSE_NOWRITE uint32 = syscall.IN_CLOSE_NOWRITE
IN_CLOSE_WRITE uint32 = syscall.IN_CLOSE_WRITE
IN_CREATE uint32 = syscall.IN_CREATE
IN_DELETE uint32 = syscall.IN_DELETE
IN_DELETE_SELF uint32 = syscall.IN_DELETE_SELF
IN_MODIFY uint32 = syscall.IN_MODIFY
IN_MOVE uint32 = syscall.IN_MOVE
IN_MOVED_FROM uint32 = syscall.IN_MOVED_FROM
IN_MOVED_TO uint32 = syscall.IN_MOVED_TO
IN_MOVE_SELF uint32 = syscall.IN_MOVE_SELF
IN_OPEN uint32 = syscall.IN_OPEN
// Special events
IN_ISDIR uint32 = syscall.IN_ISDIR
IN_IGNORED uint32 = syscall.IN_IGNORED
IN_Q_OVERFLOW uint32 = syscall.IN_Q_OVERFLOW
IN_UNMOUNT uint32 = syscall.IN_UNMOUNT
)
var eventBits = []struct {
Value uint32
Name string
}{
{IN_ACCESS, "IN_ACCESS"},
{IN_ATTRIB, "IN_ATTRIB"},
{IN_CLOSE, "IN_CLOSE"},
{IN_CLOSE_NOWRITE, "IN_CLOSE_NOWRITE"},
{IN_CLOSE_WRITE, "IN_CLOSE_WRITE"},
{IN_CREATE, "IN_CREATE"},
{IN_DELETE, "IN_DELETE"},
{IN_DELETE_SELF, "IN_DELETE_SELF"},
{IN_MODIFY, "IN_MODIFY"},
{IN_MOVE, "IN_MOVE"},
{IN_MOVED_FROM, "IN_MOVED_FROM"},
{IN_MOVED_TO, "IN_MOVED_TO"},
{IN_MOVE_SELF, "IN_MOVE_SELF"},
{IN_OPEN, "IN_OPEN"},
{IN_ISDIR, "IN_ISDIR"},
{IN_IGNORED, "IN_IGNORED"},
{IN_Q_OVERFLOW, "IN_Q_OVERFLOW"},
{IN_UNMOUNT, "IN_UNMOUNT"},
}

View File

@@ -1,106 +0,0 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux
package inotify
import (
"io/ioutil"
"os"
"testing"
"time"
)
func TestInotifyEvents(t *testing.T) {
// Create an inotify watcher instance and initialize it
watcher, err := NewWatcher()
if err != nil {
t.Fatalf("NewWatcher failed: %s", err)
}
dir, err := ioutil.TempDir("", "inotify")
if err != nil {
t.Fatalf("TempDir failed: %s", err)
}
defer os.RemoveAll(dir)
// Add a watch for "_test"
err = watcher.Watch(dir)
if err != nil {
t.Fatalf("Watch failed: %s", err)
}
// Receive errors on the error channel on a separate goroutine
go func() {
for err := range watcher.Error {
t.Fatalf("error received: %s", err)
}
}()
testFile := dir + "/TestInotifyEvents.testfile"
// Receive events on the event channel on a separate goroutine
eventstream := watcher.Event
var eventsReceived = 0
done := make(chan bool)
go func() {
for event := range eventstream {
// Only count relevant events
if event.Name == testFile {
eventsReceived++
t.Logf("event received: %s", event)
} else {
t.Logf("unexpected event received: %s", event)
}
}
done <- true
}()
// Create a file
// This should add at least one event to the inotify event queue
_, err = os.OpenFile(testFile, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
t.Fatalf("creating test file: %s", err)
}
// We expect this event to be received almost immediately, but let's wait 1 s to be sure
time.Sleep(1 * time.Second)
if eventsReceived == 0 {
t.Fatal("inotify event hasn't been received after 1 second")
}
// Try closing the inotify instance
t.Log("calling Close()")
watcher.Close()
t.Log("waiting for the event channel to become closed...")
select {
case <-done:
t.Log("event channel closed")
case <-time.After(1 * time.Second):
t.Fatal("event stream was not closed after 1 second")
}
}
func TestInotifyClose(t *testing.T) {
watcher, _ := NewWatcher()
watcher.Close()
done := make(chan bool)
go func() {
watcher.Close()
done <- true
}()
select {
case <-done:
case <-time.After(50 * time.Millisecond):
t.Fatal("double Close() test failed: second Close() call didn't return")
}
err := watcher.Watch(os.TempDir())
if err == nil {
t.Fatal("expected error on Watch() after Close(), got nil")
}
}

View File

@@ -1,30 +0,0 @@
# Copyright 2011 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
maketables: maketables.go triegen.go
go build $^
maketesttables: maketesttables.go triegen.go
go build $^
normregtest: normregtest.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w tables.go
trietesttables: maketesttables
./maketesttables > triedata_test.go
gofmt -w triedata_test.go
# Downloads from www.unicode.org, so not part
# of standard test scripts.
test: testtables regtest
testtables: maketables
./maketables -test -tables=
regtest: normregtest
./normregtest

View File

@@ -1,386 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
const (
maxCombiningChars = 30
maxBufferSize = maxCombiningChars + 2 // +1 to hold starter +1 to hold CGJ
maxBackRunes = maxCombiningChars - 1
maxNFCExpansion = 3 // NFC(0x1D160)
maxNFKCExpansion = 18 // NFKC(0xFDFA)
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)
// reorderBuffer is used to normalize a single segment. Characters inserted with
// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters. Note that the byte buffer does not hold
// the UTF-8 characters in order. Only the rune array is maintained in sorted
// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]runeInfo // Per character info.
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
nrune int // Number of runeInfos.
nbyte uint8 // Number or bytes.
f formInfo
src input
nsrc int
srcBytes inputBytes
srcString inputString
tmpBytes inputBytes
}
func (rb *reorderBuffer) init(f Form, src []byte) {
rb.f = *formTable[f]
rb.srcBytes = inputBytes(src)
rb.src = &rb.srcBytes
rb.nsrc = len(src)
}
func (rb *reorderBuffer) initString(f Form, src string) {
rb.f = *formTable[f]
rb.srcString = inputString(src)
rb.src = &rb.srcString
rb.nsrc = len(src)
}
// reset discards all characters from the buffer.
func (rb *reorderBuffer) reset() {
rb.nrune = 0
rb.nbyte = 0
}
// flush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byte {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
out = append(out, rb.byte[start:end]...)
}
rb.reset()
return out
}
// flushCopy copies the normalized segment to buf and resets rb.
// It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) int {
p := 0
for i := 0; i < rb.nrune; i++ {
runep := rb.rune[i]
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
}
rb.reset()
return p
}
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
// It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info runeInfo) bool {
n := rb.nrune
if n >= maxCombiningChars+1 {
return false
}
b := rb.rune[:]
cc := info.ccc
if cc > 0 {
// Find insertion position + move elements to make room.
for ; n > 0; n-- {
if b[n-1].ccc <= cc {
break
}
b[n] = b[n-1]
}
}
rb.nrune += 1
pos := uint8(rb.nbyte)
rb.nbyte += utf8.UTFMax
info.pos = pos
b[n] = info
return true
}
// insert inserts the given rune in the buffer ordered by CCC.
// It returns true if the buffer was large enough to hold the decomposed rune.
func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
if rune := src.hangul(i); rune != 0 {
return rb.decomposeHangul(rune)
}
if info.hasDecomposition() {
return rb.insertDecomposed(info.decomposition())
}
return rb.insertSingle(src, i, info)
}
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) bool {
saveNrune, saveNbyte := rb.nrune, rb.nbyte
rb.tmpBytes = inputBytes(dcomp)
for i := 0; i < len(dcomp); {
info := rb.f.info(&rb.tmpBytes, i)
pos := rb.nbyte
if !rb.insertOrdered(info) {
rb.nrune, rb.nbyte = saveNrune, saveNbyte
return false
}
i += copy(rb.byte[pos:], dcomp[i:i+int(info.size)])
}
return true
}
// insertSingle inserts an entry in the reorderBuffer for the rune at
// position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info runeInfo) bool {
// insertOrder changes nbyte
pos := rb.nbyte
if !rb.insertOrdered(info) {
return false
}
src.copySlice(rb.byte[pos:], i, i+int(info.size))
return true
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = runeInfo{pos: bn, size: uint8(sz)}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune) {
bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = runeInfo{pos: bn, size: uint8(sz)}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) rune {
inf := rb.rune[n]
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return r
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
// It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []byte {
inf := rb.rune[n]
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
}
// For Hangul we combine algorithmically, instead of using tables.
const (
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
hangulBase0 = 0xEA
hangulBase1 = 0xB0
hangulBase2 = 0x80
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
hangulEnd0 = 0xED
hangulEnd1 = 0x9E
hangulEnd2 = 0xA4
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
jamoLBase0 = 0xE1
jamoLBase1 = 0x84
jamoLEnd = 0x1113
jamoVBase = 0x1161
jamoVEnd = 0x1176
jamoTBase = 0x11A7
jamoTEnd = 0x11C3
jamoTCount = 28
jamoVCount = 21
jamoVTCount = 21 * 28
jamoLVTCount = 19 * 21 * 28
)
const hangulUTF8Size = 3
func isHangul(b []byte) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
func isHangulString(b string) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
// Caller must ensure len(b) >= 2.
func isJamoVT(b []byte) bool {
// True if (rune & 0xff00) == jamoLBase
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
}
func isHangulWithoutJamoT(b []byte) bool {
c, _ := utf8.DecodeRune(b)
c -= hangulBase
return c < jamoLVTCount && c%jamoTCount == 0
}
// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int {
const JamoUTF8Len = 3
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
if x != 0 {
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
return 3 * JamoUTF8Len
}
return 2 * JamoUTF8Len
}
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune) bool {
b := rb.rune[:]
n := rb.nrune
if n+3 > len(b) {
return false
}
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
rb.appendRune(jamoLBase + r/jamoVCount)
rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
return true
}
// combineHangul algorithmically combines Jamo character components into Hangul.
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
s = k - 1
}
if s != k-1 && cccB >= cccC {
// b[i] is blocked by greater-equal cccX below it
b[k] = b[i]
k++
} else {
l := rb.runeAt(s) // also used to compare to hangulBase
v := rb.runeAt(i) // also used to compare to jamoT
switch {
case jamoLBase <= l && l < jamoLEnd &&
jamoVBase <= v && v < jamoVEnd:
// 11xx plus 116x to LV
rb.assignRune(s, hangulBase+
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
case hangulBase <= l && l < hangulEnd &&
jamoTBase < v && v < jamoTEnd &&
((l-hangulBase)%jamoTCount) == 0:
// ACxx plus 11Ax to LVT
rb.assignRune(s, l+v-jamoTBase)
default:
b[k] = b[i]
k++
}
}
}
rb.nrune = k
}
// compose recombines the runes in the buffer.
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose() {
// UAX #15, section X5 , including Corrigendum #5
// "In any character sequence beginning with starter S, a character C is
// blocked from S if and only if there is some character B between S
// and C, and either B is a starter or it has the same or higher
// combining class as C."
bn := rb.nrune
if bn == 0 {
return
}
k := 1
b := rb.rune[:]
for s, i := 0, 1; i < bn; i++ {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
rb.combineHangul(s, i, k)
return
}
ii := b[i]
// We can only use combineForward as a filter if we later
// get the info for the combined character. This is more
// expensive than using the filter. Using combinesBackward()
// is safe.
if ii.combinesBackward() {
cccB := b[k-1].ccc
cccC := ii.ccc
blocked := false // b[i] blocked by starter or greater or equal CCC?
if cccB == 0 {
s = k - 1
} else {
blocked = s != k-1 && cccB >= cccC
}
if !blocked {
combined := combine(rb.runeAt(s), rb.runeAt(i))
if combined != 0 {
rb.assignRune(s, combined)
continue
}
}
}
b[k] = b[i]
k++
}
rb.nrune = k
}

View File

@@ -1,143 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "testing"
// TestCase is used for most tests.
type TestCase struct {
in []rune
out []rune
}
type insertFunc func(rb *reorderBuffer, r rune) bool
func insert(rb *reorderBuffer, r rune) bool {
src := inputString(string(r))
return rb.insert(src, 0, rb.f.info(src, 0))
}
func runTests(t *testing.T, name string, fm Form, f insertFunc, tests []TestCase) {
rb := reorderBuffer{}
rb.init(fm, nil)
for i, test := range tests {
rb.reset()
for j, rune := range test.in {
b := []byte(string(rune))
src := inputBytes(b)
if !rb.insert(src, 0, rb.f.info(src, 0)) {
t.Errorf("%s:%d: insert failed for rune %d", name, i, j)
}
}
if rb.f.composing {
rb.compose()
}
if rb.nrune != len(test.out) {
t.Errorf("%s:%d: length = %d; want %d", name, i, rb.nrune, len(test.out))
continue
}
for j, want := range test.out {
found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
}
}
}
}
type flushFunc func(rb *reorderBuffer) []byte
func testFlush(t *testing.T, name string, fn flushFunc) {
rb := reorderBuffer{}
rb.init(NFC, nil)
out := fn(&rb)
if len(out) != 0 {
t.Errorf("%s: wrote bytes on flush of empty buffer. (len(out) = %d)", name, len(out))
}
for _, r := range []rune("world!") {
insert(&rb, r)
}
out = []byte("Hello ")
out = rb.flush(out)
want := "Hello world!"
if string(out) != want {
t.Errorf(`%s: output after flush was "%s"; want "%s"`, name, string(out), want)
}
if rb.nrune != 0 {
t.Errorf("%s: non-null size of info buffer (rb.nrune == %d)", name, rb.nrune)
}
if rb.nbyte != 0 {
t.Errorf("%s: non-null size of byte buffer (rb.nbyte == %d)", name, rb.nbyte)
}
}
func flushF(rb *reorderBuffer) []byte {
out := make([]byte, 0)
return rb.flush(out)
}
func flushCopyF(rb *reorderBuffer) []byte {
out := make([]byte, MaxSegmentSize)
n := rb.flushCopy(out)
return out[:n]
}
func TestFlush(t *testing.T) {
testFlush(t, "flush", flushF)
testFlush(t, "flushCopy", flushCopyF)
}
var insertTests = []TestCase{
{[]rune{'a'}, []rune{'a'}},
{[]rune{0x300}, []rune{0x300}},
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
runTests(t, "TestInsert", NFD, insert, insertTests)
}
var decompositionNFDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x01C4}},
{[]rune{0x320E}, []rune{0x320E}},
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
runTests(t, "TestDecompositionNFD", NFD, insert, decompositionNFDTest)
runTests(t, "TestDecompositionNFKD", NFKD, insert, decompositionNFKDTest)
}
var compositionTest = []TestCase{
{[]rune{0x41, 0x300}, []rune{0xC0}},
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
runTests(t, "TestComposition", NFC, insert, compositionTest)
}

View File

@@ -1,174 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
// This file contains Form-specific logic and wrappers for data in tables.go.
// Rune info is stored in a separate trie per composing form. A composing form
// and its corresponding decomposing form share the same trie. Each trie maps
// a rune to a uint16. The values take two forms. For v >= 0x8000:
// bits
// 0..8: ccc
// 9..12: qcInfo (see below). isYesD is always true (no decompostion).
// 16: 1
// For v < 0x8000, the respective rune has a decomposition and v is an index
// into a byte array of UTF-8 decomposition sequences and additional info and
// has the form:
// <header> <decomp_byte>* [<tccc> [<lccc>]]
// The header contains the number of bytes in the decomposition (excluding this
// length byte). The two most significant bits of this length byte correspond
// to bit 2 and 3 of qcIfo (see below). The byte sequence itself starts at v+1.
// The byte sequence is followed by a trailing and leading CCC if the values
// for these are not zero. The value of v determines which ccc are appended
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
// there is an additional leading ccc.
const (
qcInfoMask = 0xF // to clear all but the relevant bits in a qcInfo
headerLenMask = 0x3F // extract the length value from the header byte
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
)
// runeInfo is a representation for the data stored in charinfoTrie.
type runeInfo struct {
pos uint8 // start position in reorderBuffer; used in composition.go
size uint8 // length of UTF-8 encoding of this rune
ccc uint8 // leading canonical combining class (ccc if not decomposition)
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
flags qcInfo // quick check flags
index uint16
}
// functions dispatchable per form
type lookupFunc func(b input, i int) runeInfo
// formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing, compatibility bool // form type
info lookupFunc
}
var formTable []*formInfo
func init() {
formTable = make([]*formInfo, 4)
for i := range formTable {
f := &formInfo{}
formTable[i] = f
f.form = Form(i)
if Form(i) == NFKD || Form(i) == NFKC {
f.compatibility = true
f.info = lookupInfoNFKC
} else {
f.info = lookupInfoNFC
}
if Form(i) == NFC || Form(i) == NFKC {
f.composing = true
}
}
}
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user. For example, in NFD, there is a boundary
// after 'a'. However, a might combine with modifiers, so from the application's
// perspective it is not a good boundary. We will therefore always use the
// boundaries for the combining variants.
func (i runeInfo) boundaryBefore() bool {
if i.ccc == 0 && !i.combinesBackward() {
return true
}
// We assume that the CCC of the first character in a decomposition
// is always non-zero if different from info.ccc and that we can return
// false at this point. This is verified by maketables.
return false
}
func (i runeInfo) boundaryAfter() bool {
return i.isInert()
}
// We pack quick check data in 4 bits:
// 0: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..2: NFC_QC Yes(00), No (10), or Maybe (11)
// 3: Combines forward (0 == false, 1 == true)
//
// When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization.
type qcInfo uint8
func (i runeInfo) isYesC() bool { return i.flags&0x4 == 0 }
func (i runeInfo) isYesD() bool { return i.flags&0x1 == 0 }
func (i runeInfo) combinesForward() bool { return i.flags&0x8 != 0 }
func (i runeInfo) combinesBackward() bool { return i.flags&0x2 != 0 } // == isMaybe
func (i runeInfo) hasDecomposition() bool { return i.flags&0x1 != 0 } // == isNoD
func (r runeInfo) isInert() bool {
return r.flags&0xf == 0 && r.ccc == 0
}
func (r runeInfo) decomposition() []byte {
if r.index == 0 {
return nil
}
p := r.index
n := decomps[p] & 0x3F
p++
return decomps[p : p+uint16(n)]
}
// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
// result in a collision. In the unlikely event that changes to
// UnicodeData.txt introduce collisions, the compiler will catch it.
// Note that the recomposition map for NFC and NFKC are identical.
// combine returns the combined rune or 0 if it doesn't exist.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
return recompMap[key]
}
func lookupInfoNFC(b input, i int) runeInfo {
v, sz := b.charinfoNFC(i)
return compInfo(v, sz)
}
func lookupInfoNFKC(b input, i int) runeInfo {
v, sz := b.charinfoNFKC(i)
return compInfo(v, sz)
}
// compInfo converts the information contained in v and sz
// to a runeInfo. See the comment at the top of the file
// for more information on the format.
func compInfo(v uint16, sz int) runeInfo {
if v == 0 {
return runeInfo{size: uint8(sz)}
} else if v >= 0x8000 {
return runeInfo{
size: uint8(sz),
ccc: uint8(v),
tccc: uint8(v),
flags: qcInfo(v>>8) & qcInfoMask,
}
}
// has decomposition
h := decomps[v]
f := (qcInfo(h&headerFlagsMask) >> 4) | 0x1
ri := runeInfo{size: uint8(sz), flags: f, index: v}
if v >= firstCCC {
v += uint16(h&headerLenMask) + 1
ri.tccc = decomps[v]
if v >= firstLeadingCCC {
ri.ccc = decomps[v+1]
}
}
return ri
}

View File

@@ -1,96 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
type input interface {
skipASCII(p, max int) int
skipNonStarter(p int) int
appendSlice(buf []byte, s, e int) []byte
copySlice(buf []byte, s, e int)
charinfoNFC(p int) (uint16, int)
charinfoNFKC(p int) (uint16, int)
hangul(p int) rune
}
type inputString string
func (s inputString) skipASCII(p, max int) int {
for ; p < max && s[p] < utf8.RuneSelf; p++ {
}
return p
}
func (s inputString) skipNonStarter(p int) int {
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
}
return p
}
func (s inputString) appendSlice(buf []byte, b, e int) []byte {
for i := b; i < e; i++ {
buf = append(buf, s[i])
}
return buf
}
func (s inputString) copySlice(buf []byte, b, e int) {
copy(buf, s[b:e])
}
func (s inputString) charinfoNFC(p int) (uint16, int) {
return nfcTrie.lookupString(string(s[p:]))
}
func (s inputString) charinfoNFKC(p int) (uint16, int) {
return nfkcTrie.lookupString(string(s[p:]))
}
func (s inputString) hangul(p int) rune {
if !isHangulString(string(s[p:])) {
return 0
}
rune, _ := utf8.DecodeRuneInString(string(s[p:]))
return rune
}
type inputBytes []byte
func (s inputBytes) skipASCII(p, max int) int {
for ; p < max && s[p] < utf8.RuneSelf; p++ {
}
return p
}
func (s inputBytes) skipNonStarter(p int) int {
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
}
return p
}
func (s inputBytes) appendSlice(buf []byte, b, e int) []byte {
return append(buf, s[b:e]...)
}
func (s inputBytes) copySlice(buf []byte, b, e int) {
copy(buf, s[b:e])
}
func (s inputBytes) charinfoNFC(p int) (uint16, int) {
return nfcTrie.lookup(s[p:])
}
func (s inputBytes) charinfoNFKC(p int) (uint16, int) {
return nfkcTrie.lookup(s[p:])
}
func (s inputBytes) hangul(p int) rune {
if !isHangul(s[p:]) {
return 0
}
rune, _ := utf8.DecodeRune(s[p:])
return rune
}

View File

@@ -1,286 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
const MaxSegmentSize = maxByteBufferSize
// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
rb reorderBuffer
info runeInfo // first character saved from previous iteration
next iterFunc // implementation of next depends on form
p int // current position in input source
outStart int // start of current segment in output buffer
inStart int // start of current segment in input source
maxp int // position in output buffer after which not to start a new segment
maxseg int // for tracking an excess of combining characters
tccc uint8
done bool
}
type iterFunc func(*Iter, []byte) int
// SetInput initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) SetInput(f Form, src []byte) {
i.rb.init(f, src)
if i.rb.f.composing {
i.next = nextComposed
} else {
i.next = nextDecomposed
}
i.p = 0
if i.done = len(src) == 0; !i.done {
i.info = i.rb.f.info(i.rb.src, i.p)
}
}
// SetInputString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) SetInputString(f Form, src string) {
i.rb.initString(f, src)
if i.rb.f.composing {
i.next = nextComposed
} else {
i.next = nextDecomposed
}
i.p = 0
if i.done = len(src) == 0; !i.done {
i.info = i.rb.f.info(i.rb.src, i.p)
}
}
// Pos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() int {
return i.p
}
// Done returns true if there is no more input to process.
func (i *Iter) Done() bool {
return i.done
}
// Next writes f(i.input[i.Pos():n]...) to buffer buf, where n is the
// largest boundary of i.input such that the result fits in buf.
// It returns the number of bytes written to buf.
// len(buf) should be at least MaxSegmentSize.
// Done must be false before calling Next.
func (i *Iter) Next(buf []byte) int {
return i.next(i, buf)
}
func (i *Iter) initNext(outn, inStart int) {
i.outStart = 0
i.inStart = inStart
i.maxp = outn - MaxSegmentSize
i.maxseg = MaxSegmentSize
}
// setStart resets the start of the new segment to the given position.
// It returns true if there is not enough room for the new segment.
func (i *Iter) setStart(outp, inp int) bool {
if outp > i.maxp {
return true
}
i.outStart = outp
i.inStart = inp
i.maxseg = outp + MaxSegmentSize
return false
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter, out []byte) int {
var outp int
i.initNext(len(out), i.p)
doFast:
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
for {
if sz := int(i.info.size); sz <= 1 {
// ASCII or illegal byte. Either way, advance by 1.
i.p++
outp++
max := min(i.rb.nsrc, len(out)-outp+i.p)
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
outp += np - i.p
i.p = np
if i.p >= i.rb.nsrc {
break
}
// ASCII may combine with consecutive runes.
if i.setStart(outp-1, i.p-1) {
i.p--
outp--
i.info.size = 1
break
}
}
} else if d := i.info.decomposition(); d != nil {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
p := outp + len(d)
if p > i.maxseg && i.setStart(outp, i.p) {
return outp
}
copy(out[outp:], d)
outp = p
i.p += sz
inCopyStart, outCopyStart = i.p, outp
} else if r := i.rb.src.hangul(i.p); r != 0 {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
for {
outp += decomposeHangul(out[outp:], r)
i.p += hangulUTF8Size
if r = i.rb.src.hangul(i.p); r == 0 {
break
}
if i.setStart(outp, i.p) {
return outp
}
}
inCopyStart, outCopyStart = i.p, outp
} else {
p := outp + sz
if p > i.maxseg && i.setStart(outp, i.p) {
break
}
outp = p
i.p += sz
}
if i.p >= i.rb.nsrc {
break
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if cc := i.info.ccc; cc == 0 {
if i.setStart(outp, i.p) {
break
}
} else if cc < prevCC {
goto doNorm
}
}
if inCopyStart != i.p {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
}
i.done = i.p >= i.rb.nsrc
return outp
doNorm:
// Insert what we have decomposed so far in the reorderBuffer.
// As we will only reorder, there will always be enough room.
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
if !i.rb.insertDecomposed(out[i.outStart:outp]) {
// Start over to prevent decompositions from crossing segment boundaries.
// This is a rare occurance.
i.p = i.inStart
i.info = i.rb.f.info(i.rb.src, i.p)
}
outp = i.outStart
for {
if !i.rb.insert(i.rb.src, i.p, i.info) {
break
}
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
outp += i.rb.flushCopy(out[outp:])
i.done = true
return outp
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.ccc == 0 {
break
}
}
// new segment or too many combining characters: exit normalization
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
return outp
}
goto doFast
}
// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter, out []byte) int {
var outp int
i.initNext(len(out), i.p)
doFast:
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
var prevCC uint8
for {
if !i.info.isYesC() {
goto doNorm
}
if cc := i.info.ccc; cc == 0 {
if i.setStart(outp, i.p) {
break
}
} else if cc < prevCC {
goto doNorm
}
prevCC = i.info.tccc
sz := int(i.info.size)
if sz == 0 {
sz = 1 // illegal rune: copy byte-by-byte
}
p := outp + sz
if p > i.maxseg && i.setStart(outp, i.p) {
break
}
outp = p
i.p += sz
max := min(i.rb.nsrc, len(out)-outp+i.p)
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
outp += np - i.p
i.p = np
if i.p >= i.rb.nsrc {
break
}
// ASCII may combine with consecutive runes.
if i.setStart(outp-1, i.p-1) {
i.p--
outp--
i.info = runeInfo{size: 1}
break
}
}
if i.p >= i.rb.nsrc {
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
}
if inCopyStart != i.p {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
}
i.done = i.p >= i.rb.nsrc
return outp
doNorm:
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.inStart)
outp, i.p = i.outStart, i.inStart
i.info = i.rb.f.info(i.rb.src, i.p)
for {
if !i.rb.insert(i.rb.src, i.p, i.info) {
break
}
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.rb.compose()
outp += i.rb.flushCopy(out[outp:])
i.done = true
return outp
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.boundaryBefore() {
break
}
}
i.rb.compose()
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
return outp
}
goto doFast
}

View File

@@ -1,186 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"strings"
"testing"
)
var iterBufSizes = []int{
MaxSegmentSize,
1.5 * MaxSegmentSize,
2 * MaxSegmentSize,
3 * MaxSegmentSize,
100 * MaxSegmentSize,
}
func doIterNorm(f Form, buf []byte, s string) []byte {
acc := []byte{}
i := Iter{}
i.SetInputString(f, s)
for !i.Done() {
n := i.Next(buf)
acc = append(acc, buf[:n]...)
}
return acc
}
func runIterTests(t *testing.T, name string, f Form, tests []AppendTest, norm bool) {
for i, test := range tests {
in := test.left + test.right
gold := test.out
if norm {
gold = string(f.AppendString(nil, test.out))
}
for _, sz := range iterBufSizes {
buf := make([]byte, sz)
out := string(doIterNorm(f, buf, in))
if len(out) != len(gold) {
const msg = "%s:%d:%d: length is %d; want %d"
t.Errorf(msg, name, i, sz, len(out), len(gold))
}
if out != gold {
// Find first rune that differs and show context.
ir := []rune(out)
ig := []rune(gold)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
}
if j -= 3; j < 0 {
j = 0
}
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
const msg = "%s:%d:%d: runeAt(%d) = %U; want %U"
t.Errorf(msg, name, i, sz, j, ir[j], ig[j])
}
break
}
}
}
}
}
func rep(r rune, n int) string {
return strings.Repeat(string(r), n)
}
var iterTests = []AppendTest{
{"", ascii, ascii},
{"", txt_all, txt_all},
{"", "a" + rep(0x0300, MaxSegmentSize/2), "a" + rep(0x0300, MaxSegmentSize/2)},
}
var iterTestsD = []AppendTest{
{ // segment overflow on unchanged character
"",
"a" + rep(0x0300, MaxSegmentSize/2) + "\u0316",
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0316\u0300",
},
{ // segment overflow on unchanged character + start value
"",
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars+4) + "\u0316",
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
},
{ // segment overflow on decomposition
"",
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340",
"a" + rep(0x0300, MaxSegmentSize/2),
},
{ // segment overflow on decomposition + start value
"",
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
"a" + rep(0x0300, MaxSegmentSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
},
{ // start value after ASCII overflow
"",
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
},
{ // start value after Hangul overflow
"",
rep(0xAC00, MaxSegmentSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
strings.Repeat("\u1100\u1161", MaxSegmentSize/6) + rep(0x300, maxCombiningChars-1) + "\u0320" + rep(0x300, 3),
},
{ // start value after cc=0
"",
"您您" + rep(0x300, maxCombiningChars+4) + "\u0320",
"您您" + rep(0x300, maxCombiningChars) + "\u0320" + rep(0x300, 4),
},
{ // start value after normalization
"",
"\u0300\u0320a" + rep(0x300, maxCombiningChars+4) + "\u0320",
"\u0320\u0300a" + rep(0x300, maxCombiningChars) + "\u0320" + rep(0x300, 4),
},
}
var iterTestsC = []AppendTest{
{ // ordering of non-composing combining characters
"",
"\u0305\u0316",
"\u0316\u0305",
},
{ // segment overflow
"",
"a" + rep(0x0305, MaxSegmentSize/2+4) + "\u0316",
"a" + rep(0x0305, MaxSegmentSize/2-1) + "\u0316" + rep(0x305, 5),
},
}
func TestIterNextD(t *testing.T) {
runIterTests(t, "IterNextD1", NFKD, appendTests, true)
runIterTests(t, "IterNextD2", NFKD, iterTests, true)
runIterTests(t, "IterNextD3", NFKD, iterTestsD, false)
}
func TestIterNextC(t *testing.T) {
runIterTests(t, "IterNextC1", NFKC, appendTests, true)
runIterTests(t, "IterNextC2", NFKC, iterTests, true)
runIterTests(t, "IterNextC3", NFKC, iterTestsC, false)
}
type SegmentTest struct {
in string
out []string
}
var segmentTests = []SegmentTest{
{rep('a', MaxSegmentSize), []string{rep('a', MaxSegmentSize), ""}},
{rep('a', MaxSegmentSize+2), []string{rep('a', MaxSegmentSize-1), "aaa", ""}},
{rep('a', MaxSegmentSize) + "\u0300aa", []string{rep('a', MaxSegmentSize-1), "a\u0300", "aa", ""}},
}
// Note that, by design, segmentation is equal for composing and decomposing forms.
func TestIterSegmentation(t *testing.T) {
segmentTest(t, "SegmentTestD", NFD, segmentTests)
segmentTest(t, "SegmentTestC", NFC, segmentTests)
}
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
iter := Iter{}
for i, tt := range segmentTests {
buf := make([]byte, MaxSegmentSize)
iter.SetInputString(f, tt.in)
for j, seg := range tt.out {
if seg == "" {
if !iter.Done() {
n := iter.Next(buf)
res := string(buf[:n])
t.Errorf(`%s:%d:%d: expected Done()==true, found segment "%s"`, name, i, j, res)
}
continue
}
if iter.Done() {
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
}
n := iter.Next(buf)
seg = f.String(seg)
if res := string(buf[:n]); res != seg {
t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d)`, name, i, j, res, len(res), seg, len(seg))
}
}
}
}

View File

@@ -1,902 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Normalization table generator.
// Data read from the web.
// See forminfo.go for a description of the trie values associated with each rune.
package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"regexp"
"sort"
"strconv"
"strings"
)
func main() {
flag.Parse()
loadUnicodeData()
loadCompositionExclusions()
completeCharFields(FCanonical)
completeCharFields(FCompatibility)
verifyComputed()
printChars()
makeTables()
testDerived()
}
var url = flag.String("url",
"http://www.unicode.org/Public/6.0.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
"comma-separated list of which tables to generate; "+
"can be 'decomp', 'recomp', 'info' and 'all'")
var test = flag.Bool("test",
false,
"test existing tables; can be used to compare web data with package data")
var verbose = flag.Bool("verbose",
false,
"write data to stdout as it is parsed")
var localFiles = flag.Bool("local",
false,
"data files have been copied to the current directory; for debugging only")
var logger = log.New(os.Stderr, "", log.Lshortfile)
// UnicodeData.txt has form:
// 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
// 007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A
// See http://unicode.org/reports/tr44/ for full explanation
// The fields:
const (
FCodePoint = iota
FName
FGeneralCategory
FCanonicalCombiningClass
FBidiClass
FDecompMapping
FDecimalValue
FDigitValue
FNumericValue
FBidiMirrored
FUnicode1Name
FISOComment
FSimpleUppercaseMapping
FSimpleLowercaseMapping
FSimpleTitlecaseMapping
NumField
MaxChar = 0x10FFFF // anything above this shouldn't exist
)
// Quick Check properties of runes allow us to quickly
// determine whether a rune may occur in a normal form.
// For a given normal form, a rune may be guaranteed to occur
// verbatim (QC=Yes), may or may not combine with another
// rune (QC=Maybe), or may not occur (QC=No).
type QCResult int
const (
QCUnknown QCResult = iota
QCYes
QCNo
QCMaybe
)
func (r QCResult) String() string {
switch r {
case QCYes:
return "Yes"
case QCNo:
return "No"
case QCMaybe:
return "Maybe"
}
return "***UNKNOWN***"
}
const (
FCanonical = iota // NFC or NFD
FCompatibility // NFKC or NFKD
FNumberOfFormTypes
)
const (
MComposed = iota // NFC or NFKC
MDecomposed // NFD or NFKD
MNumberOfModes
)
// This contains only the properties we're interested in.
type Char struct {
name string
codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
state State
}
var chars = make([]Char, MaxChar+1)
func (c Char) String() string {
buf := new(bytes.Buffer)
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
fmt.Fprintf(buf, " state: %v\n", c.state)
fmt.Fprintf(buf, " NFC:\n")
fmt.Fprint(buf, c.forms[FCanonical])
fmt.Fprintf(buf, " NFKC:\n")
fmt.Fprint(buf, c.forms[FCompatibility])
return buf.String()
}
// In UnicodeData.txt, some ranges are marked like this:
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
// parseCharacter keeps a state variable indicating the weirdness.
type State int
const (
SNormal State = iota // known to be zero for the type
SFirst
SLast
SMissing
)
var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
}
type FormInfo struct {
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
combinesForward bool // May combine with rune on the right
combinesBackward bool // May combine with rune on the left
isOneWay bool // Never appears in result
inDecomp bool // Some decompositions result in this char.
decomp Decomposition
expandedDecomp Decomposition
}
func (f FormInfo) String() string {
buf := bytes.NewBuffer(make([]byte, 0))
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
return buf.String()
}
type Decomposition []rune
func openReader(file string) (input io.ReadCloser) {
if *localFiles {
f, err := os.Open(file)
if err != nil {
logger.Fatal(err)
}
input = f
} else {
path := *url + file
resp, err := http.Get(path)
if err != nil {
logger.Fatal(err)
}
if resp.StatusCode != 200 {
logger.Fatal("bad GET status for "+file, resp.Status)
}
input = resp.Body
}
return
}
func parseDecomposition(s string, skipfirst bool) (a []rune, e error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
}
for _, d := range decomp {
point, err := strconv.ParseUint(d, 16, 64)
if err != nil {
return a, err
}
a = append(a, rune(point))
}
return a, nil
}
func parseCharacter(line string) {
field := strings.Split(line, ";")
if len(field) != NumField {
logger.Fatalf("%5s: %d fields (expected %d)\n", line, len(field), NumField)
}
x, err := strconv.ParseUint(field[FCodePoint], 16, 64)
point := int(x)
if err != nil {
logger.Fatalf("%.5s...: %s", line, err)
}
if point == 0 {
return // not interesting and we use 0 as unset
}
if point > MaxChar {
logger.Fatalf("%5s: Rune %X > MaxChar (%X)", line, point, MaxChar)
return
}
state := SNormal
switch {
case strings.Index(field[FName], ", First>") > 0:
state = SFirst
case strings.Index(field[FName], ", Last>") > 0:
state = SLast
}
firstChar := lastChar + 1
lastChar = rune(point)
if state != SLast {
firstChar = lastChar
}
x, err = strconv.ParseUint(field[FCanonicalCombiningClass], 10, 64)
if err != nil {
logger.Fatalf("%U: bad ccc field: %s", int(x), err)
}
ccc := uint8(x)
decmap := field[FDecompMapping]
exp, e := parseDecomposition(decmap, false)
isCompat := false
if e != nil {
if len(decmap) > 0 {
exp, e = parseDecomposition(decmap, true)
if e != nil {
logger.Fatalf(`%U: bad decomp |%v|: "%s"`, int(x), decmap, e)
}
isCompat = true
}
}
for i := firstChar; i <= lastChar; i++ {
char := &chars[i]
char.name = field[FName]
char.codePoint = i
char.forms[FCompatibility].decomp = exp
if !isCompat {
char.forms[FCanonical].decomp = exp
} else {
char.compatDecomp = true
}
if len(decmap) > 0 {
char.forms[FCompatibility].decomp = exp
}
char.ccc = ccc
char.state = SMissing
if i == lastChar {
char.state = state
}
}
return
}
func loadUnicodeData() {
f := openReader("UnicodeData.txt")
defer f.Close()
input := bufio.NewReader(f)
for {
line, err := input.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
logger.Fatal(err)
}
parseCharacter(line[0 : len(line)-1])
}
}
var singlePointRe = regexp.MustCompile(`^([0-9A-F]+) *$`)
// CompositionExclusions.txt has form:
// 0958 # ...
// See http://unicode.org/reports/tr44/ for full explanation
func parseExclusion(line string) int {
comment := strings.Index(line, "#")
if comment >= 0 {
line = line[0:comment]
}
if len(line) == 0 {
return 0
}
matches := singlePointRe.FindStringSubmatch(line)
if len(matches) != 2 {
logger.Fatalf("%s: %d matches (expected 1)\n", line, len(matches))
}
point, err := strconv.ParseUint(matches[1], 16, 64)
if err != nil {
logger.Fatalf("%.5s...: %s", line, err)
}
return int(point)
}
func loadCompositionExclusions() {
f := openReader("CompositionExclusions.txt")
defer f.Close()
input := bufio.NewReader(f)
for {
line, err := input.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
logger.Fatal(err)
}
point := parseExclusion(line[0 : len(line)-1])
if point == 0 {
continue
}
c := &chars[point]
if c.excludeInComp {
logger.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
}
c.excludeInComp = true
}
}
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
func hasCompatDecomp(r rune) bool {
c := &chars[r]
if c.compatDecomp {
return true
}
for _, d := range c.forms[FCompatibility].decomp {
if hasCompatDecomp(d) {
return true
}
}
return false
}
// Hangul related constants.
const (
HangulBase = 0xAC00
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
JamoLBase = 0x1100
JamoLEnd = 0x1113
JamoVBase = 0x1161
JamoVEnd = 0x1176
JamoTBase = 0x11A8
JamoTEnd = 0x11C3
)
func isHangul(r rune) bool {
return HangulBase <= r && r < HangulEnd
}
func ccc(r rune) uint8 {
return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
if ccc(b[n-1]) <= cc {
break
}
b[n] = b[n-1]
}
}
b[n] = r
return b
}
// Recursively decompose.
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
if isHangul(r) {
return d
}
dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
}
return d
}
func completeCharFields(form int) {
// Phase 0: pre-expand decomposition.
for i := range chars {
f := &chars[i].forms[form]
if len(f.decomp) == 0 {
continue
}
exp := make(Decomposition, 0)
for _, c := range f.decomp {
exp = decomposeRecursive(form, c, exp)
}
f.expandedDecomp = exp
}
// Phase 1: composition exclusion, mark decomposition.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
// Marks script-specific exclusions and version restricted.
f.isOneWay = c.excludeInComp
// Singletons
f.isOneWay = f.isOneWay || len(f.decomp) == 1
// Non-starter decompositions
if len(f.decomp) > 1 {
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
f.isOneWay = f.isOneWay || chk
}
// Runes that decompose into more than two runes.
f.isOneWay = f.isOneWay || len(f.decomp) > 2
if form == FCompatibility {
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
for _, r := range f.decomp {
chars[r].forms[form].inDecomp = true
}
}
// Phase 2: forward and backward combining.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
if !f.isOneWay && len(f.decomp) == 2 {
f0 := &chars[f.decomp[0]].forms[form]
f1 := &chars[f.decomp[1]].forms[form]
if !f0.isOneWay {
f0.combinesForward = true
}
if !f1.isOneWay {
f1.combinesBackward = true
}
}
}
// Phase 3: quick check values.
for i := range chars {
c := &chars[i]
f := &c.forms[form]
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
}
switch {
case f.isOneWay:
f.quickCheck[MComposed] = QCNo
case (i & 0xffff00) == JamoLBase:
f.quickCheck[MComposed] = QCYes
if JamoLBase <= i && i < JamoLEnd {
f.combinesForward = true
}
if JamoVBase <= i && i < JamoVEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
f.combinesForward = true
}
if JamoTBase <= i && i < JamoTEnd {
f.quickCheck[MComposed] = QCMaybe
f.combinesBackward = true
}
case !f.combinesBackward:
f.quickCheck[MComposed] = QCYes
default:
f.quickCheck[MComposed] = QCMaybe
}
}
}
func printBytes(b []byte, name string) {
fmt.Printf("// %s: %d bytes\n", name, len(b))
fmt.Printf("var %s = [...]byte {", name)
for i, c := range b {
switch {
case i%64 == 0:
fmt.Printf("\n// Bytes %x - %x\n", i, i+63)
case i%8 == 0:
fmt.Printf("\n")
}
fmt.Printf("0x%.2X, ", c)
}
fmt.Print("\n}\n\n")
}
// See forminfo.go for format.
func makeEntry(f *FormInfo) uint16 {
e := uint16(0)
if f.combinesForward {
e |= 0x8
}
if f.quickCheck[MDecomposed] == QCNo {
e |= 0x1
}
switch f.quickCheck[MComposed] {
case QCYes:
case QCNo:
e |= 0x4
case QCMaybe:
e |= 0x6
default:
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
}
return e
}
// decompSet keeps track of unique decompositions, grouped by whether
// the decomposition is followed by a trailing and/or leading CCC.
type decompSet [4]map[string]bool
func makeDecompSet() decompSet {
m := decompSet{}
for i := range m {
m[i] = make(map[string]bool)
}
return m
}
func (m *decompSet) insert(key int, s string) {
m[key][s] = true
}
func printCharInfoTables() int {
mkstr := func(r rune, f *FormInfo) (int, string) {
d := f.expandedDecomp
s := string([]rune(d))
if max := 1 << 6; len(s) >= max {
const msg = "%U: too many bytes in decomposition: %d >= %d"
logger.Fatalf(msg, r, len(s), max)
}
head := uint8(len(s))
if f.quickCheck[MComposed] != QCYes {
head |= 0x40
}
if f.combinesForward {
head |= 0x80
}
s = string([]byte{head}) + s
lccc := ccc(d[0])
tccc := ccc(d[len(d)-1])
if tccc < lccc && lccc != 0 {
const msg = "%U: lccc (%d) must be <= tcc (%d)"
logger.Fatalf(msg, r, lccc, tccc)
}
index := 0
if tccc > 0 || lccc > 0 {
s += string([]byte{tccc})
index = 1
if lccc > 0 {
s += string([]byte{lccc})
index |= 2
}
}
return index, s
}
decompSet := makeDecompSet()
// Store the uniqued decompositions in a byte buffer,
// preceded by their byte length.
for _, c := range chars {
for _, f := range c.forms {
if len(f.expandedDecomp) == 0 {
continue
}
if f.combinesBackward {
logger.Fatalf("%U: combinesBackward and decompose", c.codePoint)
}
index, s := mkstr(c.codePoint, &f)
decompSet.insert(index, s)
}
}
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
size := 0
positionMap := make(map[string]uint16)
decompositions.WriteString("\000")
cname := []string{"firstCCC", "firstLeadingCCC", "", "lastDecomp"}
fmt.Println("const (")
for i, m := range decompSet {
sa := []string{}
for s := range m {
sa = append(sa, s)
}
sort.Strings(sa)
for _, s := range sa {
p := decompositions.Len()
decompositions.WriteString(s)
positionMap[s] = uint16(p)
}
if cname[i] != "" {
fmt.Printf("%s = 0x%X\n", cname[i], decompositions.Len())
}
}
fmt.Println("maxDecomp = 0x8000")
fmt.Println(")")
b := decompositions.Bytes()
printBytes(b, "decomps")
size += len(b)
varnames := []string{"nfc", "nfkc"}
for i := 0; i < FNumberOfFormTypes; i++ {
trie := newNode()
for r, c := range chars {
f := c.forms[i]
d := f.expandedDecomp
if len(d) != 0 {
_, key := mkstr(c.codePoint, &f)
trie.insert(rune(r), positionMap[key])
if c.ccc != ccc(d[0]) {
// We assume the lead ccc of a decomposition !=0 in this case.
if ccc(d[0]) == 0 {
logger.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
}
}
} else if v := makeEntry(&f)<<8 | uint16(c.ccc); v != 0 {
trie.insert(c.codePoint, 0x8000|v)
}
}
size += trie.printTables(varnames[i])
}
return size
}
func contains(sa []string, s string) bool {
for _, a := range sa {
if a == s {
return true
}
}
return false
}
// Extract the version number from the URL.
func version() string {
// From http://www.unicode.org/standard/versions/#Version_Numbering:
// for the later Unicode versions, data files are located in
// versioned directories.
fields := strings.Split(*url, "/")
for _, f := range fields {
if match, _ := regexp.MatchString(`[0-9]\.[0-9]\.[0-9]`, f); match {
return f
}
}
logger.Fatal("unknown version")
return "Unknown"
}
const fileHeader = `// Generated by running
// maketables --tables=%s --url=%s
// DO NOT EDIT
package norm
`
func makeTables() {
size := 0
if *tablelist == "" {
return
}
list := strings.Split(*tablelist, ",")
if *tablelist == "all" {
list = []string{"recomp", "info"}
}
fmt.Printf(fileHeader, *tablelist, *url)
fmt.Println("// Version is the Unicode edition from which the tables are derived.")
fmt.Printf("const Version = %q\n\n", version())
if contains(list, "info") {
size += printCharInfoTables()
}
if contains(list, "recomp") {
// Note that we use 32 bit keys, instead of 64 bit.
// This clips the bits of three entries, but we know
// this won't cause a collision. The compiler will catch
// any changes made to UnicodeData.txt that introduces
// a collision.
// Note that the recomposition map for NFC and NFKC
// are identical.
// Recomposition map
nrentries := 0
for _, c := range chars {
f := c.forms[FCanonical]
if !f.isOneWay && len(f.decomp) > 0 {
nrentries++
}
}
sz := nrentries * 8
size += sz
fmt.Printf("// recompMap: %d bytes (entries only)\n", sz)
fmt.Println("var recompMap = map[uint32]rune{")
for i, c := range chars {
f := c.forms[FCanonical]
d := f.decomp
if !f.isOneWay && len(d) > 0 {
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
fmt.Printf("0x%.8X: 0x%.4X,\n", key, i)
}
}
fmt.Printf("}\n\n")
}
fmt.Printf("// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
}
func printChars() {
if *verbose {
for _, c := range chars {
if !c.isValid() || c.state == SMissing {
continue
}
fmt.Println(c)
}
}
}
// verifyComputed does various consistency tests.
func verifyComputed() {
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
}
isMaybe := f.quickCheck[MComposed] == QCMaybe
if f.combinesBackward != isMaybe {
log.Fatalf("%U: NF*C must be maybe if combinesBackward", i)
}
}
nfc := c.forms[FCanonical]
nfkc := c.forms[FCompatibility]
if nfc.combinesBackward != nfkc.combinesBackward {
logger.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
}
}
}
var qcRe = regexp.MustCompile(`([0-9A-F\.]+) *; (NF.*_QC); ([YNM]) #.*`)
// Use values in DerivedNormalizationProps.txt to compare against the
// values we computed.
// DerivedNormalizationProps.txt has form:
// 00C0..00C5 ; NFD_QC; N # ...
// 0374 ; NFD_QC; N # ...
// See http://unicode.org/reports/tr44/ for full explanation
func testDerived() {
if !*test {
return
}
f := openReader("DerivedNormalizationProps.txt")
defer f.Close()
input := bufio.NewReader(f)
for {
line, err := input.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
logger.Fatal(err)
}
qc := qcRe.FindStringSubmatch(line)
if qc == nil {
continue
}
rng := strings.Split(qc[1], "..")
i, err := strconv.ParseUint(rng[0], 16, 64)
if err != nil {
log.Fatal(err)
}
j := i
if len(rng) > 1 {
j, err = strconv.ParseUint(rng[1], 16, 64)
if err != nil {
log.Fatal(err)
}
}
var ftype, mode int
qt := strings.TrimSpace(qc[2])
switch qt {
case "NFC_QC":
ftype, mode = FCanonical, MComposed
case "NFD_QC":
ftype, mode = FCanonical, MDecomposed
case "NFKC_QC":
ftype, mode = FCompatibility, MComposed
case "NFKD_QC":
ftype, mode = FCompatibility, MDecomposed
default:
log.Fatalf(`Unexpected quick check type "%s"`, qt)
}
var qr QCResult
switch qc[3] {
case "Y":
qr = QCYes
case "N":
qr = QCNo
case "M":
qr = QCMaybe
default:
log.Fatalf(`Unexpected quick check value "%s"`, qc[3])
}
var lastFailed bool
// Verify current
for ; i <= j; i++ {
c := &chars[int(i)]
c.forms[ftype].verified[mode] = true
curqr := c.forms[ftype].quickCheck[mode]
if curqr != qr {
if !lastFailed {
logger.Printf("%s: %.4X..%.4X -- %s\n",
qt, int(i), int(j), line[0:50])
}
logger.Printf("%U: FAILED %s (was %v need %v)\n",
int(i), qt, curqr, qr)
lastFailed = true
}
}
}
// Any unspecified value must be QCYes. Verify this.
for i, c := range chars {
for j, fd := range c.forms {
for k, qr := range fd.quickCheck {
if !fd.verified[k] && qr != QCYes {
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
logger.Printf(m, i, j, k, qr, c.name)
}
}
}
}
}

View File

@@ -1,45 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Generate test data for trie code.
package main
import (
"fmt"
)
func main() {
printTestTables()
}
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
const fileHeader = `// Generated by running
// maketesttables
// DO NOT EDIT
package norm
`
func printTestTables() {
fmt.Print(fileHeader)
fmt.Printf("var testRunes = %#v\n\n", testRunes)
t := newNode()
for i, r := range testRunes {
t.insert(r, uint16(i))
}
t.printTables("testdata")
}

View File

@@ -1,14 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"testing"
)
func TestPlaceHolder(t *testing.T) {
// Does nothing, just allows the Makefile to be canonical
// while waiting for the package itself to be written.
}

View File

@@ -1,478 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package norm contains types and functions for normalizing Unicode strings.
package norm
import "unicode/utf8"
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
//
// NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD
//
// For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: http://unicode.org/reports/tr15/ and
// http://unicode.org/notes/tn5/.
type Form int
const (
NFC Form = iota
NFD
NFKC
NFKD
)
// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
rb := reorderBuffer{}
rb.init(f, b)
n := quickSpan(&rb, 0)
if n == len(b) {
return b
}
out := make([]byte, n, len(b))
copy(out, b[0:n])
return doAppend(&rb, out, n)
}
// String returns f(s).
func (f Form) String(s string) string {
rb := reorderBuffer{}
rb.initString(f, s)
n := quickSpan(&rb, 0)
if n == len(s) {
return s
}
out := make([]byte, n, len(s))
copy(out, s[0:n])
return string(doAppend(&rb, out, n))
}
// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
rb := reorderBuffer{}
rb.init(f, b)
bp := quickSpan(&rb, 0)
if bp == len(b) {
return true
}
for bp < len(b) {
decomposeSegment(&rb, bp)
if rb.f.composing {
rb.compose()
}
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if bp+int(info.size) > len(b) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if b[bp] != rb.byte[p] {
return false
}
bp++
}
}
rb.reset()
bp = quickSpan(&rb, bp)
}
return true
}
// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
rb := reorderBuffer{}
rb.initString(f, s)
bp := quickSpan(&rb, 0)
if bp == len(s) {
return true
}
for bp < len(s) {
decomposeSegment(&rb, bp)
if rb.f.composing {
rb.compose()
}
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if bp+int(info.size) > len(s) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if s[bp] != rb.byte[p] {
return false
}
bp++
}
}
rb.reset()
bp = quickSpan(&rb, bp)
}
return true
}
// patchTail fixes a case where a rune may be incorrectly normalized
// if it is followed by illegal continuation bytes. It returns the
// patched buffer and whether there were trailing continuation bytes.
func patchTail(rb *reorderBuffer, buf []byte) ([]byte, bool) {
info, p := lastRuneStart(&rb.f, buf)
if p == -1 || info.size == 0 {
return buf, false
}
end := p + int(info.size)
extra := len(buf) - end
if extra > 0 {
// Potentially allocating memory. However, this only
// happens with ill-formed UTF-8.
x := make([]byte, 0)
x = append(x, buf[len(buf)-extra:]...)
buf = decomposeToLastBoundary(rb, buf[:end])
if rb.f.composing {
rb.compose()
}
buf = rb.flush(buf)
return append(buf, x...), true
}
return buf, false
}
func appendQuick(rb *reorderBuffer, dst []byte, i int) ([]byte, int) {
if rb.nsrc == i {
return dst, i
}
end := quickSpan(rb, i)
return rb.src.appendSlice(dst, i, end), end
}
// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
if len(src) == 0 {
return out
}
rb := reorderBuffer{}
rb.init(f, src)
return doAppend(&rb, out, 0)
}
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
src, n := rb.src, rb.nsrc
doMerge := len(out) > 0
if q := src.skipNonStarter(p); q > p {
// Move leading non-starters to destination.
out = src.appendSlice(out, p, q)
buf, endsInError := patchTail(rb, out)
if endsInError {
out = buf
doMerge = false // no need to merge, ends with illegal UTF-8
} else {
out = decomposeToLastBoundary(rb, buf) // force decomposition
}
p = q
}
fd := &rb.f
if doMerge {
var info runeInfo
if p < n {
info = fd.info(src, p)
if p == 0 && !info.boundaryBefore() {
out = decomposeToLastBoundary(rb, out)
}
}
if info.size == 0 || info.boundaryBefore() {
if fd.composing {
rb.compose()
}
out = rb.flush(out)
if info.size == 0 {
// Append incomplete UTF-8 encoding.
return src.appendSlice(out, p, n)
}
}
}
if rb.nrune == 0 {
out, p = appendQuick(rb, out, p)
}
for p < n {
p = decomposeSegment(rb, p)
if fd.composing {
rb.compose()
}
out = rb.flush(out)
out, p = appendQuick(rb, out, p)
}
return out
}
// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
if len(src) == 0 {
return out
}
rb := reorderBuffer{}
rb.initString(f, src)
return doAppend(&rb, out, 0)
}
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int {
rb := reorderBuffer{}
rb.init(f, b)
n := quickSpan(&rb, 0)
return n
}
func quickSpan(rb *reorderBuffer, i int) int {
var lastCC uint8
var nc int
lastSegStart := i
src, n := rb.src, rb.nsrc
for i < n {
if j := src.skipASCII(i, n); i != j {
i = j
lastSegStart = i - 1
lastCC = 0
nc = 0
continue
}
info := rb.f.info(src, i)
if info.size == 0 {
// include incomplete runes
return n
}
cc := info.ccc
if rb.f.composing {
if !info.isYesC() {
break
}
} else {
if !info.isYesD() {
break
}
}
if cc == 0 {
lastSegStart = i
nc = 0
} else {
if nc >= maxCombiningChars {
lastSegStart = i
lastCC = cc
nc = 1
} else {
if lastCC > cc {
return lastSegStart
}
nc++
}
}
lastCC = cc
i += int(info.size)
}
if i == n {
return n
}
if rb.f.composing {
return lastSegStart
}
return i
}
// QuickSpanString returns a boundary n such that b[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
rb := reorderBuffer{}
rb.initString(f, s)
return quickSpan(&rb, 0)
}
// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
rb := reorderBuffer{}
rb.init(f, b)
return firstBoundary(&rb)
}
func firstBoundary(rb *reorderBuffer) int {
src, nsrc := rb.src, rb.nsrc
i := src.skipNonStarter(0)
if i >= nsrc {
return -1
}
fd := &rb.f
info := fd.info(src, i)
for n := 0; info.size != 0 && !info.boundaryBefore(); {
i += int(info.size)
if n++; n >= maxCombiningChars {
return i
}
if i >= nsrc {
if !info.boundaryAfter() {
return -1
}
return nsrc
}
info = fd.info(src, i)
}
if info.size == 0 {
return -1
}
return i
}
// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
rb := reorderBuffer{}
rb.initString(f, s)
return firstBoundary(&rb)
}
// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {
return lastBoundary(formTable[f], b)
}
func lastBoundary(fd *formInfo, b []byte) int {
i := len(b)
info, p := lastRuneStart(fd, b)
if p == -1 {
return -1
}
if info.size == 0 { // ends with incomplete rune
if p == 0 { // starts with incomplete rune
return -1
}
i = p
info, p = lastRuneStart(fd, b[:i])
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
return i
}
}
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
return i
}
if info.boundaryAfter() {
return i
}
i = p
for n := 0; i >= 0 && !info.boundaryBefore(); {
info, p = lastRuneStart(fd, b[:i])
if n++; n >= maxCombiningChars {
return len(b)
}
if p+int(info.size) != i {
if p == -1 { // no boundary found
return -1
}
return i // boundary after an illegal UTF-8 encoding
}
i = p
}
return i
}
// decomposeSegment scans the first segment in src into rb.
// It returns the number of bytes consumed from src.
// TODO(mpvl): consider inserting U+034f (Combining Grapheme Joiner)
// when we detect a sequence of 30+ non-starter chars.
func decomposeSegment(rb *reorderBuffer, sp int) int {
// Force one character to be consumed.
info := rb.f.info(rb.src, sp)
if info.size == 0 {
return 0
}
for rb.insert(rb.src, sp, info) {
sp += int(info.size)
if sp >= rb.nsrc {
break
}
info = rb.f.info(rb.src, sp)
bound := info.boundaryBefore()
if bound || info.size == 0 {
break
}
}
return sp
}
// lastRuneStart returns the runeInfo and position of the last
// rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (runeInfo, int) {
p := len(buf) - 1
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
}
if p < 0 {
return runeInfo{}, -1
}
return fd.info(inputBytes(buf), p), p
}
// decomposeToLastBoundary finds an open segment at the end of the buffer
// and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer, buf []byte) []byte {
fd := &rb.f
info, i := lastRuneStart(fd, buf)
if int(info.size) != len(buf)-i {
// illegal trailing continuation bytes
return buf
}
if info.boundaryAfter() {
return buf
}
var add [maxBackRunes]runeInfo // stores runeInfo in reverse order
add[0] = info
padd := 1
n := 1
p := len(buf) - int(info.size)
for ; p >= 0 && !info.boundaryBefore(); p -= int(info.size) {
info, i = lastRuneStart(fd, buf[:p])
if int(info.size) != p-i {
break
}
// Check that decomposition doesn't result in overflow.
if info.hasDecomposition() {
if isHangul(buf) {
i += int(info.size)
n++
} else {
dcomp := info.decomposition()
for i := 0; i < len(dcomp); {
inf := rb.f.info(inputBytes(dcomp), i)
i += int(inf.size)
n++
}
}
} else {
n++
}
if n > maxBackRunes {
break
}
add[padd] = info
padd++
}
pp := p
for padd--; padd >= 0; padd-- {
info = add[padd]
rb.insert(inputBytes(buf), pp, info)
pp += int(info.size)
}
return buf[:p]
}

View File

@@ -1,724 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bytes"
"strings"
"testing"
)
type PositionTest struct {
input string
pos int
buffer string // expected contents of reorderBuffer, if applicable
}
type positionFunc func(rb *reorderBuffer, s string) int
func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []PositionTest) {
rb := reorderBuffer{}
rb.init(f, nil)
for i, test := range tests {
rb.reset()
rb.src = inputString(test.input)
rb.nsrc = len(test.input)
pos := fn(&rb, test.input)
if pos != test.pos {
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
}
runes := []rune(test.buffer)
if rb.nrune != len(runes) {
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
continue
}
for j, want := range runes {
found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
}
}
}
}
var decomposeSegmentTests = []PositionTest{
// illegal runes
{"\xC0", 0, ""},
{"\u00E0\x80", 2, "\u0061\u0300"},
// starter
{"a", 1, "a"},
{"ab", 1, "a"},
// starter + composing
{"a\u0300", 3, "a\u0300"},
{"a\u0300b", 3, "a\u0300"},
// with decomposition
{"\u00C0", 2, "A\u0300"},
{"\u00C0b", 2, "A\u0300"},
// long
{strings.Repeat("\u0300", 31), 62, strings.Repeat("\u0300", 31)},
// ends with incomplete UTF-8 encoding
{"\xCC", 0, ""},
{"\u0300\xCC", 2, "\u0300"},
}
func decomposeSegmentF(rb *reorderBuffer, s string) int {
rb.src = inputString(s)
rb.nsrc = len(s)
return decomposeSegment(rb, 0)
}
func TestDecomposeSegment(t *testing.T) {
runPosTests(t, "TestDecomposeSegment", NFC, decomposeSegmentF, decomposeSegmentTests)
}
var firstBoundaryTests = []PositionTest{
// no boundary
{"", -1, ""},
{"\u0300", -1, ""},
{"\x80\x80", -1, ""},
// illegal runes
{"\xff", 0, ""},
{"\u0300\xff", 2, ""},
{"\u0300\xc0\x80\x80", 2, ""},
// boundaries
{"a", 0, ""},
{"\u0300a", 2, ""},
// Hangul
{"\u1103\u1161", 0, ""},
{"\u110B\u1173\u11B7", 0, ""},
{"\u1161\u110B\u1173\u11B7", 3, ""},
{"\u1173\u11B7\u1103\u1161", 6, ""},
// too many combining characters.
{strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
{strings.Repeat("\u0300", maxCombiningChars), 60, ""},
{strings.Repeat("\u0300", maxCombiningChars+1), 60, ""},
}
func firstBoundaryF(rb *reorderBuffer, s string) int {
return rb.f.form.FirstBoundary([]byte(s))
}
func firstBoundaryStringF(rb *reorderBuffer, s string) int {
return rb.f.form.FirstBoundaryInString(s)
}
func TestFirstBoundary(t *testing.T) {
runPosTests(t, "TestFirstBoundary", NFC, firstBoundaryF, firstBoundaryTests)
runPosTests(t, "TestFirstBoundaryInString", NFC, firstBoundaryStringF, firstBoundaryTests)
}
var decomposeToLastTests = []PositionTest{
// ends with inert character
{"Hello!", 6, ""},
{"\u0632", 2, ""},
{"a\u0301\u0635", 5, ""},
// ends with non-inert starter
{"a", 0, "a"},
{"a\u0301a", 3, "a"},
{"a\u0301\u03B9", 3, "\u03B9"},
{"a\u0327", 0, "a\u0327"},
// illegal runes
{"\xFF", 1, ""},
{"aa\xFF", 3, ""},
{"\xC0\x80\x80", 3, ""},
{"\xCC\x80\x80", 3, ""},
// ends with incomplete UTF-8 encoding
{"a\xCC", 2, ""},
// ends with combining characters
{"\u0300\u0301", 0, "\u0300\u0301"},
{"a\u0300\u0301", 0, "a\u0300\u0301"},
{"a\u0301\u0308", 0, "a\u0301\u0308"},
{"a\u0308\u0301", 0, "a\u0308\u0301"},
{"aaaa\u0300\u0301", 3, "a\u0300\u0301"},
{"\u0300a\u0300\u0301", 2, "a\u0300\u0301"},
{"\u00C0", 0, "A\u0300"},
{"a\u00C0", 1, "A\u0300"},
// decomposing
{"a\u0300\uFDC0", 3, "\u0645\u062C\u064A"},
{"\uFDC0" + strings.Repeat("\u0300", 26), 0, "\u0645\u062C\u064A" + strings.Repeat("\u0300", 26)},
// Hangul
{"a\u1103", 1, "\u1103"},
{"a\u110B", 1, "\u110B"},
{"a\u110B\u1173", 1, "\u110B\u1173"},
// See comment in composition.go:compBoundaryAfter.
{"a\u110B\u1173\u11B7", 1, "\u110B\u1173\u11B7"},
{"a\uC73C", 1, "\u110B\u1173"},
{"다음", 3, "\u110B\u1173\u11B7"},
{"다", 0, "\u1103\u1161"},
{"\u1103\u1161\u110B\u1173\u11B7", 6, "\u110B\u1173\u11B7"},
{"\u110B\u1173\u11B7\u1103\u1161", 9, "\u1103\u1161"},
{"다음음", 6, "\u110B\u1173\u11B7"},
{"음다다", 6, "\u1103\u1161"},
// buffer overflow
{"a" + strings.Repeat("\u0300", 30), 3, strings.Repeat("\u0300", 29)},
{"\uFDFA" + strings.Repeat("\u0300", 14), 3, strings.Repeat("\u0300", 14)},
// weird UTF-8
{"a\u0300\u11B7", 0, "a\u0300\u11B7"},
}
func decomposeToLast(rb *reorderBuffer, s string) int {
buf := decomposeToLastBoundary(rb, []byte(s))
return len(buf)
}
func TestDecomposeToLastBoundary(t *testing.T) {
runPosTests(t, "TestDecomposeToLastBoundary", NFKC, decomposeToLast, decomposeToLastTests)
}
var lastBoundaryTests = []PositionTest{
// ends with inert character
{"Hello!", 6, ""},
{"\u0632", 2, ""},
// ends with non-inert starter
{"a", 0, ""},
// illegal runes
{"\xff", 1, ""},
{"aa\xff", 3, ""},
{"a\xff\u0300", 1, ""},
{"\xc0\x80\x80", 3, ""},
{"\xc0\x80\x80\u0300", 3, ""},
// ends with incomplete UTF-8 encoding
{"\xCC", -1, ""},
{"\xE0\x80", -1, ""},
{"\xF0\x80\x80", -1, ""},
{"a\xCC", 0, ""},
{"\x80\xCC", 1, ""},
{"\xCC\xCC", 1, ""},
// ends with combining characters
{"a\u0300\u0301", 0, ""},
{"aaaa\u0300\u0301", 3, ""},
{"\u0300a\u0300\u0301", 2, ""},
{"\u00C0", 0, ""},
{"a\u00C0", 1, ""},
// decomposition may recombine
{"\u0226", 0, ""},
// no boundary
{"", -1, ""},
{"\u0300\u0301", -1, ""},
{"\u0300", -1, ""},
{"\x80\x80", -1, ""},
{"\x80\x80\u0301", -1, ""},
// Hangul
{"다음", 3, ""},
{"다", 0, ""},
{"\u1103\u1161\u110B\u1173\u11B7", 6, ""},
{"\u110B\u1173\u11B7\u1103\u1161", 9, ""},
// too many combining characters.
{strings.Repeat("\u0300", maxCombiningChars-1), -1, ""},
{strings.Repeat("\u0300", maxCombiningChars), 60, ""},
{strings.Repeat("\u0300", maxCombiningChars+1), 62, ""},
}
func lastBoundaryF(rb *reorderBuffer, s string) int {
return rb.f.form.LastBoundary([]byte(s))
}
func TestLastBoundary(t *testing.T) {
runPosTests(t, "TestLastBoundary", NFC, lastBoundaryF, lastBoundaryTests)
}
var quickSpanTests = []PositionTest{
{"", 0, ""},
// starters
{"a", 1, ""},
{"abc", 3, ""},
{"\u043Eb", 3, ""},
// incomplete last rune.
{"\xCC", 1, ""},
{"a\xCC", 2, ""},
// incorrectly ordered combining characters
{"\u0300\u0316", 0, ""},
{"\u0300\u0316cd", 0, ""},
// have a maximum number of combining characters.
{strings.Repeat("\u035D", 30) + "\u035B", 62, ""},
{"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""},
{"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
{"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
}
var quickSpanNFDTests = []PositionTest{
// needs decomposing
{"\u00C0", 0, ""},
{"abc\u00C0", 3, ""},
// correctly ordered combining characters
{"\u0300", 2, ""},
{"ab\u0300", 4, ""},
{"ab\u0300cd", 6, ""},
{"\u0300cd", 4, ""},
{"\u0316\u0300", 4, ""},
{"ab\u0316\u0300", 6, ""},
{"ab\u0316\u0300cd", 8, ""},
{"ab\u0316\u0300\u00C0", 6, ""},
{"\u0316\u0300cd", 6, ""},
{"\u043E\u0308b", 5, ""},
// incorrectly ordered combining characters
{"ab\u0300\u0316", 1, ""}, // TODO: we could skip 'b' as well.
{"ab\u0300\u0316cd", 1, ""},
// Hangul
{"같은", 0, ""},
}
var quickSpanNFCTests = []PositionTest{
// okay composed
{"\u00C0", 2, ""},
{"abc\u00C0", 5, ""},
// correctly ordered combining characters
{"ab\u0300", 1, ""},
{"ab\u0300cd", 1, ""},
{"ab\u0316\u0300", 1, ""},
{"ab\u0316\u0300cd", 1, ""},
{"\u00C0\u035D", 4, ""},
// we do not special case leading combining characters
{"\u0300cd", 0, ""},
{"\u0300", 0, ""},
{"\u0316\u0300", 0, ""},
{"\u0316\u0300cd", 0, ""},
// incorrectly ordered combining characters
{"ab\u0300\u0316", 1, ""},
{"ab\u0300\u0316cd", 1, ""},
// Hangul
{"같은", 6, ""},
}
func doQuickSpan(rb *reorderBuffer, s string) int {
return rb.f.form.QuickSpan([]byte(s))
}
func doQuickSpanString(rb *reorderBuffer, s string) int {
return rb.f.form.QuickSpanString(s)
}
func TestQuickSpan(t *testing.T) {
runPosTests(t, "TestQuickSpanNFD1", NFD, doQuickSpan, quickSpanTests)
runPosTests(t, "TestQuickSpanNFD2", NFD, doQuickSpan, quickSpanNFDTests)
runPosTests(t, "TestQuickSpanNFC1", NFC, doQuickSpan, quickSpanTests)
runPosTests(t, "TestQuickSpanNFC2", NFC, doQuickSpan, quickSpanNFCTests)
runPosTests(t, "TestQuickSpanStringNFD1", NFD, doQuickSpanString, quickSpanTests)
runPosTests(t, "TestQuickSpanStringNFD2", NFD, doQuickSpanString, quickSpanNFDTests)
runPosTests(t, "TestQuickSpanStringNFC1", NFC, doQuickSpanString, quickSpanTests)
runPosTests(t, "TestQuickSpanStringNFC2", NFC, doQuickSpanString, quickSpanNFCTests)
}
var isNormalTests = []PositionTest{
{"", 1, ""},
// illegal runes
{"\xff", 1, ""},
// starters
{"a", 1, ""},
{"abc", 1, ""},
{"\u043Eb", 1, ""},
// incorrectly ordered combining characters
{"\u0300\u0316", 0, ""},
{"ab\u0300\u0316", 0, ""},
{"ab\u0300\u0316cd", 0, ""},
{"\u0300\u0316cd", 0, ""},
}
var isNormalNFDTests = []PositionTest{
// needs decomposing
{"\u00C0", 0, ""},
{"abc\u00C0", 0, ""},
// correctly ordered combining characters
{"\u0300", 1, ""},
{"ab\u0300", 1, ""},
{"ab\u0300cd", 1, ""},
{"\u0300cd", 1, ""},
{"\u0316\u0300", 1, ""},
{"ab\u0316\u0300", 1, ""},
{"ab\u0316\u0300cd", 1, ""},
{"\u0316\u0300cd", 1, ""},
{"\u043E\u0308b", 1, ""},
// Hangul
{"같은", 0, ""},
}
var isNormalNFCTests = []PositionTest{
// okay composed
{"\u00C0", 1, ""},
{"abc\u00C0", 1, ""},
// need reordering
{"a\u0300", 0, ""},
{"a\u0300cd", 0, ""},
{"a\u0316\u0300", 0, ""},
{"a\u0316\u0300cd", 0, ""},
// correctly ordered combining characters
{"ab\u0300", 1, ""},
{"ab\u0300cd", 1, ""},
{"ab\u0316\u0300", 1, ""},
{"ab\u0316\u0300cd", 1, ""},
{"\u00C0\u035D", 1, ""},
{"\u0300", 1, ""},
{"\u0316\u0300cd", 1, ""},
// Hangul
{"같은", 1, ""},
}
func isNormalF(rb *reorderBuffer, s string) int {
if rb.f.form.IsNormal([]byte(s)) {
return 1
}
return 0
}
func TestIsNormal(t *testing.T) {
runPosTests(t, "TestIsNormalNFD1", NFD, isNormalF, isNormalTests)
runPosTests(t, "TestIsNormalNFD2", NFD, isNormalF, isNormalNFDTests)
runPosTests(t, "TestIsNormalNFC1", NFC, isNormalF, isNormalTests)
runPosTests(t, "TestIsNormalNFC2", NFC, isNormalF, isNormalNFCTests)
}
type AppendTest struct {
left string
right string
out string
}
type appendFunc func(f Form, out []byte, s string) []byte
func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []AppendTest) {
for i, test := range tests {
out := []byte(test.left)
out = fn(f, out, test.right)
outs := string(out)
if len(outs) != len(test.out) {
t.Errorf("%s:%d: length is %d; want %d", name, i, len(outs), len(test.out))
}
if outs != test.out {
// Find first rune that differs and show context.
ir := []rune(outs)
ig := []rune(test.out)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
}
if j -= 3; j < 0 {
j = 0
}
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, ir[j], ig[j])
}
break
}
}
}
}
var appendTests = []AppendTest{
// empty buffers
{"", "", ""},
{"a", "", "a"},
{"", "a", "a"},
{"", "\u0041\u0307\u0304", "\u01E0"},
// segment split across buffers
{"", "a\u0300b", "\u00E0b"},
{"a", "\u0300b", "\u00E0b"},
{"a", "\u0300\u0316", "\u00E0\u0316"},
{"a", "\u0316\u0300", "\u00E0\u0316"},
{"a", "\u0300a\u0300", "\u00E0\u00E0"},
{"a", "\u0300a\u0300a\u0300", "\u00E0\u00E0\u00E0"},
{"a", "\u0300aaa\u0300aaa\u0300", "\u00E0aa\u00E0aa\u00E0"},
{"a\u0300", "\u0327", "\u00E0\u0327"},
{"a\u0327", "\u0300", "\u00E0\u0327"},
{"a\u0316", "\u0300", "\u00E0\u0316"},
{"\u0041\u0307", "\u0304", "\u01E0"},
// Hangul
{"", "\u110B\u1173", "\uC73C"},
{"", "\u1103\u1161", "\uB2E4"},
{"", "\u110B\u1173\u11B7", "\uC74C"},
{"", "\u320E", "\x28\uAC00\x29"},
{"", "\x28\u1100\u1161\x29", "\x28\uAC00\x29"},
{"\u1103", "\u1161", "\uB2E4"},
{"\u110B", "\u1173\u11B7", "\uC74C"},
{"\u110B\u1173", "\u11B7", "\uC74C"},
{"\uC73C", "\u11B7", "\uC74C"},
// UTF-8 encoding split across buffers
{"a\xCC", "\x80", "\u00E0"},
{"a\xCC", "\x80b", "\u00E0b"},
{"a\xCC", "\x80a\u0300", "\u00E0\u00E0"},
{"a\xCC", "\x80\x80", "\u00E0\x80"},
{"a\xCC", "\x80\xCC", "\u00E0\xCC"},
{"a\u0316\xCC", "\x80a\u0316\u0300", "\u00E0\u0316\u00E0\u0316"},
// ending in incomplete UTF-8 encoding
{"", "\xCC", "\xCC"},
{"a", "\xCC", "a\xCC"},
{"a", "b\xCC", "ab\xCC"},
{"\u0226", "\xCC", "\u0226\xCC"},
// illegal runes
{"", "\x80", "\x80"},
{"", "\x80\x80\x80", "\x80\x80\x80"},
{"", "\xCC\x80\x80\x80", "\xCC\x80\x80\x80"},
{"", "a\x80", "a\x80"},
{"", "a\x80\x80\x80", "a\x80\x80\x80"},
{"", "a\x80\x80\x80\x80\x80\x80", "a\x80\x80\x80\x80\x80\x80"},
{"a", "\x80\x80\x80", "a\x80\x80\x80"},
// overflow
{"", strings.Repeat("\x80", 33), strings.Repeat("\x80", 33)},
{strings.Repeat("\x80", 33), "", strings.Repeat("\x80", 33)},
{strings.Repeat("\x80", 33), strings.Repeat("\x80", 33), strings.Repeat("\x80", 66)},
// overflow of combining characters
{strings.Repeat("\u0300", 33), "", strings.Repeat("\u0300", 33)},
// weird UTF-8
{"\u00E0\xE1", "\x86", "\u00E0\xE1\x86"},
{"a\u0300\u11B7", "\u0300", "\u00E0\u11B7\u0300"},
{"a\u0300\u11B7\u0300", "\u0300", "\u00E0\u11B7\u0300\u0300"},
{"\u0300", "\xF8\x80\x80\x80\x80\u0300", "\u0300\xF8\x80\x80\x80\x80\u0300"},
{"\u0300", "\xFC\x80\x80\x80\x80\x80\u0300", "\u0300\xFC\x80\x80\x80\x80\x80\u0300"},
{"\xF8\x80\x80\x80\x80\u0300", "\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
{"\xFC\x80\x80\x80\x80\x80\u0300", "\u0300", "\xFC\x80\x80\x80\x80\x80\u0300\u0300"},
{"\xF8\x80\x80\x80", "\x80\u0300\u0300", "\xF8\x80\x80\x80\x80\u0300\u0300"},
}
func appendF(f Form, out []byte, s string) []byte {
return f.Append(out, []byte(s)...)
}
func appendStringF(f Form, out []byte, s string) []byte {
return f.AppendString(out, s)
}
func bytesF(f Form, out []byte, s string) []byte {
buf := []byte{}
buf = append(buf, out...)
buf = append(buf, s...)
return f.Bytes(buf)
}
func stringF(f Form, out []byte, s string) []byte {
outs := string(out) + s
return []byte(f.String(outs))
}
func TestAppend(t *testing.T) {
runAppendTests(t, "TestAppend", NFKC, appendF, appendTests)
runAppendTests(t, "TestAppendString", NFKC, appendStringF, appendTests)
runAppendTests(t, "TestBytes", NFKC, bytesF, appendTests)
runAppendTests(t, "TestString", NFKC, stringF, appendTests)
}
func appendBench(f Form, in []byte) func() {
buf := make([]byte, 0, 4*len(in))
return func() {
f.Append(buf, in...)
}
}
func iterBench(f Form, in []byte) func() {
buf := make([]byte, 4*len(in))
iter := Iter{}
return func() {
iter.SetInput(f, in)
for !iter.Done() {
iter.Next(buf)
}
}
}
func appendBenchmarks(bm []func(), f Form, in []byte) []func() {
//bm = append(bm, appendBench(f, in))
bm = append(bm, iterBench(f, in))
return bm
}
func doFormBenchmark(b *testing.B, inf, f Form, s string) {
b.StopTimer()
in := inf.Bytes([]byte(s))
bm := appendBenchmarks(nil, f, in)
b.SetBytes(int64(len(in) * len(bm)))
b.StartTimer()
for i := 0; i < b.N; i++ {
for _, fn := range bm {
fn()
}
}
}
var ascii = strings.Repeat("There is nothing to change here! ", 500)
func BenchmarkNormalizeAsciiNFC(b *testing.B) {
doFormBenchmark(b, NFC, NFC, ascii)
}
func BenchmarkNormalizeAsciiNFD(b *testing.B) {
doFormBenchmark(b, NFC, NFD, ascii)
}
func BenchmarkNormalizeAsciiNFKC(b *testing.B) {
doFormBenchmark(b, NFC, NFKC, ascii)
}
func BenchmarkNormalizeAsciiNFKD(b *testing.B) {
doFormBenchmark(b, NFC, NFKD, ascii)
}
func BenchmarkNormalizeNFC2NFC(b *testing.B) {
doFormBenchmark(b, NFC, NFC, txt_all)
}
func BenchmarkNormalizeNFC2NFD(b *testing.B) {
doFormBenchmark(b, NFC, NFD, txt_all)
}
func BenchmarkNormalizeNFD2NFC(b *testing.B) {
doFormBenchmark(b, NFD, NFC, txt_all)
}
func BenchmarkNormalizeNFD2NFD(b *testing.B) {
doFormBenchmark(b, NFD, NFD, txt_all)
}
// Hangul is often special-cased, so we test it separately.
func BenchmarkNormalizeHangulNFC2NFC(b *testing.B) {
doFormBenchmark(b, NFC, NFC, txt_kr)
}
func BenchmarkNormalizeHangulNFC2NFD(b *testing.B) {
doFormBenchmark(b, NFC, NFD, txt_kr)
}
func BenchmarkNormalizeHangulNFD2NFC(b *testing.B) {
doFormBenchmark(b, NFD, NFC, txt_kr)
}
func BenchmarkNormalizeHangulNFD2NFD(b *testing.B) {
doFormBenchmark(b, NFD, NFD, txt_kr)
}
var forms = []Form{NFC, NFD, NFKC, NFKD}
func doTextBenchmark(b *testing.B, s string) {
b.StopTimer()
in := []byte(s)
bm := []func(){}
for _, f := range forms {
bm = appendBenchmarks(bm, f, in)
}
b.SetBytes(int64(len(s) * len(bm)))
b.StartTimer()
for i := 0; i < b.N; i++ {
for _, f := range bm {
f()
}
}
}
func BenchmarkCanonicalOrdering(b *testing.B) {
doTextBenchmark(b, txt_canon)
}
func BenchmarkExtendedLatin(b *testing.B) {
doTextBenchmark(b, txt_vn)
}
func BenchmarkMiscTwoByteUtf8(b *testing.B) {
doTextBenchmark(b, twoByteUtf8)
}
func BenchmarkMiscThreeByteUtf8(b *testing.B) {
doTextBenchmark(b, threeByteUtf8)
}
func BenchmarkHangul(b *testing.B) {
doTextBenchmark(b, txt_kr)
}
func BenchmarkJapanese(b *testing.B) {
doTextBenchmark(b, txt_jp)
}
func BenchmarkChinese(b *testing.B) {
doTextBenchmark(b, txt_cn)
}
func BenchmarkOverflow(b *testing.B) {
doTextBenchmark(b, overflow)
}
var overflow = string(bytes.Repeat([]byte("\u035D"), 4096)) + "\u035B"
// Tests sampled from the Canonical ordering tests (Part 2) of
// http://unicode.org/Public/UNIDATA/NormalizationTest.txt
const txt_canon = `\u0061\u0315\u0300\u05AE\u0300\u0062 \u0061\u0300\u0315\u0300\u05AE\u0062
\u0061\u0302\u0315\u0300\u05AE\u0062 \u0061\u0307\u0315\u0300\u05AE\u0062
\u0061\u0315\u0300\u05AE\u030A\u0062 \u0061\u059A\u0316\u302A\u031C\u0062
\u0061\u032E\u059A\u0316\u302A\u0062 \u0061\u0338\u093C\u0334\u0062
\u0061\u059A\u0316\u302A\u0339 \u0061\u0341\u0315\u0300\u05AE\u0062
\u0061\u0348\u059A\u0316\u302A\u0062 \u0061\u0361\u0345\u035D\u035C\u0062
\u0061\u0366\u0315\u0300\u05AE\u0062 \u0061\u0315\u0300\u05AE\u0486\u0062
\u0061\u05A4\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0613\u0062
\u0061\u0315\u0300\u05AE\u0615\u0062 \u0061\u0617\u0315\u0300\u05AE\u0062
\u0061\u0619\u0618\u064D\u064E\u0062 \u0061\u0315\u0300\u05AE\u0654\u0062
\u0061\u0315\u0300\u05AE\u06DC\u0062 \u0061\u0733\u0315\u0300\u05AE\u0062
\u0061\u0744\u059A\u0316\u302A\u0062 \u0061\u0315\u0300\u05AE\u0745\u0062
\u0061\u09CD\u05B0\u094D\u3099\u0062 \u0061\u0E38\u0E48\u0E38\u0C56\u0062
\u0061\u0EB8\u0E48\u0E38\u0E49\u0062 \u0061\u0F72\u0F71\u0EC8\u0F71\u0062
\u0061\u1039\u05B0\u094D\u3099\u0062 \u0061\u05B0\u094D\u3099\u1A60\u0062
\u0061\u3099\u093C\u0334\u1BE6\u0062 \u0061\u3099\u093C\u0334\u1C37\u0062
\u0061\u1CD9\u059A\u0316\u302A\u0062 \u0061\u2DED\u0315\u0300\u05AE\u0062
\u0061\u2DEF\u0315\u0300\u05AE\u0062 \u0061\u302D\u302E\u059A\u0316\u0062`
// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả.
Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ
nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc
một giấy phép khác có các điều khoản tương tự như giấy phép này
cho dự án của bạn. Hiểu rằng: Miễn — Bất kỳ các điều kiện nào
trên đây cũng có thể được miễn bỏ nếu bạn được sự cho phép của
người sở hữu bản quyền. Phạm vi công chúng — Khi tác phẩm hoặc
bất kỳ chương nào của tác phẩm đã trong vùng dành cho công
chúng theo quy định của pháp luật thì tình trạng của nó không
bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
const txt_ru = `При обязательном соблюдении следующих условий:
Attribution — Вы должны атрибутировать произведение (указывать
автора и источник) в порядке, предусмотренном автором или
лицензиаром (но только так, чтобы никоим образом не подразумевалось,
что они поддерживают вас или использование вами данного произведения).
Υπό τις ακόλουθες προϋποθέσεις:`
// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
const txt_gr = `Αναφορά Δημιουργού — Θα πρέπει να κάνετε την αναφορά στο έργο με τον
τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
(χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
τη χρήση του έργου από εσάς). Παρόμοια Διανομή — Εάν αλλοιώσετε,
τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
παρόμοια άδεια.`
// Taken from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
const txt_ar = `بموجب الشروط التالية نسب المصنف — يجب عليك أن
تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
المشاركة على قدم المساواة — إذا كنت يعدل ، والتغيير ، أو الاستفادة
من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
لهذا الترخيص.`
// Taken from http://creativecommons.org/licenses/by-sa/1.0/il/
const txt_il = `בכפוף לתנאים הבאים: ייחוס — עליך לייחס את היצירה (לתת קרדיט) באופן
המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה — אם תחליט/י לשנות,
לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
const twoByteUtf8 = txt_ru + txt_gr + txt_ar + txt_il
// Taken from http://creativecommons.org/licenses/by-sa/2.0/kr/
const txt_kr = `다음과 같은 조건을 따라야 합니다: 저작자표시
(Attribution) — 저작자나 이용허락자가 정한 방법으로 저작물의
원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의
이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다).
동일조건변경허락 — 이 저작물을 이용하여 만든 이차적 저작물에는 본
라이선스와 동일한 라이선스를 적용해야 합니다.`
// Taken from http://creativecommons.org/licenses/by-sa/3.0/th/
const txt_th = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา — คุณต้องแสดงที่
มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่
ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่
คุณนำงานไปใช้) อนุญาตแบบเดียวกัน — หากคุณดัดแปลง เปลี่ยนรูป หรื
อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื
อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น`
const threeByteUtf8 = txt_th
// Taken from http://creativecommons.org/licenses/by-sa/2.0/jp/
const txt_jp = `あなたの従うべき条件は以下の通りです。
表示 — あなたは原著作者のクレジットを表示しなければなりません。
継承 — もしあなたがこの作品を改変、変形または加工した場合、
あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ
頒布することができます。`
// http://creativecommons.org/licenses/by-sa/2.5/cn/
const txt_cn = `您可以自由: 复制、发行、展览、表演、放映、
广播或通过信息网络传播本作品 创作演绎作品
对本作品进行商业性使用 惟须遵守下列条件:
署名 — 您必须按照作者或者许可人指定的方式对作品进行署名。
相同方式共享 — 如果您改变、转换本作品或者以本作品为基础进行创作,
您只能采用与本协议相同的许可协议发布基于本作品的演绎作品。`
const txt_cjk = txt_cn + txt_jp + txt_kr
const txt_all = txt_vn + twoByteUtf8 + threeByteUtf8 + txt_cjk

View File

@@ -1,309 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"bytes"
"exp/norm"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"path"
"regexp"
"runtime"
"strconv"
"strings"
"time"
"unicode/utf8"
)
func main() {
flag.Parse()
loadTestData()
CharacterByCharacterTests()
StandardTests()
PerformanceTest()
if errorCount == 0 {
fmt.Println("PASS")
}
}
const file = "NormalizationTest.txt"
var url = flag.String("url",
"http://www.unicode.org/Public/6.0.0/ucd/"+file,
"URL of Unicode database directory")
var localFiles = flag.Bool("local",
false,
"data files have been copied to the current directory; for debugging only")
var logger = log.New(os.Stderr, "", log.Lshortfile)
// This regression test runs the test set in NormalizationTest.txt
// (taken from http://www.unicode.org/Public/6.0.0/ucd/).
//
// NormalizationTest.txt has form:
// @Part0 # Specific cases
// #
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
//
// Each test has 5 columns (c1, c2, c3, c4, c5), where
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
//
// CONFORMANCE:
// 1. The following invariants must be true for all conformant implementations
//
// NFC
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
// c4 == NFC(c4) == NFC(c5)
//
// NFD
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
// c5 == NFD(c4) == NFD(c5)
//
// NFKC
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
//
// NFKD
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
//
// 2. For every code point X assigned in this version of Unicode that is not
// specifically listed in Part 1, the following invariants must be true
// for all conformant implementations:
//
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
//
// Column types.
const (
cRaw = iota
cNFC
cNFD
cNFKC
cNFKD
cMaxColumns
)
// Holds data from NormalizationTest.txt
var part []Part
type Part struct {
name string
number int
tests []Test
}
type Test struct {
name string
partnr int
number int
r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
func (t Test) Name() string {
if t.number < 0 {
return part[t.partnr].name
}
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
}
var partRe = regexp.MustCompile(`@Part(\d) # (.*)\n$`)
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)\n?$`)
var counter int
// Load the data form NormalizationTest.txt
func loadTestData() {
if *localFiles {
pwd, _ := os.Getwd()
*url = "file://" + path.Join(pwd, file)
}
t := &http.Transport{}
t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
c := &http.Client{Transport: t}
resp, err := c.Get(*url)
if err != nil {
logger.Fatal(err)
}
if resp.StatusCode != 200 {
logger.Fatal("bad GET status for "+file, resp.Status)
}
f := resp.Body
defer f.Close()
input := bufio.NewReader(f)
for {
line, err := input.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
logger.Fatal(err)
}
if len(line) == 0 || line[0] == '#' {
continue
}
m := partRe.FindStringSubmatch(line)
if m != nil {
if len(m) < 3 {
logger.Fatal("Failed to parse Part: ", line)
}
i, err := strconv.Atoi(m[1])
if err != nil {
logger.Fatal(err)
}
name := m[2]
part = append(part, Part{name: name[:len(name)-1], number: i})
continue
}
m = testRe.FindStringSubmatch(line)
if m == nil || len(m) < 7 {
logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
}
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
counter++
for j := 1; j < len(m)-1; j++ {
for _, split := range strings.Split(m[j], " ") {
r, err := strconv.ParseUint(split, 16, 64)
if err != nil {
logger.Fatal(err)
}
if test.r == 0 {
// save for CharacterByCharacterTests
test.r = rune(r)
}
var buf [utf8.UTFMax]byte
sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
part := &part[len(part)-1]
part.tests = append(part.tests, test)
}
}
var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
var errorCount int
func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
if gold != result {
errorCount++
if errorCount > 20 {
return
}
st, sr, sg := []rune(test), []rune(result), []rune(gold)
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
t.Name(), name, fstr[f], st, sr, sg, t.name)
}
}
func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) {
if result != want {
errorCount++
if errorCount > 20 {
return
}
logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
}
}
func doTest(t *Test, f norm.Form, gold, test string) {
result := f.Bytes([]byte(test))
cmpResult(t, "Bytes", f, gold, test, string(result))
sresult := f.String(test)
cmpResult(t, "String", f, gold, test, sresult)
buf := make([]byte, norm.MaxSegmentSize)
acc := []byte{}
i := norm.Iter{}
i.SetInputString(f, test)
for !i.Done() {
n := i.Next(buf)
acc = append(acc, buf[:n]...)
}
cmpResult(t, "Iter.Next", f, gold, test, string(acc))
for i := range test {
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
}
cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
}
func doConformanceTests(t *Test, partn int) {
for i := 0; i <= 2; i++ {
doTest(t, norm.NFC, t.cols[1], t.cols[i])
doTest(t, norm.NFD, t.cols[2], t.cols[i])
doTest(t, norm.NFKC, t.cols[3], t.cols[i])
doTest(t, norm.NFKD, t.cols[4], t.cols[i])
}
for i := 3; i <= 4; i++ {
doTest(t, norm.NFC, t.cols[3], t.cols[i])
doTest(t, norm.NFD, t.cols[4], t.cols[i])
doTest(t, norm.NFKC, t.cols[3], t.cols[i])
doTest(t, norm.NFKD, t.cols[4], t.cols[i])
}
}
func CharacterByCharacterTests() {
tests := part[1].tests
var last rune = 0
for i := 0; i <= len(tests); i++ { // last one is special case
var r rune
if i == len(tests) {
r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
r = tests[i].r
}
for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
t := &Test{partnr: 1, number: -1}
char := string(last)
doTest(t, norm.NFC, char, char)
doTest(t, norm.NFD, char, char)
doTest(t, norm.NFKC, char, char)
doTest(t, norm.NFKD, char, char)
}
if i < len(tests) {
doConformanceTests(&tests[i], 1)
}
}
}
func StandardTests() {
for _, j := range []int{0, 2, 3} {
for _, test := range part[j].tests {
doConformanceTests(&test, j)
}
}
}
// PerformanceTest verifies that normalization is O(n). If any of the
// code does not properly check for maxCombiningChars, normalization
// may exhibit O(n**2) behavior.
func PerformanceTest() {
runtime.GOMAXPROCS(2)
success := make(chan bool, 1)
go func() {
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
buf = append(buf, "\u035B"...)
norm.NFC.Append(nil, buf...)
success <- true
}()
timeout := time.After(1 * time.Second)
select {
case <-success:
// test completed before the timeout
case <-timeout:
errorCount++
logger.Printf(`unexpectedly long time to complete PerformanceTest`)
}
}

View File

@@ -1,126 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "io"
type normWriter struct {
rb reorderBuffer
w io.Writer
buf []byte
}
// Write implements the standard write interface. If the last characters are
// not at a normalization boundary, the bytes will be buffered for the next
// write. The remaining bytes will be written on close.
func (w *normWriter) Write(data []byte) (n int, err error) {
// Process data in pieces to keep w.buf size bounded.
const chunk = 4000
for len(data) > 0 {
// Normalize into w.buf.
m := len(data)
if m > chunk {
m = chunk
}
w.rb.src = inputBytes(data[:m])
w.rb.nsrc = m
w.buf = doAppend(&w.rb, w.buf, 0)
data = data[m:]
n += m
// Write out complete prefix, save remainder.
// Note that lastBoundary looks back at most 30 runes.
i := lastBoundary(&w.rb.f, w.buf)
if i == -1 {
i = 0
}
if i > 0 {
if _, err = w.w.Write(w.buf[:i]); err != nil {
break
}
bn := copy(w.buf, w.buf[i:])
w.buf = w.buf[:bn]
}
}
return n, err
}
// Close forces data that remains in the buffer to be written.
func (w *normWriter) Close() error {
if len(w.buf) > 0 {
_, err := w.w.Write(w.buf)
if err != nil {
return err
}
}
return nil
}
// Writer returns a new writer that implements Write(b)
// by writing f(b) to w. The returned writer may use an
// an internal buffer to maintain state across Write calls.
// Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloser {
wr := &normWriter{rb: reorderBuffer{}, w: w}
wr.rb.init(f, nil)
return wr
}
type normReader struct {
rb reorderBuffer
r io.Reader
inbuf []byte
outbuf []byte
bufStart int
lastBoundary int
err error
}
// Read implements the standard read interface.
func (r *normReader) Read(p []byte) (int, error) {
for {
if r.lastBoundary-r.bufStart > 0 {
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
r.bufStart += n
if r.lastBoundary-r.bufStart > 0 {
return n, nil
}
return n, r.err
}
if r.err != nil {
return 0, r.err
}
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
r.outbuf = r.outbuf[0:outn]
r.bufStart = 0
n, err := r.r.Read(r.inbuf)
r.rb.src = inputBytes(r.inbuf[0:n])
r.rb.nsrc, r.err = n, err
if n > 0 {
r.outbuf = doAppend(&r.rb, r.outbuf, 0)
}
if err == io.EOF {
r.lastBoundary = len(r.outbuf)
} else {
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
if r.lastBoundary == -1 {
r.lastBoundary = 0
}
}
}
panic("should not reach here")
}
// Reader returns a new reader that implements Read
// by reading data from r and returning f(data).
func (f Form) Reader(r io.Reader) io.Reader {
const chunk = 4000
buf := make([]byte, chunk)
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
rr.rb.init(f, buf)
return rr
}

View File

@@ -1,68 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bytes"
"fmt"
"strings"
"testing"
)
var ioTests = []AppendTest{
{"", strings.Repeat("a\u0316\u0300", 6), strings.Repeat("\u00E0\u0316", 6)},
{"", strings.Repeat("a\u0300\u0316", 4000), strings.Repeat("\u00E0\u0316", 4000)},
{"", strings.Repeat("\x80\x80", 4000), strings.Repeat("\x80\x80", 4000)},
{"", "\u0041\u0307\u0304", "\u01E0"},
}
var bufSizes = []int{1, 2, 3, 4, 5, 6, 7, 8, 100, 101, 102, 103, 4000, 4001, 4002, 4003}
func readFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
out = append(out, s...)
r := f.Reader(bytes.NewBuffer(out))
buf := make([]byte, size)
result := []byte{}
for n, err := 0, error(nil); err == nil; {
n, err = r.Read(buf)
result = append(result, buf[:n]...)
}
return result
}
}
func TestReader(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestReader%da", s)
runAppendTests(t, name, NFKC, readFunc(s), appendTests)
name = fmt.Sprintf("TestReader%db", s)
runAppendTests(t, name, NFKC, readFunc(s), ioTests)
}
}
func writeFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
in := append(out, s...)
result := new(bytes.Buffer)
w := f.Writer(result)
buf := make([]byte, size)
for n := 0; len(in) > 0; in = in[n:] {
n = copy(buf, in)
_, _ = w.Write(buf[:n])
}
w.Close()
return result.Bytes()
}
}
func TestWriter(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestWriter%da", s)
runAppendTests(t, name, NFKC, writeFunc(s), appendTests)
name = fmt.Sprintf("TestWriter%db", s)
runAppendTests(t, name, NFKC, writeFunc(s), ioTests)
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,237 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type trie struct {
index []uint8
values []uint16
sparse []valueRange
sparseOffset []uint16
cutoff uint8 // indices >= cutoff are sparse
}
// lookupValue determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *trie) lookupValue(n uint8, b byte) uint16 {
if n < t.cutoff {
return t.values[uint16(n)<<6+uint16(b&maskx)]
}
offset := t.sparseOffset[n-t.cutoff]
header := t.sparse[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.sparse[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
maskx = 0x3F // 0011 1111
mask2 = 0x1F // 0001 1111
mask3 = 0x0F // 0000 1111
mask4 = 0x07 // 0000 0111
)
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookup(s []byte) (v uint16, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return t.values[c0], 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookupString(s string) (v uint16, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return t.values[c0], 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)&maskx
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)&maskx
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must hold a full encoding.
func (t *trie) lookupUnsafe(s []byte) uint16 {
c0 := s[0]
if c0 < tx {
return t.values[c0]
}
if c0 < t2 {
return 0
}
i := t.index[c0]
if c0 < t3 {
return t.lookupValue(i, s[1])
}
i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
if c0 < t4 {
return t.lookupValue(i, s[2])
}
i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
if c0 < t5 {
return t.lookupValue(i, s[3])
}
return 0
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must hold a full encoding.
func (t *trie) lookupStringUnsafe(s string) uint16 {
c0 := s[0]
if c0 < tx {
return t.values[c0]
}
if c0 < t2 {
return 0
}
i := t.index[c0]
if c0 < t3 {
return t.lookupValue(i, s[1])
}
i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
if c0 < t4 {
return t.lookupValue(i, s[2])
}
i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
if c0 < t5 {
return t.lookupValue(i, s[3])
}
return 0
}

View File

@@ -1,148 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"testing"
"unicode/utf8"
)
// Test data is located in triedata_test.go; generated by maketesttables.
var testdata = testdataTrie
type rangeTest struct {
block uint8
lookup byte
result uint16
table []valueRange
offsets []uint16
}
var range1Off = []uint16{0, 2}
var range1 = []valueRange{
{0, 1, 0},
{1, 0x80, 0x80},
{0, 2, 0},
{1, 0x80, 0x80},
{9, 0xff, 0xff},
}
var rangeTests = []rangeTest{
{10, 0x80, 1, range1, range1Off},
{10, 0x00, 0, range1, range1Off},
{11, 0x80, 1, range1, range1Off},
{11, 0xff, 9, range1, range1Off},
{11, 0x00, 0, range1, range1Off},
}
func TestLookupSparse(t *testing.T) {
for i, test := range rangeTests {
n := trie{sparse: test.table, sparseOffset: test.offsets, cutoff: 10}
v := n.lookupValue(test.block, test.lookup)
if v != test.result {
t.Errorf("LookupSparse:%d: found %X; want %X", i, v, test.result)
}
}
}
// Test cases for illegal runes.
type trietest struct {
size int
bytes []byte
}
var tests = []trietest{
// illegal runes
{1, []byte{0x80}},
{1, []byte{0xFF}},
{1, []byte{t2, tx - 1}},
{1, []byte{t2, t2}},
{2, []byte{t3, tx, tx - 1}},
{2, []byte{t3, tx, t2}},
{1, []byte{t3, tx - 1, tx}},
{3, []byte{t4, tx, tx, tx - 1}},
{3, []byte{t4, tx, tx, t2}},
{1, []byte{t4, t2, tx, tx - 1}},
{2, []byte{t4, tx, t2, tx - 1}},
// short runes
{0, []byte{t2}},
{0, []byte{t3, tx}},
{0, []byte{t4, tx, tx}},
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
{1, []byte{t5, tx, tx, tx, tx}},
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func mkUTF8(r rune) ([]byte, int) {
var b [utf8.UTFMax]byte
sz := utf8.EncodeRune(b[:], r)
return b[:sz], sz
}
func TestLookup(t *testing.T) {
for i, tt := range testRunes {
b, szg := mkUTF8(tt)
v, szt := testdata.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
}
if szt != szg {
t.Errorf("lookup(%U): found size %d, expected %d", tt, szt, szg)
}
}
for i, tt := range tests {
v, sz := testdata.lookup(tt.bytes)
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
func TestLookupUnsafe(t *testing.T) {
for i, tt := range testRunes {
b, _ := mkUTF8(tt)
v := testdata.lookupUnsafe(b)
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
}
}
}
func TestLookupString(t *testing.T) {
for i, tt := range testRunes {
b, szg := mkUTF8(tt)
v, szt := testdata.lookupString(string(b))
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
}
if szt != szg {
t.Errorf("lookup(%U): found size %d, expected %d", i, szt, szg)
}
}
for i, tt := range tests {
v, sz := testdata.lookupString(string(tt.bytes))
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
func TestLookupStringUnsafe(t *testing.T) {
for i, tt := range testRunes {
b, _ := mkUTF8(tt)
v := testdata.lookupStringUnsafe(string(b))
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
}
}
}

Some files were not shown because too many files have changed in this diff Show More